1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * libata-eh.c - libata error handling
4 *
5 * Maintained by: Tejun Heo <tj@kernel.org>
6 * Please ALWAYS copy linux-ide@vger.kernel.org
7 * on emails.
8 *
9 * Copyright 2006 Tejun Heo <htejun@gmail.com>
10 *
11 * libata documentation is available via 'make {ps|pdf}docs',
12 * as Documentation/driver-api/libata.rst
13 *
14 * Hardware documentation available from http://www.t13.org/ and
15 * http://www.sata-io.org/
16 */
17
18 #include <linux/kernel.h>
19 #include <linux/blkdev.h>
20 #include <linux/export.h>
21 #include <linux/pci.h>
22 #include <scsi/scsi.h>
23 #include <scsi/scsi_host.h>
24 #include <scsi/scsi_eh.h>
25 #include <scsi/scsi_device.h>
26 #include <scsi/scsi_cmnd.h>
27 #include <scsi/scsi_dbg.h>
28 #include "../scsi/scsi_transport_api.h"
29
30 #include <linux/libata.h>
31
32 #include <trace/events/libata.h>
33 #include "libata.h"
34
35 enum {
36 /* speed down verdicts */
37 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
38 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
39 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
40 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
41
42 /* error flags */
43 ATA_EFLAG_IS_IO = (1 << 0),
44 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
45 ATA_EFLAG_OLD_ER = (1 << 31),
46
47 /* error categories */
48 ATA_ECAT_NONE = 0,
49 ATA_ECAT_ATA_BUS = 1,
50 ATA_ECAT_TOUT_HSM = 2,
51 ATA_ECAT_UNK_DEV = 3,
52 ATA_ECAT_DUBIOUS_NONE = 4,
53 ATA_ECAT_DUBIOUS_ATA_BUS = 5,
54 ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
55 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
56 ATA_ECAT_NR = 8,
57
58 ATA_EH_CMD_DFL_TIMEOUT = 5000,
59
60 /* always put at least this amount of time between resets */
61 ATA_EH_RESET_COOL_DOWN = 5000,
62
63 /* Waiting in ->prereset can never be reliable. It's
64 * sometimes nice to wait there but it can't be depended upon;
65 * otherwise, we wouldn't be resetting. Just give it enough
66 * time for most drives to spin up.
67 */
68 ATA_EH_PRERESET_TIMEOUT = 10000,
69 ATA_EH_FASTDRAIN_INTERVAL = 3000,
70
71 ATA_EH_UA_TRIES = 5,
72
73 /* probe speed down parameters, see ata_eh_schedule_probe() */
74 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */
75 ATA_EH_PROBE_TRIALS = 2,
76 };
77
78 /* The following table determines how we sequence resets. Each entry
79 * represents timeout for that try. The first try can be soft or
80 * hardreset. All others are hardreset if available. In most cases
81 * the first reset w/ 10sec timeout should succeed. Following entries
82 * are mostly for error handling, hotplug and those outlier devices that
83 * take an exceptionally long time to recover from reset.
84 */
85 static const unsigned long ata_eh_reset_timeouts[] = {
86 10000, /* most drives spin up by 10sec */
87 10000, /* > 99% working drives spin up before 20sec */
88 35000, /* give > 30 secs of idleness for outlier devices */
89 5000, /* and sweet one last chance */
90 ULONG_MAX, /* > 1 min has elapsed, give up */
91 };
92
93 static const unsigned long ata_eh_identify_timeouts[] = {
94 5000, /* covers > 99% of successes and not too boring on failures */
95 10000, /* combined time till here is enough even for media access */
96 30000, /* for true idiots */
97 ULONG_MAX,
98 };
99
100 static const unsigned long ata_eh_flush_timeouts[] = {
101 15000, /* be generous with flush */
102 15000, /* ditto */
103 30000, /* and even more generous */
104 ULONG_MAX,
105 };
106
107 static const unsigned long ata_eh_other_timeouts[] = {
108 5000, /* same rationale as identify timeout */
109 10000, /* ditto */
110 /* but no merciful 30sec for other commands, it just isn't worth it */
111 ULONG_MAX,
112 };
113
114 struct ata_eh_cmd_timeout_ent {
115 const u8 *commands;
116 const unsigned long *timeouts;
117 };
118
119 /* The following table determines timeouts to use for EH internal
120 * commands. Each table entry is a command class and matches the
121 * commands the entry applies to and the timeout table to use.
122 *
123 * On the retry after a command timed out, the next timeout value from
124 * the table is used. If the table doesn't contain further entries,
125 * the last value is used.
126 *
127 * ehc->cmd_timeout_idx keeps track of which timeout to use per
128 * command class, so if SET_FEATURES times out on the first try, the
129 * next try will use the second timeout value only for that class.
130 */
131 #define CMDS(cmds...) (const u8 []){ cmds, 0 }
132 static const struct ata_eh_cmd_timeout_ent
133 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
134 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
135 .timeouts = ata_eh_identify_timeouts, },
136 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
137 .timeouts = ata_eh_other_timeouts, },
138 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
139 .timeouts = ata_eh_other_timeouts, },
140 { .commands = CMDS(ATA_CMD_SET_FEATURES),
141 .timeouts = ata_eh_other_timeouts, },
142 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
143 .timeouts = ata_eh_other_timeouts, },
144 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
145 .timeouts = ata_eh_flush_timeouts },
146 };
147 #undef CMDS
148
149 static void __ata_port_freeze(struct ata_port *ap);
150 #ifdef CONFIG_PM
151 static void ata_eh_handle_port_suspend(struct ata_port *ap);
152 static void ata_eh_handle_port_resume(struct ata_port *ap);
153 #else /* CONFIG_PM */
ata_eh_handle_port_suspend(struct ata_port * ap)154 static void ata_eh_handle_port_suspend(struct ata_port *ap)
155 { }
156
ata_eh_handle_port_resume(struct ata_port * ap)157 static void ata_eh_handle_port_resume(struct ata_port *ap)
158 { }
159 #endif /* CONFIG_PM */
160
__ata_ehi_pushv_desc(struct ata_eh_info * ehi,const char * fmt,va_list args)161 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi,
162 const char *fmt, va_list args)
163 {
164 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
165 ATA_EH_DESC_LEN - ehi->desc_len,
166 fmt, args);
167 }
168
169 /**
170 * __ata_ehi_push_desc - push error description without adding separator
171 * @ehi: target EHI
172 * @fmt: printf format string
173 *
174 * Format string according to @fmt and append it to @ehi->desc.
175 *
176 * LOCKING:
177 * spin_lock_irqsave(host lock)
178 */
__ata_ehi_push_desc(struct ata_eh_info * ehi,const char * fmt,...)179 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
180 {
181 va_list args;
182
183 va_start(args, fmt);
184 __ata_ehi_pushv_desc(ehi, fmt, args);
185 va_end(args);
186 }
187
188 /**
189 * ata_ehi_push_desc - push error description with separator
190 * @ehi: target EHI
191 * @fmt: printf format string
192 *
193 * Format string according to @fmt and append it to @ehi->desc.
194 * If @ehi->desc is not empty, ", " is added in-between.
195 *
196 * LOCKING:
197 * spin_lock_irqsave(host lock)
198 */
ata_ehi_push_desc(struct ata_eh_info * ehi,const char * fmt,...)199 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
200 {
201 va_list args;
202
203 if (ehi->desc_len)
204 __ata_ehi_push_desc(ehi, ", ");
205
206 va_start(args, fmt);
207 __ata_ehi_pushv_desc(ehi, fmt, args);
208 va_end(args);
209 }
210
211 /**
212 * ata_ehi_clear_desc - clean error description
213 * @ehi: target EHI
214 *
215 * Clear @ehi->desc.
216 *
217 * LOCKING:
218 * spin_lock_irqsave(host lock)
219 */
ata_ehi_clear_desc(struct ata_eh_info * ehi)220 void ata_ehi_clear_desc(struct ata_eh_info *ehi)
221 {
222 ehi->desc[0] = '\0';
223 ehi->desc_len = 0;
224 }
225
226 /**
227 * ata_port_desc - append port description
228 * @ap: target ATA port
229 * @fmt: printf format string
230 *
231 * Format string according to @fmt and append it to port
232 * description. If port description is not empty, " " is added
233 * in-between. This function is to be used while initializing
234 * ata_host. The description is printed on host registration.
235 *
236 * LOCKING:
237 * None.
238 */
ata_port_desc(struct ata_port * ap,const char * fmt,...)239 void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
240 {
241 va_list args;
242
243 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING));
244
245 if (ap->link.eh_info.desc_len)
246 __ata_ehi_push_desc(&ap->link.eh_info, " ");
247
248 va_start(args, fmt);
249 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
250 va_end(args);
251 }
252
253 #ifdef CONFIG_PCI
254
255 /**
256 * ata_port_pbar_desc - append PCI BAR description
257 * @ap: target ATA port
258 * @bar: target PCI BAR
259 * @offset: offset into PCI BAR
260 * @name: name of the area
261 *
262 * If @offset is negative, this function formats a string which
263 * contains the name, address, size and type of the BAR and
264 * appends it to the port description. If @offset is zero or
265 * positive, only name and offsetted address is appended.
266 *
267 * LOCKING:
268 * None.
269 */
ata_port_pbar_desc(struct ata_port * ap,int bar,ssize_t offset,const char * name)270 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
271 const char *name)
272 {
273 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
274 char *type = "";
275 unsigned long long start, len;
276
277 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
278 type = "m";
279 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
280 type = "i";
281
282 start = (unsigned long long)pci_resource_start(pdev, bar);
283 len = (unsigned long long)pci_resource_len(pdev, bar);
284
285 if (offset < 0)
286 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start);
287 else
288 ata_port_desc(ap, "%s 0x%llx", name,
289 start + (unsigned long long)offset);
290 }
291
292 #endif /* CONFIG_PCI */
293
ata_lookup_timeout_table(u8 cmd)294 static int ata_lookup_timeout_table(u8 cmd)
295 {
296 int i;
297
298 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
299 const u8 *cur;
300
301 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
302 if (*cur == cmd)
303 return i;
304 }
305
306 return -1;
307 }
308
309 /**
310 * ata_internal_cmd_timeout - determine timeout for an internal command
311 * @dev: target device
312 * @cmd: internal command to be issued
313 *
314 * Determine timeout for internal command @cmd for @dev.
315 *
316 * LOCKING:
317 * EH context.
318 *
319 * RETURNS:
320 * Determined timeout.
321 */
ata_internal_cmd_timeout(struct ata_device * dev,u8 cmd)322 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
323 {
324 struct ata_eh_context *ehc = &dev->link->eh_context;
325 int ent = ata_lookup_timeout_table(cmd);
326 int idx;
327
328 if (ent < 0)
329 return ATA_EH_CMD_DFL_TIMEOUT;
330
331 idx = ehc->cmd_timeout_idx[dev->devno][ent];
332 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
333 }
334
335 /**
336 * ata_internal_cmd_timed_out - notification for internal command timeout
337 * @dev: target device
338 * @cmd: internal command which timed out
339 *
340 * Notify EH that internal command @cmd for @dev timed out. This
341 * function should be called only for commands whose timeouts are
342 * determined using ata_internal_cmd_timeout().
343 *
344 * LOCKING:
345 * EH context.
346 */
ata_internal_cmd_timed_out(struct ata_device * dev,u8 cmd)347 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
348 {
349 struct ata_eh_context *ehc = &dev->link->eh_context;
350 int ent = ata_lookup_timeout_table(cmd);
351 int idx;
352
353 if (ent < 0)
354 return;
355
356 idx = ehc->cmd_timeout_idx[dev->devno][ent];
357 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
358 ehc->cmd_timeout_idx[dev->devno][ent]++;
359 }
360
ata_ering_record(struct ata_ering * ering,unsigned int eflags,unsigned int err_mask)361 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
362 unsigned int err_mask)
363 {
364 struct ata_ering_entry *ent;
365
366 WARN_ON(!err_mask);
367
368 ering->cursor++;
369 ering->cursor %= ATA_ERING_SIZE;
370
371 ent = &ering->ring[ering->cursor];
372 ent->eflags = eflags;
373 ent->err_mask = err_mask;
374 ent->timestamp = get_jiffies_64();
375 }
376
ata_ering_top(struct ata_ering * ering)377 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
378 {
379 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
380
381 if (ent->err_mask)
382 return ent;
383 return NULL;
384 }
385
ata_ering_map(struct ata_ering * ering,int (* map_fn)(struct ata_ering_entry *,void *),void * arg)386 int ata_ering_map(struct ata_ering *ering,
387 int (*map_fn)(struct ata_ering_entry *, void *),
388 void *arg)
389 {
390 int idx, rc = 0;
391 struct ata_ering_entry *ent;
392
393 idx = ering->cursor;
394 do {
395 ent = &ering->ring[idx];
396 if (!ent->err_mask)
397 break;
398 rc = map_fn(ent, arg);
399 if (rc)
400 break;
401 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
402 } while (idx != ering->cursor);
403
404 return rc;
405 }
406
ata_ering_clear_cb(struct ata_ering_entry * ent,void * void_arg)407 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg)
408 {
409 ent->eflags |= ATA_EFLAG_OLD_ER;
410 return 0;
411 }
412
ata_ering_clear(struct ata_ering * ering)413 static void ata_ering_clear(struct ata_ering *ering)
414 {
415 ata_ering_map(ering, ata_ering_clear_cb, NULL);
416 }
417
ata_eh_dev_action(struct ata_device * dev)418 static unsigned int ata_eh_dev_action(struct ata_device *dev)
419 {
420 struct ata_eh_context *ehc = &dev->link->eh_context;
421
422 return ehc->i.action | ehc->i.dev_action[dev->devno];
423 }
424
ata_eh_clear_action(struct ata_link * link,struct ata_device * dev,struct ata_eh_info * ehi,unsigned int action)425 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
426 struct ata_eh_info *ehi, unsigned int action)
427 {
428 struct ata_device *tdev;
429
430 if (!dev) {
431 ehi->action &= ~action;
432 ata_for_each_dev(tdev, link, ALL)
433 ehi->dev_action[tdev->devno] &= ~action;
434 } else {
435 /* doesn't make sense for port-wide EH actions */
436 WARN_ON(!(action & ATA_EH_PERDEV_MASK));
437
438 /* break ehi->action into ehi->dev_action */
439 if (ehi->action & action) {
440 ata_for_each_dev(tdev, link, ALL)
441 ehi->dev_action[tdev->devno] |=
442 ehi->action & action;
443 ehi->action &= ~action;
444 }
445
446 /* turn off the specified per-dev action */
447 ehi->dev_action[dev->devno] &= ~action;
448 }
449 }
450
451 /**
452 * ata_eh_acquire - acquire EH ownership
453 * @ap: ATA port to acquire EH ownership for
454 *
455 * Acquire EH ownership for @ap. This is the basic exclusion
456 * mechanism for ports sharing a host. Only one port hanging off
457 * the same host can claim the ownership of EH.
458 *
459 * LOCKING:
460 * EH context.
461 */
ata_eh_acquire(struct ata_port * ap)462 void ata_eh_acquire(struct ata_port *ap)
463 {
464 mutex_lock(&ap->host->eh_mutex);
465 WARN_ON_ONCE(ap->host->eh_owner);
466 ap->host->eh_owner = current;
467 }
468
469 /**
470 * ata_eh_release - release EH ownership
471 * @ap: ATA port to release EH ownership for
472 *
473 * Release EH ownership for @ap if the caller. The caller must
474 * have acquired EH ownership using ata_eh_acquire() previously.
475 *
476 * LOCKING:
477 * EH context.
478 */
ata_eh_release(struct ata_port * ap)479 void ata_eh_release(struct ata_port *ap)
480 {
481 WARN_ON_ONCE(ap->host->eh_owner != current);
482 ap->host->eh_owner = NULL;
483 mutex_unlock(&ap->host->eh_mutex);
484 }
485
ata_eh_unload(struct ata_port * ap)486 static void ata_eh_unload(struct ata_port *ap)
487 {
488 struct ata_link *link;
489 struct ata_device *dev;
490 unsigned long flags;
491
492 /* Restore SControl IPM and SPD for the next driver and
493 * disable attached devices.
494 */
495 ata_for_each_link(link, ap, PMP_FIRST) {
496 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
497 ata_for_each_dev(dev, link, ALL)
498 ata_dev_disable(dev);
499 }
500
501 /* freeze and set UNLOADED */
502 spin_lock_irqsave(ap->lock, flags);
503
504 ata_port_freeze(ap); /* won't be thawed */
505 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */
506 ap->pflags |= ATA_PFLAG_UNLOADED;
507
508 spin_unlock_irqrestore(ap->lock, flags);
509 }
510
511 /**
512 * ata_scsi_error - SCSI layer error handler callback
513 * @host: SCSI host on which error occurred
514 *
515 * Handles SCSI-layer-thrown error events.
516 *
517 * LOCKING:
518 * Inherited from SCSI layer (none, can sleep)
519 *
520 * RETURNS:
521 * Zero.
522 */
ata_scsi_error(struct Scsi_Host * host)523 void ata_scsi_error(struct Scsi_Host *host)
524 {
525 struct ata_port *ap = ata_shost_to_port(host);
526 unsigned long flags;
527 LIST_HEAD(eh_work_q);
528
529 DPRINTK("ENTER\n");
530
531 spin_lock_irqsave(host->host_lock, flags);
532 list_splice_init(&host->eh_cmd_q, &eh_work_q);
533 spin_unlock_irqrestore(host->host_lock, flags);
534
535 ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
536
537 /* If we timed raced normal completion and there is nothing to
538 recover nr_timedout == 0 why exactly are we doing error recovery ? */
539 ata_scsi_port_error_handler(host, ap);
540
541 /* finish or retry handled scmd's and clean up */
542 WARN_ON(!list_empty(&eh_work_q));
543
544 DPRINTK("EXIT\n");
545 }
546
547 /**
548 * ata_scsi_cmd_error_handler - error callback for a list of commands
549 * @host: scsi host containing the port
550 * @ap: ATA port within the host
551 * @eh_work_q: list of commands to process
552 *
553 * process the given list of commands and return those finished to the
554 * ap->eh_done_q. This function is the first part of the libata error
555 * handler which processes a given list of failed commands.
556 */
ata_scsi_cmd_error_handler(struct Scsi_Host * host,struct ata_port * ap,struct list_head * eh_work_q)557 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
558 struct list_head *eh_work_q)
559 {
560 int i;
561 unsigned long flags;
562
563 /* make sure sff pio task is not running */
564 ata_sff_flush_pio_task(ap);
565
566 /* synchronize with host lock and sort out timeouts */
567
568 /* For new EH, all qcs are finished in one of three ways -
569 * normal completion, error completion, and SCSI timeout.
570 * Both completions can race against SCSI timeout. When normal
571 * completion wins, the qc never reaches EH. When error
572 * completion wins, the qc has ATA_QCFLAG_FAILED set.
573 *
574 * When SCSI timeout wins, things are a bit more complex.
575 * Normal or error completion can occur after the timeout but
576 * before this point. In such cases, both types of
577 * completions are honored. A scmd is determined to have
578 * timed out iff its associated qc is active and not failed.
579 */
580 spin_lock_irqsave(ap->lock, flags);
581 if (ap->ops->error_handler) {
582 struct scsi_cmnd *scmd, *tmp;
583 int nr_timedout = 0;
584
585 /* This must occur under the ap->lock as we don't want
586 a polled recovery to race the real interrupt handler
587
588 The lost_interrupt handler checks for any completed but
589 non-notified command and completes much like an IRQ handler.
590
591 We then fall into the error recovery code which will treat
592 this as if normal completion won the race */
593
594 if (ap->ops->lost_interrupt)
595 ap->ops->lost_interrupt(ap);
596
597 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
598 struct ata_queued_cmd *qc;
599
600 ata_qc_for_each_raw(ap, qc, i) {
601 if (qc->flags & ATA_QCFLAG_ACTIVE &&
602 qc->scsicmd == scmd)
603 break;
604 }
605
606 if (i < ATA_MAX_QUEUE) {
607 /* the scmd has an associated qc */
608 if (!(qc->flags & ATA_QCFLAG_FAILED)) {
609 /* which hasn't failed yet, timeout */
610 qc->err_mask |= AC_ERR_TIMEOUT;
611 qc->flags |= ATA_QCFLAG_FAILED;
612 nr_timedout++;
613 }
614 } else {
615 /* Normal completion occurred after
616 * SCSI timeout but before this point.
617 * Successfully complete it.
618 */
619 scmd->retries = scmd->allowed;
620 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
621 }
622 }
623
624 /* If we have timed out qcs. They belong to EH from
625 * this point but the state of the controller is
626 * unknown. Freeze the port to make sure the IRQ
627 * handler doesn't diddle with those qcs. This must
628 * be done atomically w.r.t. setting QCFLAG_FAILED.
629 */
630 if (nr_timedout)
631 __ata_port_freeze(ap);
632
633
634 /* initialize eh_tries */
635 ap->eh_tries = ATA_EH_MAX_TRIES;
636 }
637 spin_unlock_irqrestore(ap->lock, flags);
638
639 }
640 EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
641
642 /**
643 * ata_scsi_port_error_handler - recover the port after the commands
644 * @host: SCSI host containing the port
645 * @ap: the ATA port
646 *
647 * Handle the recovery of the port @ap after all the commands
648 * have been recovered.
649 */
ata_scsi_port_error_handler(struct Scsi_Host * host,struct ata_port * ap)650 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
651 {
652 unsigned long flags;
653
654 /* invoke error handler */
655 if (ap->ops->error_handler) {
656 struct ata_link *link;
657
658 /* acquire EH ownership */
659 ata_eh_acquire(ap);
660 repeat:
661 /* kill fast drain timer */
662 del_timer_sync(&ap->fastdrain_timer);
663
664 /* process port resume request */
665 ata_eh_handle_port_resume(ap);
666
667 /* fetch & clear EH info */
668 spin_lock_irqsave(ap->lock, flags);
669
670 ata_for_each_link(link, ap, HOST_FIRST) {
671 struct ata_eh_context *ehc = &link->eh_context;
672 struct ata_device *dev;
673
674 memset(&link->eh_context, 0, sizeof(link->eh_context));
675 link->eh_context.i = link->eh_info;
676 memset(&link->eh_info, 0, sizeof(link->eh_info));
677
678 ata_for_each_dev(dev, link, ENABLED) {
679 int devno = dev->devno;
680
681 ehc->saved_xfer_mode[devno] = dev->xfer_mode;
682 if (ata_ncq_enabled(dev))
683 ehc->saved_ncq_enabled |= 1 << devno;
684 }
685 }
686
687 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
688 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
689 ap->excl_link = NULL; /* don't maintain exclusion over EH */
690
691 spin_unlock_irqrestore(ap->lock, flags);
692
693 /* invoke EH, skip if unloading or suspended */
694 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
695 ap->ops->error_handler(ap);
696 else {
697 /* if unloading, commence suicide */
698 if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
699 !(ap->pflags & ATA_PFLAG_UNLOADED))
700 ata_eh_unload(ap);
701 ata_eh_finish(ap);
702 }
703
704 /* process port suspend request */
705 ata_eh_handle_port_suspend(ap);
706
707 /* Exception might have happened after ->error_handler
708 * recovered the port but before this point. Repeat
709 * EH in such case.
710 */
711 spin_lock_irqsave(ap->lock, flags);
712
713 if (ap->pflags & ATA_PFLAG_EH_PENDING) {
714 if (--ap->eh_tries) {
715 spin_unlock_irqrestore(ap->lock, flags);
716 goto repeat;
717 }
718 ata_port_err(ap,
719 "EH pending after %d tries, giving up\n",
720 ATA_EH_MAX_TRIES);
721 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
722 }
723
724 /* this run is complete, make sure EH info is clear */
725 ata_for_each_link(link, ap, HOST_FIRST)
726 memset(&link->eh_info, 0, sizeof(link->eh_info));
727
728 /* end eh (clear host_eh_scheduled) while holding
729 * ap->lock such that if exception occurs after this
730 * point but before EH completion, SCSI midlayer will
731 * re-initiate EH.
732 */
733 ap->ops->end_eh(ap);
734
735 spin_unlock_irqrestore(ap->lock, flags);
736 ata_eh_release(ap);
737 } else {
738 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
739 ap->ops->eng_timeout(ap);
740 }
741
742 scsi_eh_flush_done_q(&ap->eh_done_q);
743
744 /* clean up */
745 spin_lock_irqsave(ap->lock, flags);
746
747 if (ap->pflags & ATA_PFLAG_LOADING)
748 ap->pflags &= ~ATA_PFLAG_LOADING;
749 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
750 !(ap->flags & ATA_FLAG_SAS_HOST))
751 schedule_delayed_work(&ap->hotplug_task, 0);
752
753 if (ap->pflags & ATA_PFLAG_RECOVERED)
754 ata_port_info(ap, "EH complete\n");
755
756 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
757
758 /* tell wait_eh that we're done */
759 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
760 wake_up_all(&ap->eh_wait_q);
761
762 spin_unlock_irqrestore(ap->lock, flags);
763 }
764 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
765
766 /**
767 * ata_port_wait_eh - Wait for the currently pending EH to complete
768 * @ap: Port to wait EH for
769 *
770 * Wait until the currently pending EH is complete.
771 *
772 * LOCKING:
773 * Kernel thread context (may sleep).
774 */
ata_port_wait_eh(struct ata_port * ap)775 void ata_port_wait_eh(struct ata_port *ap)
776 {
777 unsigned long flags;
778 DEFINE_WAIT(wait);
779
780 retry:
781 spin_lock_irqsave(ap->lock, flags);
782
783 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
784 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
785 spin_unlock_irqrestore(ap->lock, flags);
786 schedule();
787 spin_lock_irqsave(ap->lock, flags);
788 }
789 finish_wait(&ap->eh_wait_q, &wait);
790
791 spin_unlock_irqrestore(ap->lock, flags);
792
793 /* make sure SCSI EH is complete */
794 if (scsi_host_in_recovery(ap->scsi_host)) {
795 ata_msleep(ap, 10);
796 goto retry;
797 }
798 }
799 EXPORT_SYMBOL_GPL(ata_port_wait_eh);
800
ata_eh_nr_in_flight(struct ata_port * ap)801 static int ata_eh_nr_in_flight(struct ata_port *ap)
802 {
803 struct ata_queued_cmd *qc;
804 unsigned int tag;
805 int nr = 0;
806
807 /* count only non-internal commands */
808 ata_qc_for_each(ap, qc, tag) {
809 if (qc)
810 nr++;
811 }
812
813 return nr;
814 }
815
ata_eh_fastdrain_timerfn(struct timer_list * t)816 void ata_eh_fastdrain_timerfn(struct timer_list *t)
817 {
818 struct ata_port *ap = from_timer(ap, t, fastdrain_timer);
819 unsigned long flags;
820 int cnt;
821
822 spin_lock_irqsave(ap->lock, flags);
823
824 cnt = ata_eh_nr_in_flight(ap);
825
826 /* are we done? */
827 if (!cnt)
828 goto out_unlock;
829
830 if (cnt == ap->fastdrain_cnt) {
831 struct ata_queued_cmd *qc;
832 unsigned int tag;
833
834 /* No progress during the last interval, tag all
835 * in-flight qcs as timed out and freeze the port.
836 */
837 ata_qc_for_each(ap, qc, tag) {
838 if (qc)
839 qc->err_mask |= AC_ERR_TIMEOUT;
840 }
841
842 ata_port_freeze(ap);
843 } else {
844 /* some qcs have finished, give it another chance */
845 ap->fastdrain_cnt = cnt;
846 ap->fastdrain_timer.expires =
847 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
848 add_timer(&ap->fastdrain_timer);
849 }
850
851 out_unlock:
852 spin_unlock_irqrestore(ap->lock, flags);
853 }
854
855 /**
856 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain
857 * @ap: target ATA port
858 * @fastdrain: activate fast drain
859 *
860 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain
861 * is non-zero and EH wasn't pending before. Fast drain ensures
862 * that EH kicks in in timely manner.
863 *
864 * LOCKING:
865 * spin_lock_irqsave(host lock)
866 */
ata_eh_set_pending(struct ata_port * ap,int fastdrain)867 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
868 {
869 int cnt;
870
871 /* already scheduled? */
872 if (ap->pflags & ATA_PFLAG_EH_PENDING)
873 return;
874
875 ap->pflags |= ATA_PFLAG_EH_PENDING;
876
877 if (!fastdrain)
878 return;
879
880 /* do we have in-flight qcs? */
881 cnt = ata_eh_nr_in_flight(ap);
882 if (!cnt)
883 return;
884
885 /* activate fast drain */
886 ap->fastdrain_cnt = cnt;
887 ap->fastdrain_timer.expires =
888 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
889 add_timer(&ap->fastdrain_timer);
890 }
891
892 /**
893 * ata_qc_schedule_eh - schedule qc for error handling
894 * @qc: command to schedule error handling for
895 *
896 * Schedule error handling for @qc. EH will kick in as soon as
897 * other commands are drained.
898 *
899 * LOCKING:
900 * spin_lock_irqsave(host lock)
901 */
ata_qc_schedule_eh(struct ata_queued_cmd * qc)902 void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
903 {
904 struct ata_port *ap = qc->ap;
905
906 WARN_ON(!ap->ops->error_handler);
907
908 qc->flags |= ATA_QCFLAG_FAILED;
909 ata_eh_set_pending(ap, 1);
910
911 /* The following will fail if timeout has already expired.
912 * ata_scsi_error() takes care of such scmds on EH entry.
913 * Note that ATA_QCFLAG_FAILED is unconditionally set after
914 * this function completes.
915 */
916 blk_abort_request(qc->scsicmd->request);
917 }
918
919 /**
920 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine
921 * @ap: ATA port to schedule EH for
922 *
923 * LOCKING: inherited from ata_port_schedule_eh
924 * spin_lock_irqsave(host lock)
925 */
ata_std_sched_eh(struct ata_port * ap)926 void ata_std_sched_eh(struct ata_port *ap)
927 {
928 WARN_ON(!ap->ops->error_handler);
929
930 if (ap->pflags & ATA_PFLAG_INITIALIZING)
931 return;
932
933 ata_eh_set_pending(ap, 1);
934 scsi_schedule_eh(ap->scsi_host);
935
936 DPRINTK("port EH scheduled\n");
937 }
938 EXPORT_SYMBOL_GPL(ata_std_sched_eh);
939
940 /**
941 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine
942 * @ap: ATA port to end EH for
943 *
944 * In the libata object model there is a 1:1 mapping of ata_port to
945 * shost, so host fields can be directly manipulated under ap->lock, in
946 * the libsas case we need to hold a lock at the ha->level to coordinate
947 * these events.
948 *
949 * LOCKING:
950 * spin_lock_irqsave(host lock)
951 */
ata_std_end_eh(struct ata_port * ap)952 void ata_std_end_eh(struct ata_port *ap)
953 {
954 struct Scsi_Host *host = ap->scsi_host;
955
956 host->host_eh_scheduled = 0;
957 }
958 EXPORT_SYMBOL(ata_std_end_eh);
959
960
961 /**
962 * ata_port_schedule_eh - schedule error handling without a qc
963 * @ap: ATA port to schedule EH for
964 *
965 * Schedule error handling for @ap. EH will kick in as soon as
966 * all commands are drained.
967 *
968 * LOCKING:
969 * spin_lock_irqsave(host lock)
970 */
ata_port_schedule_eh(struct ata_port * ap)971 void ata_port_schedule_eh(struct ata_port *ap)
972 {
973 /* see: ata_std_sched_eh, unless you know better */
974 ap->ops->sched_eh(ap);
975 }
976
ata_do_link_abort(struct ata_port * ap,struct ata_link * link)977 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
978 {
979 struct ata_queued_cmd *qc;
980 int tag, nr_aborted = 0;
981
982 WARN_ON(!ap->ops->error_handler);
983
984 /* we're gonna abort all commands, no need for fast drain */
985 ata_eh_set_pending(ap, 0);
986
987 /* include internal tag in iteration */
988 ata_qc_for_each_with_internal(ap, qc, tag) {
989 if (qc && (!link || qc->dev->link == link)) {
990 qc->flags |= ATA_QCFLAG_FAILED;
991 ata_qc_complete(qc);
992 nr_aborted++;
993 }
994 }
995
996 if (!nr_aborted)
997 ata_port_schedule_eh(ap);
998
999 return nr_aborted;
1000 }
1001
1002 /**
1003 * ata_link_abort - abort all qc's on the link
1004 * @link: ATA link to abort qc's for
1005 *
1006 * Abort all active qc's active on @link and schedule EH.
1007 *
1008 * LOCKING:
1009 * spin_lock_irqsave(host lock)
1010 *
1011 * RETURNS:
1012 * Number of aborted qc's.
1013 */
ata_link_abort(struct ata_link * link)1014 int ata_link_abort(struct ata_link *link)
1015 {
1016 return ata_do_link_abort(link->ap, link);
1017 }
1018
1019 /**
1020 * ata_port_abort - abort all qc's on the port
1021 * @ap: ATA port to abort qc's for
1022 *
1023 * Abort all active qc's of @ap and schedule EH.
1024 *
1025 * LOCKING:
1026 * spin_lock_irqsave(host_set lock)
1027 *
1028 * RETURNS:
1029 * Number of aborted qc's.
1030 */
ata_port_abort(struct ata_port * ap)1031 int ata_port_abort(struct ata_port *ap)
1032 {
1033 return ata_do_link_abort(ap, NULL);
1034 }
1035
1036 /**
1037 * __ata_port_freeze - freeze port
1038 * @ap: ATA port to freeze
1039 *
1040 * This function is called when HSM violation or some other
1041 * condition disrupts normal operation of the port. Frozen port
1042 * is not allowed to perform any operation until the port is
1043 * thawed, which usually follows a successful reset.
1044 *
1045 * ap->ops->freeze() callback can be used for freezing the port
1046 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
1047 * port cannot be frozen hardware-wise, the interrupt handler
1048 * must ack and clear interrupts unconditionally while the port
1049 * is frozen.
1050 *
1051 * LOCKING:
1052 * spin_lock_irqsave(host lock)
1053 */
__ata_port_freeze(struct ata_port * ap)1054 static void __ata_port_freeze(struct ata_port *ap)
1055 {
1056 WARN_ON(!ap->ops->error_handler);
1057
1058 if (ap->ops->freeze)
1059 ap->ops->freeze(ap);
1060
1061 ap->pflags |= ATA_PFLAG_FROZEN;
1062
1063 DPRINTK("ata%u port frozen\n", ap->print_id);
1064 }
1065
1066 /**
1067 * ata_port_freeze - abort & freeze port
1068 * @ap: ATA port to freeze
1069 *
1070 * Abort and freeze @ap. The freeze operation must be called
1071 * first, because some hardware requires special operations
1072 * before the taskfile registers are accessible.
1073 *
1074 * LOCKING:
1075 * spin_lock_irqsave(host lock)
1076 *
1077 * RETURNS:
1078 * Number of aborted commands.
1079 */
ata_port_freeze(struct ata_port * ap)1080 int ata_port_freeze(struct ata_port *ap)
1081 {
1082 int nr_aborted;
1083
1084 WARN_ON(!ap->ops->error_handler);
1085
1086 __ata_port_freeze(ap);
1087 nr_aborted = ata_port_abort(ap);
1088
1089 return nr_aborted;
1090 }
1091
1092 /**
1093 * sata_async_notification - SATA async notification handler
1094 * @ap: ATA port where async notification is received
1095 *
1096 * Handler to be called when async notification via SDB FIS is
1097 * received. This function schedules EH if necessary.
1098 *
1099 * LOCKING:
1100 * spin_lock_irqsave(host lock)
1101 *
1102 * RETURNS:
1103 * 1 if EH is scheduled, 0 otherwise.
1104 */
sata_async_notification(struct ata_port * ap)1105 int sata_async_notification(struct ata_port *ap)
1106 {
1107 u32 sntf;
1108 int rc;
1109
1110 if (!(ap->flags & ATA_FLAG_AN))
1111 return 0;
1112
1113 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
1114 if (rc == 0)
1115 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
1116
1117 if (!sata_pmp_attached(ap) || rc) {
1118 /* PMP is not attached or SNTF is not available */
1119 if (!sata_pmp_attached(ap)) {
1120 /* PMP is not attached. Check whether ATAPI
1121 * AN is configured. If so, notify media
1122 * change.
1123 */
1124 struct ata_device *dev = ap->link.device;
1125
1126 if ((dev->class == ATA_DEV_ATAPI) &&
1127 (dev->flags & ATA_DFLAG_AN))
1128 ata_scsi_media_change_notify(dev);
1129 return 0;
1130 } else {
1131 /* PMP is attached but SNTF is not available.
1132 * ATAPI async media change notification is
1133 * not used. The PMP must be reporting PHY
1134 * status change, schedule EH.
1135 */
1136 ata_port_schedule_eh(ap);
1137 return 1;
1138 }
1139 } else {
1140 /* PMP is attached and SNTF is available */
1141 struct ata_link *link;
1142
1143 /* check and notify ATAPI AN */
1144 ata_for_each_link(link, ap, EDGE) {
1145 if (!(sntf & (1 << link->pmp)))
1146 continue;
1147
1148 if ((link->device->class == ATA_DEV_ATAPI) &&
1149 (link->device->flags & ATA_DFLAG_AN))
1150 ata_scsi_media_change_notify(link->device);
1151 }
1152
1153 /* If PMP is reporting that PHY status of some
1154 * downstream ports has changed, schedule EH.
1155 */
1156 if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
1157 ata_port_schedule_eh(ap);
1158 return 1;
1159 }
1160
1161 return 0;
1162 }
1163 }
1164
1165 /**
1166 * ata_eh_freeze_port - EH helper to freeze port
1167 * @ap: ATA port to freeze
1168 *
1169 * Freeze @ap.
1170 *
1171 * LOCKING:
1172 * None.
1173 */
ata_eh_freeze_port(struct ata_port * ap)1174 void ata_eh_freeze_port(struct ata_port *ap)
1175 {
1176 unsigned long flags;
1177
1178 if (!ap->ops->error_handler)
1179 return;
1180
1181 spin_lock_irqsave(ap->lock, flags);
1182 __ata_port_freeze(ap);
1183 spin_unlock_irqrestore(ap->lock, flags);
1184 }
1185
1186 /**
1187 * ata_port_thaw_port - EH helper to thaw port
1188 * @ap: ATA port to thaw
1189 *
1190 * Thaw frozen port @ap.
1191 *
1192 * LOCKING:
1193 * None.
1194 */
ata_eh_thaw_port(struct ata_port * ap)1195 void ata_eh_thaw_port(struct ata_port *ap)
1196 {
1197 unsigned long flags;
1198
1199 if (!ap->ops->error_handler)
1200 return;
1201
1202 spin_lock_irqsave(ap->lock, flags);
1203
1204 ap->pflags &= ~ATA_PFLAG_FROZEN;
1205
1206 if (ap->ops->thaw)
1207 ap->ops->thaw(ap);
1208
1209 spin_unlock_irqrestore(ap->lock, flags);
1210
1211 DPRINTK("ata%u port thawed\n", ap->print_id);
1212 }
1213
ata_eh_scsidone(struct scsi_cmnd * scmd)1214 static void ata_eh_scsidone(struct scsi_cmnd *scmd)
1215 {
1216 /* nada */
1217 }
1218
__ata_eh_qc_complete(struct ata_queued_cmd * qc)1219 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
1220 {
1221 struct ata_port *ap = qc->ap;
1222 struct scsi_cmnd *scmd = qc->scsicmd;
1223 unsigned long flags;
1224
1225 spin_lock_irqsave(ap->lock, flags);
1226 qc->scsidone = ata_eh_scsidone;
1227 __ata_qc_complete(qc);
1228 WARN_ON(ata_tag_valid(qc->tag));
1229 spin_unlock_irqrestore(ap->lock, flags);
1230
1231 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
1232 }
1233
1234 /**
1235 * ata_eh_qc_complete - Complete an active ATA command from EH
1236 * @qc: Command to complete
1237 *
1238 * Indicate to the mid and upper layers that an ATA command has
1239 * completed. To be used from EH.
1240 */
ata_eh_qc_complete(struct ata_queued_cmd * qc)1241 void ata_eh_qc_complete(struct ata_queued_cmd *qc)
1242 {
1243 struct scsi_cmnd *scmd = qc->scsicmd;
1244 scmd->retries = scmd->allowed;
1245 __ata_eh_qc_complete(qc);
1246 }
1247
1248 /**
1249 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
1250 * @qc: Command to retry
1251 *
1252 * Indicate to the mid and upper layers that an ATA command
1253 * should be retried. To be used from EH.
1254 *
1255 * SCSI midlayer limits the number of retries to scmd->allowed.
1256 * scmd->allowed is incremented for commands which get retried
1257 * due to unrelated failures (qc->err_mask is zero).
1258 */
ata_eh_qc_retry(struct ata_queued_cmd * qc)1259 void ata_eh_qc_retry(struct ata_queued_cmd *qc)
1260 {
1261 struct scsi_cmnd *scmd = qc->scsicmd;
1262 if (!qc->err_mask)
1263 scmd->allowed++;
1264 __ata_eh_qc_complete(qc);
1265 }
1266
1267 /**
1268 * ata_dev_disable - disable ATA device
1269 * @dev: ATA device to disable
1270 *
1271 * Disable @dev.
1272 *
1273 * Locking:
1274 * EH context.
1275 */
ata_dev_disable(struct ata_device * dev)1276 void ata_dev_disable(struct ata_device *dev)
1277 {
1278 if (!ata_dev_enabled(dev))
1279 return;
1280
1281 if (ata_msg_drv(dev->link->ap))
1282 ata_dev_warn(dev, "disabled\n");
1283 ata_acpi_on_disable(dev);
1284 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET);
1285 dev->class++;
1286
1287 /* From now till the next successful probe, ering is used to
1288 * track probe failures. Clear accumulated device error info.
1289 */
1290 ata_ering_clear(&dev->ering);
1291 }
1292
1293 /**
1294 * ata_eh_detach_dev - detach ATA device
1295 * @dev: ATA device to detach
1296 *
1297 * Detach @dev.
1298 *
1299 * LOCKING:
1300 * None.
1301 */
ata_eh_detach_dev(struct ata_device * dev)1302 void ata_eh_detach_dev(struct ata_device *dev)
1303 {
1304 struct ata_link *link = dev->link;
1305 struct ata_port *ap = link->ap;
1306 struct ata_eh_context *ehc = &link->eh_context;
1307 unsigned long flags;
1308
1309 ata_dev_disable(dev);
1310
1311 spin_lock_irqsave(ap->lock, flags);
1312
1313 dev->flags &= ~ATA_DFLAG_DETACH;
1314
1315 if (ata_scsi_offline_dev(dev)) {
1316 dev->flags |= ATA_DFLAG_DETACHED;
1317 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1318 }
1319
1320 /* clear per-dev EH info */
1321 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK);
1322 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK);
1323 ehc->saved_xfer_mode[dev->devno] = 0;
1324 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
1325
1326 spin_unlock_irqrestore(ap->lock, flags);
1327 }
1328
1329 /**
1330 * ata_eh_about_to_do - about to perform eh_action
1331 * @link: target ATA link
1332 * @dev: target ATA dev for per-dev action (can be NULL)
1333 * @action: action about to be performed
1334 *
1335 * Called just before performing EH actions to clear related bits
1336 * in @link->eh_info such that eh actions are not unnecessarily
1337 * repeated.
1338 *
1339 * LOCKING:
1340 * None.
1341 */
ata_eh_about_to_do(struct ata_link * link,struct ata_device * dev,unsigned int action)1342 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
1343 unsigned int action)
1344 {
1345 struct ata_port *ap = link->ap;
1346 struct ata_eh_info *ehi = &link->eh_info;
1347 struct ata_eh_context *ehc = &link->eh_context;
1348 unsigned long flags;
1349
1350 spin_lock_irqsave(ap->lock, flags);
1351
1352 ata_eh_clear_action(link, dev, ehi, action);
1353
1354 /* About to take EH action, set RECOVERED. Ignore actions on
1355 * slave links as master will do them again.
1356 */
1357 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
1358 ap->pflags |= ATA_PFLAG_RECOVERED;
1359
1360 spin_unlock_irqrestore(ap->lock, flags);
1361 }
1362
1363 /**
1364 * ata_eh_done - EH action complete
1365 * @link: ATA link for which EH actions are complete
1366 * @dev: target ATA dev for per-dev action (can be NULL)
1367 * @action: action just completed
1368 *
1369 * Called right after performing EH actions to clear related bits
1370 * in @link->eh_context.
1371 *
1372 * LOCKING:
1373 * None.
1374 */
ata_eh_done(struct ata_link * link,struct ata_device * dev,unsigned int action)1375 void ata_eh_done(struct ata_link *link, struct ata_device *dev,
1376 unsigned int action)
1377 {
1378 struct ata_eh_context *ehc = &link->eh_context;
1379
1380 ata_eh_clear_action(link, dev, &ehc->i, action);
1381 }
1382
1383 /**
1384 * ata_err_string - convert err_mask to descriptive string
1385 * @err_mask: error mask to convert to string
1386 *
1387 * Convert @err_mask to descriptive string. Errors are
1388 * prioritized according to severity and only the most severe
1389 * error is reported.
1390 *
1391 * LOCKING:
1392 * None.
1393 *
1394 * RETURNS:
1395 * Descriptive string for @err_mask
1396 */
ata_err_string(unsigned int err_mask)1397 static const char *ata_err_string(unsigned int err_mask)
1398 {
1399 if (err_mask & AC_ERR_HOST_BUS)
1400 return "host bus error";
1401 if (err_mask & AC_ERR_ATA_BUS)
1402 return "ATA bus error";
1403 if (err_mask & AC_ERR_TIMEOUT)
1404 return "timeout";
1405 if (err_mask & AC_ERR_HSM)
1406 return "HSM violation";
1407 if (err_mask & AC_ERR_SYSTEM)
1408 return "internal error";
1409 if (err_mask & AC_ERR_MEDIA)
1410 return "media error";
1411 if (err_mask & AC_ERR_INVALID)
1412 return "invalid argument";
1413 if (err_mask & AC_ERR_DEV)
1414 return "device error";
1415 if (err_mask & AC_ERR_NCQ)
1416 return "NCQ error";
1417 if (err_mask & AC_ERR_NODEV_HINT)
1418 return "Polling detection error";
1419 return "unknown error";
1420 }
1421
1422 /**
1423 * ata_eh_read_log_10h - Read log page 10h for NCQ error details
1424 * @dev: Device to read log page 10h from
1425 * @tag: Resulting tag of the failed command
1426 * @tf: Resulting taskfile registers of the failed command
1427 *
1428 * Read log page 10h to obtain NCQ error details and clear error
1429 * condition.
1430 *
1431 * LOCKING:
1432 * Kernel thread context (may sleep).
1433 *
1434 * RETURNS:
1435 * 0 on success, -errno otherwise.
1436 */
ata_eh_read_log_10h(struct ata_device * dev,int * tag,struct ata_taskfile * tf)1437 static int ata_eh_read_log_10h(struct ata_device *dev,
1438 int *tag, struct ata_taskfile *tf)
1439 {
1440 u8 *buf = dev->link->ap->sector_buf;
1441 unsigned int err_mask;
1442 u8 csum;
1443 int i;
1444
1445 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
1446 if (err_mask)
1447 return -EIO;
1448
1449 csum = 0;
1450 for (i = 0; i < ATA_SECT_SIZE; i++)
1451 csum += buf[i];
1452 if (csum)
1453 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
1454 csum);
1455
1456 if (buf[0] & 0x80)
1457 return -ENOENT;
1458
1459 *tag = buf[0] & 0x1f;
1460
1461 tf->command = buf[2];
1462 tf->feature = buf[3];
1463 tf->lbal = buf[4];
1464 tf->lbam = buf[5];
1465 tf->lbah = buf[6];
1466 tf->device = buf[7];
1467 tf->hob_lbal = buf[8];
1468 tf->hob_lbam = buf[9];
1469 tf->hob_lbah = buf[10];
1470 tf->nsect = buf[12];
1471 tf->hob_nsect = buf[13];
1472 if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
1473 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
1474
1475 return 0;
1476 }
1477
1478 /**
1479 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY
1480 * @dev: target ATAPI device
1481 * @r_sense_key: out parameter for sense_key
1482 *
1483 * Perform ATAPI TEST_UNIT_READY.
1484 *
1485 * LOCKING:
1486 * EH context (may sleep).
1487 *
1488 * RETURNS:
1489 * 0 on success, AC_ERR_* mask on failure.
1490 */
atapi_eh_tur(struct ata_device * dev,u8 * r_sense_key)1491 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
1492 {
1493 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
1494 struct ata_taskfile tf;
1495 unsigned int err_mask;
1496
1497 ata_tf_init(dev, &tf);
1498
1499 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1500 tf.command = ATA_CMD_PACKET;
1501 tf.protocol = ATAPI_PROT_NODATA;
1502
1503 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
1504 if (err_mask == AC_ERR_DEV)
1505 *r_sense_key = tf.feature >> 4;
1506 return err_mask;
1507 }
1508
1509 /**
1510 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT
1511 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to
1512 * @cmd: scsi command for which the sense code should be set
1513 *
1514 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK
1515 * SENSE. This function is an EH helper.
1516 *
1517 * LOCKING:
1518 * Kernel thread context (may sleep).
1519 */
ata_eh_request_sense(struct ata_queued_cmd * qc,struct scsi_cmnd * cmd)1520 static void ata_eh_request_sense(struct ata_queued_cmd *qc,
1521 struct scsi_cmnd *cmd)
1522 {
1523 struct ata_device *dev = qc->dev;
1524 struct ata_taskfile tf;
1525 unsigned int err_mask;
1526
1527 if (qc->ap->pflags & ATA_PFLAG_FROZEN) {
1528 ata_dev_warn(dev, "sense data available but port frozen\n");
1529 return;
1530 }
1531
1532 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID)
1533 return;
1534
1535 if (!ata_id_sense_reporting_enabled(dev->id)) {
1536 ata_dev_warn(qc->dev, "sense data reporting disabled\n");
1537 return;
1538 }
1539
1540 DPRINTK("ATA request sense\n");
1541
1542 ata_tf_init(dev, &tf);
1543 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1544 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1545 tf.command = ATA_CMD_REQ_SENSE_DATA;
1546 tf.protocol = ATA_PROT_NODATA;
1547
1548 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
1549 /* Ignore err_mask; ATA_ERR might be set */
1550 if (tf.command & ATA_SENSE) {
1551 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal);
1552 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1553 } else {
1554 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n",
1555 tf.command, err_mask);
1556 }
1557 }
1558
1559 /**
1560 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
1561 * @dev: device to perform REQUEST_SENSE to
1562 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
1563 * @dfl_sense_key: default sense key to use
1564 *
1565 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
1566 * SENSE. This function is EH helper.
1567 *
1568 * LOCKING:
1569 * Kernel thread context (may sleep).
1570 *
1571 * RETURNS:
1572 * 0 on success, AC_ERR_* mask on failure
1573 */
atapi_eh_request_sense(struct ata_device * dev,u8 * sense_buf,u8 dfl_sense_key)1574 unsigned int atapi_eh_request_sense(struct ata_device *dev,
1575 u8 *sense_buf, u8 dfl_sense_key)
1576 {
1577 u8 cdb[ATAPI_CDB_LEN] =
1578 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
1579 struct ata_port *ap = dev->link->ap;
1580 struct ata_taskfile tf;
1581
1582 DPRINTK("ATAPI request sense\n");
1583
1584 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
1585
1586 /* initialize sense_buf with the error register,
1587 * for the case where they are -not- overwritten
1588 */
1589 sense_buf[0] = 0x70;
1590 sense_buf[2] = dfl_sense_key;
1591
1592 /* some devices time out if garbage left in tf */
1593 ata_tf_init(dev, &tf);
1594
1595 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1596 tf.command = ATA_CMD_PACKET;
1597
1598 /* is it pointless to prefer PIO for "safety reasons"? */
1599 if (ap->flags & ATA_FLAG_PIO_DMA) {
1600 tf.protocol = ATAPI_PROT_DMA;
1601 tf.feature |= ATAPI_PKT_DMA;
1602 } else {
1603 tf.protocol = ATAPI_PROT_PIO;
1604 tf.lbam = SCSI_SENSE_BUFFERSIZE;
1605 tf.lbah = 0;
1606 }
1607
1608 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
1609 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
1610 }
1611
1612 /**
1613 * ata_eh_analyze_serror - analyze SError for a failed port
1614 * @link: ATA link to analyze SError for
1615 *
1616 * Analyze SError if available and further determine cause of
1617 * failure.
1618 *
1619 * LOCKING:
1620 * None.
1621 */
ata_eh_analyze_serror(struct ata_link * link)1622 static void ata_eh_analyze_serror(struct ata_link *link)
1623 {
1624 struct ata_eh_context *ehc = &link->eh_context;
1625 u32 serror = ehc->i.serror;
1626 unsigned int err_mask = 0, action = 0;
1627 u32 hotplug_mask;
1628
1629 if (serror & (SERR_PERSISTENT | SERR_DATA)) {
1630 err_mask |= AC_ERR_ATA_BUS;
1631 action |= ATA_EH_RESET;
1632 }
1633 if (serror & SERR_PROTOCOL) {
1634 err_mask |= AC_ERR_HSM;
1635 action |= ATA_EH_RESET;
1636 }
1637 if (serror & SERR_INTERNAL) {
1638 err_mask |= AC_ERR_SYSTEM;
1639 action |= ATA_EH_RESET;
1640 }
1641
1642 /* Determine whether a hotplug event has occurred. Both
1643 * SError.N/X are considered hotplug events for enabled or
1644 * host links. For disabled PMP links, only N bit is
1645 * considered as X bit is left at 1 for link plugging.
1646 */
1647 if (link->lpm_policy > ATA_LPM_MAX_POWER)
1648 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
1649 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
1650 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
1651 else
1652 hotplug_mask = SERR_PHYRDY_CHG;
1653
1654 if (serror & hotplug_mask)
1655 ata_ehi_hotplugged(&ehc->i);
1656
1657 ehc->i.err_mask |= err_mask;
1658 ehc->i.action |= action;
1659 }
1660
1661 /**
1662 * ata_eh_analyze_ncq_error - analyze NCQ error
1663 * @link: ATA link to analyze NCQ error for
1664 *
1665 * Read log page 10h, determine the offending qc and acquire
1666 * error status TF. For NCQ device errors, all LLDDs have to do
1667 * is setting AC_ERR_DEV in ehi->err_mask. This function takes
1668 * care of the rest.
1669 *
1670 * LOCKING:
1671 * Kernel thread context (may sleep).
1672 */
ata_eh_analyze_ncq_error(struct ata_link * link)1673 void ata_eh_analyze_ncq_error(struct ata_link *link)
1674 {
1675 struct ata_port *ap = link->ap;
1676 struct ata_eh_context *ehc = &link->eh_context;
1677 struct ata_device *dev = link->device;
1678 struct ata_queued_cmd *qc;
1679 struct ata_taskfile tf;
1680 int tag, rc;
1681
1682 /* if frozen, we can't do much */
1683 if (ap->pflags & ATA_PFLAG_FROZEN)
1684 return;
1685
1686 /* is it NCQ device error? */
1687 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
1688 return;
1689
1690 /* has LLDD analyzed already? */
1691 ata_qc_for_each_raw(ap, qc, tag) {
1692 if (!(qc->flags & ATA_QCFLAG_FAILED))
1693 continue;
1694
1695 if (qc->err_mask)
1696 return;
1697 }
1698
1699 /* okay, this error is ours */
1700 memset(&tf, 0, sizeof(tf));
1701 rc = ata_eh_read_log_10h(dev, &tag, &tf);
1702 if (rc) {
1703 ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
1704 rc);
1705 return;
1706 }
1707
1708 if (!(link->sactive & (1 << tag))) {
1709 ata_link_err(link, "log page 10h reported inactive tag %d\n",
1710 tag);
1711 return;
1712 }
1713
1714 /* we've got the perpetrator, condemn it */
1715 qc = __ata_qc_from_tag(ap, tag);
1716 memcpy(&qc->result_tf, &tf, sizeof(tf));
1717 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1718 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
1719 if (dev->class == ATA_DEV_ZAC &&
1720 ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
1721 char sense_key, asc, ascq;
1722
1723 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
1724 asc = (qc->result_tf.auxiliary >> 8) & 0xff;
1725 ascq = qc->result_tf.auxiliary & 0xff;
1726 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
1727 ata_scsi_set_sense_information(dev, qc->scsicmd,
1728 &qc->result_tf);
1729 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1730 }
1731
1732 ehc->i.err_mask &= ~AC_ERR_DEV;
1733 }
1734
1735 /**
1736 * ata_eh_analyze_tf - analyze taskfile of a failed qc
1737 * @qc: qc to analyze
1738 * @tf: Taskfile registers to analyze
1739 *
1740 * Analyze taskfile of @qc and further determine cause of
1741 * failure. This function also requests ATAPI sense data if
1742 * available.
1743 *
1744 * LOCKING:
1745 * Kernel thread context (may sleep).
1746 *
1747 * RETURNS:
1748 * Determined recovery action
1749 */
ata_eh_analyze_tf(struct ata_queued_cmd * qc,const struct ata_taskfile * tf)1750 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1751 const struct ata_taskfile *tf)
1752 {
1753 unsigned int tmp, action = 0;
1754 u8 stat = tf->command, err = tf->feature;
1755
1756 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1757 qc->err_mask |= AC_ERR_HSM;
1758 return ATA_EH_RESET;
1759 }
1760
1761 if (stat & (ATA_ERR | ATA_DF)) {
1762 qc->err_mask |= AC_ERR_DEV;
1763 /*
1764 * Sense data reporting does not work if the
1765 * device fault bit is set.
1766 */
1767 if (stat & ATA_DF)
1768 stat &= ~ATA_SENSE;
1769 } else {
1770 return 0;
1771 }
1772
1773 switch (qc->dev->class) {
1774 case ATA_DEV_ZAC:
1775 if (stat & ATA_SENSE)
1776 ata_eh_request_sense(qc, qc->scsicmd);
1777 /* fall through */
1778 case ATA_DEV_ATA:
1779 if (err & ATA_ICRC)
1780 qc->err_mask |= AC_ERR_ATA_BUS;
1781 if (err & (ATA_UNC | ATA_AMNF))
1782 qc->err_mask |= AC_ERR_MEDIA;
1783 if (err & ATA_IDNF)
1784 qc->err_mask |= AC_ERR_INVALID;
1785 break;
1786
1787 case ATA_DEV_ATAPI:
1788 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1789 tmp = atapi_eh_request_sense(qc->dev,
1790 qc->scsicmd->sense_buffer,
1791 qc->result_tf.feature >> 4);
1792 if (!tmp)
1793 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1794 else
1795 qc->err_mask |= tmp;
1796 }
1797 }
1798
1799 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1800 int ret = scsi_check_sense(qc->scsicmd);
1801 /*
1802 * SUCCESS here means that the sense code could be
1803 * evaluated and should be passed to the upper layers
1804 * for correct evaluation.
1805 * FAILED means the sense code could not be interpreted
1806 * and the device would need to be reset.
1807 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the
1808 * command would need to be retried.
1809 */
1810 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) {
1811 qc->flags |= ATA_QCFLAG_RETRY;
1812 qc->err_mask |= AC_ERR_OTHER;
1813 } else if (ret != SUCCESS) {
1814 qc->err_mask |= AC_ERR_HSM;
1815 }
1816 }
1817 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1818 action |= ATA_EH_RESET;
1819
1820 return action;
1821 }
1822
ata_eh_categorize_error(unsigned int eflags,unsigned int err_mask,int * xfer_ok)1823 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1824 int *xfer_ok)
1825 {
1826 int base = 0;
1827
1828 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1829 *xfer_ok = 1;
1830
1831 if (!*xfer_ok)
1832 base = ATA_ECAT_DUBIOUS_NONE;
1833
1834 if (err_mask & AC_ERR_ATA_BUS)
1835 return base + ATA_ECAT_ATA_BUS;
1836
1837 if (err_mask & AC_ERR_TIMEOUT)
1838 return base + ATA_ECAT_TOUT_HSM;
1839
1840 if (eflags & ATA_EFLAG_IS_IO) {
1841 if (err_mask & AC_ERR_HSM)
1842 return base + ATA_ECAT_TOUT_HSM;
1843 if ((err_mask &
1844 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1845 return base + ATA_ECAT_UNK_DEV;
1846 }
1847
1848 return 0;
1849 }
1850
1851 struct speed_down_verdict_arg {
1852 u64 since;
1853 int xfer_ok;
1854 int nr_errors[ATA_ECAT_NR];
1855 };
1856
speed_down_verdict_cb(struct ata_ering_entry * ent,void * void_arg)1857 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1858 {
1859 struct speed_down_verdict_arg *arg = void_arg;
1860 int cat;
1861
1862 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since))
1863 return -1;
1864
1865 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1866 &arg->xfer_ok);
1867 arg->nr_errors[cat]++;
1868
1869 return 0;
1870 }
1871
1872 /**
1873 * ata_eh_speed_down_verdict - Determine speed down verdict
1874 * @dev: Device of interest
1875 *
1876 * This function examines error ring of @dev and determines
1877 * whether NCQ needs to be turned off, transfer speed should be
1878 * stepped down, or falling back to PIO is necessary.
1879 *
1880 * ECAT_ATA_BUS : ATA_BUS error for any command
1881 *
1882 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
1883 * IO commands
1884 *
1885 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1886 *
1887 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1888 * data transfer hasn't been verified.
1889 *
1890 * Verdicts are
1891 *
1892 * NCQ_OFF : Turn off NCQ.
1893 *
1894 * SPEED_DOWN : Speed down transfer speed but don't fall back
1895 * to PIO.
1896 *
1897 * FALLBACK_TO_PIO : Fall back to PIO.
1898 *
1899 * Even if multiple verdicts are returned, only one action is
1900 * taken per error. An action triggered by non-DUBIOUS errors
1901 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1902 * This is to expedite speed down decisions right after device is
1903 * initially configured.
1904 *
1905 * The following are speed down rules. #1 and #2 deal with
1906 * DUBIOUS errors.
1907 *
1908 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1909 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1910 *
1911 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1912 * occurred during last 5 mins, NCQ_OFF.
1913 *
1914 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1915 * occurred during last 5 mins, FALLBACK_TO_PIO
1916 *
1917 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1918 * during last 10 mins, NCQ_OFF.
1919 *
1920 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1921 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1922 *
1923 * LOCKING:
1924 * Inherited from caller.
1925 *
1926 * RETURNS:
1927 * OR of ATA_EH_SPDN_* flags.
1928 */
ata_eh_speed_down_verdict(struct ata_device * dev)1929 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1930 {
1931 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1932 u64 j64 = get_jiffies_64();
1933 struct speed_down_verdict_arg arg;
1934 unsigned int verdict = 0;
1935
1936 /* scan past 5 mins of error history */
1937 memset(&arg, 0, sizeof(arg));
1938 arg.since = j64 - min(j64, j5mins);
1939 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1940
1941 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1942 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1943 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1944 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
1945
1946 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
1947 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
1948 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
1949
1950 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1951 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1952 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1953 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1954
1955 /* scan past 10 mins of error history */
1956 memset(&arg, 0, sizeof(arg));
1957 arg.since = j64 - min(j64, j10mins);
1958 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1959
1960 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1961 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
1962 verdict |= ATA_EH_SPDN_NCQ_OFF;
1963
1964 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1965 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
1966 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1967 verdict |= ATA_EH_SPDN_SPEED_DOWN;
1968
1969 return verdict;
1970 }
1971
1972 /**
1973 * ata_eh_speed_down - record error and speed down if necessary
1974 * @dev: Failed device
1975 * @eflags: mask of ATA_EFLAG_* flags
1976 * @err_mask: err_mask of the error
1977 *
1978 * Record error and examine error history to determine whether
1979 * adjusting transmission speed is necessary. It also sets
1980 * transmission limits appropriately if such adjustment is
1981 * necessary.
1982 *
1983 * LOCKING:
1984 * Kernel thread context (may sleep).
1985 *
1986 * RETURNS:
1987 * Determined recovery action.
1988 */
ata_eh_speed_down(struct ata_device * dev,unsigned int eflags,unsigned int err_mask)1989 static unsigned int ata_eh_speed_down(struct ata_device *dev,
1990 unsigned int eflags, unsigned int err_mask)
1991 {
1992 struct ata_link *link = ata_dev_phys_link(dev);
1993 int xfer_ok = 0;
1994 unsigned int verdict;
1995 unsigned int action = 0;
1996
1997 /* don't bother if Cat-0 error */
1998 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
1999 return 0;
2000
2001 /* record error and determine whether speed down is necessary */
2002 ata_ering_record(&dev->ering, eflags, err_mask);
2003 verdict = ata_eh_speed_down_verdict(dev);
2004
2005 /* turn off NCQ? */
2006 if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
2007 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
2008 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
2009 dev->flags |= ATA_DFLAG_NCQ_OFF;
2010 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n");
2011 goto done;
2012 }
2013
2014 /* speed down? */
2015 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
2016 /* speed down SATA link speed if possible */
2017 if (sata_down_spd_limit(link, 0) == 0) {
2018 action |= ATA_EH_RESET;
2019 goto done;
2020 }
2021
2022 /* lower transfer mode */
2023 if (dev->spdn_cnt < 2) {
2024 static const int dma_dnxfer_sel[] =
2025 { ATA_DNXFER_DMA, ATA_DNXFER_40C };
2026 static const int pio_dnxfer_sel[] =
2027 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
2028 int sel;
2029
2030 if (dev->xfer_shift != ATA_SHIFT_PIO)
2031 sel = dma_dnxfer_sel[dev->spdn_cnt];
2032 else
2033 sel = pio_dnxfer_sel[dev->spdn_cnt];
2034
2035 dev->spdn_cnt++;
2036
2037 if (ata_down_xfermask_limit(dev, sel) == 0) {
2038 action |= ATA_EH_RESET;
2039 goto done;
2040 }
2041 }
2042 }
2043
2044 /* Fall back to PIO? Slowing down to PIO is meaningless for
2045 * SATA ATA devices. Consider it only for PATA and SATAPI.
2046 */
2047 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
2048 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) &&
2049 (dev->xfer_shift != ATA_SHIFT_PIO)) {
2050 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
2051 dev->spdn_cnt = 0;
2052 action |= ATA_EH_RESET;
2053 goto done;
2054 }
2055 }
2056
2057 return 0;
2058 done:
2059 /* device has been slowed down, blow error history */
2060 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
2061 ata_ering_clear(&dev->ering);
2062 return action;
2063 }
2064
2065 /**
2066 * ata_eh_worth_retry - analyze error and decide whether to retry
2067 * @qc: qc to possibly retry
2068 *
2069 * Look at the cause of the error and decide if a retry
2070 * might be useful or not. We don't want to retry media errors
2071 * because the drive itself has probably already taken 10-30 seconds
2072 * doing its own internal retries before reporting the failure.
2073 */
ata_eh_worth_retry(struct ata_queued_cmd * qc)2074 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc)
2075 {
2076 if (qc->err_mask & AC_ERR_MEDIA)
2077 return 0; /* don't retry media errors */
2078 if (qc->flags & ATA_QCFLAG_IO)
2079 return 1; /* otherwise retry anything from fs stack */
2080 if (qc->err_mask & AC_ERR_INVALID)
2081 return 0; /* don't retry these */
2082 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */
2083 }
2084
2085 /**
2086 * ata_eh_quiet - check if we need to be quiet about a command error
2087 * @qc: qc to check
2088 *
2089 * Look at the qc flags anbd its scsi command request flags to determine
2090 * if we need to be quiet about the command failure.
2091 */
ata_eh_quiet(struct ata_queued_cmd * qc)2092 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
2093 {
2094 if (qc->scsicmd &&
2095 qc->scsicmd->request->rq_flags & RQF_QUIET)
2096 qc->flags |= ATA_QCFLAG_QUIET;
2097 return qc->flags & ATA_QCFLAG_QUIET;
2098 }
2099
2100 /**
2101 * ata_eh_link_autopsy - analyze error and determine recovery action
2102 * @link: host link to perform autopsy on
2103 *
2104 * Analyze why @link failed and determine which recovery actions
2105 * are needed. This function also sets more detailed AC_ERR_*
2106 * values and fills sense data for ATAPI CHECK SENSE.
2107 *
2108 * LOCKING:
2109 * Kernel thread context (may sleep).
2110 */
ata_eh_link_autopsy(struct ata_link * link)2111 static void ata_eh_link_autopsy(struct ata_link *link)
2112 {
2113 struct ata_port *ap = link->ap;
2114 struct ata_eh_context *ehc = &link->eh_context;
2115 struct ata_queued_cmd *qc;
2116 struct ata_device *dev;
2117 unsigned int all_err_mask = 0, eflags = 0;
2118 int tag, nr_failed = 0, nr_quiet = 0;
2119 u32 serror;
2120 int rc;
2121
2122 DPRINTK("ENTER\n");
2123
2124 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
2125 return;
2126
2127 /* obtain and analyze SError */
2128 rc = sata_scr_read(link, SCR_ERROR, &serror);
2129 if (rc == 0) {
2130 ehc->i.serror |= serror;
2131 ata_eh_analyze_serror(link);
2132 } else if (rc != -EOPNOTSUPP) {
2133 /* SError read failed, force reset and probing */
2134 ehc->i.probe_mask |= ATA_ALL_DEVICES;
2135 ehc->i.action |= ATA_EH_RESET;
2136 ehc->i.err_mask |= AC_ERR_OTHER;
2137 }
2138
2139 /* analyze NCQ failure */
2140 ata_eh_analyze_ncq_error(link);
2141
2142 /* any real error trumps AC_ERR_OTHER */
2143 if (ehc->i.err_mask & ~AC_ERR_OTHER)
2144 ehc->i.err_mask &= ~AC_ERR_OTHER;
2145
2146 all_err_mask |= ehc->i.err_mask;
2147
2148 ata_qc_for_each_raw(ap, qc, tag) {
2149 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2150 ata_dev_phys_link(qc->dev) != link)
2151 continue;
2152
2153 /* inherit upper level err_mask */
2154 qc->err_mask |= ehc->i.err_mask;
2155
2156 /* analyze TF */
2157 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
2158
2159 /* DEV errors are probably spurious in case of ATA_BUS error */
2160 if (qc->err_mask & AC_ERR_ATA_BUS)
2161 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
2162 AC_ERR_INVALID);
2163
2164 /* any real error trumps unknown error */
2165 if (qc->err_mask & ~AC_ERR_OTHER)
2166 qc->err_mask &= ~AC_ERR_OTHER;
2167
2168 /*
2169 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
2170 * layers will determine whether the command is worth retrying
2171 * based on the sense data and device class/type. Otherwise,
2172 * determine directly if the command is worth retrying using its
2173 * error mask and flags.
2174 */
2175 if (qc->flags & ATA_QCFLAG_SENSE_VALID)
2176 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
2177 else if (ata_eh_worth_retry(qc))
2178 qc->flags |= ATA_QCFLAG_RETRY;
2179
2180 /* accumulate error info */
2181 ehc->i.dev = qc->dev;
2182 all_err_mask |= qc->err_mask;
2183 if (qc->flags & ATA_QCFLAG_IO)
2184 eflags |= ATA_EFLAG_IS_IO;
2185 trace_ata_eh_link_autopsy_qc(qc);
2186
2187 /* Count quiet errors */
2188 if (ata_eh_quiet(qc))
2189 nr_quiet++;
2190 nr_failed++;
2191 }
2192
2193 /* If all failed commands requested silence, then be quiet */
2194 if (nr_quiet == nr_failed)
2195 ehc->i.flags |= ATA_EHI_QUIET;
2196
2197 /* enforce default EH actions */
2198 if (ap->pflags & ATA_PFLAG_FROZEN ||
2199 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
2200 ehc->i.action |= ATA_EH_RESET;
2201 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
2202 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
2203 ehc->i.action |= ATA_EH_REVALIDATE;
2204
2205 /* If we have offending qcs and the associated failed device,
2206 * perform per-dev EH action only on the offending device.
2207 */
2208 if (ehc->i.dev) {
2209 ehc->i.dev_action[ehc->i.dev->devno] |=
2210 ehc->i.action & ATA_EH_PERDEV_MASK;
2211 ehc->i.action &= ~ATA_EH_PERDEV_MASK;
2212 }
2213
2214 /* propagate timeout to host link */
2215 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link))
2216 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT;
2217
2218 /* record error and consider speeding down */
2219 dev = ehc->i.dev;
2220 if (!dev && ((ata_link_max_devices(link) == 1 &&
2221 ata_dev_enabled(link->device))))
2222 dev = link->device;
2223
2224 if (dev) {
2225 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
2226 eflags |= ATA_EFLAG_DUBIOUS_XFER;
2227 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
2228 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
2229 }
2230 DPRINTK("EXIT\n");
2231 }
2232
2233 /**
2234 * ata_eh_autopsy - analyze error and determine recovery action
2235 * @ap: host port to perform autopsy on
2236 *
2237 * Analyze all links of @ap and determine why they failed and
2238 * which recovery actions are needed.
2239 *
2240 * LOCKING:
2241 * Kernel thread context (may sleep).
2242 */
ata_eh_autopsy(struct ata_port * ap)2243 void ata_eh_autopsy(struct ata_port *ap)
2244 {
2245 struct ata_link *link;
2246
2247 ata_for_each_link(link, ap, EDGE)
2248 ata_eh_link_autopsy(link);
2249
2250 /* Handle the frigging slave link. Autopsy is done similarly
2251 * but actions and flags are transferred over to the master
2252 * link and handled from there.
2253 */
2254 if (ap->slave_link) {
2255 struct ata_eh_context *mehc = &ap->link.eh_context;
2256 struct ata_eh_context *sehc = &ap->slave_link->eh_context;
2257
2258 /* transfer control flags from master to slave */
2259 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK;
2260
2261 /* perform autopsy on the slave link */
2262 ata_eh_link_autopsy(ap->slave_link);
2263
2264 /* transfer actions from slave to master and clear slave */
2265 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2266 mehc->i.action |= sehc->i.action;
2267 mehc->i.dev_action[1] |= sehc->i.dev_action[1];
2268 mehc->i.flags |= sehc->i.flags;
2269 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2270 }
2271
2272 /* Autopsy of fanout ports can affect host link autopsy.
2273 * Perform host link autopsy last.
2274 */
2275 if (sata_pmp_attached(ap))
2276 ata_eh_link_autopsy(&ap->link);
2277 }
2278
2279 /**
2280 * ata_get_cmd_descript - get description for ATA command
2281 * @command: ATA command code to get description for
2282 *
2283 * Return a textual description of the given command, or NULL if the
2284 * command is not known.
2285 *
2286 * LOCKING:
2287 * None
2288 */
ata_get_cmd_descript(u8 command)2289 const char *ata_get_cmd_descript(u8 command)
2290 {
2291 #ifdef CONFIG_ATA_VERBOSE_ERROR
2292 static const struct
2293 {
2294 u8 command;
2295 const char *text;
2296 } cmd_descr[] = {
2297 { ATA_CMD_DEV_RESET, "DEVICE RESET" },
2298 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" },
2299 { ATA_CMD_STANDBY, "STANDBY" },
2300 { ATA_CMD_IDLE, "IDLE" },
2301 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" },
2302 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" },
2303 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" },
2304 { ATA_CMD_NOP, "NOP" },
2305 { ATA_CMD_FLUSH, "FLUSH CACHE" },
2306 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" },
2307 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" },
2308 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" },
2309 { ATA_CMD_SERVICE, "SERVICE" },
2310 { ATA_CMD_READ, "READ DMA" },
2311 { ATA_CMD_READ_EXT, "READ DMA EXT" },
2312 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" },
2313 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" },
2314 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" },
2315 { ATA_CMD_WRITE, "WRITE DMA" },
2316 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" },
2317 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" },
2318 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" },
2319 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
2320 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" },
2321 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
2322 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
2323 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
2324 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
2325 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
2326 { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
2327 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" },
2328 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" },
2329 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" },
2330 { ATA_CMD_READ_MULTI, "READ MULTIPLE" },
2331 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" },
2332 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" },
2333 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" },
2334 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" },
2335 { ATA_CMD_SET_FEATURES, "SET FEATURES" },
2336 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" },
2337 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" },
2338 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" },
2339 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" },
2340 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" },
2341 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" },
2342 { ATA_CMD_SLEEP, "SLEEP" },
2343 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" },
2344 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" },
2345 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" },
2346 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" },
2347 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" },
2348 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" },
2349 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" },
2350 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" },
2351 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" },
2352 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" },
2353 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" },
2354 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" },
2355 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" },
2356 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" },
2357 { ATA_CMD_PMP_READ, "READ BUFFER" },
2358 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" },
2359 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" },
2360 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" },
2361 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" },
2362 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" },
2363 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" },
2364 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" },
2365 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" },
2366 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" },
2367 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" },
2368 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" },
2369 { ATA_CMD_SMART, "SMART" },
2370 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" },
2371 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" },
2372 { ATA_CMD_DSM, "DATA SET MANAGEMENT" },
2373 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" },
2374 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" },
2375 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" },
2376 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" },
2377 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" },
2378 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" },
2379 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" },
2380 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" },
2381 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" },
2382 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" },
2383 { ATA_CMD_READ_LONG, "READ LONG (with retries)" },
2384 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" },
2385 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" },
2386 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" },
2387 { ATA_CMD_RESTORE, "RECALIBRATE" },
2388 { 0, NULL } /* terminate list */
2389 };
2390
2391 unsigned int i;
2392 for (i = 0; cmd_descr[i].text; i++)
2393 if (cmd_descr[i].command == command)
2394 return cmd_descr[i].text;
2395 #endif
2396
2397 return NULL;
2398 }
2399 EXPORT_SYMBOL_GPL(ata_get_cmd_descript);
2400
2401 /**
2402 * ata_eh_link_report - report error handling to user
2403 * @link: ATA link EH is going on
2404 *
2405 * Report EH to user.
2406 *
2407 * LOCKING:
2408 * None.
2409 */
ata_eh_link_report(struct ata_link * link)2410 static void ata_eh_link_report(struct ata_link *link)
2411 {
2412 struct ata_port *ap = link->ap;
2413 struct ata_eh_context *ehc = &link->eh_context;
2414 struct ata_queued_cmd *qc;
2415 const char *frozen, *desc;
2416 char tries_buf[6] = "";
2417 int tag, nr_failed = 0;
2418
2419 if (ehc->i.flags & ATA_EHI_QUIET)
2420 return;
2421
2422 desc = NULL;
2423 if (ehc->i.desc[0] != '\0')
2424 desc = ehc->i.desc;
2425
2426 ata_qc_for_each_raw(ap, qc, tag) {
2427 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2428 ata_dev_phys_link(qc->dev) != link ||
2429 ((qc->flags & ATA_QCFLAG_QUIET) &&
2430 qc->err_mask == AC_ERR_DEV))
2431 continue;
2432 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
2433 continue;
2434
2435 nr_failed++;
2436 }
2437
2438 if (!nr_failed && !ehc->i.err_mask)
2439 return;
2440
2441 frozen = "";
2442 if (ap->pflags & ATA_PFLAG_FROZEN)
2443 frozen = " frozen";
2444
2445 if (ap->eh_tries < ATA_EH_MAX_TRIES)
2446 snprintf(tries_buf, sizeof(tries_buf), " t%d",
2447 ap->eh_tries);
2448
2449 if (ehc->i.dev) {
2450 ata_dev_err(ehc->i.dev, "exception Emask 0x%x "
2451 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2452 ehc->i.err_mask, link->sactive, ehc->i.serror,
2453 ehc->i.action, frozen, tries_buf);
2454 if (desc)
2455 ata_dev_err(ehc->i.dev, "%s\n", desc);
2456 } else {
2457 ata_link_err(link, "exception Emask 0x%x "
2458 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2459 ehc->i.err_mask, link->sactive, ehc->i.serror,
2460 ehc->i.action, frozen, tries_buf);
2461 if (desc)
2462 ata_link_err(link, "%s\n", desc);
2463 }
2464
2465 #ifdef CONFIG_ATA_VERBOSE_ERROR
2466 if (ehc->i.serror)
2467 ata_link_err(link,
2468 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
2469 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "",
2470 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "",
2471 ehc->i.serror & SERR_DATA ? "UnrecovData " : "",
2472 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "",
2473 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "",
2474 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "",
2475 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "",
2476 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "",
2477 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "",
2478 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "",
2479 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "",
2480 ehc->i.serror & SERR_CRC ? "BadCRC " : "",
2481 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "",
2482 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "",
2483 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
2484 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
2485 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
2486 #endif
2487
2488 ata_qc_for_each_raw(ap, qc, tag) {
2489 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
2490 char data_buf[20] = "";
2491 char cdb_buf[70] = "";
2492
2493 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2494 ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
2495 continue;
2496
2497 if (qc->dma_dir != DMA_NONE) {
2498 static const char *dma_str[] = {
2499 [DMA_BIDIRECTIONAL] = "bidi",
2500 [DMA_TO_DEVICE] = "out",
2501 [DMA_FROM_DEVICE] = "in",
2502 };
2503 const char *prot_str = NULL;
2504
2505 switch (qc->tf.protocol) {
2506 case ATA_PROT_UNKNOWN:
2507 prot_str = "unknown";
2508 break;
2509 case ATA_PROT_NODATA:
2510 prot_str = "nodata";
2511 break;
2512 case ATA_PROT_PIO:
2513 prot_str = "pio";
2514 break;
2515 case ATA_PROT_DMA:
2516 prot_str = "dma";
2517 break;
2518 case ATA_PROT_NCQ:
2519 prot_str = "ncq dma";
2520 break;
2521 case ATA_PROT_NCQ_NODATA:
2522 prot_str = "ncq nodata";
2523 break;
2524 case ATAPI_PROT_NODATA:
2525 prot_str = "nodata";
2526 break;
2527 case ATAPI_PROT_PIO:
2528 prot_str = "pio";
2529 break;
2530 case ATAPI_PROT_DMA:
2531 prot_str = "dma";
2532 break;
2533 }
2534 snprintf(data_buf, sizeof(data_buf), " %s %u %s",
2535 prot_str, qc->nbytes, dma_str[qc->dma_dir]);
2536 }
2537
2538 if (ata_is_atapi(qc->tf.protocol)) {
2539 const u8 *cdb = qc->cdb;
2540 size_t cdb_len = qc->dev->cdb_len;
2541
2542 if (qc->scsicmd) {
2543 cdb = qc->scsicmd->cmnd;
2544 cdb_len = qc->scsicmd->cmd_len;
2545 }
2546 __scsi_format_command(cdb_buf, sizeof(cdb_buf),
2547 cdb, cdb_len);
2548 } else {
2549 const char *descr = ata_get_cmd_descript(cmd->command);
2550 if (descr)
2551 ata_dev_err(qc->dev, "failed command: %s\n",
2552 descr);
2553 }
2554
2555 ata_dev_err(qc->dev,
2556 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2557 "tag %d%s\n %s"
2558 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2559 "Emask 0x%x (%s)%s\n",
2560 cmd->command, cmd->feature, cmd->nsect,
2561 cmd->lbal, cmd->lbam, cmd->lbah,
2562 cmd->hob_feature, cmd->hob_nsect,
2563 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
2564 cmd->device, qc->tag, data_buf, cdb_buf,
2565 res->command, res->feature, res->nsect,
2566 res->lbal, res->lbam, res->lbah,
2567 res->hob_feature, res->hob_nsect,
2568 res->hob_lbal, res->hob_lbam, res->hob_lbah,
2569 res->device, qc->err_mask, ata_err_string(qc->err_mask),
2570 qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
2571
2572 #ifdef CONFIG_ATA_VERBOSE_ERROR
2573 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
2574 ATA_SENSE | ATA_ERR)) {
2575 if (res->command & ATA_BUSY)
2576 ata_dev_err(qc->dev, "status: { Busy }\n");
2577 else
2578 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n",
2579 res->command & ATA_DRDY ? "DRDY " : "",
2580 res->command & ATA_DF ? "DF " : "",
2581 res->command & ATA_DRQ ? "DRQ " : "",
2582 res->command & ATA_SENSE ? "SENSE " : "",
2583 res->command & ATA_ERR ? "ERR " : "");
2584 }
2585
2586 if (cmd->command != ATA_CMD_PACKET &&
2587 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF |
2588 ATA_IDNF | ATA_ABORTED)))
2589 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
2590 res->feature & ATA_ICRC ? "ICRC " : "",
2591 res->feature & ATA_UNC ? "UNC " : "",
2592 res->feature & ATA_AMNF ? "AMNF " : "",
2593 res->feature & ATA_IDNF ? "IDNF " : "",
2594 res->feature & ATA_ABORTED ? "ABRT " : "");
2595 #endif
2596 }
2597 }
2598
2599 /**
2600 * ata_eh_report - report error handling to user
2601 * @ap: ATA port to report EH about
2602 *
2603 * Report EH to user.
2604 *
2605 * LOCKING:
2606 * None.
2607 */
ata_eh_report(struct ata_port * ap)2608 void ata_eh_report(struct ata_port *ap)
2609 {
2610 struct ata_link *link;
2611
2612 ata_for_each_link(link, ap, HOST_FIRST)
2613 ata_eh_link_report(link);
2614 }
2615
ata_do_reset(struct ata_link * link,ata_reset_fn_t reset,unsigned int * classes,unsigned long deadline,bool clear_classes)2616 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
2617 unsigned int *classes, unsigned long deadline,
2618 bool clear_classes)
2619 {
2620 struct ata_device *dev;
2621
2622 if (clear_classes)
2623 ata_for_each_dev(dev, link, ALL)
2624 classes[dev->devno] = ATA_DEV_UNKNOWN;
2625
2626 return reset(link, classes, deadline);
2627 }
2628
ata_eh_followup_srst_needed(struct ata_link * link,int rc)2629 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc)
2630 {
2631 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link))
2632 return 0;
2633 if (rc == -EAGAIN)
2634 return 1;
2635 if (sata_pmp_supported(link->ap) && ata_is_host_link(link))
2636 return 1;
2637 return 0;
2638 }
2639
ata_eh_reset(struct ata_link * link,int classify,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset)2640 int ata_eh_reset(struct ata_link *link, int classify,
2641 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
2642 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
2643 {
2644 struct ata_port *ap = link->ap;
2645 struct ata_link *slave = ap->slave_link;
2646 struct ata_eh_context *ehc = &link->eh_context;
2647 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL;
2648 unsigned int *classes = ehc->classes;
2649 unsigned int lflags = link->flags;
2650 int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
2651 int max_tries = 0, try = 0;
2652 struct ata_link *failed_link;
2653 struct ata_device *dev;
2654 unsigned long deadline, now;
2655 ata_reset_fn_t reset;
2656 unsigned long flags;
2657 u32 sstatus;
2658 int nr_unknown, rc;
2659
2660 /*
2661 * Prepare to reset
2662 */
2663 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
2664 max_tries++;
2665 if (link->flags & ATA_LFLAG_RST_ONCE)
2666 max_tries = 1;
2667 if (link->flags & ATA_LFLAG_NO_HRST)
2668 hardreset = NULL;
2669 if (link->flags & ATA_LFLAG_NO_SRST)
2670 softreset = NULL;
2671
2672 /* make sure each reset attempt is at least COOL_DOWN apart */
2673 if (ehc->i.flags & ATA_EHI_DID_RESET) {
2674 now = jiffies;
2675 WARN_ON(time_after(ehc->last_reset, now));
2676 deadline = ata_deadline(ehc->last_reset,
2677 ATA_EH_RESET_COOL_DOWN);
2678 if (time_before(now, deadline))
2679 schedule_timeout_uninterruptible(deadline - now);
2680 }
2681
2682 spin_lock_irqsave(ap->lock, flags);
2683 ap->pflags |= ATA_PFLAG_RESETTING;
2684 spin_unlock_irqrestore(ap->lock, flags);
2685
2686 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2687
2688 ata_for_each_dev(dev, link, ALL) {
2689 /* If we issue an SRST then an ATA drive (not ATAPI)
2690 * may change configuration and be in PIO0 timing. If
2691 * we do a hard reset (or are coming from power on)
2692 * this is true for ATA or ATAPI. Until we've set a
2693 * suitable controller mode we should not touch the
2694 * bus as we may be talking too fast.
2695 */
2696 dev->pio_mode = XFER_PIO_0;
2697 dev->dma_mode = 0xff;
2698
2699 /* If the controller has a pio mode setup function
2700 * then use it to set the chipset to rights. Don't
2701 * touch the DMA setup as that will be dealt with when
2702 * configuring devices.
2703 */
2704 if (ap->ops->set_piomode)
2705 ap->ops->set_piomode(ap, dev);
2706 }
2707
2708 /* prefer hardreset */
2709 reset = NULL;
2710 ehc->i.action &= ~ATA_EH_RESET;
2711 if (hardreset) {
2712 reset = hardreset;
2713 ehc->i.action |= ATA_EH_HARDRESET;
2714 } else if (softreset) {
2715 reset = softreset;
2716 ehc->i.action |= ATA_EH_SOFTRESET;
2717 }
2718
2719 if (prereset) {
2720 unsigned long deadline = ata_deadline(jiffies,
2721 ATA_EH_PRERESET_TIMEOUT);
2722
2723 if (slave) {
2724 sehc->i.action &= ~ATA_EH_RESET;
2725 sehc->i.action |= ehc->i.action;
2726 }
2727
2728 rc = prereset(link, deadline);
2729
2730 /* If present, do prereset on slave link too. Reset
2731 * is skipped iff both master and slave links report
2732 * -ENOENT or clear ATA_EH_RESET.
2733 */
2734 if (slave && (rc == 0 || rc == -ENOENT)) {
2735 int tmp;
2736
2737 tmp = prereset(slave, deadline);
2738 if (tmp != -ENOENT)
2739 rc = tmp;
2740
2741 ehc->i.action |= sehc->i.action;
2742 }
2743
2744 if (rc) {
2745 if (rc == -ENOENT) {
2746 ata_link_dbg(link, "port disabled--ignoring\n");
2747 ehc->i.action &= ~ATA_EH_RESET;
2748
2749 ata_for_each_dev(dev, link, ALL)
2750 classes[dev->devno] = ATA_DEV_NONE;
2751
2752 rc = 0;
2753 } else
2754 ata_link_err(link,
2755 "prereset failed (errno=%d)\n",
2756 rc);
2757 goto out;
2758 }
2759
2760 /* prereset() might have cleared ATA_EH_RESET. If so,
2761 * bang classes, thaw and return.
2762 */
2763 if (reset && !(ehc->i.action & ATA_EH_RESET)) {
2764 ata_for_each_dev(dev, link, ALL)
2765 classes[dev->devno] = ATA_DEV_NONE;
2766 if ((ap->pflags & ATA_PFLAG_FROZEN) &&
2767 ata_is_host_link(link))
2768 ata_eh_thaw_port(ap);
2769 rc = 0;
2770 goto out;
2771 }
2772 }
2773
2774 retry:
2775 /*
2776 * Perform reset
2777 */
2778 if (ata_is_host_link(link))
2779 ata_eh_freeze_port(ap);
2780
2781 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
2782
2783 if (reset) {
2784 if (verbose)
2785 ata_link_info(link, "%s resetting link\n",
2786 reset == softreset ? "soft" : "hard");
2787
2788 /* mark that this EH session started with reset */
2789 ehc->last_reset = jiffies;
2790 if (reset == hardreset)
2791 ehc->i.flags |= ATA_EHI_DID_HARDRESET;
2792 else
2793 ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
2794
2795 rc = ata_do_reset(link, reset, classes, deadline, true);
2796 if (rc && rc != -EAGAIN) {
2797 failed_link = link;
2798 goto fail;
2799 }
2800
2801 /* hardreset slave link if existent */
2802 if (slave && reset == hardreset) {
2803 int tmp;
2804
2805 if (verbose)
2806 ata_link_info(slave, "hard resetting link\n");
2807
2808 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
2809 tmp = ata_do_reset(slave, reset, classes, deadline,
2810 false);
2811 switch (tmp) {
2812 case -EAGAIN:
2813 rc = -EAGAIN;
2814 case 0:
2815 break;
2816 default:
2817 failed_link = slave;
2818 rc = tmp;
2819 goto fail;
2820 }
2821 }
2822
2823 /* perform follow-up SRST if necessary */
2824 if (reset == hardreset &&
2825 ata_eh_followup_srst_needed(link, rc)) {
2826 reset = softreset;
2827
2828 if (!reset) {
2829 ata_link_err(link,
2830 "follow-up softreset required but no softreset available\n");
2831 failed_link = link;
2832 rc = -EINVAL;
2833 goto fail;
2834 }
2835
2836 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2837 rc = ata_do_reset(link, reset, classes, deadline, true);
2838 if (rc) {
2839 failed_link = link;
2840 goto fail;
2841 }
2842 }
2843 } else {
2844 if (verbose)
2845 ata_link_info(link,
2846 "no reset method available, skipping reset\n");
2847 if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
2848 lflags |= ATA_LFLAG_ASSUME_ATA;
2849 }
2850
2851 /*
2852 * Post-reset processing
2853 */
2854 ata_for_each_dev(dev, link, ALL) {
2855 /* After the reset, the device state is PIO 0 and the
2856 * controller state is undefined. Reset also wakes up
2857 * drives from sleeping mode.
2858 */
2859 dev->pio_mode = XFER_PIO_0;
2860 dev->flags &= ~ATA_DFLAG_SLEEPING;
2861
2862 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
2863 continue;
2864
2865 /* apply class override */
2866 if (lflags & ATA_LFLAG_ASSUME_ATA)
2867 classes[dev->devno] = ATA_DEV_ATA;
2868 else if (lflags & ATA_LFLAG_ASSUME_SEMB)
2869 classes[dev->devno] = ATA_DEV_SEMB_UNSUP;
2870 }
2871
2872 /* record current link speed */
2873 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
2874 link->sata_spd = (sstatus >> 4) & 0xf;
2875 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
2876 slave->sata_spd = (sstatus >> 4) & 0xf;
2877
2878 /* thaw the port */
2879 if (ata_is_host_link(link))
2880 ata_eh_thaw_port(ap);
2881
2882 /* postreset() should clear hardware SError. Although SError
2883 * is cleared during link resume, clearing SError here is
2884 * necessary as some PHYs raise hotplug events after SRST.
2885 * This introduces race condition where hotplug occurs between
2886 * reset and here. This race is mediated by cross checking
2887 * link onlineness and classification result later.
2888 */
2889 if (postreset) {
2890 postreset(link, classes);
2891 if (slave)
2892 postreset(slave, classes);
2893 }
2894
2895 /*
2896 * Some controllers can't be frozen very well and may set spurious
2897 * error conditions during reset. Clear accumulated error
2898 * information and re-thaw the port if frozen. As reset is the
2899 * final recovery action and we cross check link onlineness against
2900 * device classification later, no hotplug event is lost by this.
2901 */
2902 spin_lock_irqsave(link->ap->lock, flags);
2903 memset(&link->eh_info, 0, sizeof(link->eh_info));
2904 if (slave)
2905 memset(&slave->eh_info, 0, sizeof(link->eh_info));
2906 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
2907 spin_unlock_irqrestore(link->ap->lock, flags);
2908
2909 if (ap->pflags & ATA_PFLAG_FROZEN)
2910 ata_eh_thaw_port(ap);
2911
2912 /*
2913 * Make sure onlineness and classification result correspond.
2914 * Hotplug could have happened during reset and some
2915 * controllers fail to wait while a drive is spinning up after
2916 * being hotplugged causing misdetection. By cross checking
2917 * link on/offlineness and classification result, those
2918 * conditions can be reliably detected and retried.
2919 */
2920 nr_unknown = 0;
2921 ata_for_each_dev(dev, link, ALL) {
2922 if (ata_phys_link_online(ata_dev_phys_link(dev))) {
2923 if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2924 ata_dev_dbg(dev, "link online but device misclassified\n");
2925 classes[dev->devno] = ATA_DEV_NONE;
2926 nr_unknown++;
2927 }
2928 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
2929 if (ata_class_enabled(classes[dev->devno]))
2930 ata_dev_dbg(dev,
2931 "link offline, clearing class %d to NONE\n",
2932 classes[dev->devno]);
2933 classes[dev->devno] = ATA_DEV_NONE;
2934 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2935 ata_dev_dbg(dev,
2936 "link status unknown, clearing UNKNOWN to NONE\n");
2937 classes[dev->devno] = ATA_DEV_NONE;
2938 }
2939 }
2940
2941 if (classify && nr_unknown) {
2942 if (try < max_tries) {
2943 ata_link_warn(link,
2944 "link online but %d devices misclassified, retrying\n",
2945 nr_unknown);
2946 failed_link = link;
2947 rc = -EAGAIN;
2948 goto fail;
2949 }
2950 ata_link_warn(link,
2951 "link online but %d devices misclassified, "
2952 "device detection might fail\n", nr_unknown);
2953 }
2954
2955 /* reset successful, schedule revalidation */
2956 ata_eh_done(link, NULL, ATA_EH_RESET);
2957 if (slave)
2958 ata_eh_done(slave, NULL, ATA_EH_RESET);
2959 ehc->last_reset = jiffies; /* update to completion time */
2960 ehc->i.action |= ATA_EH_REVALIDATE;
2961 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */
2962
2963 rc = 0;
2964 out:
2965 /* clear hotplug flag */
2966 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2967 if (slave)
2968 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2969
2970 spin_lock_irqsave(ap->lock, flags);
2971 ap->pflags &= ~ATA_PFLAG_RESETTING;
2972 spin_unlock_irqrestore(ap->lock, flags);
2973
2974 return rc;
2975
2976 fail:
2977 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */
2978 if (!ata_is_host_link(link) &&
2979 sata_scr_read(link, SCR_STATUS, &sstatus))
2980 rc = -ERESTART;
2981
2982 if (try >= max_tries) {
2983 /*
2984 * Thaw host port even if reset failed, so that the port
2985 * can be retried on the next phy event. This risks
2986 * repeated EH runs but seems to be a better tradeoff than
2987 * shutting down a port after a botched hotplug attempt.
2988 */
2989 if (ata_is_host_link(link))
2990 ata_eh_thaw_port(ap);
2991 goto out;
2992 }
2993
2994 now = jiffies;
2995 if (time_before(now, deadline)) {
2996 unsigned long delta = deadline - now;
2997
2998 ata_link_warn(failed_link,
2999 "reset failed (errno=%d), retrying in %u secs\n",
3000 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
3001
3002 ata_eh_release(ap);
3003 while (delta)
3004 delta = schedule_timeout_uninterruptible(delta);
3005 ata_eh_acquire(ap);
3006 }
3007
3008 /*
3009 * While disks spinup behind PMP, some controllers fail sending SRST.
3010 * They need to be reset - as well as the PMP - before retrying.
3011 */
3012 if (rc == -ERESTART) {
3013 if (ata_is_host_link(link))
3014 ata_eh_thaw_port(ap);
3015 goto out;
3016 }
3017
3018 if (try == max_tries - 1) {
3019 sata_down_spd_limit(link, 0);
3020 if (slave)
3021 sata_down_spd_limit(slave, 0);
3022 } else if (rc == -EPIPE)
3023 sata_down_spd_limit(failed_link, 0);
3024
3025 if (hardreset)
3026 reset = hardreset;
3027 goto retry;
3028 }
3029
ata_eh_pull_park_action(struct ata_port * ap)3030 static inline void ata_eh_pull_park_action(struct ata_port *ap)
3031 {
3032 struct ata_link *link;
3033 struct ata_device *dev;
3034 unsigned long flags;
3035
3036 /*
3037 * This function can be thought of as an extended version of
3038 * ata_eh_about_to_do() specially crafted to accommodate the
3039 * requirements of ATA_EH_PARK handling. Since the EH thread
3040 * does not leave the do {} while () loop in ata_eh_recover as
3041 * long as the timeout for a park request to *one* device on
3042 * the port has not expired, and since we still want to pick
3043 * up park requests to other devices on the same port or
3044 * timeout updates for the same device, we have to pull
3045 * ATA_EH_PARK actions from eh_info into eh_context.i
3046 * ourselves at the beginning of each pass over the loop.
3047 *
3048 * Additionally, all write accesses to &ap->park_req_pending
3049 * through reinit_completion() (see below) or complete_all()
3050 * (see ata_scsi_park_store()) are protected by the host lock.
3051 * As a result we have that park_req_pending.done is zero on
3052 * exit from this function, i.e. when ATA_EH_PARK actions for
3053 * *all* devices on port ap have been pulled into the
3054 * respective eh_context structs. If, and only if,
3055 * park_req_pending.done is non-zero by the time we reach
3056 * wait_for_completion_timeout(), another ATA_EH_PARK action
3057 * has been scheduled for at least one of the devices on port
3058 * ap and we have to cycle over the do {} while () loop in
3059 * ata_eh_recover() again.
3060 */
3061
3062 spin_lock_irqsave(ap->lock, flags);
3063 reinit_completion(&ap->park_req_pending);
3064 ata_for_each_link(link, ap, EDGE) {
3065 ata_for_each_dev(dev, link, ALL) {
3066 struct ata_eh_info *ehi = &link->eh_info;
3067
3068 link->eh_context.i.dev_action[dev->devno] |=
3069 ehi->dev_action[dev->devno] & ATA_EH_PARK;
3070 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
3071 }
3072 }
3073 spin_unlock_irqrestore(ap->lock, flags);
3074 }
3075
ata_eh_park_issue_cmd(struct ata_device * dev,int park)3076 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
3077 {
3078 struct ata_eh_context *ehc = &dev->link->eh_context;
3079 struct ata_taskfile tf;
3080 unsigned int err_mask;
3081
3082 ata_tf_init(dev, &tf);
3083 if (park) {
3084 ehc->unloaded_mask |= 1 << dev->devno;
3085 tf.command = ATA_CMD_IDLEIMMEDIATE;
3086 tf.feature = 0x44;
3087 tf.lbal = 0x4c;
3088 tf.lbam = 0x4e;
3089 tf.lbah = 0x55;
3090 } else {
3091 ehc->unloaded_mask &= ~(1 << dev->devno);
3092 tf.command = ATA_CMD_CHK_POWER;
3093 }
3094
3095 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
3096 tf.protocol = ATA_PROT_NODATA;
3097 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3098 if (park && (err_mask || tf.lbal != 0xc4)) {
3099 ata_dev_err(dev, "head unload failed!\n");
3100 ehc->unloaded_mask &= ~(1 << dev->devno);
3101 }
3102 }
3103
ata_eh_revalidate_and_attach(struct ata_link * link,struct ata_device ** r_failed_dev)3104 static int ata_eh_revalidate_and_attach(struct ata_link *link,
3105 struct ata_device **r_failed_dev)
3106 {
3107 struct ata_port *ap = link->ap;
3108 struct ata_eh_context *ehc = &link->eh_context;
3109 struct ata_device *dev;
3110 unsigned int new_mask = 0;
3111 unsigned long flags;
3112 int rc = 0;
3113
3114 DPRINTK("ENTER\n");
3115
3116 /* For PATA drive side cable detection to work, IDENTIFY must
3117 * be done backwards such that PDIAG- is released by the slave
3118 * device before the master device is identified.
3119 */
3120 ata_for_each_dev(dev, link, ALL_REVERSE) {
3121 unsigned int action = ata_eh_dev_action(dev);
3122 unsigned int readid_flags = 0;
3123
3124 if (ehc->i.flags & ATA_EHI_DID_RESET)
3125 readid_flags |= ATA_READID_POSTRESET;
3126
3127 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
3128 WARN_ON(dev->class == ATA_DEV_PMP);
3129
3130 if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
3131 rc = -EIO;
3132 goto err;
3133 }
3134
3135 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
3136 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
3137 readid_flags);
3138 if (rc)
3139 goto err;
3140
3141 ata_eh_done(link, dev, ATA_EH_REVALIDATE);
3142
3143 /* Configuration may have changed, reconfigure
3144 * transfer mode.
3145 */
3146 ehc->i.flags |= ATA_EHI_SETMODE;
3147
3148 /* schedule the scsi_rescan_device() here */
3149 schedule_work(&(ap->scsi_rescan_task));
3150 } else if (dev->class == ATA_DEV_UNKNOWN &&
3151 ehc->tries[dev->devno] &&
3152 ata_class_enabled(ehc->classes[dev->devno])) {
3153 /* Temporarily set dev->class, it will be
3154 * permanently set once all configurations are
3155 * complete. This is necessary because new
3156 * device configuration is done in two
3157 * separate loops.
3158 */
3159 dev->class = ehc->classes[dev->devno];
3160
3161 if (dev->class == ATA_DEV_PMP)
3162 rc = sata_pmp_attach(dev);
3163 else
3164 rc = ata_dev_read_id(dev, &dev->class,
3165 readid_flags, dev->id);
3166
3167 /* read_id might have changed class, store and reset */
3168 ehc->classes[dev->devno] = dev->class;
3169 dev->class = ATA_DEV_UNKNOWN;
3170
3171 switch (rc) {
3172 case 0:
3173 /* clear error info accumulated during probe */
3174 ata_ering_clear(&dev->ering);
3175 new_mask |= 1 << dev->devno;
3176 break;
3177 case -ENOENT:
3178 /* IDENTIFY was issued to non-existent
3179 * device. No need to reset. Just
3180 * thaw and ignore the device.
3181 */
3182 ata_eh_thaw_port(ap);
3183 break;
3184 default:
3185 goto err;
3186 }
3187 }
3188 }
3189
3190 /* PDIAG- should have been released, ask cable type if post-reset */
3191 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
3192 if (ap->ops->cable_detect)
3193 ap->cbl = ap->ops->cable_detect(ap);
3194 ata_force_cbl(ap);
3195 }
3196
3197 /* Configure new devices forward such that user doesn't see
3198 * device detection messages backwards.
3199 */
3200 ata_for_each_dev(dev, link, ALL) {
3201 if (!(new_mask & (1 << dev->devno)))
3202 continue;
3203
3204 dev->class = ehc->classes[dev->devno];
3205
3206 if (dev->class == ATA_DEV_PMP)
3207 continue;
3208
3209 ehc->i.flags |= ATA_EHI_PRINTINFO;
3210 rc = ata_dev_configure(dev);
3211 ehc->i.flags &= ~ATA_EHI_PRINTINFO;
3212 if (rc) {
3213 dev->class = ATA_DEV_UNKNOWN;
3214 goto err;
3215 }
3216
3217 spin_lock_irqsave(ap->lock, flags);
3218 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
3219 spin_unlock_irqrestore(ap->lock, flags);
3220
3221 /* new device discovered, configure xfermode */
3222 ehc->i.flags |= ATA_EHI_SETMODE;
3223 }
3224
3225 return 0;
3226
3227 err:
3228 *r_failed_dev = dev;
3229 DPRINTK("EXIT rc=%d\n", rc);
3230 return rc;
3231 }
3232
3233 /**
3234 * ata_set_mode - Program timings and issue SET FEATURES - XFER
3235 * @link: link on which timings will be programmed
3236 * @r_failed_dev: out parameter for failed device
3237 *
3238 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If
3239 * ata_set_mode() fails, pointer to the failing device is
3240 * returned in @r_failed_dev.
3241 *
3242 * LOCKING:
3243 * PCI/etc. bus probe sem.
3244 *
3245 * RETURNS:
3246 * 0 on success, negative errno otherwise
3247 */
ata_set_mode(struct ata_link * link,struct ata_device ** r_failed_dev)3248 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
3249 {
3250 struct ata_port *ap = link->ap;
3251 struct ata_device *dev;
3252 int rc;
3253
3254 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
3255 ata_for_each_dev(dev, link, ENABLED) {
3256 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
3257 struct ata_ering_entry *ent;
3258
3259 ent = ata_ering_top(&dev->ering);
3260 if (ent)
3261 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
3262 }
3263 }
3264
3265 /* has private set_mode? */
3266 if (ap->ops->set_mode)
3267 rc = ap->ops->set_mode(link, r_failed_dev);
3268 else
3269 rc = ata_do_set_mode(link, r_failed_dev);
3270
3271 /* if transfer mode has changed, set DUBIOUS_XFER on device */
3272 ata_for_each_dev(dev, link, ENABLED) {
3273 struct ata_eh_context *ehc = &link->eh_context;
3274 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
3275 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
3276
3277 if (dev->xfer_mode != saved_xfer_mode ||
3278 ata_ncq_enabled(dev) != saved_ncq)
3279 dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
3280 }
3281
3282 return rc;
3283 }
3284
3285 /**
3286 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
3287 * @dev: ATAPI device to clear UA for
3288 *
3289 * Resets and other operations can make an ATAPI device raise
3290 * UNIT ATTENTION which causes the next operation to fail. This
3291 * function clears UA.
3292 *
3293 * LOCKING:
3294 * EH context (may sleep).
3295 *
3296 * RETURNS:
3297 * 0 on success, -errno on failure.
3298 */
atapi_eh_clear_ua(struct ata_device * dev)3299 static int atapi_eh_clear_ua(struct ata_device *dev)
3300 {
3301 int i;
3302
3303 for (i = 0; i < ATA_EH_UA_TRIES; i++) {
3304 u8 *sense_buffer = dev->link->ap->sector_buf;
3305 u8 sense_key = 0;
3306 unsigned int err_mask;
3307
3308 err_mask = atapi_eh_tur(dev, &sense_key);
3309 if (err_mask != 0 && err_mask != AC_ERR_DEV) {
3310 ata_dev_warn(dev,
3311 "TEST_UNIT_READY failed (err_mask=0x%x)\n",
3312 err_mask);
3313 return -EIO;
3314 }
3315
3316 if (!err_mask || sense_key != UNIT_ATTENTION)
3317 return 0;
3318
3319 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
3320 if (err_mask) {
3321 ata_dev_warn(dev, "failed to clear "
3322 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
3323 return -EIO;
3324 }
3325 }
3326
3327 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n",
3328 ATA_EH_UA_TRIES);
3329
3330 return 0;
3331 }
3332
3333 /**
3334 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary
3335 * @dev: ATA device which may need FLUSH retry
3336 *
3337 * If @dev failed FLUSH, it needs to be reported upper layer
3338 * immediately as it means that @dev failed to remap and already
3339 * lost at least a sector and further FLUSH retrials won't make
3340 * any difference to the lost sector. However, if FLUSH failed
3341 * for other reasons, for example transmission error, FLUSH needs
3342 * to be retried.
3343 *
3344 * This function determines whether FLUSH failure retry is
3345 * necessary and performs it if so.
3346 *
3347 * RETURNS:
3348 * 0 if EH can continue, -errno if EH needs to be repeated.
3349 */
ata_eh_maybe_retry_flush(struct ata_device * dev)3350 static int ata_eh_maybe_retry_flush(struct ata_device *dev)
3351 {
3352 struct ata_link *link = dev->link;
3353 struct ata_port *ap = link->ap;
3354 struct ata_queued_cmd *qc;
3355 struct ata_taskfile tf;
3356 unsigned int err_mask;
3357 int rc = 0;
3358
3359 /* did flush fail for this device? */
3360 if (!ata_tag_valid(link->active_tag))
3361 return 0;
3362
3363 qc = __ata_qc_from_tag(ap, link->active_tag);
3364 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
3365 qc->tf.command != ATA_CMD_FLUSH))
3366 return 0;
3367
3368 /* if the device failed it, it should be reported to upper layers */
3369 if (qc->err_mask & AC_ERR_DEV)
3370 return 0;
3371
3372 /* flush failed for some other reason, give it another shot */
3373 ata_tf_init(dev, &tf);
3374
3375 tf.command = qc->tf.command;
3376 tf.flags |= ATA_TFLAG_DEVICE;
3377 tf.protocol = ATA_PROT_NODATA;
3378
3379 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n",
3380 tf.command, qc->err_mask);
3381
3382 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3383 if (!err_mask) {
3384 /*
3385 * FLUSH is complete but there's no way to
3386 * successfully complete a failed command from EH.
3387 * Making sure retry is allowed at least once and
3388 * retrying it should do the trick - whatever was in
3389 * the cache is already on the platter and this won't
3390 * cause infinite loop.
3391 */
3392 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
3393 } else {
3394 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n",
3395 err_mask);
3396 rc = -EIO;
3397
3398 /* if device failed it, report it to upper layers */
3399 if (err_mask & AC_ERR_DEV) {
3400 qc->err_mask |= AC_ERR_DEV;
3401 qc->result_tf = tf;
3402 if (!(ap->pflags & ATA_PFLAG_FROZEN))
3403 rc = 0;
3404 }
3405 }
3406 return rc;
3407 }
3408
3409 /**
3410 * ata_eh_set_lpm - configure SATA interface power management
3411 * @link: link to configure power management
3412 * @policy: the link power management policy
3413 * @r_failed_dev: out parameter for failed device
3414 *
3415 * Enable SATA Interface power management. This will enable
3416 * Device Interface Power Management (DIPM) for min_power and
3417 * medium_power_with_dipm policies, and then call driver specific
3418 * callbacks for enabling Host Initiated Power management.
3419 *
3420 * LOCKING:
3421 * EH context.
3422 *
3423 * RETURNS:
3424 * 0 on success, -errno on failure.
3425 */
ata_eh_set_lpm(struct ata_link * link,enum ata_lpm_policy policy,struct ata_device ** r_failed_dev)3426 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3427 struct ata_device **r_failed_dev)
3428 {
3429 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
3430 struct ata_eh_context *ehc = &link->eh_context;
3431 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
3432 enum ata_lpm_policy old_policy = link->lpm_policy;
3433 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
3434 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
3435 unsigned int err_mask;
3436 int rc;
3437
3438 /* if the link or host doesn't do LPM, noop */
3439 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
3440 return 0;
3441
3442 /*
3443 * DIPM is enabled only for MIN_POWER as some devices
3444 * misbehave when the host NACKs transition to SLUMBER. Order
3445 * device and link configurations such that the host always
3446 * allows DIPM requests.
3447 */
3448 ata_for_each_dev(dev, link, ENABLED) {
3449 bool hipm = ata_id_has_hipm(dev->id);
3450 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
3451
3452 /* find the first enabled and LPM enabled devices */
3453 if (!link_dev)
3454 link_dev = dev;
3455
3456 if (!lpm_dev && (hipm || dipm))
3457 lpm_dev = dev;
3458
3459 hints &= ~ATA_LPM_EMPTY;
3460 if (!hipm)
3461 hints &= ~ATA_LPM_HIPM;
3462
3463 /* disable DIPM before changing link config */
3464 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) {
3465 err_mask = ata_dev_set_feature(dev,
3466 SETFEATURES_SATA_DISABLE, SATA_DIPM);
3467 if (err_mask && err_mask != AC_ERR_DEV) {
3468 ata_dev_warn(dev,
3469 "failed to disable DIPM, Emask 0x%x\n",
3470 err_mask);
3471 rc = -EIO;
3472 goto fail;
3473 }
3474 }
3475 }
3476
3477 if (ap) {
3478 rc = ap->ops->set_lpm(link, policy, hints);
3479 if (!rc && ap->slave_link)
3480 rc = ap->ops->set_lpm(ap->slave_link, policy, hints);
3481 } else
3482 rc = sata_pmp_set_lpm(link, policy, hints);
3483
3484 /*
3485 * Attribute link config failure to the first (LPM) enabled
3486 * device on the link.
3487 */
3488 if (rc) {
3489 if (rc == -EOPNOTSUPP) {
3490 link->flags |= ATA_LFLAG_NO_LPM;
3491 return 0;
3492 }
3493 dev = lpm_dev ? lpm_dev : link_dev;
3494 goto fail;
3495 }
3496
3497 /*
3498 * Low level driver acked the transition. Issue DIPM command
3499 * with the new policy set.
3500 */
3501 link->lpm_policy = policy;
3502 if (ap && ap->slave_link)
3503 ap->slave_link->lpm_policy = policy;
3504
3505 /* host config updated, enable DIPM if transitioning to MIN_POWER */
3506 ata_for_each_dev(dev, link, ENABLED) {
3507 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm &&
3508 ata_id_has_dipm(dev->id)) {
3509 err_mask = ata_dev_set_feature(dev,
3510 SETFEATURES_SATA_ENABLE, SATA_DIPM);
3511 if (err_mask && err_mask != AC_ERR_DEV) {
3512 ata_dev_warn(dev,
3513 "failed to enable DIPM, Emask 0x%x\n",
3514 err_mask);
3515 rc = -EIO;
3516 goto fail;
3517 }
3518 }
3519 }
3520
3521 link->last_lpm_change = jiffies;
3522 link->flags |= ATA_LFLAG_CHANGED;
3523
3524 return 0;
3525
3526 fail:
3527 /* restore the old policy */
3528 link->lpm_policy = old_policy;
3529 if (ap && ap->slave_link)
3530 ap->slave_link->lpm_policy = old_policy;
3531
3532 /* if no device or only one more chance is left, disable LPM */
3533 if (!dev || ehc->tries[dev->devno] <= 2) {
3534 ata_link_warn(link, "disabling LPM on the link\n");
3535 link->flags |= ATA_LFLAG_NO_LPM;
3536 }
3537 if (r_failed_dev)
3538 *r_failed_dev = dev;
3539 return rc;
3540 }
3541
ata_link_nr_enabled(struct ata_link * link)3542 int ata_link_nr_enabled(struct ata_link *link)
3543 {
3544 struct ata_device *dev;
3545 int cnt = 0;
3546
3547 ata_for_each_dev(dev, link, ENABLED)
3548 cnt++;
3549 return cnt;
3550 }
3551
ata_link_nr_vacant(struct ata_link * link)3552 static int ata_link_nr_vacant(struct ata_link *link)
3553 {
3554 struct ata_device *dev;
3555 int cnt = 0;
3556
3557 ata_for_each_dev(dev, link, ALL)
3558 if (dev->class == ATA_DEV_UNKNOWN)
3559 cnt++;
3560 return cnt;
3561 }
3562
ata_eh_skip_recovery(struct ata_link * link)3563 static int ata_eh_skip_recovery(struct ata_link *link)
3564 {
3565 struct ata_port *ap = link->ap;
3566 struct ata_eh_context *ehc = &link->eh_context;
3567 struct ata_device *dev;
3568
3569 /* skip disabled links */
3570 if (link->flags & ATA_LFLAG_DISABLED)
3571 return 1;
3572
3573 /* skip if explicitly requested */
3574 if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
3575 return 1;
3576
3577 /* thaw frozen port and recover failed devices */
3578 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
3579 return 0;
3580
3581 /* reset at least once if reset is requested */
3582 if ((ehc->i.action & ATA_EH_RESET) &&
3583 !(ehc->i.flags & ATA_EHI_DID_RESET))
3584 return 0;
3585
3586 /* skip if class codes for all vacant slots are ATA_DEV_NONE */
3587 ata_for_each_dev(dev, link, ALL) {
3588 if (dev->class == ATA_DEV_UNKNOWN &&
3589 ehc->classes[dev->devno] != ATA_DEV_NONE)
3590 return 0;
3591 }
3592
3593 return 1;
3594 }
3595
ata_count_probe_trials_cb(struct ata_ering_entry * ent,void * void_arg)3596 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg)
3597 {
3598 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL);
3599 u64 now = get_jiffies_64();
3600 int *trials = void_arg;
3601
3602 if ((ent->eflags & ATA_EFLAG_OLD_ER) ||
3603 (ent->timestamp < now - min(now, interval)))
3604 return -1;
3605
3606 (*trials)++;
3607 return 0;
3608 }
3609
ata_eh_schedule_probe(struct ata_device * dev)3610 static int ata_eh_schedule_probe(struct ata_device *dev)
3611 {
3612 struct ata_eh_context *ehc = &dev->link->eh_context;
3613 struct ata_link *link = ata_dev_phys_link(dev);
3614 int trials = 0;
3615
3616 if (!(ehc->i.probe_mask & (1 << dev->devno)) ||
3617 (ehc->did_probe_mask & (1 << dev->devno)))
3618 return 0;
3619
3620 ata_eh_detach_dev(dev);
3621 ata_dev_init(dev);
3622 ehc->did_probe_mask |= (1 << dev->devno);
3623 ehc->i.action |= ATA_EH_RESET;
3624 ehc->saved_xfer_mode[dev->devno] = 0;
3625 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
3626
3627 /* the link maybe in a deep sleep, wake it up */
3628 if (link->lpm_policy > ATA_LPM_MAX_POWER) {
3629 if (ata_is_host_link(link))
3630 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER,
3631 ATA_LPM_EMPTY);
3632 else
3633 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER,
3634 ATA_LPM_EMPTY);
3635 }
3636
3637 /* Record and count probe trials on the ering. The specific
3638 * error mask used is irrelevant. Because a successful device
3639 * detection clears the ering, this count accumulates only if
3640 * there are consecutive failed probes.
3641 *
3642 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS
3643 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is
3644 * forced to 1.5Gbps.
3645 *
3646 * This is to work around cases where failed link speed
3647 * negotiation results in device misdetection leading to
3648 * infinite DEVXCHG or PHRDY CHG events.
3649 */
3650 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER);
3651 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials);
3652
3653 if (trials > ATA_EH_PROBE_TRIALS)
3654 sata_down_spd_limit(link, 1);
3655
3656 return 1;
3657 }
3658
ata_eh_handle_dev_fail(struct ata_device * dev,int err)3659 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
3660 {
3661 struct ata_eh_context *ehc = &dev->link->eh_context;
3662
3663 /* -EAGAIN from EH routine indicates retry without prejudice.
3664 * The requester is responsible for ensuring forward progress.
3665 */
3666 if (err != -EAGAIN)
3667 ehc->tries[dev->devno]--;
3668
3669 switch (err) {
3670 case -ENODEV:
3671 /* device missing or wrong IDENTIFY data, schedule probing */
3672 ehc->i.probe_mask |= (1 << dev->devno);
3673 /* fall through */
3674 case -EINVAL:
3675 /* give it just one more chance */
3676 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
3677 /* fall through */
3678 case -EIO:
3679 if (ehc->tries[dev->devno] == 1) {
3680 /* This is the last chance, better to slow
3681 * down than lose it.
3682 */
3683 sata_down_spd_limit(ata_dev_phys_link(dev), 0);
3684 if (dev->pio_mode > XFER_PIO_0)
3685 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
3686 }
3687 }
3688
3689 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
3690 /* disable device if it has used up all its chances */
3691 ata_dev_disable(dev);
3692
3693 /* detach if offline */
3694 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
3695 ata_eh_detach_dev(dev);
3696
3697 /* schedule probe if necessary */
3698 if (ata_eh_schedule_probe(dev)) {
3699 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3700 memset(ehc->cmd_timeout_idx[dev->devno], 0,
3701 sizeof(ehc->cmd_timeout_idx[dev->devno]));
3702 }
3703
3704 return 1;
3705 } else {
3706 ehc->i.action |= ATA_EH_RESET;
3707 return 0;
3708 }
3709 }
3710
3711 /**
3712 * ata_eh_recover - recover host port after error
3713 * @ap: host port to recover
3714 * @prereset: prereset method (can be NULL)
3715 * @softreset: softreset method (can be NULL)
3716 * @hardreset: hardreset method (can be NULL)
3717 * @postreset: postreset method (can be NULL)
3718 * @r_failed_link: out parameter for failed link
3719 *
3720 * This is the alpha and omega, eum and yang, heart and soul of
3721 * libata exception handling. On entry, actions required to
3722 * recover each link and hotplug requests are recorded in the
3723 * link's eh_context. This function executes all the operations
3724 * with appropriate retrials and fallbacks to resurrect failed
3725 * devices, detach goners and greet newcomers.
3726 *
3727 * LOCKING:
3728 * Kernel thread context (may sleep).
3729 *
3730 * RETURNS:
3731 * 0 on success, -errno on failure.
3732 */
ata_eh_recover(struct ata_port * ap,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset,struct ata_link ** r_failed_link)3733 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3734 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
3735 ata_postreset_fn_t postreset,
3736 struct ata_link **r_failed_link)
3737 {
3738 struct ata_link *link;
3739 struct ata_device *dev;
3740 int rc, nr_fails;
3741 unsigned long flags, deadline;
3742
3743 DPRINTK("ENTER\n");
3744
3745 /* prep for recovery */
3746 ata_for_each_link(link, ap, EDGE) {
3747 struct ata_eh_context *ehc = &link->eh_context;
3748
3749 /* re-enable link? */
3750 if (ehc->i.action & ATA_EH_ENABLE_LINK) {
3751 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
3752 spin_lock_irqsave(ap->lock, flags);
3753 link->flags &= ~ATA_LFLAG_DISABLED;
3754 spin_unlock_irqrestore(ap->lock, flags);
3755 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
3756 }
3757
3758 ata_for_each_dev(dev, link, ALL) {
3759 if (link->flags & ATA_LFLAG_NO_RETRY)
3760 ehc->tries[dev->devno] = 1;
3761 else
3762 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3763
3764 /* collect port action mask recorded in dev actions */
3765 ehc->i.action |= ehc->i.dev_action[dev->devno] &
3766 ~ATA_EH_PERDEV_MASK;
3767 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;
3768
3769 /* process hotplug request */
3770 if (dev->flags & ATA_DFLAG_DETACH)
3771 ata_eh_detach_dev(dev);
3772
3773 /* schedule probe if necessary */
3774 if (!ata_dev_enabled(dev))
3775 ata_eh_schedule_probe(dev);
3776 }
3777 }
3778
3779 retry:
3780 rc = 0;
3781
3782 /* if UNLOADING, finish immediately */
3783 if (ap->pflags & ATA_PFLAG_UNLOADING)
3784 goto out;
3785
3786 /* prep for EH */
3787 ata_for_each_link(link, ap, EDGE) {
3788 struct ata_eh_context *ehc = &link->eh_context;
3789
3790 /* skip EH if possible. */
3791 if (ata_eh_skip_recovery(link))
3792 ehc->i.action = 0;
3793
3794 ata_for_each_dev(dev, link, ALL)
3795 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
3796 }
3797
3798 /* reset */
3799 ata_for_each_link(link, ap, EDGE) {
3800 struct ata_eh_context *ehc = &link->eh_context;
3801
3802 if (!(ehc->i.action & ATA_EH_RESET))
3803 continue;
3804
3805 rc = ata_eh_reset(link, ata_link_nr_vacant(link),
3806 prereset, softreset, hardreset, postreset);
3807 if (rc) {
3808 ata_link_err(link, "reset failed, giving up\n");
3809 goto out;
3810 }
3811 }
3812
3813 do {
3814 unsigned long now;
3815
3816 /*
3817 * clears ATA_EH_PARK in eh_info and resets
3818 * ap->park_req_pending
3819 */
3820 ata_eh_pull_park_action(ap);
3821
3822 deadline = jiffies;
3823 ata_for_each_link(link, ap, EDGE) {
3824 ata_for_each_dev(dev, link, ALL) {
3825 struct ata_eh_context *ehc = &link->eh_context;
3826 unsigned long tmp;
3827
3828 if (dev->class != ATA_DEV_ATA &&
3829 dev->class != ATA_DEV_ZAC)
3830 continue;
3831 if (!(ehc->i.dev_action[dev->devno] &
3832 ATA_EH_PARK))
3833 continue;
3834 tmp = dev->unpark_deadline;
3835 if (time_before(deadline, tmp))
3836 deadline = tmp;
3837 else if (time_before_eq(tmp, jiffies))
3838 continue;
3839 if (ehc->unloaded_mask & (1 << dev->devno))
3840 continue;
3841
3842 ata_eh_park_issue_cmd(dev, 1);
3843 }
3844 }
3845
3846 now = jiffies;
3847 if (time_before_eq(deadline, now))
3848 break;
3849
3850 ata_eh_release(ap);
3851 deadline = wait_for_completion_timeout(&ap->park_req_pending,
3852 deadline - now);
3853 ata_eh_acquire(ap);
3854 } while (deadline);
3855 ata_for_each_link(link, ap, EDGE) {
3856 ata_for_each_dev(dev, link, ALL) {
3857 if (!(link->eh_context.unloaded_mask &
3858 (1 << dev->devno)))
3859 continue;
3860
3861 ata_eh_park_issue_cmd(dev, 0);
3862 ata_eh_done(link, dev, ATA_EH_PARK);
3863 }
3864 }
3865
3866 /* the rest */
3867 nr_fails = 0;
3868 ata_for_each_link(link, ap, PMP_FIRST) {
3869 struct ata_eh_context *ehc = &link->eh_context;
3870
3871 if (sata_pmp_attached(ap) && ata_is_host_link(link))
3872 goto config_lpm;
3873
3874 /* revalidate existing devices and attach new ones */
3875 rc = ata_eh_revalidate_and_attach(link, &dev);
3876 if (rc)
3877 goto rest_fail;
3878
3879 /* if PMP got attached, return, pmp EH will take care of it */
3880 if (link->device->class == ATA_DEV_PMP) {
3881 ehc->i.action = 0;
3882 return 0;
3883 }
3884
3885 /* configure transfer mode if necessary */
3886 if (ehc->i.flags & ATA_EHI_SETMODE) {
3887 rc = ata_set_mode(link, &dev);
3888 if (rc)
3889 goto rest_fail;
3890 ehc->i.flags &= ~ATA_EHI_SETMODE;
3891 }
3892
3893 /* If reset has been issued, clear UA to avoid
3894 * disrupting the current users of the device.
3895 */
3896 if (ehc->i.flags & ATA_EHI_DID_RESET) {
3897 ata_for_each_dev(dev, link, ALL) {
3898 if (dev->class != ATA_DEV_ATAPI)
3899 continue;
3900 rc = atapi_eh_clear_ua(dev);
3901 if (rc)
3902 goto rest_fail;
3903 if (zpodd_dev_enabled(dev))
3904 zpodd_post_poweron(dev);
3905 }
3906 }
3907
3908 /* retry flush if necessary */
3909 ata_for_each_dev(dev, link, ALL) {
3910 if (dev->class != ATA_DEV_ATA &&
3911 dev->class != ATA_DEV_ZAC)
3912 continue;
3913 rc = ata_eh_maybe_retry_flush(dev);
3914 if (rc)
3915 goto rest_fail;
3916 }
3917
3918 config_lpm:
3919 /* configure link power saving */
3920 if (link->lpm_policy != ap->target_lpm_policy) {
3921 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev);
3922 if (rc)
3923 goto rest_fail;
3924 }
3925
3926 /* this link is okay now */
3927 ehc->i.flags = 0;
3928 continue;
3929
3930 rest_fail:
3931 nr_fails++;
3932 if (dev)
3933 ata_eh_handle_dev_fail(dev, rc);
3934
3935 if (ap->pflags & ATA_PFLAG_FROZEN) {
3936 /* PMP reset requires working host port.
3937 * Can't retry if it's frozen.
3938 */
3939 if (sata_pmp_attached(ap))
3940 goto out;
3941 break;
3942 }
3943 }
3944
3945 if (nr_fails)
3946 goto retry;
3947
3948 out:
3949 if (rc && r_failed_link)
3950 *r_failed_link = link;
3951
3952 DPRINTK("EXIT, rc=%d\n", rc);
3953 return rc;
3954 }
3955
3956 /**
3957 * ata_eh_finish - finish up EH
3958 * @ap: host port to finish EH for
3959 *
3960 * Recovery is complete. Clean up EH states and retry or finish
3961 * failed qcs.
3962 *
3963 * LOCKING:
3964 * None.
3965 */
ata_eh_finish(struct ata_port * ap)3966 void ata_eh_finish(struct ata_port *ap)
3967 {
3968 struct ata_queued_cmd *qc;
3969 int tag;
3970
3971 /* retry or finish qcs */
3972 ata_qc_for_each_raw(ap, qc, tag) {
3973 if (!(qc->flags & ATA_QCFLAG_FAILED))
3974 continue;
3975
3976 if (qc->err_mask) {
3977 /* FIXME: Once EH migration is complete,
3978 * generate sense data in this function,
3979 * considering both err_mask and tf.
3980 */
3981 if (qc->flags & ATA_QCFLAG_RETRY)
3982 ata_eh_qc_retry(qc);
3983 else
3984 ata_eh_qc_complete(qc);
3985 } else {
3986 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
3987 ata_eh_qc_complete(qc);
3988 } else {
3989 /* feed zero TF to sense generation */
3990 memset(&qc->result_tf, 0, sizeof(qc->result_tf));
3991 ata_eh_qc_retry(qc);
3992 }
3993 }
3994 }
3995
3996 /* make sure nr_active_links is zero after EH */
3997 WARN_ON(ap->nr_active_links);
3998 ap->nr_active_links = 0;
3999 }
4000
4001 /**
4002 * ata_do_eh - do standard error handling
4003 * @ap: host port to handle error for
4004 *
4005 * @prereset: prereset method (can be NULL)
4006 * @softreset: softreset method (can be NULL)
4007 * @hardreset: hardreset method (can be NULL)
4008 * @postreset: postreset method (can be NULL)
4009 *
4010 * Perform standard error handling sequence.
4011 *
4012 * LOCKING:
4013 * Kernel thread context (may sleep).
4014 */
ata_do_eh(struct ata_port * ap,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset)4015 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
4016 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
4017 ata_postreset_fn_t postreset)
4018 {
4019 struct ata_device *dev;
4020 int rc;
4021
4022 ata_eh_autopsy(ap);
4023 ata_eh_report(ap);
4024
4025 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
4026 NULL);
4027 if (rc) {
4028 ata_for_each_dev(dev, &ap->link, ALL)
4029 ata_dev_disable(dev);
4030 }
4031
4032 ata_eh_finish(ap);
4033 }
4034
4035 /**
4036 * ata_std_error_handler - standard error handler
4037 * @ap: host port to handle error for
4038 *
4039 * Standard error handler
4040 *
4041 * LOCKING:
4042 * Kernel thread context (may sleep).
4043 */
ata_std_error_handler(struct ata_port * ap)4044 void ata_std_error_handler(struct ata_port *ap)
4045 {
4046 struct ata_port_operations *ops = ap->ops;
4047 ata_reset_fn_t hardreset = ops->hardreset;
4048
4049 /* ignore built-in hardreset if SCR access is not available */
4050 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link))
4051 hardreset = NULL;
4052
4053 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
4054 }
4055
4056 #ifdef CONFIG_PM
4057 /**
4058 * ata_eh_handle_port_suspend - perform port suspend operation
4059 * @ap: port to suspend
4060 *
4061 * Suspend @ap.
4062 *
4063 * LOCKING:
4064 * Kernel thread context (may sleep).
4065 */
ata_eh_handle_port_suspend(struct ata_port * ap)4066 static void ata_eh_handle_port_suspend(struct ata_port *ap)
4067 {
4068 unsigned long flags;
4069 int rc = 0;
4070 struct ata_device *dev;
4071
4072 /* are we suspending? */
4073 spin_lock_irqsave(ap->lock, flags);
4074 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4075 ap->pm_mesg.event & PM_EVENT_RESUME) {
4076 spin_unlock_irqrestore(ap->lock, flags);
4077 return;
4078 }
4079 spin_unlock_irqrestore(ap->lock, flags);
4080
4081 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
4082
4083 /*
4084 * If we have a ZPODD attached, check its zero
4085 * power ready status before the port is frozen.
4086 * Only needed for runtime suspend.
4087 */
4088 if (PMSG_IS_AUTO(ap->pm_mesg)) {
4089 ata_for_each_dev(dev, &ap->link, ENABLED) {
4090 if (zpodd_dev_enabled(dev))
4091 zpodd_on_suspend(dev);
4092 }
4093 }
4094
4095 /* tell ACPI we're suspending */
4096 rc = ata_acpi_on_suspend(ap);
4097 if (rc)
4098 goto out;
4099
4100 /* suspend */
4101 ata_eh_freeze_port(ap);
4102
4103 if (ap->ops->port_suspend)
4104 rc = ap->ops->port_suspend(ap, ap->pm_mesg);
4105
4106 ata_acpi_set_state(ap, ap->pm_mesg);
4107 out:
4108 /* update the flags */
4109 spin_lock_irqsave(ap->lock, flags);
4110
4111 ap->pflags &= ~ATA_PFLAG_PM_PENDING;
4112 if (rc == 0)
4113 ap->pflags |= ATA_PFLAG_SUSPENDED;
4114 else if (ap->pflags & ATA_PFLAG_FROZEN)
4115 ata_port_schedule_eh(ap);
4116
4117 spin_unlock_irqrestore(ap->lock, flags);
4118
4119 return;
4120 }
4121
4122 /**
4123 * ata_eh_handle_port_resume - perform port resume operation
4124 * @ap: port to resume
4125 *
4126 * Resume @ap.
4127 *
4128 * LOCKING:
4129 * Kernel thread context (may sleep).
4130 */
ata_eh_handle_port_resume(struct ata_port * ap)4131 static void ata_eh_handle_port_resume(struct ata_port *ap)
4132 {
4133 struct ata_link *link;
4134 struct ata_device *dev;
4135 unsigned long flags;
4136
4137 /* are we resuming? */
4138 spin_lock_irqsave(ap->lock, flags);
4139 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4140 !(ap->pm_mesg.event & PM_EVENT_RESUME)) {
4141 spin_unlock_irqrestore(ap->lock, flags);
4142 return;
4143 }
4144 spin_unlock_irqrestore(ap->lock, flags);
4145
4146 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED));
4147
4148 /*
4149 * Error timestamps are in jiffies which doesn't run while
4150 * suspended and PHY events during resume isn't too uncommon.
4151 * When the two are combined, it can lead to unnecessary speed
4152 * downs if the machine is suspended and resumed repeatedly.
4153 * Clear error history.
4154 */
4155 ata_for_each_link(link, ap, HOST_FIRST)
4156 ata_for_each_dev(dev, link, ALL)
4157 ata_ering_clear(&dev->ering);
4158
4159 ata_acpi_set_state(ap, ap->pm_mesg);
4160
4161 if (ap->ops->port_resume)
4162 ap->ops->port_resume(ap);
4163
4164 /* tell ACPI that we're resuming */
4165 ata_acpi_on_resume(ap);
4166
4167 /* update the flags */
4168 spin_lock_irqsave(ap->lock, flags);
4169 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
4170 spin_unlock_irqrestore(ap->lock, flags);
4171 }
4172 #endif /* CONFIG_PM */
4173