Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * libata-eh.c - libata error handling
4 *
5 * Maintained by: Tejun Heo <tj@kernel.org>
6 * Please ALWAYS copy linux-ide@vger.kernel.org
7 * on emails.
8 *
9 * Copyright 2006 Tejun Heo <htejun@gmail.com>
10 *
11 * libata documentation is available via 'make {ps|pdf}docs',
12 * as Documentation/driver-api/libata.rst
13 *
14 * Hardware documentation available from http://www.t13.org/ and
15 * http://www.sata-io.org/
16 */
17
18#include <linux/kernel.h>
19#include <linux/blkdev.h>
20#include <linux/export.h>
21#include <linux/pci.h>
22#include <scsi/scsi.h>
23#include <scsi/scsi_host.h>
24#include <scsi/scsi_eh.h>
25#include <scsi/scsi_device.h>
26#include <scsi/scsi_cmnd.h>
27#include <scsi/scsi_dbg.h>
28#include "../scsi/scsi_transport_api.h"
29
30#include <linux/libata.h>
31
32#include <trace/events/libata.h>
33#include "libata.h"
34
35enum {
36 /* speed down verdicts */
37 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
38 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
39 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
40 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
41
42 /* error flags */
43 ATA_EFLAG_IS_IO = (1 << 0),
44 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
45 ATA_EFLAG_OLD_ER = (1 << 31),
46
47 /* error categories */
48 ATA_ECAT_NONE = 0,
49 ATA_ECAT_ATA_BUS = 1,
50 ATA_ECAT_TOUT_HSM = 2,
51 ATA_ECAT_UNK_DEV = 3,
52 ATA_ECAT_DUBIOUS_NONE = 4,
53 ATA_ECAT_DUBIOUS_ATA_BUS = 5,
54 ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
55 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
56 ATA_ECAT_NR = 8,
57
58 ATA_EH_CMD_DFL_TIMEOUT = 5000,
59
60 /* always put at least this amount of time between resets */
61 ATA_EH_RESET_COOL_DOWN = 5000,
62
63 /* Waiting in ->prereset can never be reliable. It's
64 * sometimes nice to wait there but it can't be depended upon;
65 * otherwise, we wouldn't be resetting. Just give it enough
66 * time for most drives to spin up.
67 */
68 ATA_EH_PRERESET_TIMEOUT = 10000,
69 ATA_EH_FASTDRAIN_INTERVAL = 3000,
70
71 ATA_EH_UA_TRIES = 5,
72
73 /* probe speed down parameters, see ata_eh_schedule_probe() */
74 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */
75 ATA_EH_PROBE_TRIALS = 2,
76};
77
78/* The following table determines how we sequence resets. Each entry
79 * represents timeout for that try. The first try can be soft or
80 * hardreset. All others are hardreset if available. In most cases
81 * the first reset w/ 10sec timeout should succeed. Following entries
82 * are mostly for error handling, hotplug and those outlier devices that
83 * take an exceptionally long time to recover from reset.
84 */
85static const unsigned long ata_eh_reset_timeouts[] = {
86 10000, /* most drives spin up by 10sec */
87 10000, /* > 99% working drives spin up before 20sec */
88 35000, /* give > 30 secs of idleness for outlier devices */
89 5000, /* and sweet one last chance */
90 ULONG_MAX, /* > 1 min has elapsed, give up */
91};
92
93static const unsigned long ata_eh_identify_timeouts[] = {
94 5000, /* covers > 99% of successes and not too boring on failures */
95 10000, /* combined time till here is enough even for media access */
96 30000, /* for true idiots */
97 ULONG_MAX,
98};
99
100static const unsigned long ata_eh_flush_timeouts[] = {
101 15000, /* be generous with flush */
102 15000, /* ditto */
103 30000, /* and even more generous */
104 ULONG_MAX,
105};
106
107static const unsigned long ata_eh_other_timeouts[] = {
108 5000, /* same rationale as identify timeout */
109 10000, /* ditto */
110 /* but no merciful 30sec for other commands, it just isn't worth it */
111 ULONG_MAX,
112};
113
114struct ata_eh_cmd_timeout_ent {
115 const u8 *commands;
116 const unsigned long *timeouts;
117};
118
119/* The following table determines timeouts to use for EH internal
120 * commands. Each table entry is a command class and matches the
121 * commands the entry applies to and the timeout table to use.
122 *
123 * On the retry after a command timed out, the next timeout value from
124 * the table is used. If the table doesn't contain further entries,
125 * the last value is used.
126 *
127 * ehc->cmd_timeout_idx keeps track of which timeout to use per
128 * command class, so if SET_FEATURES times out on the first try, the
129 * next try will use the second timeout value only for that class.
130 */
131#define CMDS(cmds...) (const u8 []){ cmds, 0 }
132static const struct ata_eh_cmd_timeout_ent
133ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
134 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
135 .timeouts = ata_eh_identify_timeouts, },
136 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
137 .timeouts = ata_eh_other_timeouts, },
138 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
139 .timeouts = ata_eh_other_timeouts, },
140 { .commands = CMDS(ATA_CMD_SET_FEATURES),
141 .timeouts = ata_eh_other_timeouts, },
142 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
143 .timeouts = ata_eh_other_timeouts, },
144 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
145 .timeouts = ata_eh_flush_timeouts },
146};
147#undef CMDS
148
149static void __ata_port_freeze(struct ata_port *ap);
150#ifdef CONFIG_PM
151static void ata_eh_handle_port_suspend(struct ata_port *ap);
152static void ata_eh_handle_port_resume(struct ata_port *ap);
153#else /* CONFIG_PM */
154static void ata_eh_handle_port_suspend(struct ata_port *ap)
155{ }
156
157static void ata_eh_handle_port_resume(struct ata_port *ap)
158{ }
159#endif /* CONFIG_PM */
160
161static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi,
162 const char *fmt, va_list args)
163{
164 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
165 ATA_EH_DESC_LEN - ehi->desc_len,
166 fmt, args);
167}
168
169/**
170 * __ata_ehi_push_desc - push error description without adding separator
171 * @ehi: target EHI
172 * @fmt: printf format string
173 *
174 * Format string according to @fmt and append it to @ehi->desc.
175 *
176 * LOCKING:
177 * spin_lock_irqsave(host lock)
178 */
179void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
180{
181 va_list args;
182
183 va_start(args, fmt);
184 __ata_ehi_pushv_desc(ehi, fmt, args);
185 va_end(args);
186}
187
188/**
189 * ata_ehi_push_desc - push error description with separator
190 * @ehi: target EHI
191 * @fmt: printf format string
192 *
193 * Format string according to @fmt and append it to @ehi->desc.
194 * If @ehi->desc is not empty, ", " is added in-between.
195 *
196 * LOCKING:
197 * spin_lock_irqsave(host lock)
198 */
199void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
200{
201 va_list args;
202
203 if (ehi->desc_len)
204 __ata_ehi_push_desc(ehi, ", ");
205
206 va_start(args, fmt);
207 __ata_ehi_pushv_desc(ehi, fmt, args);
208 va_end(args);
209}
210
211/**
212 * ata_ehi_clear_desc - clean error description
213 * @ehi: target EHI
214 *
215 * Clear @ehi->desc.
216 *
217 * LOCKING:
218 * spin_lock_irqsave(host lock)
219 */
220void ata_ehi_clear_desc(struct ata_eh_info *ehi)
221{
222 ehi->desc[0] = '\0';
223 ehi->desc_len = 0;
224}
225
226/**
227 * ata_port_desc - append port description
228 * @ap: target ATA port
229 * @fmt: printf format string
230 *
231 * Format string according to @fmt and append it to port
232 * description. If port description is not empty, " " is added
233 * in-between. This function is to be used while initializing
234 * ata_host. The description is printed on host registration.
235 *
236 * LOCKING:
237 * None.
238 */
239void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
240{
241 va_list args;
242
243 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING));
244
245 if (ap->link.eh_info.desc_len)
246 __ata_ehi_push_desc(&ap->link.eh_info, " ");
247
248 va_start(args, fmt);
249 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
250 va_end(args);
251}
252
253#ifdef CONFIG_PCI
254
255/**
256 * ata_port_pbar_desc - append PCI BAR description
257 * @ap: target ATA port
258 * @bar: target PCI BAR
259 * @offset: offset into PCI BAR
260 * @name: name of the area
261 *
262 * If @offset is negative, this function formats a string which
263 * contains the name, address, size and type of the BAR and
264 * appends it to the port description. If @offset is zero or
265 * positive, only name and offsetted address is appended.
266 *
267 * LOCKING:
268 * None.
269 */
270void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
271 const char *name)
272{
273 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
274 char *type = "";
275 unsigned long long start, len;
276
277 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
278 type = "m";
279 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
280 type = "i";
281
282 start = (unsigned long long)pci_resource_start(pdev, bar);
283 len = (unsigned long long)pci_resource_len(pdev, bar);
284
285 if (offset < 0)
286 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start);
287 else
288 ata_port_desc(ap, "%s 0x%llx", name,
289 start + (unsigned long long)offset);
290}
291
292#endif /* CONFIG_PCI */
293
294static int ata_lookup_timeout_table(u8 cmd)
295{
296 int i;
297
298 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
299 const u8 *cur;
300
301 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
302 if (*cur == cmd)
303 return i;
304 }
305
306 return -1;
307}
308
309/**
310 * ata_internal_cmd_timeout - determine timeout for an internal command
311 * @dev: target device
312 * @cmd: internal command to be issued
313 *
314 * Determine timeout for internal command @cmd for @dev.
315 *
316 * LOCKING:
317 * EH context.
318 *
319 * RETURNS:
320 * Determined timeout.
321 */
322unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
323{
324 struct ata_eh_context *ehc = &dev->link->eh_context;
325 int ent = ata_lookup_timeout_table(cmd);
326 int idx;
327
328 if (ent < 0)
329 return ATA_EH_CMD_DFL_TIMEOUT;
330
331 idx = ehc->cmd_timeout_idx[dev->devno][ent];
332 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
333}
334
335/**
336 * ata_internal_cmd_timed_out - notification for internal command timeout
337 * @dev: target device
338 * @cmd: internal command which timed out
339 *
340 * Notify EH that internal command @cmd for @dev timed out. This
341 * function should be called only for commands whose timeouts are
342 * determined using ata_internal_cmd_timeout().
343 *
344 * LOCKING:
345 * EH context.
346 */
347void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
348{
349 struct ata_eh_context *ehc = &dev->link->eh_context;
350 int ent = ata_lookup_timeout_table(cmd);
351 int idx;
352
353 if (ent < 0)
354 return;
355
356 idx = ehc->cmd_timeout_idx[dev->devno][ent];
357 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
358 ehc->cmd_timeout_idx[dev->devno][ent]++;
359}
360
361static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
362 unsigned int err_mask)
363{
364 struct ata_ering_entry *ent;
365
366 WARN_ON(!err_mask);
367
368 ering->cursor++;
369 ering->cursor %= ATA_ERING_SIZE;
370
371 ent = &ering->ring[ering->cursor];
372 ent->eflags = eflags;
373 ent->err_mask = err_mask;
374 ent->timestamp = get_jiffies_64();
375}
376
377static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
378{
379 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
380
381 if (ent->err_mask)
382 return ent;
383 return NULL;
384}
385
386int ata_ering_map(struct ata_ering *ering,
387 int (*map_fn)(struct ata_ering_entry *, void *),
388 void *arg)
389{
390 int idx, rc = 0;
391 struct ata_ering_entry *ent;
392
393 idx = ering->cursor;
394 do {
395 ent = &ering->ring[idx];
396 if (!ent->err_mask)
397 break;
398 rc = map_fn(ent, arg);
399 if (rc)
400 break;
401 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
402 } while (idx != ering->cursor);
403
404 return rc;
405}
406
407static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg)
408{
409 ent->eflags |= ATA_EFLAG_OLD_ER;
410 return 0;
411}
412
413static void ata_ering_clear(struct ata_ering *ering)
414{
415 ata_ering_map(ering, ata_ering_clear_cb, NULL);
416}
417
418static unsigned int ata_eh_dev_action(struct ata_device *dev)
419{
420 struct ata_eh_context *ehc = &dev->link->eh_context;
421
422 return ehc->i.action | ehc->i.dev_action[dev->devno];
423}
424
425static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
426 struct ata_eh_info *ehi, unsigned int action)
427{
428 struct ata_device *tdev;
429
430 if (!dev) {
431 ehi->action &= ~action;
432 ata_for_each_dev(tdev, link, ALL)
433 ehi->dev_action[tdev->devno] &= ~action;
434 } else {
435 /* doesn't make sense for port-wide EH actions */
436 WARN_ON(!(action & ATA_EH_PERDEV_MASK));
437
438 /* break ehi->action into ehi->dev_action */
439 if (ehi->action & action) {
440 ata_for_each_dev(tdev, link, ALL)
441 ehi->dev_action[tdev->devno] |=
442 ehi->action & action;
443 ehi->action &= ~action;
444 }
445
446 /* turn off the specified per-dev action */
447 ehi->dev_action[dev->devno] &= ~action;
448 }
449}
450
451/**
452 * ata_eh_acquire - acquire EH ownership
453 * @ap: ATA port to acquire EH ownership for
454 *
455 * Acquire EH ownership for @ap. This is the basic exclusion
456 * mechanism for ports sharing a host. Only one port hanging off
457 * the same host can claim the ownership of EH.
458 *
459 * LOCKING:
460 * EH context.
461 */
462void ata_eh_acquire(struct ata_port *ap)
463{
464 mutex_lock(&ap->host->eh_mutex);
465 WARN_ON_ONCE(ap->host->eh_owner);
466 ap->host->eh_owner = current;
467}
468
469/**
470 * ata_eh_release - release EH ownership
471 * @ap: ATA port to release EH ownership for
472 *
473 * Release EH ownership for @ap if the caller. The caller must
474 * have acquired EH ownership using ata_eh_acquire() previously.
475 *
476 * LOCKING:
477 * EH context.
478 */
479void ata_eh_release(struct ata_port *ap)
480{
481 WARN_ON_ONCE(ap->host->eh_owner != current);
482 ap->host->eh_owner = NULL;
483 mutex_unlock(&ap->host->eh_mutex);
484}
485
486static void ata_eh_unload(struct ata_port *ap)
487{
488 struct ata_link *link;
489 struct ata_device *dev;
490 unsigned long flags;
491
492 /* Restore SControl IPM and SPD for the next driver and
493 * disable attached devices.
494 */
495 ata_for_each_link(link, ap, PMP_FIRST) {
496 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
497 ata_for_each_dev(dev, link, ALL)
498 ata_dev_disable(dev);
499 }
500
501 /* freeze and set UNLOADED */
502 spin_lock_irqsave(ap->lock, flags);
503
504 ata_port_freeze(ap); /* won't be thawed */
505 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */
506 ap->pflags |= ATA_PFLAG_UNLOADED;
507
508 spin_unlock_irqrestore(ap->lock, flags);
509}
510
511/**
512 * ata_scsi_error - SCSI layer error handler callback
513 * @host: SCSI host on which error occurred
514 *
515 * Handles SCSI-layer-thrown error events.
516 *
517 * LOCKING:
518 * Inherited from SCSI layer (none, can sleep)
519 *
520 * RETURNS:
521 * Zero.
522 */
523void ata_scsi_error(struct Scsi_Host *host)
524{
525 struct ata_port *ap = ata_shost_to_port(host);
526 unsigned long flags;
527 LIST_HEAD(eh_work_q);
528
529 DPRINTK("ENTER\n");
530
531 spin_lock_irqsave(host->host_lock, flags);
532 list_splice_init(&host->eh_cmd_q, &eh_work_q);
533 spin_unlock_irqrestore(host->host_lock, flags);
534
535 ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
536
537 /* If we timed raced normal completion and there is nothing to
538 recover nr_timedout == 0 why exactly are we doing error recovery ? */
539 ata_scsi_port_error_handler(host, ap);
540
541 /* finish or retry handled scmd's and clean up */
542 WARN_ON(!list_empty(&eh_work_q));
543
544 DPRINTK("EXIT\n");
545}
546
547/**
548 * ata_scsi_cmd_error_handler - error callback for a list of commands
549 * @host: scsi host containing the port
550 * @ap: ATA port within the host
551 * @eh_work_q: list of commands to process
552 *
553 * process the given list of commands and return those finished to the
554 * ap->eh_done_q. This function is the first part of the libata error
555 * handler which processes a given list of failed commands.
556 */
557void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
558 struct list_head *eh_work_q)
559{
560 int i;
561 unsigned long flags;
562
563 /* make sure sff pio task is not running */
564 ata_sff_flush_pio_task(ap);
565
566 /* synchronize with host lock and sort out timeouts */
567
568 /* For new EH, all qcs are finished in one of three ways -
569 * normal completion, error completion, and SCSI timeout.
570 * Both completions can race against SCSI timeout. When normal
571 * completion wins, the qc never reaches EH. When error
572 * completion wins, the qc has ATA_QCFLAG_FAILED set.
573 *
574 * When SCSI timeout wins, things are a bit more complex.
575 * Normal or error completion can occur after the timeout but
576 * before this point. In such cases, both types of
577 * completions are honored. A scmd is determined to have
578 * timed out iff its associated qc is active and not failed.
579 */
580 spin_lock_irqsave(ap->lock, flags);
581 if (ap->ops->error_handler) {
582 struct scsi_cmnd *scmd, *tmp;
583 int nr_timedout = 0;
584
585 /* This must occur under the ap->lock as we don't want
586 a polled recovery to race the real interrupt handler
587
588 The lost_interrupt handler checks for any completed but
589 non-notified command and completes much like an IRQ handler.
590
591 We then fall into the error recovery code which will treat
592 this as if normal completion won the race */
593
594 if (ap->ops->lost_interrupt)
595 ap->ops->lost_interrupt(ap);
596
597 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
598 struct ata_queued_cmd *qc;
599
600 ata_qc_for_each_raw(ap, qc, i) {
601 if (qc->flags & ATA_QCFLAG_ACTIVE &&
602 qc->scsicmd == scmd)
603 break;
604 }
605
606 if (i < ATA_MAX_QUEUE) {
607 /* the scmd has an associated qc */
608 if (!(qc->flags & ATA_QCFLAG_FAILED)) {
609 /* which hasn't failed yet, timeout */
610 qc->err_mask |= AC_ERR_TIMEOUT;
611 qc->flags |= ATA_QCFLAG_FAILED;
612 nr_timedout++;
613 }
614 } else {
615 /* Normal completion occurred after
616 * SCSI timeout but before this point.
617 * Successfully complete it.
618 */
619 scmd->retries = scmd->allowed;
620 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
621 }
622 }
623
624 /* If we have timed out qcs. They belong to EH from
625 * this point but the state of the controller is
626 * unknown. Freeze the port to make sure the IRQ
627 * handler doesn't diddle with those qcs. This must
628 * be done atomically w.r.t. setting QCFLAG_FAILED.
629 */
630 if (nr_timedout)
631 __ata_port_freeze(ap);
632
633
634 /* initialize eh_tries */
635 ap->eh_tries = ATA_EH_MAX_TRIES;
636 }
637 spin_unlock_irqrestore(ap->lock, flags);
638
639}
640EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
641
642/**
643 * ata_scsi_port_error_handler - recover the port after the commands
644 * @host: SCSI host containing the port
645 * @ap: the ATA port
646 *
647 * Handle the recovery of the port @ap after all the commands
648 * have been recovered.
649 */
650void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
651{
652 unsigned long flags;
653
654 /* invoke error handler */
655 if (ap->ops->error_handler) {
656 struct ata_link *link;
657
658 /* acquire EH ownership */
659 ata_eh_acquire(ap);
660 repeat:
661 /* kill fast drain timer */
662 del_timer_sync(&ap->fastdrain_timer);
663
664 /* process port resume request */
665 ata_eh_handle_port_resume(ap);
666
667 /* fetch & clear EH info */
668 spin_lock_irqsave(ap->lock, flags);
669
670 ata_for_each_link(link, ap, HOST_FIRST) {
671 struct ata_eh_context *ehc = &link->eh_context;
672 struct ata_device *dev;
673
674 memset(&link->eh_context, 0, sizeof(link->eh_context));
675 link->eh_context.i = link->eh_info;
676 memset(&link->eh_info, 0, sizeof(link->eh_info));
677
678 ata_for_each_dev(dev, link, ENABLED) {
679 int devno = dev->devno;
680
681 ehc->saved_xfer_mode[devno] = dev->xfer_mode;
682 if (ata_ncq_enabled(dev))
683 ehc->saved_ncq_enabled |= 1 << devno;
684 }
685 }
686
687 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
688 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
689 ap->excl_link = NULL; /* don't maintain exclusion over EH */
690
691 spin_unlock_irqrestore(ap->lock, flags);
692
693 /* invoke EH, skip if unloading or suspended */
694 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
695 ap->ops->error_handler(ap);
696 else {
697 /* if unloading, commence suicide */
698 if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
699 !(ap->pflags & ATA_PFLAG_UNLOADED))
700 ata_eh_unload(ap);
701 ata_eh_finish(ap);
702 }
703
704 /* process port suspend request */
705 ata_eh_handle_port_suspend(ap);
706
707 /* Exception might have happened after ->error_handler
708 * recovered the port but before this point. Repeat
709 * EH in such case.
710 */
711 spin_lock_irqsave(ap->lock, flags);
712
713 if (ap->pflags & ATA_PFLAG_EH_PENDING) {
714 if (--ap->eh_tries) {
715 spin_unlock_irqrestore(ap->lock, flags);
716 goto repeat;
717 }
718 ata_port_err(ap,
719 "EH pending after %d tries, giving up\n",
720 ATA_EH_MAX_TRIES);
721 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
722 }
723
724 /* this run is complete, make sure EH info is clear */
725 ata_for_each_link(link, ap, HOST_FIRST)
726 memset(&link->eh_info, 0, sizeof(link->eh_info));
727
728 /* end eh (clear host_eh_scheduled) while holding
729 * ap->lock such that if exception occurs after this
730 * point but before EH completion, SCSI midlayer will
731 * re-initiate EH.
732 */
733 ap->ops->end_eh(ap);
734
735 spin_unlock_irqrestore(ap->lock, flags);
736 ata_eh_release(ap);
737 } else {
738 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
739 ap->ops->eng_timeout(ap);
740 }
741
742 scsi_eh_flush_done_q(&ap->eh_done_q);
743
744 /* clean up */
745 spin_lock_irqsave(ap->lock, flags);
746
747 if (ap->pflags & ATA_PFLAG_LOADING)
748 ap->pflags &= ~ATA_PFLAG_LOADING;
749 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
750 !(ap->flags & ATA_FLAG_SAS_HOST))
751 schedule_delayed_work(&ap->hotplug_task, 0);
752
753 if (ap->pflags & ATA_PFLAG_RECOVERED)
754 ata_port_info(ap, "EH complete\n");
755
756 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
757
758 /* tell wait_eh that we're done */
759 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
760 wake_up_all(&ap->eh_wait_q);
761
762 spin_unlock_irqrestore(ap->lock, flags);
763}
764EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
765
766/**
767 * ata_port_wait_eh - Wait for the currently pending EH to complete
768 * @ap: Port to wait EH for
769 *
770 * Wait until the currently pending EH is complete.
771 *
772 * LOCKING:
773 * Kernel thread context (may sleep).
774 */
775void ata_port_wait_eh(struct ata_port *ap)
776{
777 unsigned long flags;
778 DEFINE_WAIT(wait);
779
780 retry:
781 spin_lock_irqsave(ap->lock, flags);
782
783 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
784 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
785 spin_unlock_irqrestore(ap->lock, flags);
786 schedule();
787 spin_lock_irqsave(ap->lock, flags);
788 }
789 finish_wait(&ap->eh_wait_q, &wait);
790
791 spin_unlock_irqrestore(ap->lock, flags);
792
793 /* make sure SCSI EH is complete */
794 if (scsi_host_in_recovery(ap->scsi_host)) {
795 ata_msleep(ap, 10);
796 goto retry;
797 }
798}
799EXPORT_SYMBOL_GPL(ata_port_wait_eh);
800
801static int ata_eh_nr_in_flight(struct ata_port *ap)
802{
803 struct ata_queued_cmd *qc;
804 unsigned int tag;
805 int nr = 0;
806
807 /* count only non-internal commands */
808 ata_qc_for_each(ap, qc, tag) {
809 if (qc)
810 nr++;
811 }
812
813 return nr;
814}
815
816void ata_eh_fastdrain_timerfn(struct timer_list *t)
817{
818 struct ata_port *ap = from_timer(ap, t, fastdrain_timer);
819 unsigned long flags;
820 int cnt;
821
822 spin_lock_irqsave(ap->lock, flags);
823
824 cnt = ata_eh_nr_in_flight(ap);
825
826 /* are we done? */
827 if (!cnt)
828 goto out_unlock;
829
830 if (cnt == ap->fastdrain_cnt) {
831 struct ata_queued_cmd *qc;
832 unsigned int tag;
833
834 /* No progress during the last interval, tag all
835 * in-flight qcs as timed out and freeze the port.
836 */
837 ata_qc_for_each(ap, qc, tag) {
838 if (qc)
839 qc->err_mask |= AC_ERR_TIMEOUT;
840 }
841
842 ata_port_freeze(ap);
843 } else {
844 /* some qcs have finished, give it another chance */
845 ap->fastdrain_cnt = cnt;
846 ap->fastdrain_timer.expires =
847 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
848 add_timer(&ap->fastdrain_timer);
849 }
850
851 out_unlock:
852 spin_unlock_irqrestore(ap->lock, flags);
853}
854
855/**
856 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain
857 * @ap: target ATA port
858 * @fastdrain: activate fast drain
859 *
860 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain
861 * is non-zero and EH wasn't pending before. Fast drain ensures
862 * that EH kicks in in timely manner.
863 *
864 * LOCKING:
865 * spin_lock_irqsave(host lock)
866 */
867static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
868{
869 int cnt;
870
871 /* already scheduled? */
872 if (ap->pflags & ATA_PFLAG_EH_PENDING)
873 return;
874
875 ap->pflags |= ATA_PFLAG_EH_PENDING;
876
877 if (!fastdrain)
878 return;
879
880 /* do we have in-flight qcs? */
881 cnt = ata_eh_nr_in_flight(ap);
882 if (!cnt)
883 return;
884
885 /* activate fast drain */
886 ap->fastdrain_cnt = cnt;
887 ap->fastdrain_timer.expires =
888 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
889 add_timer(&ap->fastdrain_timer);
890}
891
892/**
893 * ata_qc_schedule_eh - schedule qc for error handling
894 * @qc: command to schedule error handling for
895 *
896 * Schedule error handling for @qc. EH will kick in as soon as
897 * other commands are drained.
898 *
899 * LOCKING:
900 * spin_lock_irqsave(host lock)
901 */
902void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
903{
904 struct ata_port *ap = qc->ap;
905
906 WARN_ON(!ap->ops->error_handler);
907
908 qc->flags |= ATA_QCFLAG_FAILED;
909 ata_eh_set_pending(ap, 1);
910
911 /* The following will fail if timeout has already expired.
912 * ata_scsi_error() takes care of such scmds on EH entry.
913 * Note that ATA_QCFLAG_FAILED is unconditionally set after
914 * this function completes.
915 */
916 blk_abort_request(qc->scsicmd->request);
917}
918
919/**
920 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine
921 * @ap: ATA port to schedule EH for
922 *
923 * LOCKING: inherited from ata_port_schedule_eh
924 * spin_lock_irqsave(host lock)
925 */
926void ata_std_sched_eh(struct ata_port *ap)
927{
928 WARN_ON(!ap->ops->error_handler);
929
930 if (ap->pflags & ATA_PFLAG_INITIALIZING)
931 return;
932
933 ata_eh_set_pending(ap, 1);
934 scsi_schedule_eh(ap->scsi_host);
935
936 DPRINTK("port EH scheduled\n");
937}
938EXPORT_SYMBOL_GPL(ata_std_sched_eh);
939
940/**
941 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine
942 * @ap: ATA port to end EH for
943 *
944 * In the libata object model there is a 1:1 mapping of ata_port to
945 * shost, so host fields can be directly manipulated under ap->lock, in
946 * the libsas case we need to hold a lock at the ha->level to coordinate
947 * these events.
948 *
949 * LOCKING:
950 * spin_lock_irqsave(host lock)
951 */
952void ata_std_end_eh(struct ata_port *ap)
953{
954 struct Scsi_Host *host = ap->scsi_host;
955
956 host->host_eh_scheduled = 0;
957}
958EXPORT_SYMBOL(ata_std_end_eh);
959
960
961/**
962 * ata_port_schedule_eh - schedule error handling without a qc
963 * @ap: ATA port to schedule EH for
964 *
965 * Schedule error handling for @ap. EH will kick in as soon as
966 * all commands are drained.
967 *
968 * LOCKING:
969 * spin_lock_irqsave(host lock)
970 */
971void ata_port_schedule_eh(struct ata_port *ap)
972{
973 /* see: ata_std_sched_eh, unless you know better */
974 ap->ops->sched_eh(ap);
975}
976
977static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
978{
979 struct ata_queued_cmd *qc;
980 int tag, nr_aborted = 0;
981
982 WARN_ON(!ap->ops->error_handler);
983
984 /* we're gonna abort all commands, no need for fast drain */
985 ata_eh_set_pending(ap, 0);
986
987 /* include internal tag in iteration */
988 ata_qc_for_each_with_internal(ap, qc, tag) {
989 if (qc && (!link || qc->dev->link == link)) {
990 qc->flags |= ATA_QCFLAG_FAILED;
991 ata_qc_complete(qc);
992 nr_aborted++;
993 }
994 }
995
996 if (!nr_aborted)
997 ata_port_schedule_eh(ap);
998
999 return nr_aborted;
1000}
1001
1002/**
1003 * ata_link_abort - abort all qc's on the link
1004 * @link: ATA link to abort qc's for
1005 *
1006 * Abort all active qc's active on @link and schedule EH.
1007 *
1008 * LOCKING:
1009 * spin_lock_irqsave(host lock)
1010 *
1011 * RETURNS:
1012 * Number of aborted qc's.
1013 */
1014int ata_link_abort(struct ata_link *link)
1015{
1016 return ata_do_link_abort(link->ap, link);
1017}
1018
1019/**
1020 * ata_port_abort - abort all qc's on the port
1021 * @ap: ATA port to abort qc's for
1022 *
1023 * Abort all active qc's of @ap and schedule EH.
1024 *
1025 * LOCKING:
1026 * spin_lock_irqsave(host_set lock)
1027 *
1028 * RETURNS:
1029 * Number of aborted qc's.
1030 */
1031int ata_port_abort(struct ata_port *ap)
1032{
1033 return ata_do_link_abort(ap, NULL);
1034}
1035
1036/**
1037 * __ata_port_freeze - freeze port
1038 * @ap: ATA port to freeze
1039 *
1040 * This function is called when HSM violation or some other
1041 * condition disrupts normal operation of the port. Frozen port
1042 * is not allowed to perform any operation until the port is
1043 * thawed, which usually follows a successful reset.
1044 *
1045 * ap->ops->freeze() callback can be used for freezing the port
1046 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
1047 * port cannot be frozen hardware-wise, the interrupt handler
1048 * must ack and clear interrupts unconditionally while the port
1049 * is frozen.
1050 *
1051 * LOCKING:
1052 * spin_lock_irqsave(host lock)
1053 */
1054static void __ata_port_freeze(struct ata_port *ap)
1055{
1056 WARN_ON(!ap->ops->error_handler);
1057
1058 if (ap->ops->freeze)
1059 ap->ops->freeze(ap);
1060
1061 ap->pflags |= ATA_PFLAG_FROZEN;
1062
1063 DPRINTK("ata%u port frozen\n", ap->print_id);
1064}
1065
1066/**
1067 * ata_port_freeze - abort & freeze port
1068 * @ap: ATA port to freeze
1069 *
1070 * Abort and freeze @ap. The freeze operation must be called
1071 * first, because some hardware requires special operations
1072 * before the taskfile registers are accessible.
1073 *
1074 * LOCKING:
1075 * spin_lock_irqsave(host lock)
1076 *
1077 * RETURNS:
1078 * Number of aborted commands.
1079 */
1080int ata_port_freeze(struct ata_port *ap)
1081{
1082 int nr_aborted;
1083
1084 WARN_ON(!ap->ops->error_handler);
1085
1086 __ata_port_freeze(ap);
1087 nr_aborted = ata_port_abort(ap);
1088
1089 return nr_aborted;
1090}
1091
1092/**
1093 * sata_async_notification - SATA async notification handler
1094 * @ap: ATA port where async notification is received
1095 *
1096 * Handler to be called when async notification via SDB FIS is
1097 * received. This function schedules EH if necessary.
1098 *
1099 * LOCKING:
1100 * spin_lock_irqsave(host lock)
1101 *
1102 * RETURNS:
1103 * 1 if EH is scheduled, 0 otherwise.
1104 */
1105int sata_async_notification(struct ata_port *ap)
1106{
1107 u32 sntf;
1108 int rc;
1109
1110 if (!(ap->flags & ATA_FLAG_AN))
1111 return 0;
1112
1113 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
1114 if (rc == 0)
1115 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
1116
1117 if (!sata_pmp_attached(ap) || rc) {
1118 /* PMP is not attached or SNTF is not available */
1119 if (!sata_pmp_attached(ap)) {
1120 /* PMP is not attached. Check whether ATAPI
1121 * AN is configured. If so, notify media
1122 * change.
1123 */
1124 struct ata_device *dev = ap->link.device;
1125
1126 if ((dev->class == ATA_DEV_ATAPI) &&
1127 (dev->flags & ATA_DFLAG_AN))
1128 ata_scsi_media_change_notify(dev);
1129 return 0;
1130 } else {
1131 /* PMP is attached but SNTF is not available.
1132 * ATAPI async media change notification is
1133 * not used. The PMP must be reporting PHY
1134 * status change, schedule EH.
1135 */
1136 ata_port_schedule_eh(ap);
1137 return 1;
1138 }
1139 } else {
1140 /* PMP is attached and SNTF is available */
1141 struct ata_link *link;
1142
1143 /* check and notify ATAPI AN */
1144 ata_for_each_link(link, ap, EDGE) {
1145 if (!(sntf & (1 << link->pmp)))
1146 continue;
1147
1148 if ((link->device->class == ATA_DEV_ATAPI) &&
1149 (link->device->flags & ATA_DFLAG_AN))
1150 ata_scsi_media_change_notify(link->device);
1151 }
1152
1153 /* If PMP is reporting that PHY status of some
1154 * downstream ports has changed, schedule EH.
1155 */
1156 if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
1157 ata_port_schedule_eh(ap);
1158 return 1;
1159 }
1160
1161 return 0;
1162 }
1163}
1164
1165/**
1166 * ata_eh_freeze_port - EH helper to freeze port
1167 * @ap: ATA port to freeze
1168 *
1169 * Freeze @ap.
1170 *
1171 * LOCKING:
1172 * None.
1173 */
1174void ata_eh_freeze_port(struct ata_port *ap)
1175{
1176 unsigned long flags;
1177
1178 if (!ap->ops->error_handler)
1179 return;
1180
1181 spin_lock_irqsave(ap->lock, flags);
1182 __ata_port_freeze(ap);
1183 spin_unlock_irqrestore(ap->lock, flags);
1184}
1185
1186/**
1187 * ata_port_thaw_port - EH helper to thaw port
1188 * @ap: ATA port to thaw
1189 *
1190 * Thaw frozen port @ap.
1191 *
1192 * LOCKING:
1193 * None.
1194 */
1195void ata_eh_thaw_port(struct ata_port *ap)
1196{
1197 unsigned long flags;
1198
1199 if (!ap->ops->error_handler)
1200 return;
1201
1202 spin_lock_irqsave(ap->lock, flags);
1203
1204 ap->pflags &= ~ATA_PFLAG_FROZEN;
1205
1206 if (ap->ops->thaw)
1207 ap->ops->thaw(ap);
1208
1209 spin_unlock_irqrestore(ap->lock, flags);
1210
1211 DPRINTK("ata%u port thawed\n", ap->print_id);
1212}
1213
1214static void ata_eh_scsidone(struct scsi_cmnd *scmd)
1215{
1216 /* nada */
1217}
1218
1219static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
1220{
1221 struct ata_port *ap = qc->ap;
1222 struct scsi_cmnd *scmd = qc->scsicmd;
1223 unsigned long flags;
1224
1225 spin_lock_irqsave(ap->lock, flags);
1226 qc->scsidone = ata_eh_scsidone;
1227 __ata_qc_complete(qc);
1228 WARN_ON(ata_tag_valid(qc->tag));
1229 spin_unlock_irqrestore(ap->lock, flags);
1230
1231 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
1232}
1233
1234/**
1235 * ata_eh_qc_complete - Complete an active ATA command from EH
1236 * @qc: Command to complete
1237 *
1238 * Indicate to the mid and upper layers that an ATA command has
1239 * completed. To be used from EH.
1240 */
1241void ata_eh_qc_complete(struct ata_queued_cmd *qc)
1242{
1243 struct scsi_cmnd *scmd = qc->scsicmd;
1244 scmd->retries = scmd->allowed;
1245 __ata_eh_qc_complete(qc);
1246}
1247
1248/**
1249 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
1250 * @qc: Command to retry
1251 *
1252 * Indicate to the mid and upper layers that an ATA command
1253 * should be retried. To be used from EH.
1254 *
1255 * SCSI midlayer limits the number of retries to scmd->allowed.
1256 * scmd->allowed is incremented for commands which get retried
1257 * due to unrelated failures (qc->err_mask is zero).
1258 */
1259void ata_eh_qc_retry(struct ata_queued_cmd *qc)
1260{
1261 struct scsi_cmnd *scmd = qc->scsicmd;
1262 if (!qc->err_mask)
1263 scmd->allowed++;
1264 __ata_eh_qc_complete(qc);
1265}
1266
1267/**
1268 * ata_dev_disable - disable ATA device
1269 * @dev: ATA device to disable
1270 *
1271 * Disable @dev.
1272 *
1273 * Locking:
1274 * EH context.
1275 */
1276void ata_dev_disable(struct ata_device *dev)
1277{
1278 if (!ata_dev_enabled(dev))
1279 return;
1280
1281 if (ata_msg_drv(dev->link->ap))
1282 ata_dev_warn(dev, "disabled\n");
1283 ata_acpi_on_disable(dev);
1284 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET);
1285 dev->class++;
1286
1287 /* From now till the next successful probe, ering is used to
1288 * track probe failures. Clear accumulated device error info.
1289 */
1290 ata_ering_clear(&dev->ering);
1291}
1292
1293/**
1294 * ata_eh_detach_dev - detach ATA device
1295 * @dev: ATA device to detach
1296 *
1297 * Detach @dev.
1298 *
1299 * LOCKING:
1300 * None.
1301 */
1302void ata_eh_detach_dev(struct ata_device *dev)
1303{
1304 struct ata_link *link = dev->link;
1305 struct ata_port *ap = link->ap;
1306 struct ata_eh_context *ehc = &link->eh_context;
1307 unsigned long flags;
1308
1309 ata_dev_disable(dev);
1310
1311 spin_lock_irqsave(ap->lock, flags);
1312
1313 dev->flags &= ~ATA_DFLAG_DETACH;
1314
1315 if (ata_scsi_offline_dev(dev)) {
1316 dev->flags |= ATA_DFLAG_DETACHED;
1317 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1318 }
1319
1320 /* clear per-dev EH info */
1321 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK);
1322 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK);
1323 ehc->saved_xfer_mode[dev->devno] = 0;
1324 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
1325
1326 spin_unlock_irqrestore(ap->lock, flags);
1327}
1328
1329/**
1330 * ata_eh_about_to_do - about to perform eh_action
1331 * @link: target ATA link
1332 * @dev: target ATA dev for per-dev action (can be NULL)
1333 * @action: action about to be performed
1334 *
1335 * Called just before performing EH actions to clear related bits
1336 * in @link->eh_info such that eh actions are not unnecessarily
1337 * repeated.
1338 *
1339 * LOCKING:
1340 * None.
1341 */
1342void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
1343 unsigned int action)
1344{
1345 struct ata_port *ap = link->ap;
1346 struct ata_eh_info *ehi = &link->eh_info;
1347 struct ata_eh_context *ehc = &link->eh_context;
1348 unsigned long flags;
1349
1350 spin_lock_irqsave(ap->lock, flags);
1351
1352 ata_eh_clear_action(link, dev, ehi, action);
1353
1354 /* About to take EH action, set RECOVERED. Ignore actions on
1355 * slave links as master will do them again.
1356 */
1357 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
1358 ap->pflags |= ATA_PFLAG_RECOVERED;
1359
1360 spin_unlock_irqrestore(ap->lock, flags);
1361}
1362
1363/**
1364 * ata_eh_done - EH action complete
1365 * @link: ATA link for which EH actions are complete
1366 * @dev: target ATA dev for per-dev action (can be NULL)
1367 * @action: action just completed
1368 *
1369 * Called right after performing EH actions to clear related bits
1370 * in @link->eh_context.
1371 *
1372 * LOCKING:
1373 * None.
1374 */
1375void ata_eh_done(struct ata_link *link, struct ata_device *dev,
1376 unsigned int action)
1377{
1378 struct ata_eh_context *ehc = &link->eh_context;
1379
1380 ata_eh_clear_action(link, dev, &ehc->i, action);
1381}
1382
1383/**
1384 * ata_err_string - convert err_mask to descriptive string
1385 * @err_mask: error mask to convert to string
1386 *
1387 * Convert @err_mask to descriptive string. Errors are
1388 * prioritized according to severity and only the most severe
1389 * error is reported.
1390 *
1391 * LOCKING:
1392 * None.
1393 *
1394 * RETURNS:
1395 * Descriptive string for @err_mask
1396 */
1397static const char *ata_err_string(unsigned int err_mask)
1398{
1399 if (err_mask & AC_ERR_HOST_BUS)
1400 return "host bus error";
1401 if (err_mask & AC_ERR_ATA_BUS)
1402 return "ATA bus error";
1403 if (err_mask & AC_ERR_TIMEOUT)
1404 return "timeout";
1405 if (err_mask & AC_ERR_HSM)
1406 return "HSM violation";
1407 if (err_mask & AC_ERR_SYSTEM)
1408 return "internal error";
1409 if (err_mask & AC_ERR_MEDIA)
1410 return "media error";
1411 if (err_mask & AC_ERR_INVALID)
1412 return "invalid argument";
1413 if (err_mask & AC_ERR_DEV)
1414 return "device error";
1415 if (err_mask & AC_ERR_NCQ)
1416 return "NCQ error";
1417 if (err_mask & AC_ERR_NODEV_HINT)
1418 return "Polling detection error";
1419 return "unknown error";
1420}
1421
1422/**
1423 * ata_eh_read_log_10h - Read log page 10h for NCQ error details
1424 * @dev: Device to read log page 10h from
1425 * @tag: Resulting tag of the failed command
1426 * @tf: Resulting taskfile registers of the failed command
1427 *
1428 * Read log page 10h to obtain NCQ error details and clear error
1429 * condition.
1430 *
1431 * LOCKING:
1432 * Kernel thread context (may sleep).
1433 *
1434 * RETURNS:
1435 * 0 on success, -errno otherwise.
1436 */
1437static int ata_eh_read_log_10h(struct ata_device *dev,
1438 int *tag, struct ata_taskfile *tf)
1439{
1440 u8 *buf = dev->link->ap->sector_buf;
1441 unsigned int err_mask;
1442 u8 csum;
1443 int i;
1444
1445 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
1446 if (err_mask)
1447 return -EIO;
1448
1449 csum = 0;
1450 for (i = 0; i < ATA_SECT_SIZE; i++)
1451 csum += buf[i];
1452 if (csum)
1453 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
1454 csum);
1455
1456 if (buf[0] & 0x80)
1457 return -ENOENT;
1458
1459 *tag = buf[0] & 0x1f;
1460
1461 tf->command = buf[2];
1462 tf->feature = buf[3];
1463 tf->lbal = buf[4];
1464 tf->lbam = buf[5];
1465 tf->lbah = buf[6];
1466 tf->device = buf[7];
1467 tf->hob_lbal = buf[8];
1468 tf->hob_lbam = buf[9];
1469 tf->hob_lbah = buf[10];
1470 tf->nsect = buf[12];
1471 tf->hob_nsect = buf[13];
1472 if (ata_id_has_ncq_autosense(dev->id))
1473 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
1474
1475 return 0;
1476}
1477
1478/**
1479 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY
1480 * @dev: target ATAPI device
1481 * @r_sense_key: out parameter for sense_key
1482 *
1483 * Perform ATAPI TEST_UNIT_READY.
1484 *
1485 * LOCKING:
1486 * EH context (may sleep).
1487 *
1488 * RETURNS:
1489 * 0 on success, AC_ERR_* mask on failure.
1490 */
1491unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
1492{
1493 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
1494 struct ata_taskfile tf;
1495 unsigned int err_mask;
1496
1497 ata_tf_init(dev, &tf);
1498
1499 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1500 tf.command = ATA_CMD_PACKET;
1501 tf.protocol = ATAPI_PROT_NODATA;
1502
1503 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
1504 if (err_mask == AC_ERR_DEV)
1505 *r_sense_key = tf.feature >> 4;
1506 return err_mask;
1507}
1508
1509/**
1510 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT
1511 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to
1512 * @cmd: scsi command for which the sense code should be set
1513 *
1514 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK
1515 * SENSE. This function is an EH helper.
1516 *
1517 * LOCKING:
1518 * Kernel thread context (may sleep).
1519 */
1520static void ata_eh_request_sense(struct ata_queued_cmd *qc,
1521 struct scsi_cmnd *cmd)
1522{
1523 struct ata_device *dev = qc->dev;
1524 struct ata_taskfile tf;
1525 unsigned int err_mask;
1526
1527 if (qc->ap->pflags & ATA_PFLAG_FROZEN) {
1528 ata_dev_warn(dev, "sense data available but port frozen\n");
1529 return;
1530 }
1531
1532 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID)
1533 return;
1534
1535 if (!ata_id_sense_reporting_enabled(dev->id)) {
1536 ata_dev_warn(qc->dev, "sense data reporting disabled\n");
1537 return;
1538 }
1539
1540 DPRINTK("ATA request sense\n");
1541
1542 ata_tf_init(dev, &tf);
1543 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1544 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1545 tf.command = ATA_CMD_REQ_SENSE_DATA;
1546 tf.protocol = ATA_PROT_NODATA;
1547
1548 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
1549 /* Ignore err_mask; ATA_ERR might be set */
1550 if (tf.command & ATA_SENSE) {
1551 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal);
1552 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1553 } else {
1554 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n",
1555 tf.command, err_mask);
1556 }
1557}
1558
1559/**
1560 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
1561 * @dev: device to perform REQUEST_SENSE to
1562 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
1563 * @dfl_sense_key: default sense key to use
1564 *
1565 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
1566 * SENSE. This function is EH helper.
1567 *
1568 * LOCKING:
1569 * Kernel thread context (may sleep).
1570 *
1571 * RETURNS:
1572 * 0 on success, AC_ERR_* mask on failure
1573 */
1574unsigned int atapi_eh_request_sense(struct ata_device *dev,
1575 u8 *sense_buf, u8 dfl_sense_key)
1576{
1577 u8 cdb[ATAPI_CDB_LEN] =
1578 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
1579 struct ata_port *ap = dev->link->ap;
1580 struct ata_taskfile tf;
1581
1582 DPRINTK("ATAPI request sense\n");
1583
1584 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
1585
1586 /* initialize sense_buf with the error register,
1587 * for the case where they are -not- overwritten
1588 */
1589 sense_buf[0] = 0x70;
1590 sense_buf[2] = dfl_sense_key;
1591
1592 /* some devices time out if garbage left in tf */
1593 ata_tf_init(dev, &tf);
1594
1595 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1596 tf.command = ATA_CMD_PACKET;
1597
1598 /* is it pointless to prefer PIO for "safety reasons"? */
1599 if (ap->flags & ATA_FLAG_PIO_DMA) {
1600 tf.protocol = ATAPI_PROT_DMA;
1601 tf.feature |= ATAPI_PKT_DMA;
1602 } else {
1603 tf.protocol = ATAPI_PROT_PIO;
1604 tf.lbam = SCSI_SENSE_BUFFERSIZE;
1605 tf.lbah = 0;
1606 }
1607
1608 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
1609 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
1610}
1611
1612/**
1613 * ata_eh_analyze_serror - analyze SError for a failed port
1614 * @link: ATA link to analyze SError for
1615 *
1616 * Analyze SError if available and further determine cause of
1617 * failure.
1618 *
1619 * LOCKING:
1620 * None.
1621 */
1622static void ata_eh_analyze_serror(struct ata_link *link)
1623{
1624 struct ata_eh_context *ehc = &link->eh_context;
1625 u32 serror = ehc->i.serror;
1626 unsigned int err_mask = 0, action = 0;
1627 u32 hotplug_mask;
1628
1629 if (serror & (SERR_PERSISTENT | SERR_DATA)) {
1630 err_mask |= AC_ERR_ATA_BUS;
1631 action |= ATA_EH_RESET;
1632 }
1633 if (serror & SERR_PROTOCOL) {
1634 err_mask |= AC_ERR_HSM;
1635 action |= ATA_EH_RESET;
1636 }
1637 if (serror & SERR_INTERNAL) {
1638 err_mask |= AC_ERR_SYSTEM;
1639 action |= ATA_EH_RESET;
1640 }
1641
1642 /* Determine whether a hotplug event has occurred. Both
1643 * SError.N/X are considered hotplug events for enabled or
1644 * host links. For disabled PMP links, only N bit is
1645 * considered as X bit is left at 1 for link plugging.
1646 */
1647 if (link->lpm_policy > ATA_LPM_MAX_POWER)
1648 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
1649 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
1650 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
1651 else
1652 hotplug_mask = SERR_PHYRDY_CHG;
1653
1654 if (serror & hotplug_mask)
1655 ata_ehi_hotplugged(&ehc->i);
1656
1657 ehc->i.err_mask |= err_mask;
1658 ehc->i.action |= action;
1659}
1660
1661/**
1662 * ata_eh_analyze_ncq_error - analyze NCQ error
1663 * @link: ATA link to analyze NCQ error for
1664 *
1665 * Read log page 10h, determine the offending qc and acquire
1666 * error status TF. For NCQ device errors, all LLDDs have to do
1667 * is setting AC_ERR_DEV in ehi->err_mask. This function takes
1668 * care of the rest.
1669 *
1670 * LOCKING:
1671 * Kernel thread context (may sleep).
1672 */
1673void ata_eh_analyze_ncq_error(struct ata_link *link)
1674{
1675 struct ata_port *ap = link->ap;
1676 struct ata_eh_context *ehc = &link->eh_context;
1677 struct ata_device *dev = link->device;
1678 struct ata_queued_cmd *qc;
1679 struct ata_taskfile tf;
1680 int tag, rc;
1681
1682 /* if frozen, we can't do much */
1683 if (ap->pflags & ATA_PFLAG_FROZEN)
1684 return;
1685
1686 /* is it NCQ device error? */
1687 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
1688 return;
1689
1690 /* has LLDD analyzed already? */
1691 ata_qc_for_each_raw(ap, qc, tag) {
1692 if (!(qc->flags & ATA_QCFLAG_FAILED))
1693 continue;
1694
1695 if (qc->err_mask)
1696 return;
1697 }
1698
1699 /* okay, this error is ours */
1700 memset(&tf, 0, sizeof(tf));
1701 rc = ata_eh_read_log_10h(dev, &tag, &tf);
1702 if (rc) {
1703 ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
1704 rc);
1705 return;
1706 }
1707
1708 if (!(link->sactive & (1 << tag))) {
1709 ata_link_err(link, "log page 10h reported inactive tag %d\n",
1710 tag);
1711 return;
1712 }
1713
1714 /* we've got the perpetrator, condemn it */
1715 qc = __ata_qc_from_tag(ap, tag);
1716 memcpy(&qc->result_tf, &tf, sizeof(tf));
1717 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1718 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
1719 if ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary) {
1720 char sense_key, asc, ascq;
1721
1722 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
1723 asc = (qc->result_tf.auxiliary >> 8) & 0xff;
1724 ascq = qc->result_tf.auxiliary & 0xff;
1725 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
1726 ata_scsi_set_sense_information(dev, qc->scsicmd,
1727 &qc->result_tf);
1728 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1729 }
1730
1731 ehc->i.err_mask &= ~AC_ERR_DEV;
1732}
1733
1734/**
1735 * ata_eh_analyze_tf - analyze taskfile of a failed qc
1736 * @qc: qc to analyze
1737 * @tf: Taskfile registers to analyze
1738 *
1739 * Analyze taskfile of @qc and further determine cause of
1740 * failure. This function also requests ATAPI sense data if
1741 * available.
1742 *
1743 * LOCKING:
1744 * Kernel thread context (may sleep).
1745 *
1746 * RETURNS:
1747 * Determined recovery action
1748 */
1749static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1750 const struct ata_taskfile *tf)
1751{
1752 unsigned int tmp, action = 0;
1753 u8 stat = tf->command, err = tf->feature;
1754
1755 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1756 qc->err_mask |= AC_ERR_HSM;
1757 return ATA_EH_RESET;
1758 }
1759
1760 if (stat & (ATA_ERR | ATA_DF)) {
1761 qc->err_mask |= AC_ERR_DEV;
1762 /*
1763 * Sense data reporting does not work if the
1764 * device fault bit is set.
1765 */
1766 if (stat & ATA_DF)
1767 stat &= ~ATA_SENSE;
1768 } else {
1769 return 0;
1770 }
1771
1772 switch (qc->dev->class) {
1773 case ATA_DEV_ATA:
1774 case ATA_DEV_ZAC:
1775 if (stat & ATA_SENSE)
1776 ata_eh_request_sense(qc, qc->scsicmd);
1777 if (err & ATA_ICRC)
1778 qc->err_mask |= AC_ERR_ATA_BUS;
1779 if (err & (ATA_UNC | ATA_AMNF))
1780 qc->err_mask |= AC_ERR_MEDIA;
1781 if (err & ATA_IDNF)
1782 qc->err_mask |= AC_ERR_INVALID;
1783 break;
1784
1785 case ATA_DEV_ATAPI:
1786 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1787 tmp = atapi_eh_request_sense(qc->dev,
1788 qc->scsicmd->sense_buffer,
1789 qc->result_tf.feature >> 4);
1790 if (!tmp)
1791 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1792 else
1793 qc->err_mask |= tmp;
1794 }
1795 }
1796
1797 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1798 int ret = scsi_check_sense(qc->scsicmd);
1799 /*
1800 * SUCCESS here means that the sense code could be
1801 * evaluated and should be passed to the upper layers
1802 * for correct evaluation.
1803 * FAILED means the sense code could not be interpreted
1804 * and the device would need to be reset.
1805 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the
1806 * command would need to be retried.
1807 */
1808 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) {
1809 qc->flags |= ATA_QCFLAG_RETRY;
1810 qc->err_mask |= AC_ERR_OTHER;
1811 } else if (ret != SUCCESS) {
1812 qc->err_mask |= AC_ERR_HSM;
1813 }
1814 }
1815 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1816 action |= ATA_EH_RESET;
1817
1818 return action;
1819}
1820
1821static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1822 int *xfer_ok)
1823{
1824 int base = 0;
1825
1826 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1827 *xfer_ok = 1;
1828
1829 if (!*xfer_ok)
1830 base = ATA_ECAT_DUBIOUS_NONE;
1831
1832 if (err_mask & AC_ERR_ATA_BUS)
1833 return base + ATA_ECAT_ATA_BUS;
1834
1835 if (err_mask & AC_ERR_TIMEOUT)
1836 return base + ATA_ECAT_TOUT_HSM;
1837
1838 if (eflags & ATA_EFLAG_IS_IO) {
1839 if (err_mask & AC_ERR_HSM)
1840 return base + ATA_ECAT_TOUT_HSM;
1841 if ((err_mask &
1842 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1843 return base + ATA_ECAT_UNK_DEV;
1844 }
1845
1846 return 0;
1847}
1848
1849struct speed_down_verdict_arg {
1850 u64 since;
1851 int xfer_ok;
1852 int nr_errors[ATA_ECAT_NR];
1853};
1854
1855static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1856{
1857 struct speed_down_verdict_arg *arg = void_arg;
1858 int cat;
1859
1860 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since))
1861 return -1;
1862
1863 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1864 &arg->xfer_ok);
1865 arg->nr_errors[cat]++;
1866
1867 return 0;
1868}
1869
1870/**
1871 * ata_eh_speed_down_verdict - Determine speed down verdict
1872 * @dev: Device of interest
1873 *
1874 * This function examines error ring of @dev and determines
1875 * whether NCQ needs to be turned off, transfer speed should be
1876 * stepped down, or falling back to PIO is necessary.
1877 *
1878 * ECAT_ATA_BUS : ATA_BUS error for any command
1879 *
1880 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
1881 * IO commands
1882 *
1883 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1884 *
1885 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1886 * data transfer hasn't been verified.
1887 *
1888 * Verdicts are
1889 *
1890 * NCQ_OFF : Turn off NCQ.
1891 *
1892 * SPEED_DOWN : Speed down transfer speed but don't fall back
1893 * to PIO.
1894 *
1895 * FALLBACK_TO_PIO : Fall back to PIO.
1896 *
1897 * Even if multiple verdicts are returned, only one action is
1898 * taken per error. An action triggered by non-DUBIOUS errors
1899 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1900 * This is to expedite speed down decisions right after device is
1901 * initially configured.
1902 *
1903 * The following are speed down rules. #1 and #2 deal with
1904 * DUBIOUS errors.
1905 *
1906 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1907 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1908 *
1909 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1910 * occurred during last 5 mins, NCQ_OFF.
1911 *
1912 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1913 * occurred during last 5 mins, FALLBACK_TO_PIO
1914 *
1915 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1916 * during last 10 mins, NCQ_OFF.
1917 *
1918 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1919 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1920 *
1921 * LOCKING:
1922 * Inherited from caller.
1923 *
1924 * RETURNS:
1925 * OR of ATA_EH_SPDN_* flags.
1926 */
1927static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1928{
1929 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1930 u64 j64 = get_jiffies_64();
1931 struct speed_down_verdict_arg arg;
1932 unsigned int verdict = 0;
1933
1934 /* scan past 5 mins of error history */
1935 memset(&arg, 0, sizeof(arg));
1936 arg.since = j64 - min(j64, j5mins);
1937 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1938
1939 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1940 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1941 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1942 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
1943
1944 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
1945 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
1946 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
1947
1948 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1949 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1950 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1951 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1952
1953 /* scan past 10 mins of error history */
1954 memset(&arg, 0, sizeof(arg));
1955 arg.since = j64 - min(j64, j10mins);
1956 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1957
1958 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1959 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
1960 verdict |= ATA_EH_SPDN_NCQ_OFF;
1961
1962 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1963 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
1964 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1965 verdict |= ATA_EH_SPDN_SPEED_DOWN;
1966
1967 return verdict;
1968}
1969
1970/**
1971 * ata_eh_speed_down - record error and speed down if necessary
1972 * @dev: Failed device
1973 * @eflags: mask of ATA_EFLAG_* flags
1974 * @err_mask: err_mask of the error
1975 *
1976 * Record error and examine error history to determine whether
1977 * adjusting transmission speed is necessary. It also sets
1978 * transmission limits appropriately if such adjustment is
1979 * necessary.
1980 *
1981 * LOCKING:
1982 * Kernel thread context (may sleep).
1983 *
1984 * RETURNS:
1985 * Determined recovery action.
1986 */
1987static unsigned int ata_eh_speed_down(struct ata_device *dev,
1988 unsigned int eflags, unsigned int err_mask)
1989{
1990 struct ata_link *link = ata_dev_phys_link(dev);
1991 int xfer_ok = 0;
1992 unsigned int verdict;
1993 unsigned int action = 0;
1994
1995 /* don't bother if Cat-0 error */
1996 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
1997 return 0;
1998
1999 /* record error and determine whether speed down is necessary */
2000 ata_ering_record(&dev->ering, eflags, err_mask);
2001 verdict = ata_eh_speed_down_verdict(dev);
2002
2003 /* turn off NCQ? */
2004 if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
2005 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
2006 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
2007 dev->flags |= ATA_DFLAG_NCQ_OFF;
2008 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n");
2009 goto done;
2010 }
2011
2012 /* speed down? */
2013 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
2014 /* speed down SATA link speed if possible */
2015 if (sata_down_spd_limit(link, 0) == 0) {
2016 action |= ATA_EH_RESET;
2017 goto done;
2018 }
2019
2020 /* lower transfer mode */
2021 if (dev->spdn_cnt < 2) {
2022 static const int dma_dnxfer_sel[] =
2023 { ATA_DNXFER_DMA, ATA_DNXFER_40C };
2024 static const int pio_dnxfer_sel[] =
2025 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
2026 int sel;
2027
2028 if (dev->xfer_shift != ATA_SHIFT_PIO)
2029 sel = dma_dnxfer_sel[dev->spdn_cnt];
2030 else
2031 sel = pio_dnxfer_sel[dev->spdn_cnt];
2032
2033 dev->spdn_cnt++;
2034
2035 if (ata_down_xfermask_limit(dev, sel) == 0) {
2036 action |= ATA_EH_RESET;
2037 goto done;
2038 }
2039 }
2040 }
2041
2042 /* Fall back to PIO? Slowing down to PIO is meaningless for
2043 * SATA ATA devices. Consider it only for PATA and SATAPI.
2044 */
2045 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
2046 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) &&
2047 (dev->xfer_shift != ATA_SHIFT_PIO)) {
2048 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
2049 dev->spdn_cnt = 0;
2050 action |= ATA_EH_RESET;
2051 goto done;
2052 }
2053 }
2054
2055 return 0;
2056 done:
2057 /* device has been slowed down, blow error history */
2058 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
2059 ata_ering_clear(&dev->ering);
2060 return action;
2061}
2062
2063/**
2064 * ata_eh_worth_retry - analyze error and decide whether to retry
2065 * @qc: qc to possibly retry
2066 *
2067 * Look at the cause of the error and decide if a retry
2068 * might be useful or not. We don't want to retry media errors
2069 * because the drive itself has probably already taken 10-30 seconds
2070 * doing its own internal retries before reporting the failure.
2071 */
2072static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc)
2073{
2074 if (qc->err_mask & AC_ERR_MEDIA)
2075 return 0; /* don't retry media errors */
2076 if (qc->flags & ATA_QCFLAG_IO)
2077 return 1; /* otherwise retry anything from fs stack */
2078 if (qc->err_mask & AC_ERR_INVALID)
2079 return 0; /* don't retry these */
2080 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */
2081}
2082
2083/**
2084 * ata_eh_quiet - check if we need to be quiet about a command error
2085 * @qc: qc to check
2086 *
2087 * Look at the qc flags anbd its scsi command request flags to determine
2088 * if we need to be quiet about the command failure.
2089 */
2090static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
2091{
2092 if (qc->scsicmd &&
2093 qc->scsicmd->request->rq_flags & RQF_QUIET)
2094 qc->flags |= ATA_QCFLAG_QUIET;
2095 return qc->flags & ATA_QCFLAG_QUIET;
2096}
2097
2098/**
2099 * ata_eh_link_autopsy - analyze error and determine recovery action
2100 * @link: host link to perform autopsy on
2101 *
2102 * Analyze why @link failed and determine which recovery actions
2103 * are needed. This function also sets more detailed AC_ERR_*
2104 * values and fills sense data for ATAPI CHECK SENSE.
2105 *
2106 * LOCKING:
2107 * Kernel thread context (may sleep).
2108 */
2109static void ata_eh_link_autopsy(struct ata_link *link)
2110{
2111 struct ata_port *ap = link->ap;
2112 struct ata_eh_context *ehc = &link->eh_context;
2113 struct ata_queued_cmd *qc;
2114 struct ata_device *dev;
2115 unsigned int all_err_mask = 0, eflags = 0;
2116 int tag, nr_failed = 0, nr_quiet = 0;
2117 u32 serror;
2118 int rc;
2119
2120 DPRINTK("ENTER\n");
2121
2122 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
2123 return;
2124
2125 /* obtain and analyze SError */
2126 rc = sata_scr_read(link, SCR_ERROR, &serror);
2127 if (rc == 0) {
2128 ehc->i.serror |= serror;
2129 ata_eh_analyze_serror(link);
2130 } else if (rc != -EOPNOTSUPP) {
2131 /* SError read failed, force reset and probing */
2132 ehc->i.probe_mask |= ATA_ALL_DEVICES;
2133 ehc->i.action |= ATA_EH_RESET;
2134 ehc->i.err_mask |= AC_ERR_OTHER;
2135 }
2136
2137 /* analyze NCQ failure */
2138 ata_eh_analyze_ncq_error(link);
2139
2140 /* any real error trumps AC_ERR_OTHER */
2141 if (ehc->i.err_mask & ~AC_ERR_OTHER)
2142 ehc->i.err_mask &= ~AC_ERR_OTHER;
2143
2144 all_err_mask |= ehc->i.err_mask;
2145
2146 ata_qc_for_each_raw(ap, qc, tag) {
2147 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2148 ata_dev_phys_link(qc->dev) != link)
2149 continue;
2150
2151 /* inherit upper level err_mask */
2152 qc->err_mask |= ehc->i.err_mask;
2153
2154 /* analyze TF */
2155 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
2156
2157 /* DEV errors are probably spurious in case of ATA_BUS error */
2158 if (qc->err_mask & AC_ERR_ATA_BUS)
2159 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
2160 AC_ERR_INVALID);
2161
2162 /* any real error trumps unknown error */
2163 if (qc->err_mask & ~AC_ERR_OTHER)
2164 qc->err_mask &= ~AC_ERR_OTHER;
2165
2166 /*
2167 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
2168 * layers will determine whether the command is worth retrying
2169 * based on the sense data and device class/type. Otherwise,
2170 * determine directly if the command is worth retrying using its
2171 * error mask and flags.
2172 */
2173 if (qc->flags & ATA_QCFLAG_SENSE_VALID)
2174 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
2175 else if (ata_eh_worth_retry(qc))
2176 qc->flags |= ATA_QCFLAG_RETRY;
2177
2178 /* accumulate error info */
2179 ehc->i.dev = qc->dev;
2180 all_err_mask |= qc->err_mask;
2181 if (qc->flags & ATA_QCFLAG_IO)
2182 eflags |= ATA_EFLAG_IS_IO;
2183 trace_ata_eh_link_autopsy_qc(qc);
2184
2185 /* Count quiet errors */
2186 if (ata_eh_quiet(qc))
2187 nr_quiet++;
2188 nr_failed++;
2189 }
2190
2191 /* If all failed commands requested silence, then be quiet */
2192 if (nr_quiet == nr_failed)
2193 ehc->i.flags |= ATA_EHI_QUIET;
2194
2195 /* enforce default EH actions */
2196 if (ap->pflags & ATA_PFLAG_FROZEN ||
2197 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
2198 ehc->i.action |= ATA_EH_RESET;
2199 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
2200 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
2201 ehc->i.action |= ATA_EH_REVALIDATE;
2202
2203 /* If we have offending qcs and the associated failed device,
2204 * perform per-dev EH action only on the offending device.
2205 */
2206 if (ehc->i.dev) {
2207 ehc->i.dev_action[ehc->i.dev->devno] |=
2208 ehc->i.action & ATA_EH_PERDEV_MASK;
2209 ehc->i.action &= ~ATA_EH_PERDEV_MASK;
2210 }
2211
2212 /* propagate timeout to host link */
2213 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link))
2214 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT;
2215
2216 /* record error and consider speeding down */
2217 dev = ehc->i.dev;
2218 if (!dev && ((ata_link_max_devices(link) == 1 &&
2219 ata_dev_enabled(link->device))))
2220 dev = link->device;
2221
2222 if (dev) {
2223 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
2224 eflags |= ATA_EFLAG_DUBIOUS_XFER;
2225 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
2226 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
2227 }
2228 DPRINTK("EXIT\n");
2229}
2230
2231/**
2232 * ata_eh_autopsy - analyze error and determine recovery action
2233 * @ap: host port to perform autopsy on
2234 *
2235 * Analyze all links of @ap and determine why they failed and
2236 * which recovery actions are needed.
2237 *
2238 * LOCKING:
2239 * Kernel thread context (may sleep).
2240 */
2241void ata_eh_autopsy(struct ata_port *ap)
2242{
2243 struct ata_link *link;
2244
2245 ata_for_each_link(link, ap, EDGE)
2246 ata_eh_link_autopsy(link);
2247
2248 /* Handle the frigging slave link. Autopsy is done similarly
2249 * but actions and flags are transferred over to the master
2250 * link and handled from there.
2251 */
2252 if (ap->slave_link) {
2253 struct ata_eh_context *mehc = &ap->link.eh_context;
2254 struct ata_eh_context *sehc = &ap->slave_link->eh_context;
2255
2256 /* transfer control flags from master to slave */
2257 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK;
2258
2259 /* perform autopsy on the slave link */
2260 ata_eh_link_autopsy(ap->slave_link);
2261
2262 /* transfer actions from slave to master and clear slave */
2263 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2264 mehc->i.action |= sehc->i.action;
2265 mehc->i.dev_action[1] |= sehc->i.dev_action[1];
2266 mehc->i.flags |= sehc->i.flags;
2267 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2268 }
2269
2270 /* Autopsy of fanout ports can affect host link autopsy.
2271 * Perform host link autopsy last.
2272 */
2273 if (sata_pmp_attached(ap))
2274 ata_eh_link_autopsy(&ap->link);
2275}
2276
2277/**
2278 * ata_get_cmd_descript - get description for ATA command
2279 * @command: ATA command code to get description for
2280 *
2281 * Return a textual description of the given command, or NULL if the
2282 * command is not known.
2283 *
2284 * LOCKING:
2285 * None
2286 */
2287const char *ata_get_cmd_descript(u8 command)
2288{
2289#ifdef CONFIG_ATA_VERBOSE_ERROR
2290 static const struct
2291 {
2292 u8 command;
2293 const char *text;
2294 } cmd_descr[] = {
2295 { ATA_CMD_DEV_RESET, "DEVICE RESET" },
2296 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" },
2297 { ATA_CMD_STANDBY, "STANDBY" },
2298 { ATA_CMD_IDLE, "IDLE" },
2299 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" },
2300 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" },
2301 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" },
2302 { ATA_CMD_NOP, "NOP" },
2303 { ATA_CMD_FLUSH, "FLUSH CACHE" },
2304 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" },
2305 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" },
2306 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" },
2307 { ATA_CMD_SERVICE, "SERVICE" },
2308 { ATA_CMD_READ, "READ DMA" },
2309 { ATA_CMD_READ_EXT, "READ DMA EXT" },
2310 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" },
2311 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" },
2312 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" },
2313 { ATA_CMD_WRITE, "WRITE DMA" },
2314 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" },
2315 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" },
2316 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" },
2317 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
2318 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" },
2319 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
2320 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
2321 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
2322 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
2323 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
2324 { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
2325 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" },
2326 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" },
2327 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" },
2328 { ATA_CMD_READ_MULTI, "READ MULTIPLE" },
2329 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" },
2330 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" },
2331 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" },
2332 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" },
2333 { ATA_CMD_SET_FEATURES, "SET FEATURES" },
2334 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" },
2335 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" },
2336 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" },
2337 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" },
2338 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" },
2339 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" },
2340 { ATA_CMD_SLEEP, "SLEEP" },
2341 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" },
2342 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" },
2343 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" },
2344 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" },
2345 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" },
2346 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" },
2347 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" },
2348 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" },
2349 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" },
2350 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" },
2351 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" },
2352 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" },
2353 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" },
2354 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" },
2355 { ATA_CMD_PMP_READ, "READ BUFFER" },
2356 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" },
2357 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" },
2358 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" },
2359 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" },
2360 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" },
2361 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" },
2362 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" },
2363 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" },
2364 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" },
2365 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" },
2366 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" },
2367 { ATA_CMD_SMART, "SMART" },
2368 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" },
2369 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" },
2370 { ATA_CMD_DSM, "DATA SET MANAGEMENT" },
2371 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" },
2372 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" },
2373 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" },
2374 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" },
2375 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" },
2376 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" },
2377 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" },
2378 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" },
2379 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" },
2380 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" },
2381 { ATA_CMD_READ_LONG, "READ LONG (with retries)" },
2382 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" },
2383 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" },
2384 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" },
2385 { ATA_CMD_RESTORE, "RECALIBRATE" },
2386 { 0, NULL } /* terminate list */
2387 };
2388
2389 unsigned int i;
2390 for (i = 0; cmd_descr[i].text; i++)
2391 if (cmd_descr[i].command == command)
2392 return cmd_descr[i].text;
2393#endif
2394
2395 return NULL;
2396}
2397EXPORT_SYMBOL_GPL(ata_get_cmd_descript);
2398
2399/**
2400 * ata_eh_link_report - report error handling to user
2401 * @link: ATA link EH is going on
2402 *
2403 * Report EH to user.
2404 *
2405 * LOCKING:
2406 * None.
2407 */
2408static void ata_eh_link_report(struct ata_link *link)
2409{
2410 struct ata_port *ap = link->ap;
2411 struct ata_eh_context *ehc = &link->eh_context;
2412 struct ata_queued_cmd *qc;
2413 const char *frozen, *desc;
2414 char tries_buf[6] = "";
2415 int tag, nr_failed = 0;
2416
2417 if (ehc->i.flags & ATA_EHI_QUIET)
2418 return;
2419
2420 desc = NULL;
2421 if (ehc->i.desc[0] != '\0')
2422 desc = ehc->i.desc;
2423
2424 ata_qc_for_each_raw(ap, qc, tag) {
2425 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2426 ata_dev_phys_link(qc->dev) != link ||
2427 ((qc->flags & ATA_QCFLAG_QUIET) &&
2428 qc->err_mask == AC_ERR_DEV))
2429 continue;
2430 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
2431 continue;
2432
2433 nr_failed++;
2434 }
2435
2436 if (!nr_failed && !ehc->i.err_mask)
2437 return;
2438
2439 frozen = "";
2440 if (ap->pflags & ATA_PFLAG_FROZEN)
2441 frozen = " frozen";
2442
2443 if (ap->eh_tries < ATA_EH_MAX_TRIES)
2444 snprintf(tries_buf, sizeof(tries_buf), " t%d",
2445 ap->eh_tries);
2446
2447 if (ehc->i.dev) {
2448 ata_dev_err(ehc->i.dev, "exception Emask 0x%x "
2449 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2450 ehc->i.err_mask, link->sactive, ehc->i.serror,
2451 ehc->i.action, frozen, tries_buf);
2452 if (desc)
2453 ata_dev_err(ehc->i.dev, "%s\n", desc);
2454 } else {
2455 ata_link_err(link, "exception Emask 0x%x "
2456 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2457 ehc->i.err_mask, link->sactive, ehc->i.serror,
2458 ehc->i.action, frozen, tries_buf);
2459 if (desc)
2460 ata_link_err(link, "%s\n", desc);
2461 }
2462
2463#ifdef CONFIG_ATA_VERBOSE_ERROR
2464 if (ehc->i.serror)
2465 ata_link_err(link,
2466 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
2467 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "",
2468 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "",
2469 ehc->i.serror & SERR_DATA ? "UnrecovData " : "",
2470 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "",
2471 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "",
2472 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "",
2473 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "",
2474 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "",
2475 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "",
2476 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "",
2477 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "",
2478 ehc->i.serror & SERR_CRC ? "BadCRC " : "",
2479 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "",
2480 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "",
2481 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
2482 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
2483 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
2484#endif
2485
2486 ata_qc_for_each_raw(ap, qc, tag) {
2487 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
2488 char data_buf[20] = "";
2489 char cdb_buf[70] = "";
2490
2491 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2492 ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
2493 continue;
2494
2495 if (qc->dma_dir != DMA_NONE) {
2496 static const char *dma_str[] = {
2497 [DMA_BIDIRECTIONAL] = "bidi",
2498 [DMA_TO_DEVICE] = "out",
2499 [DMA_FROM_DEVICE] = "in",
2500 };
2501 const char *prot_str = NULL;
2502
2503 switch (qc->tf.protocol) {
2504 case ATA_PROT_UNKNOWN:
2505 prot_str = "unknown";
2506 break;
2507 case ATA_PROT_NODATA:
2508 prot_str = "nodata";
2509 break;
2510 case ATA_PROT_PIO:
2511 prot_str = "pio";
2512 break;
2513 case ATA_PROT_DMA:
2514 prot_str = "dma";
2515 break;
2516 case ATA_PROT_NCQ:
2517 prot_str = "ncq dma";
2518 break;
2519 case ATA_PROT_NCQ_NODATA:
2520 prot_str = "ncq nodata";
2521 break;
2522 case ATAPI_PROT_NODATA:
2523 prot_str = "nodata";
2524 break;
2525 case ATAPI_PROT_PIO:
2526 prot_str = "pio";
2527 break;
2528 case ATAPI_PROT_DMA:
2529 prot_str = "dma";
2530 break;
2531 }
2532 snprintf(data_buf, sizeof(data_buf), " %s %u %s",
2533 prot_str, qc->nbytes, dma_str[qc->dma_dir]);
2534 }
2535
2536 if (ata_is_atapi(qc->tf.protocol)) {
2537 const u8 *cdb = qc->cdb;
2538 size_t cdb_len = qc->dev->cdb_len;
2539
2540 if (qc->scsicmd) {
2541 cdb = qc->scsicmd->cmnd;
2542 cdb_len = qc->scsicmd->cmd_len;
2543 }
2544 __scsi_format_command(cdb_buf, sizeof(cdb_buf),
2545 cdb, cdb_len);
2546 } else {
2547 const char *descr = ata_get_cmd_descript(cmd->command);
2548 if (descr)
2549 ata_dev_err(qc->dev, "failed command: %s\n",
2550 descr);
2551 }
2552
2553 ata_dev_err(qc->dev,
2554 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2555 "tag %d%s\n %s"
2556 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2557 "Emask 0x%x (%s)%s\n",
2558 cmd->command, cmd->feature, cmd->nsect,
2559 cmd->lbal, cmd->lbam, cmd->lbah,
2560 cmd->hob_feature, cmd->hob_nsect,
2561 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
2562 cmd->device, qc->tag, data_buf, cdb_buf,
2563 res->command, res->feature, res->nsect,
2564 res->lbal, res->lbam, res->lbah,
2565 res->hob_feature, res->hob_nsect,
2566 res->hob_lbal, res->hob_lbam, res->hob_lbah,
2567 res->device, qc->err_mask, ata_err_string(qc->err_mask),
2568 qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
2569
2570#ifdef CONFIG_ATA_VERBOSE_ERROR
2571 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
2572 ATA_SENSE | ATA_ERR)) {
2573 if (res->command & ATA_BUSY)
2574 ata_dev_err(qc->dev, "status: { Busy }\n");
2575 else
2576 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n",
2577 res->command & ATA_DRDY ? "DRDY " : "",
2578 res->command & ATA_DF ? "DF " : "",
2579 res->command & ATA_DRQ ? "DRQ " : "",
2580 res->command & ATA_SENSE ? "SENSE " : "",
2581 res->command & ATA_ERR ? "ERR " : "");
2582 }
2583
2584 if (cmd->command != ATA_CMD_PACKET &&
2585 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF |
2586 ATA_IDNF | ATA_ABORTED)))
2587 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
2588 res->feature & ATA_ICRC ? "ICRC " : "",
2589 res->feature & ATA_UNC ? "UNC " : "",
2590 res->feature & ATA_AMNF ? "AMNF " : "",
2591 res->feature & ATA_IDNF ? "IDNF " : "",
2592 res->feature & ATA_ABORTED ? "ABRT " : "");
2593#endif
2594 }
2595}
2596
2597/**
2598 * ata_eh_report - report error handling to user
2599 * @ap: ATA port to report EH about
2600 *
2601 * Report EH to user.
2602 *
2603 * LOCKING:
2604 * None.
2605 */
2606void ata_eh_report(struct ata_port *ap)
2607{
2608 struct ata_link *link;
2609
2610 ata_for_each_link(link, ap, HOST_FIRST)
2611 ata_eh_link_report(link);
2612}
2613
2614static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
2615 unsigned int *classes, unsigned long deadline,
2616 bool clear_classes)
2617{
2618 struct ata_device *dev;
2619
2620 if (clear_classes)
2621 ata_for_each_dev(dev, link, ALL)
2622 classes[dev->devno] = ATA_DEV_UNKNOWN;
2623
2624 return reset(link, classes, deadline);
2625}
2626
2627static int ata_eh_followup_srst_needed(struct ata_link *link, int rc)
2628{
2629 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link))
2630 return 0;
2631 if (rc == -EAGAIN)
2632 return 1;
2633 if (sata_pmp_supported(link->ap) && ata_is_host_link(link))
2634 return 1;
2635 return 0;
2636}
2637
2638int ata_eh_reset(struct ata_link *link, int classify,
2639 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
2640 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
2641{
2642 struct ata_port *ap = link->ap;
2643 struct ata_link *slave = ap->slave_link;
2644 struct ata_eh_context *ehc = &link->eh_context;
2645 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL;
2646 unsigned int *classes = ehc->classes;
2647 unsigned int lflags = link->flags;
2648 int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
2649 int max_tries = 0, try = 0;
2650 struct ata_link *failed_link;
2651 struct ata_device *dev;
2652 unsigned long deadline, now;
2653 ata_reset_fn_t reset;
2654 unsigned long flags;
2655 u32 sstatus;
2656 int nr_unknown, rc;
2657
2658 /*
2659 * Prepare to reset
2660 */
2661 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
2662 max_tries++;
2663 if (link->flags & ATA_LFLAG_RST_ONCE)
2664 max_tries = 1;
2665 if (link->flags & ATA_LFLAG_NO_HRST)
2666 hardreset = NULL;
2667 if (link->flags & ATA_LFLAG_NO_SRST)
2668 softreset = NULL;
2669
2670 /* make sure each reset attempt is at least COOL_DOWN apart */
2671 if (ehc->i.flags & ATA_EHI_DID_RESET) {
2672 now = jiffies;
2673 WARN_ON(time_after(ehc->last_reset, now));
2674 deadline = ata_deadline(ehc->last_reset,
2675 ATA_EH_RESET_COOL_DOWN);
2676 if (time_before(now, deadline))
2677 schedule_timeout_uninterruptible(deadline - now);
2678 }
2679
2680 spin_lock_irqsave(ap->lock, flags);
2681 ap->pflags |= ATA_PFLAG_RESETTING;
2682 spin_unlock_irqrestore(ap->lock, flags);
2683
2684 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2685
2686 ata_for_each_dev(dev, link, ALL) {
2687 /* If we issue an SRST then an ATA drive (not ATAPI)
2688 * may change configuration and be in PIO0 timing. If
2689 * we do a hard reset (or are coming from power on)
2690 * this is true for ATA or ATAPI. Until we've set a
2691 * suitable controller mode we should not touch the
2692 * bus as we may be talking too fast.
2693 */
2694 dev->pio_mode = XFER_PIO_0;
2695 dev->dma_mode = 0xff;
2696
2697 /* If the controller has a pio mode setup function
2698 * then use it to set the chipset to rights. Don't
2699 * touch the DMA setup as that will be dealt with when
2700 * configuring devices.
2701 */
2702 if (ap->ops->set_piomode)
2703 ap->ops->set_piomode(ap, dev);
2704 }
2705
2706 /* prefer hardreset */
2707 reset = NULL;
2708 ehc->i.action &= ~ATA_EH_RESET;
2709 if (hardreset) {
2710 reset = hardreset;
2711 ehc->i.action |= ATA_EH_HARDRESET;
2712 } else if (softreset) {
2713 reset = softreset;
2714 ehc->i.action |= ATA_EH_SOFTRESET;
2715 }
2716
2717 if (prereset) {
2718 unsigned long deadline = ata_deadline(jiffies,
2719 ATA_EH_PRERESET_TIMEOUT);
2720
2721 if (slave) {
2722 sehc->i.action &= ~ATA_EH_RESET;
2723 sehc->i.action |= ehc->i.action;
2724 }
2725
2726 rc = prereset(link, deadline);
2727
2728 /* If present, do prereset on slave link too. Reset
2729 * is skipped iff both master and slave links report
2730 * -ENOENT or clear ATA_EH_RESET.
2731 */
2732 if (slave && (rc == 0 || rc == -ENOENT)) {
2733 int tmp;
2734
2735 tmp = prereset(slave, deadline);
2736 if (tmp != -ENOENT)
2737 rc = tmp;
2738
2739 ehc->i.action |= sehc->i.action;
2740 }
2741
2742 if (rc) {
2743 if (rc == -ENOENT) {
2744 ata_link_dbg(link, "port disabled--ignoring\n");
2745 ehc->i.action &= ~ATA_EH_RESET;
2746
2747 ata_for_each_dev(dev, link, ALL)
2748 classes[dev->devno] = ATA_DEV_NONE;
2749
2750 rc = 0;
2751 } else
2752 ata_link_err(link,
2753 "prereset failed (errno=%d)\n",
2754 rc);
2755 goto out;
2756 }
2757
2758 /* prereset() might have cleared ATA_EH_RESET. If so,
2759 * bang classes, thaw and return.
2760 */
2761 if (reset && !(ehc->i.action & ATA_EH_RESET)) {
2762 ata_for_each_dev(dev, link, ALL)
2763 classes[dev->devno] = ATA_DEV_NONE;
2764 if ((ap->pflags & ATA_PFLAG_FROZEN) &&
2765 ata_is_host_link(link))
2766 ata_eh_thaw_port(ap);
2767 rc = 0;
2768 goto out;
2769 }
2770 }
2771
2772 retry:
2773 /*
2774 * Perform reset
2775 */
2776 if (ata_is_host_link(link))
2777 ata_eh_freeze_port(ap);
2778
2779 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
2780
2781 if (reset) {
2782 if (verbose)
2783 ata_link_info(link, "%s resetting link\n",
2784 reset == softreset ? "soft" : "hard");
2785
2786 /* mark that this EH session started with reset */
2787 ehc->last_reset = jiffies;
2788 if (reset == hardreset)
2789 ehc->i.flags |= ATA_EHI_DID_HARDRESET;
2790 else
2791 ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
2792
2793 rc = ata_do_reset(link, reset, classes, deadline, true);
2794 if (rc && rc != -EAGAIN) {
2795 failed_link = link;
2796 goto fail;
2797 }
2798
2799 /* hardreset slave link if existent */
2800 if (slave && reset == hardreset) {
2801 int tmp;
2802
2803 if (verbose)
2804 ata_link_info(slave, "hard resetting link\n");
2805
2806 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
2807 tmp = ata_do_reset(slave, reset, classes, deadline,
2808 false);
2809 switch (tmp) {
2810 case -EAGAIN:
2811 rc = -EAGAIN;
2812 case 0:
2813 break;
2814 default:
2815 failed_link = slave;
2816 rc = tmp;
2817 goto fail;
2818 }
2819 }
2820
2821 /* perform follow-up SRST if necessary */
2822 if (reset == hardreset &&
2823 ata_eh_followup_srst_needed(link, rc)) {
2824 reset = softreset;
2825
2826 if (!reset) {
2827 ata_link_err(link,
2828 "follow-up softreset required but no softreset available\n");
2829 failed_link = link;
2830 rc = -EINVAL;
2831 goto fail;
2832 }
2833
2834 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2835 rc = ata_do_reset(link, reset, classes, deadline, true);
2836 if (rc) {
2837 failed_link = link;
2838 goto fail;
2839 }
2840 }
2841 } else {
2842 if (verbose)
2843 ata_link_info(link,
2844 "no reset method available, skipping reset\n");
2845 if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
2846 lflags |= ATA_LFLAG_ASSUME_ATA;
2847 }
2848
2849 /*
2850 * Post-reset processing
2851 */
2852 ata_for_each_dev(dev, link, ALL) {
2853 /* After the reset, the device state is PIO 0 and the
2854 * controller state is undefined. Reset also wakes up
2855 * drives from sleeping mode.
2856 */
2857 dev->pio_mode = XFER_PIO_0;
2858 dev->flags &= ~ATA_DFLAG_SLEEPING;
2859
2860 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
2861 continue;
2862
2863 /* apply class override */
2864 if (lflags & ATA_LFLAG_ASSUME_ATA)
2865 classes[dev->devno] = ATA_DEV_ATA;
2866 else if (lflags & ATA_LFLAG_ASSUME_SEMB)
2867 classes[dev->devno] = ATA_DEV_SEMB_UNSUP;
2868 }
2869
2870 /* record current link speed */
2871 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
2872 link->sata_spd = (sstatus >> 4) & 0xf;
2873 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
2874 slave->sata_spd = (sstatus >> 4) & 0xf;
2875
2876 /* thaw the port */
2877 if (ata_is_host_link(link))
2878 ata_eh_thaw_port(ap);
2879
2880 /* postreset() should clear hardware SError. Although SError
2881 * is cleared during link resume, clearing SError here is
2882 * necessary as some PHYs raise hotplug events after SRST.
2883 * This introduces race condition where hotplug occurs between
2884 * reset and here. This race is mediated by cross checking
2885 * link onlineness and classification result later.
2886 */
2887 if (postreset) {
2888 postreset(link, classes);
2889 if (slave)
2890 postreset(slave, classes);
2891 }
2892
2893 /*
2894 * Some controllers can't be frozen very well and may set spurious
2895 * error conditions during reset. Clear accumulated error
2896 * information and re-thaw the port if frozen. As reset is the
2897 * final recovery action and we cross check link onlineness against
2898 * device classification later, no hotplug event is lost by this.
2899 */
2900 spin_lock_irqsave(link->ap->lock, flags);
2901 memset(&link->eh_info, 0, sizeof(link->eh_info));
2902 if (slave)
2903 memset(&slave->eh_info, 0, sizeof(link->eh_info));
2904 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
2905 spin_unlock_irqrestore(link->ap->lock, flags);
2906
2907 if (ap->pflags & ATA_PFLAG_FROZEN)
2908 ata_eh_thaw_port(ap);
2909
2910 /*
2911 * Make sure onlineness and classification result correspond.
2912 * Hotplug could have happened during reset and some
2913 * controllers fail to wait while a drive is spinning up after
2914 * being hotplugged causing misdetection. By cross checking
2915 * link on/offlineness and classification result, those
2916 * conditions can be reliably detected and retried.
2917 */
2918 nr_unknown = 0;
2919 ata_for_each_dev(dev, link, ALL) {
2920 if (ata_phys_link_online(ata_dev_phys_link(dev))) {
2921 if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2922 ata_dev_dbg(dev, "link online but device misclassified\n");
2923 classes[dev->devno] = ATA_DEV_NONE;
2924 nr_unknown++;
2925 }
2926 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
2927 if (ata_class_enabled(classes[dev->devno]))
2928 ata_dev_dbg(dev,
2929 "link offline, clearing class %d to NONE\n",
2930 classes[dev->devno]);
2931 classes[dev->devno] = ATA_DEV_NONE;
2932 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2933 ata_dev_dbg(dev,
2934 "link status unknown, clearing UNKNOWN to NONE\n");
2935 classes[dev->devno] = ATA_DEV_NONE;
2936 }
2937 }
2938
2939 if (classify && nr_unknown) {
2940 if (try < max_tries) {
2941 ata_link_warn(link,
2942 "link online but %d devices misclassified, retrying\n",
2943 nr_unknown);
2944 failed_link = link;
2945 rc = -EAGAIN;
2946 goto fail;
2947 }
2948 ata_link_warn(link,
2949 "link online but %d devices misclassified, "
2950 "device detection might fail\n", nr_unknown);
2951 }
2952
2953 /* reset successful, schedule revalidation */
2954 ata_eh_done(link, NULL, ATA_EH_RESET);
2955 if (slave)
2956 ata_eh_done(slave, NULL, ATA_EH_RESET);
2957 ehc->last_reset = jiffies; /* update to completion time */
2958 ehc->i.action |= ATA_EH_REVALIDATE;
2959 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */
2960
2961 rc = 0;
2962 out:
2963 /* clear hotplug flag */
2964 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2965 if (slave)
2966 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2967
2968 spin_lock_irqsave(ap->lock, flags);
2969 ap->pflags &= ~ATA_PFLAG_RESETTING;
2970 spin_unlock_irqrestore(ap->lock, flags);
2971
2972 return rc;
2973
2974 fail:
2975 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */
2976 if (!ata_is_host_link(link) &&
2977 sata_scr_read(link, SCR_STATUS, &sstatus))
2978 rc = -ERESTART;
2979
2980 if (try >= max_tries) {
2981 /*
2982 * Thaw host port even if reset failed, so that the port
2983 * can be retried on the next phy event. This risks
2984 * repeated EH runs but seems to be a better tradeoff than
2985 * shutting down a port after a botched hotplug attempt.
2986 */
2987 if (ata_is_host_link(link))
2988 ata_eh_thaw_port(ap);
2989 goto out;
2990 }
2991
2992 now = jiffies;
2993 if (time_before(now, deadline)) {
2994 unsigned long delta = deadline - now;
2995
2996 ata_link_warn(failed_link,
2997 "reset failed (errno=%d), retrying in %u secs\n",
2998 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
2999
3000 ata_eh_release(ap);
3001 while (delta)
3002 delta = schedule_timeout_uninterruptible(delta);
3003 ata_eh_acquire(ap);
3004 }
3005
3006 /*
3007 * While disks spinup behind PMP, some controllers fail sending SRST.
3008 * They need to be reset - as well as the PMP - before retrying.
3009 */
3010 if (rc == -ERESTART) {
3011 if (ata_is_host_link(link))
3012 ata_eh_thaw_port(ap);
3013 goto out;
3014 }
3015
3016 if (try == max_tries - 1) {
3017 sata_down_spd_limit(link, 0);
3018 if (slave)
3019 sata_down_spd_limit(slave, 0);
3020 } else if (rc == -EPIPE)
3021 sata_down_spd_limit(failed_link, 0);
3022
3023 if (hardreset)
3024 reset = hardreset;
3025 goto retry;
3026}
3027
3028static inline void ata_eh_pull_park_action(struct ata_port *ap)
3029{
3030 struct ata_link *link;
3031 struct ata_device *dev;
3032 unsigned long flags;
3033
3034 /*
3035 * This function can be thought of as an extended version of
3036 * ata_eh_about_to_do() specially crafted to accommodate the
3037 * requirements of ATA_EH_PARK handling. Since the EH thread
3038 * does not leave the do {} while () loop in ata_eh_recover as
3039 * long as the timeout for a park request to *one* device on
3040 * the port has not expired, and since we still want to pick
3041 * up park requests to other devices on the same port or
3042 * timeout updates for the same device, we have to pull
3043 * ATA_EH_PARK actions from eh_info into eh_context.i
3044 * ourselves at the beginning of each pass over the loop.
3045 *
3046 * Additionally, all write accesses to &ap->park_req_pending
3047 * through reinit_completion() (see below) or complete_all()
3048 * (see ata_scsi_park_store()) are protected by the host lock.
3049 * As a result we have that park_req_pending.done is zero on
3050 * exit from this function, i.e. when ATA_EH_PARK actions for
3051 * *all* devices on port ap have been pulled into the
3052 * respective eh_context structs. If, and only if,
3053 * park_req_pending.done is non-zero by the time we reach
3054 * wait_for_completion_timeout(), another ATA_EH_PARK action
3055 * has been scheduled for at least one of the devices on port
3056 * ap and we have to cycle over the do {} while () loop in
3057 * ata_eh_recover() again.
3058 */
3059
3060 spin_lock_irqsave(ap->lock, flags);
3061 reinit_completion(&ap->park_req_pending);
3062 ata_for_each_link(link, ap, EDGE) {
3063 ata_for_each_dev(dev, link, ALL) {
3064 struct ata_eh_info *ehi = &link->eh_info;
3065
3066 link->eh_context.i.dev_action[dev->devno] |=
3067 ehi->dev_action[dev->devno] & ATA_EH_PARK;
3068 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
3069 }
3070 }
3071 spin_unlock_irqrestore(ap->lock, flags);
3072}
3073
3074static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
3075{
3076 struct ata_eh_context *ehc = &dev->link->eh_context;
3077 struct ata_taskfile tf;
3078 unsigned int err_mask;
3079
3080 ata_tf_init(dev, &tf);
3081 if (park) {
3082 ehc->unloaded_mask |= 1 << dev->devno;
3083 tf.command = ATA_CMD_IDLEIMMEDIATE;
3084 tf.feature = 0x44;
3085 tf.lbal = 0x4c;
3086 tf.lbam = 0x4e;
3087 tf.lbah = 0x55;
3088 } else {
3089 ehc->unloaded_mask &= ~(1 << dev->devno);
3090 tf.command = ATA_CMD_CHK_POWER;
3091 }
3092
3093 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
3094 tf.protocol = ATA_PROT_NODATA;
3095 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3096 if (park && (err_mask || tf.lbal != 0xc4)) {
3097 ata_dev_err(dev, "head unload failed!\n");
3098 ehc->unloaded_mask &= ~(1 << dev->devno);
3099 }
3100}
3101
3102static int ata_eh_revalidate_and_attach(struct ata_link *link,
3103 struct ata_device **r_failed_dev)
3104{
3105 struct ata_port *ap = link->ap;
3106 struct ata_eh_context *ehc = &link->eh_context;
3107 struct ata_device *dev;
3108 unsigned int new_mask = 0;
3109 unsigned long flags;
3110 int rc = 0;
3111
3112 DPRINTK("ENTER\n");
3113
3114 /* For PATA drive side cable detection to work, IDENTIFY must
3115 * be done backwards such that PDIAG- is released by the slave
3116 * device before the master device is identified.
3117 */
3118 ata_for_each_dev(dev, link, ALL_REVERSE) {
3119 unsigned int action = ata_eh_dev_action(dev);
3120 unsigned int readid_flags = 0;
3121
3122 if (ehc->i.flags & ATA_EHI_DID_RESET)
3123 readid_flags |= ATA_READID_POSTRESET;
3124
3125 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
3126 WARN_ON(dev->class == ATA_DEV_PMP);
3127
3128 if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
3129 rc = -EIO;
3130 goto err;
3131 }
3132
3133 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
3134 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
3135 readid_flags);
3136 if (rc)
3137 goto err;
3138
3139 ata_eh_done(link, dev, ATA_EH_REVALIDATE);
3140
3141 /* Configuration may have changed, reconfigure
3142 * transfer mode.
3143 */
3144 ehc->i.flags |= ATA_EHI_SETMODE;
3145
3146 /* schedule the scsi_rescan_device() here */
3147 schedule_work(&(ap->scsi_rescan_task));
3148 } else if (dev->class == ATA_DEV_UNKNOWN &&
3149 ehc->tries[dev->devno] &&
3150 ata_class_enabled(ehc->classes[dev->devno])) {
3151 /* Temporarily set dev->class, it will be
3152 * permanently set once all configurations are
3153 * complete. This is necessary because new
3154 * device configuration is done in two
3155 * separate loops.
3156 */
3157 dev->class = ehc->classes[dev->devno];
3158
3159 if (dev->class == ATA_DEV_PMP)
3160 rc = sata_pmp_attach(dev);
3161 else
3162 rc = ata_dev_read_id(dev, &dev->class,
3163 readid_flags, dev->id);
3164
3165 /* read_id might have changed class, store and reset */
3166 ehc->classes[dev->devno] = dev->class;
3167 dev->class = ATA_DEV_UNKNOWN;
3168
3169 switch (rc) {
3170 case 0:
3171 /* clear error info accumulated during probe */
3172 ata_ering_clear(&dev->ering);
3173 new_mask |= 1 << dev->devno;
3174 break;
3175 case -ENOENT:
3176 /* IDENTIFY was issued to non-existent
3177 * device. No need to reset. Just
3178 * thaw and ignore the device.
3179 */
3180 ata_eh_thaw_port(ap);
3181 break;
3182 default:
3183 goto err;
3184 }
3185 }
3186 }
3187
3188 /* PDIAG- should have been released, ask cable type if post-reset */
3189 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
3190 if (ap->ops->cable_detect)
3191 ap->cbl = ap->ops->cable_detect(ap);
3192 ata_force_cbl(ap);
3193 }
3194
3195 /* Configure new devices forward such that user doesn't see
3196 * device detection messages backwards.
3197 */
3198 ata_for_each_dev(dev, link, ALL) {
3199 if (!(new_mask & (1 << dev->devno)))
3200 continue;
3201
3202 dev->class = ehc->classes[dev->devno];
3203
3204 if (dev->class == ATA_DEV_PMP)
3205 continue;
3206
3207 ehc->i.flags |= ATA_EHI_PRINTINFO;
3208 rc = ata_dev_configure(dev);
3209 ehc->i.flags &= ~ATA_EHI_PRINTINFO;
3210 if (rc) {
3211 dev->class = ATA_DEV_UNKNOWN;
3212 goto err;
3213 }
3214
3215 spin_lock_irqsave(ap->lock, flags);
3216 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
3217 spin_unlock_irqrestore(ap->lock, flags);
3218
3219 /* new device discovered, configure xfermode */
3220 ehc->i.flags |= ATA_EHI_SETMODE;
3221 }
3222
3223 return 0;
3224
3225 err:
3226 *r_failed_dev = dev;
3227 DPRINTK("EXIT rc=%d\n", rc);
3228 return rc;
3229}
3230
3231/**
3232 * ata_set_mode - Program timings and issue SET FEATURES - XFER
3233 * @link: link on which timings will be programmed
3234 * @r_failed_dev: out parameter for failed device
3235 *
3236 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If
3237 * ata_set_mode() fails, pointer to the failing device is
3238 * returned in @r_failed_dev.
3239 *
3240 * LOCKING:
3241 * PCI/etc. bus probe sem.
3242 *
3243 * RETURNS:
3244 * 0 on success, negative errno otherwise
3245 */
3246int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
3247{
3248 struct ata_port *ap = link->ap;
3249 struct ata_device *dev;
3250 int rc;
3251
3252 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
3253 ata_for_each_dev(dev, link, ENABLED) {
3254 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
3255 struct ata_ering_entry *ent;
3256
3257 ent = ata_ering_top(&dev->ering);
3258 if (ent)
3259 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
3260 }
3261 }
3262
3263 /* has private set_mode? */
3264 if (ap->ops->set_mode)
3265 rc = ap->ops->set_mode(link, r_failed_dev);
3266 else
3267 rc = ata_do_set_mode(link, r_failed_dev);
3268
3269 /* if transfer mode has changed, set DUBIOUS_XFER on device */
3270 ata_for_each_dev(dev, link, ENABLED) {
3271 struct ata_eh_context *ehc = &link->eh_context;
3272 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
3273 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
3274
3275 if (dev->xfer_mode != saved_xfer_mode ||
3276 ata_ncq_enabled(dev) != saved_ncq)
3277 dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
3278 }
3279
3280 return rc;
3281}
3282
3283/**
3284 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
3285 * @dev: ATAPI device to clear UA for
3286 *
3287 * Resets and other operations can make an ATAPI device raise
3288 * UNIT ATTENTION which causes the next operation to fail. This
3289 * function clears UA.
3290 *
3291 * LOCKING:
3292 * EH context (may sleep).
3293 *
3294 * RETURNS:
3295 * 0 on success, -errno on failure.
3296 */
3297static int atapi_eh_clear_ua(struct ata_device *dev)
3298{
3299 int i;
3300
3301 for (i = 0; i < ATA_EH_UA_TRIES; i++) {
3302 u8 *sense_buffer = dev->link->ap->sector_buf;
3303 u8 sense_key = 0;
3304 unsigned int err_mask;
3305
3306 err_mask = atapi_eh_tur(dev, &sense_key);
3307 if (err_mask != 0 && err_mask != AC_ERR_DEV) {
3308 ata_dev_warn(dev,
3309 "TEST_UNIT_READY failed (err_mask=0x%x)\n",
3310 err_mask);
3311 return -EIO;
3312 }
3313
3314 if (!err_mask || sense_key != UNIT_ATTENTION)
3315 return 0;
3316
3317 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
3318 if (err_mask) {
3319 ata_dev_warn(dev, "failed to clear "
3320 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
3321 return -EIO;
3322 }
3323 }
3324
3325 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n",
3326 ATA_EH_UA_TRIES);
3327
3328 return 0;
3329}
3330
3331/**
3332 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary
3333 * @dev: ATA device which may need FLUSH retry
3334 *
3335 * If @dev failed FLUSH, it needs to be reported upper layer
3336 * immediately as it means that @dev failed to remap and already
3337 * lost at least a sector and further FLUSH retrials won't make
3338 * any difference to the lost sector. However, if FLUSH failed
3339 * for other reasons, for example transmission error, FLUSH needs
3340 * to be retried.
3341 *
3342 * This function determines whether FLUSH failure retry is
3343 * necessary and performs it if so.
3344 *
3345 * RETURNS:
3346 * 0 if EH can continue, -errno if EH needs to be repeated.
3347 */
3348static int ata_eh_maybe_retry_flush(struct ata_device *dev)
3349{
3350 struct ata_link *link = dev->link;
3351 struct ata_port *ap = link->ap;
3352 struct ata_queued_cmd *qc;
3353 struct ata_taskfile tf;
3354 unsigned int err_mask;
3355 int rc = 0;
3356
3357 /* did flush fail for this device? */
3358 if (!ata_tag_valid(link->active_tag))
3359 return 0;
3360
3361 qc = __ata_qc_from_tag(ap, link->active_tag);
3362 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
3363 qc->tf.command != ATA_CMD_FLUSH))
3364 return 0;
3365
3366 /* if the device failed it, it should be reported to upper layers */
3367 if (qc->err_mask & AC_ERR_DEV)
3368 return 0;
3369
3370 /* flush failed for some other reason, give it another shot */
3371 ata_tf_init(dev, &tf);
3372
3373 tf.command = qc->tf.command;
3374 tf.flags |= ATA_TFLAG_DEVICE;
3375 tf.protocol = ATA_PROT_NODATA;
3376
3377 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n",
3378 tf.command, qc->err_mask);
3379
3380 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3381 if (!err_mask) {
3382 /*
3383 * FLUSH is complete but there's no way to
3384 * successfully complete a failed command from EH.
3385 * Making sure retry is allowed at least once and
3386 * retrying it should do the trick - whatever was in
3387 * the cache is already on the platter and this won't
3388 * cause infinite loop.
3389 */
3390 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
3391 } else {
3392 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n",
3393 err_mask);
3394 rc = -EIO;
3395
3396 /* if device failed it, report it to upper layers */
3397 if (err_mask & AC_ERR_DEV) {
3398 qc->err_mask |= AC_ERR_DEV;
3399 qc->result_tf = tf;
3400 if (!(ap->pflags & ATA_PFLAG_FROZEN))
3401 rc = 0;
3402 }
3403 }
3404 return rc;
3405}
3406
3407/**
3408 * ata_eh_set_lpm - configure SATA interface power management
3409 * @link: link to configure power management
3410 * @policy: the link power management policy
3411 * @r_failed_dev: out parameter for failed device
3412 *
3413 * Enable SATA Interface power management. This will enable
3414 * Device Interface Power Management (DIPM) for min_power and
3415 * medium_power_with_dipm policies, and then call driver specific
3416 * callbacks for enabling Host Initiated Power management.
3417 *
3418 * LOCKING:
3419 * EH context.
3420 *
3421 * RETURNS:
3422 * 0 on success, -errno on failure.
3423 */
3424static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3425 struct ata_device **r_failed_dev)
3426{
3427 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
3428 struct ata_eh_context *ehc = &link->eh_context;
3429 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
3430 enum ata_lpm_policy old_policy = link->lpm_policy;
3431 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
3432 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
3433 unsigned int err_mask;
3434 int rc;
3435
3436 /* if the link or host doesn't do LPM, noop */
3437 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
3438 return 0;
3439
3440 /*
3441 * DIPM is enabled only for MIN_POWER as some devices
3442 * misbehave when the host NACKs transition to SLUMBER. Order
3443 * device and link configurations such that the host always
3444 * allows DIPM requests.
3445 */
3446 ata_for_each_dev(dev, link, ENABLED) {
3447 bool hipm = ata_id_has_hipm(dev->id);
3448 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
3449
3450 /* find the first enabled and LPM enabled devices */
3451 if (!link_dev)
3452 link_dev = dev;
3453
3454 if (!lpm_dev && (hipm || dipm))
3455 lpm_dev = dev;
3456
3457 hints &= ~ATA_LPM_EMPTY;
3458 if (!hipm)
3459 hints &= ~ATA_LPM_HIPM;
3460
3461 /* disable DIPM before changing link config */
3462 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) {
3463 err_mask = ata_dev_set_feature(dev,
3464 SETFEATURES_SATA_DISABLE, SATA_DIPM);
3465 if (err_mask && err_mask != AC_ERR_DEV) {
3466 ata_dev_warn(dev,
3467 "failed to disable DIPM, Emask 0x%x\n",
3468 err_mask);
3469 rc = -EIO;
3470 goto fail;
3471 }
3472 }
3473 }
3474
3475 if (ap) {
3476 rc = ap->ops->set_lpm(link, policy, hints);
3477 if (!rc && ap->slave_link)
3478 rc = ap->ops->set_lpm(ap->slave_link, policy, hints);
3479 } else
3480 rc = sata_pmp_set_lpm(link, policy, hints);
3481
3482 /*
3483 * Attribute link config failure to the first (LPM) enabled
3484 * device on the link.
3485 */
3486 if (rc) {
3487 if (rc == -EOPNOTSUPP) {
3488 link->flags |= ATA_LFLAG_NO_LPM;
3489 return 0;
3490 }
3491 dev = lpm_dev ? lpm_dev : link_dev;
3492 goto fail;
3493 }
3494
3495 /*
3496 * Low level driver acked the transition. Issue DIPM command
3497 * with the new policy set.
3498 */
3499 link->lpm_policy = policy;
3500 if (ap && ap->slave_link)
3501 ap->slave_link->lpm_policy = policy;
3502
3503 /* host config updated, enable DIPM if transitioning to MIN_POWER */
3504 ata_for_each_dev(dev, link, ENABLED) {
3505 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm &&
3506 ata_id_has_dipm(dev->id)) {
3507 err_mask = ata_dev_set_feature(dev,
3508 SETFEATURES_SATA_ENABLE, SATA_DIPM);
3509 if (err_mask && err_mask != AC_ERR_DEV) {
3510 ata_dev_warn(dev,
3511 "failed to enable DIPM, Emask 0x%x\n",
3512 err_mask);
3513 rc = -EIO;
3514 goto fail;
3515 }
3516 }
3517 }
3518
3519 link->last_lpm_change = jiffies;
3520 link->flags |= ATA_LFLAG_CHANGED;
3521
3522 return 0;
3523
3524fail:
3525 /* restore the old policy */
3526 link->lpm_policy = old_policy;
3527 if (ap && ap->slave_link)
3528 ap->slave_link->lpm_policy = old_policy;
3529
3530 /* if no device or only one more chance is left, disable LPM */
3531 if (!dev || ehc->tries[dev->devno] <= 2) {
3532 ata_link_warn(link, "disabling LPM on the link\n");
3533 link->flags |= ATA_LFLAG_NO_LPM;
3534 }
3535 if (r_failed_dev)
3536 *r_failed_dev = dev;
3537 return rc;
3538}
3539
3540int ata_link_nr_enabled(struct ata_link *link)
3541{
3542 struct ata_device *dev;
3543 int cnt = 0;
3544
3545 ata_for_each_dev(dev, link, ENABLED)
3546 cnt++;
3547 return cnt;
3548}
3549
3550static int ata_link_nr_vacant(struct ata_link *link)
3551{
3552 struct ata_device *dev;
3553 int cnt = 0;
3554
3555 ata_for_each_dev(dev, link, ALL)
3556 if (dev->class == ATA_DEV_UNKNOWN)
3557 cnt++;
3558 return cnt;
3559}
3560
3561static int ata_eh_skip_recovery(struct ata_link *link)
3562{
3563 struct ata_port *ap = link->ap;
3564 struct ata_eh_context *ehc = &link->eh_context;
3565 struct ata_device *dev;
3566
3567 /* skip disabled links */
3568 if (link->flags & ATA_LFLAG_DISABLED)
3569 return 1;
3570
3571 /* skip if explicitly requested */
3572 if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
3573 return 1;
3574
3575 /* thaw frozen port and recover failed devices */
3576 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
3577 return 0;
3578
3579 /* reset at least once if reset is requested */
3580 if ((ehc->i.action & ATA_EH_RESET) &&
3581 !(ehc->i.flags & ATA_EHI_DID_RESET))
3582 return 0;
3583
3584 /* skip if class codes for all vacant slots are ATA_DEV_NONE */
3585 ata_for_each_dev(dev, link, ALL) {
3586 if (dev->class == ATA_DEV_UNKNOWN &&
3587 ehc->classes[dev->devno] != ATA_DEV_NONE)
3588 return 0;
3589 }
3590
3591 return 1;
3592}
3593
3594static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg)
3595{
3596 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL);
3597 u64 now = get_jiffies_64();
3598 int *trials = void_arg;
3599
3600 if ((ent->eflags & ATA_EFLAG_OLD_ER) ||
3601 (ent->timestamp < now - min(now, interval)))
3602 return -1;
3603
3604 (*trials)++;
3605 return 0;
3606}
3607
3608static int ata_eh_schedule_probe(struct ata_device *dev)
3609{
3610 struct ata_eh_context *ehc = &dev->link->eh_context;
3611 struct ata_link *link = ata_dev_phys_link(dev);
3612 int trials = 0;
3613
3614 if (!(ehc->i.probe_mask & (1 << dev->devno)) ||
3615 (ehc->did_probe_mask & (1 << dev->devno)))
3616 return 0;
3617
3618 ata_eh_detach_dev(dev);
3619 ata_dev_init(dev);
3620 ehc->did_probe_mask |= (1 << dev->devno);
3621 ehc->i.action |= ATA_EH_RESET;
3622 ehc->saved_xfer_mode[dev->devno] = 0;
3623 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
3624
3625 /* the link maybe in a deep sleep, wake it up */
3626 if (link->lpm_policy > ATA_LPM_MAX_POWER) {
3627 if (ata_is_host_link(link))
3628 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER,
3629 ATA_LPM_EMPTY);
3630 else
3631 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER,
3632 ATA_LPM_EMPTY);
3633 }
3634
3635 /* Record and count probe trials on the ering. The specific
3636 * error mask used is irrelevant. Because a successful device
3637 * detection clears the ering, this count accumulates only if
3638 * there are consecutive failed probes.
3639 *
3640 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS
3641 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is
3642 * forced to 1.5Gbps.
3643 *
3644 * This is to work around cases where failed link speed
3645 * negotiation results in device misdetection leading to
3646 * infinite DEVXCHG or PHRDY CHG events.
3647 */
3648 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER);
3649 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials);
3650
3651 if (trials > ATA_EH_PROBE_TRIALS)
3652 sata_down_spd_limit(link, 1);
3653
3654 return 1;
3655}
3656
3657static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
3658{
3659 struct ata_eh_context *ehc = &dev->link->eh_context;
3660
3661 /* -EAGAIN from EH routine indicates retry without prejudice.
3662 * The requester is responsible for ensuring forward progress.
3663 */
3664 if (err != -EAGAIN)
3665 ehc->tries[dev->devno]--;
3666
3667 switch (err) {
3668 case -ENODEV:
3669 /* device missing or wrong IDENTIFY data, schedule probing */
3670 ehc->i.probe_mask |= (1 << dev->devno);
3671 /* fall through */
3672 case -EINVAL:
3673 /* give it just one more chance */
3674 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
3675 /* fall through */
3676 case -EIO:
3677 if (ehc->tries[dev->devno] == 1) {
3678 /* This is the last chance, better to slow
3679 * down than lose it.
3680 */
3681 sata_down_spd_limit(ata_dev_phys_link(dev), 0);
3682 if (dev->pio_mode > XFER_PIO_0)
3683 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
3684 }
3685 }
3686
3687 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
3688 /* disable device if it has used up all its chances */
3689 ata_dev_disable(dev);
3690
3691 /* detach if offline */
3692 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
3693 ata_eh_detach_dev(dev);
3694
3695 /* schedule probe if necessary */
3696 if (ata_eh_schedule_probe(dev)) {
3697 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3698 memset(ehc->cmd_timeout_idx[dev->devno], 0,
3699 sizeof(ehc->cmd_timeout_idx[dev->devno]));
3700 }
3701
3702 return 1;
3703 } else {
3704 ehc->i.action |= ATA_EH_RESET;
3705 return 0;
3706 }
3707}
3708
3709/**
3710 * ata_eh_recover - recover host port after error
3711 * @ap: host port to recover
3712 * @prereset: prereset method (can be NULL)
3713 * @softreset: softreset method (can be NULL)
3714 * @hardreset: hardreset method (can be NULL)
3715 * @postreset: postreset method (can be NULL)
3716 * @r_failed_link: out parameter for failed link
3717 *
3718 * This is the alpha and omega, eum and yang, heart and soul of
3719 * libata exception handling. On entry, actions required to
3720 * recover each link and hotplug requests are recorded in the
3721 * link's eh_context. This function executes all the operations
3722 * with appropriate retrials and fallbacks to resurrect failed
3723 * devices, detach goners and greet newcomers.
3724 *
3725 * LOCKING:
3726 * Kernel thread context (may sleep).
3727 *
3728 * RETURNS:
3729 * 0 on success, -errno on failure.
3730 */
3731int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3732 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
3733 ata_postreset_fn_t postreset,
3734 struct ata_link **r_failed_link)
3735{
3736 struct ata_link *link;
3737 struct ata_device *dev;
3738 int rc, nr_fails;
3739 unsigned long flags, deadline;
3740
3741 DPRINTK("ENTER\n");
3742
3743 /* prep for recovery */
3744 ata_for_each_link(link, ap, EDGE) {
3745 struct ata_eh_context *ehc = &link->eh_context;
3746
3747 /* re-enable link? */
3748 if (ehc->i.action & ATA_EH_ENABLE_LINK) {
3749 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
3750 spin_lock_irqsave(ap->lock, flags);
3751 link->flags &= ~ATA_LFLAG_DISABLED;
3752 spin_unlock_irqrestore(ap->lock, flags);
3753 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
3754 }
3755
3756 ata_for_each_dev(dev, link, ALL) {
3757 if (link->flags & ATA_LFLAG_NO_RETRY)
3758 ehc->tries[dev->devno] = 1;
3759 else
3760 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3761
3762 /* collect port action mask recorded in dev actions */
3763 ehc->i.action |= ehc->i.dev_action[dev->devno] &
3764 ~ATA_EH_PERDEV_MASK;
3765 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;
3766
3767 /* process hotplug request */
3768 if (dev->flags & ATA_DFLAG_DETACH)
3769 ata_eh_detach_dev(dev);
3770
3771 /* schedule probe if necessary */
3772 if (!ata_dev_enabled(dev))
3773 ata_eh_schedule_probe(dev);
3774 }
3775 }
3776
3777 retry:
3778 rc = 0;
3779
3780 /* if UNLOADING, finish immediately */
3781 if (ap->pflags & ATA_PFLAG_UNLOADING)
3782 goto out;
3783
3784 /* prep for EH */
3785 ata_for_each_link(link, ap, EDGE) {
3786 struct ata_eh_context *ehc = &link->eh_context;
3787
3788 /* skip EH if possible. */
3789 if (ata_eh_skip_recovery(link))
3790 ehc->i.action = 0;
3791
3792 ata_for_each_dev(dev, link, ALL)
3793 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
3794 }
3795
3796 /* reset */
3797 ata_for_each_link(link, ap, EDGE) {
3798 struct ata_eh_context *ehc = &link->eh_context;
3799
3800 if (!(ehc->i.action & ATA_EH_RESET))
3801 continue;
3802
3803 rc = ata_eh_reset(link, ata_link_nr_vacant(link),
3804 prereset, softreset, hardreset, postreset);
3805 if (rc) {
3806 ata_link_err(link, "reset failed, giving up\n");
3807 goto out;
3808 }
3809 }
3810
3811 do {
3812 unsigned long now;
3813
3814 /*
3815 * clears ATA_EH_PARK in eh_info and resets
3816 * ap->park_req_pending
3817 */
3818 ata_eh_pull_park_action(ap);
3819
3820 deadline = jiffies;
3821 ata_for_each_link(link, ap, EDGE) {
3822 ata_for_each_dev(dev, link, ALL) {
3823 struct ata_eh_context *ehc = &link->eh_context;
3824 unsigned long tmp;
3825
3826 if (dev->class != ATA_DEV_ATA &&
3827 dev->class != ATA_DEV_ZAC)
3828 continue;
3829 if (!(ehc->i.dev_action[dev->devno] &
3830 ATA_EH_PARK))
3831 continue;
3832 tmp = dev->unpark_deadline;
3833 if (time_before(deadline, tmp))
3834 deadline = tmp;
3835 else if (time_before_eq(tmp, jiffies))
3836 continue;
3837 if (ehc->unloaded_mask & (1 << dev->devno))
3838 continue;
3839
3840 ata_eh_park_issue_cmd(dev, 1);
3841 }
3842 }
3843
3844 now = jiffies;
3845 if (time_before_eq(deadline, now))
3846 break;
3847
3848 ata_eh_release(ap);
3849 deadline = wait_for_completion_timeout(&ap->park_req_pending,
3850 deadline - now);
3851 ata_eh_acquire(ap);
3852 } while (deadline);
3853 ata_for_each_link(link, ap, EDGE) {
3854 ata_for_each_dev(dev, link, ALL) {
3855 if (!(link->eh_context.unloaded_mask &
3856 (1 << dev->devno)))
3857 continue;
3858
3859 ata_eh_park_issue_cmd(dev, 0);
3860 ata_eh_done(link, dev, ATA_EH_PARK);
3861 }
3862 }
3863
3864 /* the rest */
3865 nr_fails = 0;
3866 ata_for_each_link(link, ap, PMP_FIRST) {
3867 struct ata_eh_context *ehc = &link->eh_context;
3868
3869 if (sata_pmp_attached(ap) && ata_is_host_link(link))
3870 goto config_lpm;
3871
3872 /* revalidate existing devices and attach new ones */
3873 rc = ata_eh_revalidate_and_attach(link, &dev);
3874 if (rc)
3875 goto rest_fail;
3876
3877 /* if PMP got attached, return, pmp EH will take care of it */
3878 if (link->device->class == ATA_DEV_PMP) {
3879 ehc->i.action = 0;
3880 return 0;
3881 }
3882
3883 /* configure transfer mode if necessary */
3884 if (ehc->i.flags & ATA_EHI_SETMODE) {
3885 rc = ata_set_mode(link, &dev);
3886 if (rc)
3887 goto rest_fail;
3888 ehc->i.flags &= ~ATA_EHI_SETMODE;
3889 }
3890
3891 /* If reset has been issued, clear UA to avoid
3892 * disrupting the current users of the device.
3893 */
3894 if (ehc->i.flags & ATA_EHI_DID_RESET) {
3895 ata_for_each_dev(dev, link, ALL) {
3896 if (dev->class != ATA_DEV_ATAPI)
3897 continue;
3898 rc = atapi_eh_clear_ua(dev);
3899 if (rc)
3900 goto rest_fail;
3901 if (zpodd_dev_enabled(dev))
3902 zpodd_post_poweron(dev);
3903 }
3904 }
3905
3906 /* retry flush if necessary */
3907 ata_for_each_dev(dev, link, ALL) {
3908 if (dev->class != ATA_DEV_ATA &&
3909 dev->class != ATA_DEV_ZAC)
3910 continue;
3911 rc = ata_eh_maybe_retry_flush(dev);
3912 if (rc)
3913 goto rest_fail;
3914 }
3915
3916 config_lpm:
3917 /* configure link power saving */
3918 if (link->lpm_policy != ap->target_lpm_policy) {
3919 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev);
3920 if (rc)
3921 goto rest_fail;
3922 }
3923
3924 /* this link is okay now */
3925 ehc->i.flags = 0;
3926 continue;
3927
3928 rest_fail:
3929 nr_fails++;
3930 if (dev)
3931 ata_eh_handle_dev_fail(dev, rc);
3932
3933 if (ap->pflags & ATA_PFLAG_FROZEN) {
3934 /* PMP reset requires working host port.
3935 * Can't retry if it's frozen.
3936 */
3937 if (sata_pmp_attached(ap))
3938 goto out;
3939 break;
3940 }
3941 }
3942
3943 if (nr_fails)
3944 goto retry;
3945
3946 out:
3947 if (rc && r_failed_link)
3948 *r_failed_link = link;
3949
3950 DPRINTK("EXIT, rc=%d\n", rc);
3951 return rc;
3952}
3953
3954/**
3955 * ata_eh_finish - finish up EH
3956 * @ap: host port to finish EH for
3957 *
3958 * Recovery is complete. Clean up EH states and retry or finish
3959 * failed qcs.
3960 *
3961 * LOCKING:
3962 * None.
3963 */
3964void ata_eh_finish(struct ata_port *ap)
3965{
3966 struct ata_queued_cmd *qc;
3967 int tag;
3968
3969 /* retry or finish qcs */
3970 ata_qc_for_each_raw(ap, qc, tag) {
3971 if (!(qc->flags & ATA_QCFLAG_FAILED))
3972 continue;
3973
3974 if (qc->err_mask) {
3975 /* FIXME: Once EH migration is complete,
3976 * generate sense data in this function,
3977 * considering both err_mask and tf.
3978 */
3979 if (qc->flags & ATA_QCFLAG_RETRY)
3980 ata_eh_qc_retry(qc);
3981 else
3982 ata_eh_qc_complete(qc);
3983 } else {
3984 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
3985 ata_eh_qc_complete(qc);
3986 } else {
3987 /* feed zero TF to sense generation */
3988 memset(&qc->result_tf, 0, sizeof(qc->result_tf));
3989 ata_eh_qc_retry(qc);
3990 }
3991 }
3992 }
3993
3994 /* make sure nr_active_links is zero after EH */
3995 WARN_ON(ap->nr_active_links);
3996 ap->nr_active_links = 0;
3997}
3998
3999/**
4000 * ata_do_eh - do standard error handling
4001 * @ap: host port to handle error for
4002 *
4003 * @prereset: prereset method (can be NULL)
4004 * @softreset: softreset method (can be NULL)
4005 * @hardreset: hardreset method (can be NULL)
4006 * @postreset: postreset method (can be NULL)
4007 *
4008 * Perform standard error handling sequence.
4009 *
4010 * LOCKING:
4011 * Kernel thread context (may sleep).
4012 */
4013void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
4014 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
4015 ata_postreset_fn_t postreset)
4016{
4017 struct ata_device *dev;
4018 int rc;
4019
4020 ata_eh_autopsy(ap);
4021 ata_eh_report(ap);
4022
4023 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
4024 NULL);
4025 if (rc) {
4026 ata_for_each_dev(dev, &ap->link, ALL)
4027 ata_dev_disable(dev);
4028 }
4029
4030 ata_eh_finish(ap);
4031}
4032
4033/**
4034 * ata_std_error_handler - standard error handler
4035 * @ap: host port to handle error for
4036 *
4037 * Standard error handler
4038 *
4039 * LOCKING:
4040 * Kernel thread context (may sleep).
4041 */
4042void ata_std_error_handler(struct ata_port *ap)
4043{
4044 struct ata_port_operations *ops = ap->ops;
4045 ata_reset_fn_t hardreset = ops->hardreset;
4046
4047 /* ignore built-in hardreset if SCR access is not available */
4048 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link))
4049 hardreset = NULL;
4050
4051 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
4052}
4053
4054#ifdef CONFIG_PM
4055/**
4056 * ata_eh_handle_port_suspend - perform port suspend operation
4057 * @ap: port to suspend
4058 *
4059 * Suspend @ap.
4060 *
4061 * LOCKING:
4062 * Kernel thread context (may sleep).
4063 */
4064static void ata_eh_handle_port_suspend(struct ata_port *ap)
4065{
4066 unsigned long flags;
4067 int rc = 0;
4068 struct ata_device *dev;
4069
4070 /* are we suspending? */
4071 spin_lock_irqsave(ap->lock, flags);
4072 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4073 ap->pm_mesg.event & PM_EVENT_RESUME) {
4074 spin_unlock_irqrestore(ap->lock, flags);
4075 return;
4076 }
4077 spin_unlock_irqrestore(ap->lock, flags);
4078
4079 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
4080
4081 /*
4082 * If we have a ZPODD attached, check its zero
4083 * power ready status before the port is frozen.
4084 * Only needed for runtime suspend.
4085 */
4086 if (PMSG_IS_AUTO(ap->pm_mesg)) {
4087 ata_for_each_dev(dev, &ap->link, ENABLED) {
4088 if (zpodd_dev_enabled(dev))
4089 zpodd_on_suspend(dev);
4090 }
4091 }
4092
4093 /* tell ACPI we're suspending */
4094 rc = ata_acpi_on_suspend(ap);
4095 if (rc)
4096 goto out;
4097
4098 /* suspend */
4099 ata_eh_freeze_port(ap);
4100
4101 if (ap->ops->port_suspend)
4102 rc = ap->ops->port_suspend(ap, ap->pm_mesg);
4103
4104 ata_acpi_set_state(ap, ap->pm_mesg);
4105 out:
4106 /* update the flags */
4107 spin_lock_irqsave(ap->lock, flags);
4108
4109 ap->pflags &= ~ATA_PFLAG_PM_PENDING;
4110 if (rc == 0)
4111 ap->pflags |= ATA_PFLAG_SUSPENDED;
4112 else if (ap->pflags & ATA_PFLAG_FROZEN)
4113 ata_port_schedule_eh(ap);
4114
4115 spin_unlock_irqrestore(ap->lock, flags);
4116
4117 return;
4118}
4119
4120/**
4121 * ata_eh_handle_port_resume - perform port resume operation
4122 * @ap: port to resume
4123 *
4124 * Resume @ap.
4125 *
4126 * LOCKING:
4127 * Kernel thread context (may sleep).
4128 */
4129static void ata_eh_handle_port_resume(struct ata_port *ap)
4130{
4131 struct ata_link *link;
4132 struct ata_device *dev;
4133 unsigned long flags;
4134
4135 /* are we resuming? */
4136 spin_lock_irqsave(ap->lock, flags);
4137 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4138 !(ap->pm_mesg.event & PM_EVENT_RESUME)) {
4139 spin_unlock_irqrestore(ap->lock, flags);
4140 return;
4141 }
4142 spin_unlock_irqrestore(ap->lock, flags);
4143
4144 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED));
4145
4146 /*
4147 * Error timestamps are in jiffies which doesn't run while
4148 * suspended and PHY events during resume isn't too uncommon.
4149 * When the two are combined, it can lead to unnecessary speed
4150 * downs if the machine is suspended and resumed repeatedly.
4151 * Clear error history.
4152 */
4153 ata_for_each_link(link, ap, HOST_FIRST)
4154 ata_for_each_dev(dev, link, ALL)
4155 ata_ering_clear(&dev->ering);
4156
4157 ata_acpi_set_state(ap, ap->pm_mesg);
4158
4159 if (ap->ops->port_resume)
4160 ap->ops->port_resume(ap);
4161
4162 /* tell ACPI that we're resuming */
4163 ata_acpi_on_resume(ap);
4164
4165 /* update the flags */
4166 spin_lock_irqsave(ap->lock, flags);
4167 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
4168 spin_unlock_irqrestore(ap->lock, flags);
4169}
4170#endif /* CONFIG_PM */