Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

USB: OHCI handles more ZFMicro quirks

The ZF Micro OHCI controller exhibits unexpected behavior that seems to be
related to high load. Under certain conditions, the controller will
complete a TD, remove it from the endpoint's queue, and fail to add it to
the donelist. This causes the endpoint to appear to stop responding. Worse,
if the device is removed while in that state, OHCI will hang while waiting
for the orphaned TD to complete. The situation is not recoverable without
rebooting.

This fix enhances the scope of the existing OHCI_QUIRK_ZFMICRO flag:

1. A watchdog routine periodically scans the OHCI structures to check
for orphaned TDs. In these cases the TD is taken back from the
controller and completed normally.

2. If a device is removed while the endpoint is hung but before the
watchdog catches the situation, any outstanding TDs are taken back
from the controller in the 'sanitize' phase.

The ohci-hcd driver used to print "INTR_SF lossage" in this situation;
this changes it to the universally accurate "ED unlink timeout". Other
instances of this message presumably have different root causes.

Both this Compaq quirk and a NEC quirk are now properly compiled out for
non-PCI builds of this driver.

Signed-off-by: Mike Nuss <mike@terascala.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

authored by

Mike Nuss and committed by
Greg Kroah-Hartman
89a0fd18 e8fa0ce6

+253 -75
+139 -27
drivers/usb/host/ohci-hcd.c
··· 81 81 static int ohci_init (struct ohci_hcd *ohci); 82 82 static void ohci_stop (struct usb_hcd *hcd); 83 83 static int ohci_restart (struct ohci_hcd *ohci); 84 - static void ohci_quirk_nec_worker (struct work_struct *work); 85 84 86 85 #include "ohci-hub.c" 87 86 #include "ohci-dbg.c" ··· 313 314 if (!HC_IS_RUNNING (hcd->state)) { 314 315 sanitize: 315 316 ed->state = ED_IDLE; 317 + if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT) 318 + ohci->eds_scheduled--; 316 319 finish_unlinks (ohci, 0); 317 320 } 318 321 ··· 322 321 case ED_UNLINK: /* wait for hw to finish? */ 323 322 /* major IRQ delivery trouble loses INTR_SF too... */ 324 323 if (limit-- == 0) { 325 - ohci_warn (ohci, "IRQ INTR_SF lossage\n"); 324 + ohci_warn(ohci, "ED unlink timeout\n"); 325 + if (quirk_zfmicro(ohci)) { 326 + ohci_warn(ohci, "Attempting ZF TD recovery\n"); 327 + ohci->ed_to_check = ed; 328 + ohci->zf_delay = 2; 329 + } 326 330 goto sanitize; 327 331 } 328 332 spin_unlock_irqrestore (&ohci->lock, flags); ··· 383 377 ohci_usb_reset (ohci); 384 378 /* flush the writes */ 385 379 (void) ohci_readl (ohci, &ohci->regs->control); 380 + } 381 + 382 + static int check_ed(struct ohci_hcd *ohci, struct ed *ed) 383 + { 384 + return (hc32_to_cpu(ohci, ed->hwINFO) & ED_IN) != 0 385 + && (hc32_to_cpu(ohci, ed->hwHeadP) & TD_MASK) 386 + == (hc32_to_cpu(ohci, ed->hwTailP) & TD_MASK) 387 + && !list_empty(&ed->td_list); 388 + } 389 + 390 + /* ZF Micro watchdog timer callback. The ZF Micro chipset sometimes completes 391 + * an interrupt TD but neglects to add it to the donelist. On systems with 392 + * this chipset, we need to periodically check the state of the queues to look 393 + * for such "lost" TDs. 394 + */ 395 + static void unlink_watchdog_func(unsigned long _ohci) 396 + { 397 + long flags; 398 + unsigned max; 399 + unsigned seen_count = 0; 400 + unsigned i; 401 + struct ed **seen = NULL; 402 + struct ohci_hcd *ohci = (struct ohci_hcd *) _ohci; 403 + 404 + spin_lock_irqsave(&ohci->lock, flags); 405 + max = ohci->eds_scheduled; 406 + if (!max) 407 + goto done; 408 + 409 + if (ohci->ed_to_check) 410 + goto out; 411 + 412 + seen = kcalloc(max, sizeof *seen, GFP_ATOMIC); 413 + if (!seen) 414 + goto out; 415 + 416 + for (i = 0; i < NUM_INTS; i++) { 417 + struct ed *ed = ohci->periodic[i]; 418 + 419 + while (ed) { 420 + unsigned temp; 421 + 422 + /* scan this branch of the periodic schedule tree */ 423 + for (temp = 0; temp < seen_count; temp++) { 424 + if (seen[temp] == ed) { 425 + /* we've checked it and what's after */ 426 + ed = NULL; 427 + break; 428 + } 429 + } 430 + if (!ed) 431 + break; 432 + seen[seen_count++] = ed; 433 + if (!check_ed(ohci, ed)) { 434 + ed = ed->ed_next; 435 + continue; 436 + } 437 + 438 + /* HC's TD list is empty, but HCD sees at least one 439 + * TD that's not been sent through the donelist. 440 + */ 441 + ohci->ed_to_check = ed; 442 + ohci->zf_delay = 2; 443 + 444 + /* The HC may wait until the next frame to report the 445 + * TD as done through the donelist and INTR_WDH. (We 446 + * just *assume* it's not a multi-TD interrupt URB; 447 + * those could defer the IRQ more than one frame, using 448 + * DI...) Check again after the next INTR_SF. 449 + */ 450 + ohci_writel(ohci, OHCI_INTR_SF, 451 + &ohci->regs->intrstatus); 452 + ohci_writel(ohci, OHCI_INTR_SF, 453 + &ohci->regs->intrenable); 454 + 455 + /* flush those writes */ 456 + (void) ohci_readl(ohci, &ohci->regs->control); 457 + 458 + goto out; 459 + } 460 + } 461 + out: 462 + kfree(seen); 463 + if (ohci->eds_scheduled) 464 + mod_timer(&ohci->unlink_watchdog, round_jiffies_relative(HZ)); 465 + done: 466 + spin_unlock_irqrestore(&ohci->lock, flags); 386 467 } 387 468 388 469 /*-------------------------------------------------------------------------* ··· 709 616 mdelay ((temp >> 23) & 0x1fe); 710 617 hcd->state = HC_STATE_RUNNING; 711 618 619 + if (quirk_zfmicro(ohci)) { 620 + /* Create timer to watch for bad queue state on ZF Micro */ 621 + setup_timer(&ohci->unlink_watchdog, unlink_watchdog_func, 622 + (unsigned long) ohci); 623 + 624 + ohci->eds_scheduled = 0; 625 + ohci->ed_to_check = NULL; 626 + } 627 + 712 628 ohci_dump (ohci, 1); 713 629 714 630 return 0; ··· 731 629 { 732 630 struct ohci_hcd *ohci = hcd_to_ohci (hcd); 733 631 struct ohci_regs __iomem *regs = ohci->regs; 734 - int ints; 632 + int ints; 735 633 736 634 /* we can eliminate a (slow) ohci_readl() 737 - if _only_ WDH caused this irq */ 635 + * if _only_ WDH caused this irq 636 + */ 738 637 if ((ohci->hcca->done_head != 0) 739 638 && ! (hc32_to_cpup (ohci, &ohci->hcca->done_head) 740 639 & 0x01)) { ··· 754 651 755 652 if (ints & OHCI_INTR_UE) { 756 653 // e.g. due to PCI Master/Target Abort 757 - if (ohci->flags & OHCI_QUIRK_NEC) { 654 + if (quirk_nec(ohci)) { 758 655 /* Workaround for a silicon bug in some NEC chips used 759 656 * in Apple's PowerBooks. Adapted from Darwin code. 760 657 */ ··· 816 713 ohci_writel (ohci, OHCI_INTR_WDH, &regs->intrenable); 817 714 } 818 715 716 + if (quirk_zfmicro(ohci) && (ints & OHCI_INTR_SF)) { 717 + spin_lock(&ohci->lock); 718 + if (ohci->ed_to_check) { 719 + struct ed *ed = ohci->ed_to_check; 720 + 721 + if (check_ed(ohci, ed)) { 722 + /* HC thinks the TD list is empty; HCD knows 723 + * at least one TD is outstanding 724 + */ 725 + if (--ohci->zf_delay == 0) { 726 + struct td *td = list_entry( 727 + ed->td_list.next, 728 + struct td, td_list); 729 + ohci_warn(ohci, 730 + "Reclaiming orphan TD %p\n", 731 + td); 732 + takeback_td(ohci, td); 733 + ohci->ed_to_check = NULL; 734 + } 735 + } else 736 + ohci->ed_to_check = NULL; 737 + } 738 + spin_unlock(&ohci->lock); 739 + } 740 + 819 741 /* could track INTR_SO to reduce available PCI/... bandwidth */ 820 742 821 743 /* handle any pending URB/ED unlinks, leaving INTR_SF enabled ··· 849 721 spin_lock (&ohci->lock); 850 722 if (ohci->ed_rm_list) 851 723 finish_unlinks (ohci, ohci_frame_no(ohci)); 852 - if ((ints & OHCI_INTR_SF) != 0 && !ohci->ed_rm_list 724 + if ((ints & OHCI_INTR_SF) != 0 725 + && !ohci->ed_rm_list 726 + && !ohci->ed_to_check 853 727 && HC_IS_RUNNING(hcd->state)) 854 728 ohci_writel (ohci, OHCI_INTR_SF, &regs->intrdisable); 855 729 spin_unlock (&ohci->lock); ··· 880 750 ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); 881 751 free_irq(hcd->irq, hcd); 882 752 hcd->irq = -1; 753 + 754 + if (quirk_zfmicro(ohci)) 755 + del_timer(&ohci->unlink_watchdog); 883 756 884 757 remove_debug_files (ohci); 885 758 ohci_mem_cleanup (ohci); ··· 957 824 } 958 825 ohci_dbg(ohci, "restart complete\n"); 959 826 return 0; 960 - } 961 - 962 - /*-------------------------------------------------------------------------*/ 963 - 964 - /* NEC workaround */ 965 - static void ohci_quirk_nec_worker(struct work_struct *work) 966 - { 967 - struct ohci_hcd *ohci = container_of(work, struct ohci_hcd, nec_work); 968 - int status; 969 - 970 - status = ohci_init(ohci); 971 - if (status != 0) { 972 - ohci_err(ohci, "Restarting NEC controller failed " 973 - "in ohci_init, %d\n", status); 974 - return; 975 - } 976 - 977 - status = ohci_restart(ohci); 978 - if (status != 0) 979 - ohci_err(ohci, "Restarting NEC controller failed " 980 - "in ohci_restart, %d\n", status); 981 827 } 982 828 983 829 /*-------------------------------------------------------------------------*/
-1
drivers/usb/host/ohci-mem.c
··· 28 28 ohci->next_statechange = jiffies; 29 29 spin_lock_init (&ohci->lock); 30 30 INIT_LIST_HEAD (&ohci->pending); 31 - INIT_WORK (&ohci->nec_work, ohci_quirk_nec_worker); 32 31 } 33 32 34 33 /*-------------------------------------------------------------------------*/
+21 -1
drivers/usb/host/ohci-pci.c
··· 84 84 struct ohci_hcd *ohci = hcd_to_ohci (hcd); 85 85 86 86 ohci->flags |= OHCI_QUIRK_ZFMICRO; 87 - ohci_dbg (ohci, "enabled Compaq ZFMicro chipset quirk\n"); 87 + ohci_dbg(ohci, "enabled Compaq ZFMicro chipset quirks\n"); 88 88 89 89 return 0; 90 90 } ··· 113 113 114 114 /* Check for NEC chip and apply quirk for allegedly lost interrupts. 115 115 */ 116 + 117 + static void ohci_quirk_nec_worker(struct work_struct *work) 118 + { 119 + struct ohci_hcd *ohci = container_of(work, struct ohci_hcd, nec_work); 120 + int status; 121 + 122 + status = ohci_init(ohci); 123 + if (status != 0) { 124 + ohci_err(ohci, "Restarting NEC controller failed in %s, %d\n", 125 + "ohci_init", status); 126 + return; 127 + } 128 + 129 + status = ohci_restart(ohci); 130 + if (status != 0) 131 + ohci_err(ohci, "Restarting NEC controller failed in %s, %d\n", 132 + "ohci_restart", status); 133 + } 134 + 116 135 static int ohci_quirk_nec(struct usb_hcd *hcd) 117 136 { 118 137 struct ohci_hcd *ohci = hcd_to_ohci (hcd); 119 138 120 139 ohci->flags |= OHCI_QUIRK_NEC; 140 + INIT_WORK(&ohci->nec_work, ohci_quirk_nec_worker); 121 141 ohci_dbg (ohci, "enabled NEC chipset lost interrupt quirk\n"); 122 142 123 143 return 0;
+67 -46
drivers/usb/host/ohci-q.c
··· 179 179 ed->ed_prev = NULL; 180 180 ed->ed_next = NULL; 181 181 ed->hwNextED = 0; 182 + if (quirk_zfmicro(ohci) 183 + && (ed->type == PIPE_INTERRUPT) 184 + && !(ohci->eds_scheduled++)) 185 + mod_timer(&ohci->unlink_watchdog, round_jiffies_relative(HZ)); 182 186 wmb (); 183 187 184 188 /* we care about rm_list when setting CLE/BLE in case the HC was at ··· 944 940 TD_MASK; 945 941 946 942 /* INTR_WDH may need to clean up first */ 947 - if (td->td_dma != head) 948 - goto skip_ed; 943 + if (td->td_dma != head) { 944 + if (ed == ohci->ed_to_check) 945 + ohci->ed_to_check = NULL; 946 + else 947 + goto skip_ed; 948 + } 949 949 } 950 950 } 951 951 ··· 1006 998 1007 999 /* ED's now officially unlinked, hc doesn't see */ 1008 1000 ed->state = ED_IDLE; 1001 + if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT) 1002 + ohci->eds_scheduled--; 1009 1003 ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H); 1010 1004 ed->hwNextED = 0; 1011 1005 wmb (); ··· 1031 1021 1032 1022 if (ohci->ed_controltail) { 1033 1023 command |= OHCI_CLF; 1034 - if (ohci->flags & OHCI_QUIRK_ZFMICRO) 1024 + if (quirk_zfmicro(ohci)) 1035 1025 mdelay(1); 1036 1026 if (!(ohci->hc_control & OHCI_CTRL_CLE)) { 1037 1027 control |= OHCI_CTRL_CLE; ··· 1041 1031 } 1042 1032 if (ohci->ed_bulktail) { 1043 1033 command |= OHCI_BLF; 1044 - if (ohci->flags & OHCI_QUIRK_ZFMICRO) 1034 + if (quirk_zfmicro(ohci)) 1045 1035 mdelay(1); 1046 1036 if (!(ohci->hc_control & OHCI_CTRL_BLE)) { 1047 1037 control |= OHCI_CTRL_BLE; ··· 1053 1043 /* CLE/BLE to enable, CLF/BLF to (maybe) kickstart */ 1054 1044 if (control) { 1055 1045 ohci->hc_control |= control; 1056 - if (ohci->flags & OHCI_QUIRK_ZFMICRO) 1046 + if (quirk_zfmicro(ohci)) 1057 1047 mdelay(1); 1058 1048 ohci_writel (ohci, ohci->hc_control, 1059 1049 &ohci->regs->control); 1060 1050 } 1061 1051 if (command) { 1062 - if (ohci->flags & OHCI_QUIRK_ZFMICRO) 1052 + if (quirk_zfmicro(ohci)) 1063 1053 mdelay(1); 1064 1054 ohci_writel (ohci, command, &ohci->regs->cmdstatus); 1065 1055 } ··· 1071 1061 /*-------------------------------------------------------------------------*/ 1072 1062 1073 1063 /* 1064 + * Used to take back a TD from the host controller. This would normally be 1065 + * called from within dl_done_list, however it may be called directly if the 1066 + * HC no longer sees the TD and it has not appeared on the donelist (after 1067 + * two frames). This bug has been observed on ZF Micro systems. 1068 + */ 1069 + static void takeback_td(struct ohci_hcd *ohci, struct td *td) 1070 + { 1071 + struct urb *urb = td->urb; 1072 + urb_priv_t *urb_priv = urb->hcpriv; 1073 + struct ed *ed = td->ed; 1074 + 1075 + /* update URB's length and status from TD */ 1076 + td_done(ohci, urb, td); 1077 + urb_priv->td_cnt++; 1078 + 1079 + /* If all this urb's TDs are done, call complete() */ 1080 + if (urb_priv->td_cnt == urb_priv->length) 1081 + finish_urb(ohci, urb); 1082 + 1083 + /* clean schedule: unlink EDs that are no longer busy */ 1084 + if (list_empty(&ed->td_list)) { 1085 + if (ed->state == ED_OPER) 1086 + start_ed_unlink(ohci, ed); 1087 + 1088 + /* ... reenabling halted EDs only after fault cleanup */ 1089 + } else if ((ed->hwINFO & cpu_to_hc32(ohci, ED_SKIP | ED_DEQUEUE)) 1090 + == cpu_to_hc32(ohci, ED_SKIP)) { 1091 + td = list_entry(ed->td_list.next, struct td, td_list); 1092 + if (!(td->hwINFO & cpu_to_hc32(ohci, TD_DONE))) { 1093 + ed->hwINFO &= ~cpu_to_hc32(ohci, ED_SKIP); 1094 + /* ... hc may need waking-up */ 1095 + switch (ed->type) { 1096 + case PIPE_CONTROL: 1097 + ohci_writel(ohci, OHCI_CLF, 1098 + &ohci->regs->cmdstatus); 1099 + break; 1100 + case PIPE_BULK: 1101 + ohci_writel(ohci, OHCI_BLF, 1102 + &ohci->regs->cmdstatus); 1103 + break; 1104 + } 1105 + } 1106 + } 1107 + } 1108 + 1109 + /* 1074 1110 * Process normal completions (error or success) and clean the schedules. 1075 1111 * 1076 1112 * This is the main path for handing urbs back to drivers. The only other 1077 - * path is finish_unlinks(), which unlinks URBs using ed_rm_list, instead of 1078 - * scanning the (re-reversed) donelist as this does. 1113 + * normal path is finish_unlinks(), which unlinks URBs using ed_rm_list, 1114 + * instead of scanning the (re-reversed) donelist as this does. There's 1115 + * an abnormal path too, handling a quirk in some Compaq silicon: URBs 1116 + * with TDs that appear to be orphaned are directly reclaimed. 1079 1117 */ 1080 1118 static void 1081 1119 dl_done_list (struct ohci_hcd *ohci) ··· 1132 1074 1133 1075 while (td) { 1134 1076 struct td *td_next = td->next_dl_td; 1135 - struct urb *urb = td->urb; 1136 - urb_priv_t *urb_priv = urb->hcpriv; 1137 - struct ed *ed = td->ed; 1138 - 1139 - /* update URB's length and status from TD */ 1140 - td_done (ohci, urb, td); 1141 - urb_priv->td_cnt++; 1142 - 1143 - /* If all this urb's TDs are done, call complete() */ 1144 - if (urb_priv->td_cnt == urb_priv->length) 1145 - finish_urb (ohci, urb); 1146 - 1147 - /* clean schedule: unlink EDs that are no longer busy */ 1148 - if (list_empty (&ed->td_list)) { 1149 - if (ed->state == ED_OPER) 1150 - start_ed_unlink (ohci, ed); 1151 - 1152 - /* ... reenabling halted EDs only after fault cleanup */ 1153 - } else if ((ed->hwINFO & cpu_to_hc32 (ohci, 1154 - ED_SKIP | ED_DEQUEUE)) 1155 - == cpu_to_hc32 (ohci, ED_SKIP)) { 1156 - td = list_entry (ed->td_list.next, struct td, td_list); 1157 - if (!(td->hwINFO & cpu_to_hc32 (ohci, TD_DONE))) { 1158 - ed->hwINFO &= ~cpu_to_hc32 (ohci, ED_SKIP); 1159 - /* ... hc may need waking-up */ 1160 - switch (ed->type) { 1161 - case PIPE_CONTROL: 1162 - ohci_writel (ohci, OHCI_CLF, 1163 - &ohci->regs->cmdstatus); 1164 - break; 1165 - case PIPE_BULK: 1166 - ohci_writel (ohci, OHCI_BLF, 1167 - &ohci->regs->cmdstatus); 1168 - break; 1169 - } 1170 - } 1171 - } 1172 - 1077 + takeback_td(ohci, td); 1173 1078 td = td_next; 1174 1079 } 1175 1080 }
+26
drivers/usb/host/ohci.h
··· 401 401 // there are also chip quirks/bugs in init logic 402 402 403 403 struct work_struct nec_work; /* Worker for NEC quirk */ 404 + 405 + /* Needed for ZF Micro quirk */ 406 + struct timer_list unlink_watchdog; 407 + unsigned eds_scheduled; 408 + struct ed *ed_to_check; 409 + unsigned zf_delay; 404 410 }; 411 + 412 + #ifdef CONFIG_PCI 413 + static inline int quirk_nec(struct ohci_hcd *ohci) 414 + { 415 + return ohci->flags & OHCI_QUIRK_NEC; 416 + } 417 + static inline int quirk_zfmicro(struct ohci_hcd *ohci) 418 + { 419 + return ohci->flags & OHCI_QUIRK_ZFMICRO; 420 + } 421 + #else 422 + static inline int quirk_nec(struct ohci_hcd *ohci) 423 + { 424 + return 0; 425 + } 426 + static inline int quirk_zfmicro(struct ohci_hcd *ohci) 427 + { 428 + return 0; 429 + } 430 + #endif 405 431 406 432 /* convert between an hcd pointer and the corresponding ohci_hcd */ 407 433 static inline struct ohci_hcd *hcd_to_ohci (struct usb_hcd *hcd)