[PATCH] sbp2: fix deadlocks and delays on device removal/rmmod

Fixes for deadlocks of the ieee1394 and scsi subsystems and long delays in
futile error recovery attempts when SBP-2 devices are removed or drivers are
unloaded.

- Complete commands quickly with DID_NO_CONNECT if the 1394 node is gone or if
the 1394 low-level driver was unloaded.
- Skip unnecessary work in the eh_abort_handler and eh_device_reset_handler if
the node or 1394 low-level driver is gone.
- Let scsi's high-level shut down gracefully when sbp2 is being unloaded or
detached from the 1394 unit. A call to scsi_remove_device is added for this
purpose, which requires us to store a scsi_device pointer.
- scsi_device pointer is obtained from slave_alloc hook and cleared by
slave_destroy. This avoids usage of the pointer after the scsi device was
deleted e.g. by the user via scsi_mod's sysfs interface.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Jody McIntyre <scjody@steamballoon.com>
Cc: Ben Collins <bcollins@debian.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Jody McIntyre and committed by Linus Torvalds abd559b1 105d7b38

+58 -38
+58 -38
drivers/ieee1394/sbp2.c
··· 596 596 spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags); 597 597 } 598 598 599 + /* 600 + * Is scsi_id valid? Is the 1394 node still present? 601 + */ 602 + static inline int sbp2util_node_is_available(struct scsi_id_instance_data *scsi_id) 603 + { 604 + return scsi_id && scsi_id->ne && !scsi_id->ne->in_limbo; 605 + } 606 + 599 607 600 608 601 609 /********************************************* ··· 639 631 { 640 632 struct unit_directory *ud; 641 633 struct scsi_id_instance_data *scsi_id; 634 + struct scsi_device *sdev; 642 635 643 636 SBP2_DEBUG("sbp2_remove"); 644 637 645 638 ud = container_of(dev, struct unit_directory, device); 646 639 scsi_id = ud->device.driver_data; 640 + if (!scsi_id) 641 + return 0; 642 + 643 + /* Trigger shutdown functions in scsi's highlevel. */ 644 + if (scsi_id->scsi_host) 645 + scsi_unblock_requests(scsi_id->scsi_host); 646 + sdev = scsi_id->sdev; 647 + if (sdev) { 648 + scsi_id->sdev = NULL; 649 + scsi_remove_device(sdev); 650 + } 647 651 648 652 sbp2_logout_device(scsi_id); 649 653 sbp2_remove_device(scsi_id); ··· 2493 2473 struct scsi_id_instance_data *scsi_id = 2494 2474 (struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0]; 2495 2475 struct sbp2scsi_host_info *hi; 2476 + int result = DID_NO_CONNECT << 16; 2496 2477 2497 2478 SBP2_DEBUG("sbp2scsi_queuecommand"); 2498 2479 2499 - /* 2500 - * If scsi_id is null, it means there is no device in this slot, 2501 - * so we should return selection timeout. 2502 - */ 2503 - if (!scsi_id) { 2504 - SCpnt->result = DID_NO_CONNECT << 16; 2505 - done (SCpnt); 2506 - return 0; 2507 - } 2480 + if (!sbp2util_node_is_available(scsi_id)) 2481 + goto done; 2508 2482 2509 2483 hi = scsi_id->hi; 2510 2484 2511 2485 if (!hi) { 2512 2486 SBP2_ERR("sbp2scsi_host_info is NULL - this is bad!"); 2513 - SCpnt->result = DID_NO_CONNECT << 16; 2514 - done (SCpnt); 2515 - return(0); 2487 + goto done; 2516 2488 } 2517 2489 2518 2490 /* 2519 2491 * Until we handle multiple luns, just return selection time-out 2520 2492 * to any IO directed at non-zero LUNs 2521 2493 */ 2522 - if (SCpnt->device->lun) { 2523 - SCpnt->result = DID_NO_CONNECT << 16; 2524 - done (SCpnt); 2525 - return(0); 2526 - } 2494 + if (SCpnt->device->lun) 2495 + goto done; 2527 2496 2528 2497 /* 2529 2498 * Check for request sense command, and handle it here ··· 2523 2514 memcpy(SCpnt->request_buffer, SCpnt->sense_buffer, SCpnt->request_bufflen); 2524 2515 memset(SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer)); 2525 2516 sbp2scsi_complete_command(scsi_id, SBP2_SCSI_STATUS_GOOD, SCpnt, done); 2526 - return(0); 2517 + return 0; 2527 2518 } 2528 2519 2529 2520 /* ··· 2531 2522 */ 2532 2523 if (!hpsb_node_entry_valid(scsi_id->ne)) { 2533 2524 SBP2_ERR("Bus reset in progress - rejecting command"); 2534 - SCpnt->result = DID_BUS_BUSY << 16; 2535 - done (SCpnt); 2536 - return(0); 2525 + result = DID_BUS_BUSY << 16; 2526 + goto done; 2537 2527 } 2538 2528 2539 2529 /* ··· 2543 2535 sbp2scsi_complete_command(scsi_id, SBP2_SCSI_STATUS_SELECTION_TIMEOUT, 2544 2536 SCpnt, done); 2545 2537 } 2538 + return 0; 2546 2539 2547 - return(0); 2540 + done: 2541 + SCpnt->result = result; 2542 + done(SCpnt); 2543 + return 0; 2548 2544 } 2549 2545 2550 2546 /* ··· 2695 2683 } 2696 2684 2697 2685 2698 - static int sbp2scsi_slave_configure (struct scsi_device *sdev) 2686 + static int sbp2scsi_slave_alloc(struct scsi_device *sdev) 2687 + { 2688 + ((struct scsi_id_instance_data *)sdev->host->hostdata[0])->sdev = sdev; 2689 + return 0; 2690 + } 2691 + 2692 + 2693 + static int sbp2scsi_slave_configure(struct scsi_device *sdev) 2699 2694 { 2700 2695 blk_queue_dma_alignment(sdev->request_queue, (512 - 1)); 2701 - 2702 2696 return 0; 2697 + } 2698 + 2699 + 2700 + static void sbp2scsi_slave_destroy(struct scsi_device *sdev) 2701 + { 2702 + ((struct scsi_id_instance_data *)sdev->host->hostdata[0])->sdev = NULL; 2703 + return; 2703 2704 } 2704 2705 2705 2706 ··· 2730 2705 SBP2_ERR("aborting sbp2 command"); 2731 2706 scsi_print_command(SCpnt); 2732 2707 2733 - if (scsi_id) { 2708 + if (sbp2util_node_is_available(scsi_id)) { 2734 2709 2735 2710 /* 2736 2711 * Right now, just return any matching command structures ··· 2767 2742 /* 2768 2743 * Called by scsi stack when something has really gone wrong. 2769 2744 */ 2770 - static int __sbp2scsi_reset(struct scsi_cmnd *SCpnt) 2745 + static int sbp2scsi_reset(struct scsi_cmnd *SCpnt) 2771 2746 { 2772 2747 struct scsi_id_instance_data *scsi_id = 2773 2748 (struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0]; 2749 + unsigned long flags; 2774 2750 2775 2751 SBP2_ERR("reset requested"); 2776 2752 2777 - if (scsi_id) { 2753 + spin_lock_irqsave(SCpnt->device->host->host_lock, flags); 2754 + 2755 + if (sbp2util_node_is_available(scsi_id)) { 2778 2756 SBP2_ERR("Generating sbp2 fetch agent reset"); 2779 2757 sbp2_agent_reset(scsi_id, 0); 2780 2758 } 2781 2759 2782 - return(SUCCESS); 2783 - } 2784 - 2785 - static int sbp2scsi_reset(struct scsi_cmnd *SCpnt) 2786 - { 2787 - unsigned long flags; 2788 - int rc; 2789 - 2790 - spin_lock_irqsave(SCpnt->device->host->host_lock, flags); 2791 - rc = __sbp2scsi_reset(SCpnt); 2792 2760 spin_unlock_irqrestore(SCpnt->device->host->host_lock, flags); 2793 2761 2794 - return rc; 2762 + return SUCCESS; 2795 2763 } 2796 2764 2797 2765 static const char *sbp2scsi_info (struct Scsi_Host *host) ··· 2835 2817 .eh_device_reset_handler = sbp2scsi_reset, 2836 2818 .eh_bus_reset_handler = sbp2scsi_reset, 2837 2819 .eh_host_reset_handler = sbp2scsi_reset, 2820 + .slave_alloc = sbp2scsi_slave_alloc, 2838 2821 .slave_configure = sbp2scsi_slave_configure, 2822 + .slave_destroy = sbp2scsi_slave_destroy, 2839 2823 .this_id = -1, 2840 2824 .sg_tablesize = SG_ALL, 2841 2825 .use_clustering = ENABLE_CLUSTERING,