[SCSI] megaraid_{mm,mbox}: fix a bug in reset handler

When abort failed, the driver gets reset handleer called. In the reset
handler, driver calls 'scsi_done()' callback for same SCSI command packet
(struct scsi_cmnd) multiple times if there are multiple SCSI command packet
in the pend_list. More over, if there are entry in the pend_lsit with
IOCTL packet associated, the driver returns it to wrong free_list so that,
in turn, the driver could end up with 'NULL pointer dereference..' during
I/O command building with incorrect resource.

Also, the patch contains several minor/cosmetic changes besides this.

Signed-off-by: Seokmann Ju <seokmann.ju@lsil.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

authored by Ju, Seokmann and committed by James Bottomley c005fb4f 509e5e5d

+71 -20
+25
Documentation/scsi/ChangeLog.megaraid
··· 1 + Release Date : Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com> 2 + Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module) 3 + Older Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module) 4 + 5 + 1. Fixed a bug in megaraid_reset_handler(). 6 + Customer reported "Unable to handle kernel NULL pointer dereference 7 + at virtual address 00000000" when system goes to reset condition 8 + for some reason. It happened randomly. 9 + Root Cause: in the megaraid_reset_handler(), there is possibility not 10 + returning pending packets in the pend_list if there are multiple 11 + pending packets. 12 + Fix: Made the change in the driver so that it will return all packets 13 + in the pend_list. 14 + 15 + 2. Added change request. 16 + As found in the following URL, rmb() only didn't help the 17 + problem. I had to increase the loop counter to 0xFFFFFF. (6 F's) 18 + http://marc.theaimsgroup.com/?l=linux-scsi&m=110971060502497&w=2 19 + 20 + I attached a patch for your reference, too. 21 + Could you check and get this fix in your driver? 22 + 23 + Best Regards, 24 + Jun'ichi Nomura 25 + 1 26 Release Date : Fri Nov 11 12:27:22 EST 2005 - Seokmann Ju <sju@lsil.com> 2 27 Current Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module) 3 28 Older Version : 2.20.4.6 (scsi module), 2.20.2.6 (cmm module)
+41 -18
drivers/scsi/megaraid/megaraid_mbox.c
··· 10 10 * 2 of the License, or (at your option) any later version. 11 11 * 12 12 * FILE : megaraid_mbox.c 13 - * Version : v2.20.4.7 (Nov 14 2005) 13 + * Version : v2.20.4.8 (Apr 11 2006) 14 14 * 15 15 * Authors: 16 16 * Atul Mukker <Atul.Mukker@lsil.com> ··· 2278 2278 unsigned long flags; 2279 2279 uint8_t c; 2280 2280 int status; 2281 + uioc_t *kioc; 2281 2282 2282 2283 2283 2284 if (!adapter) return; ··· 2320 2319 2321 2320 // remove from local clist 2322 2321 list_del_init(&scb->list); 2322 + 2323 + kioc = (uioc_t *)scb->gp; 2324 + kioc->status = 0; 2323 2325 2324 2326 megaraid_mbox_mm_done(adapter, scb); 2325 2327 ··· 2640 2636 int recovery_window; 2641 2637 int recovering; 2642 2638 int i; 2639 + uioc_t *kioc; 2643 2640 2644 2641 adapter = SCP2ADAPTER(scp); 2645 2642 raid_dev = ADAP2RAIDDEV(adapter); ··· 2660 2655 // Also, reset all the commands currently owned by the driver 2661 2656 spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags); 2662 2657 list_for_each_entry_safe(scb, tmp, &adapter->pend_list, list) { 2663 - 2664 2658 list_del_init(&scb->list); // from pending list 2665 2659 2666 - con_log(CL_ANN, (KERN_WARNING 2667 - "megaraid: %ld:%d[%d:%d], reset from pending list\n", 2668 - scp->serial_number, scb->sno, 2669 - scb->dev_channel, scb->dev_target)); 2660 + if (scb->sno >= MBOX_MAX_SCSI_CMDS) { 2661 + con_log(CL_ANN, (KERN_WARNING 2662 + "megaraid: IOCTL packet with %d[%d:%d] being reset\n", 2663 + scb->sno, scb->dev_channel, scb->dev_target)); 2670 2664 2671 - scp->result = (DID_RESET << 16); 2672 - scp->scsi_done(scp); 2665 + scb->status = -1; 2673 2666 2674 - megaraid_dealloc_scb(adapter, scb); 2667 + kioc = (uioc_t *)scb->gp; 2668 + kioc->status = -EFAULT; 2669 + 2670 + megaraid_mbox_mm_done(adapter, scb); 2671 + } else { 2672 + if (scb->scp == scp) { // Found command 2673 + con_log(CL_ANN, (KERN_WARNING 2674 + "megaraid: %ld:%d[%d:%d], reset from pending list\n", 2675 + scp->serial_number, scb->sno, 2676 + scb->dev_channel, scb->dev_target)); 2677 + } else { 2678 + con_log(CL_ANN, (KERN_WARNING 2679 + "megaraid: IO packet with %d[%d:%d] being reset\n", 2680 + scb->sno, scb->dev_channel, scb->dev_target)); 2681 + } 2682 + 2683 + scb->scp->result = (DID_RESET << 16); 2684 + scb->scp->scsi_done(scb->scp); 2685 + 2686 + megaraid_dealloc_scb(adapter, scb); 2687 + } 2675 2688 } 2676 2689 spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags); 2677 2690 2678 2691 if (adapter->outstanding_cmds) { 2679 2692 con_log(CL_ANN, (KERN_NOTICE 2680 2693 "megaraid: %d outstanding commands. Max wait %d sec\n", 2681 - adapter->outstanding_cmds, MBOX_RESET_WAIT)); 2694 + adapter->outstanding_cmds, 2695 + (MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT))); 2682 2696 } 2683 2697 2684 2698 recovery_window = MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT; 2685 2699 2686 2700 recovering = adapter->outstanding_cmds; 2687 2701 2688 - for (i = 0; i < recovery_window && adapter->outstanding_cmds; i++) { 2702 + for (i = 0; i < recovery_window; i++) { 2689 2703 2690 2704 megaraid_ack_sequence(adapter); 2691 2705 ··· 2713 2689 con_log(CL_ANN, ( 2714 2690 "megaraid mbox: Wait for %d commands to complete:%d\n", 2715 2691 adapter->outstanding_cmds, 2716 - MBOX_RESET_WAIT - i)); 2692 + (MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT) - i)); 2717 2693 } 2718 2694 2719 2695 // bailout if no recovery happended in reset time 2720 - if ((i == MBOX_RESET_WAIT) && 2721 - (recovering == adapter->outstanding_cmds)) { 2696 + if (adapter->outstanding_cmds == 0) { 2722 2697 break; 2723 2698 } 2724 2699 ··· 2941 2918 wmb(); 2942 2919 WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x1); 2943 2920 2944 - for (i = 0; i < 0xFFFFF; i++) { 2921 + for (i = 0; i < MBOX_SYNC_WAIT_CNT; i++) { 2945 2922 if (mbox->numstatus != 0xFF) break; 2946 2923 rmb(); 2924 + udelay(MBOX_SYNC_DELAY_200); 2947 2925 } 2948 2926 2949 - if (i == 0xFFFFF) { 2927 + if (i == MBOX_SYNC_WAIT_CNT) { 2950 2928 // We may need to re-calibrate the counter 2951 2929 con_log(CL_ANN, (KERN_CRIT 2952 2930 "megaraid: fast sync command timed out\n")); ··· 3499 3475 adp.drvr_data = (unsigned long)adapter; 3500 3476 adp.pdev = adapter->pdev; 3501 3477 adp.issue_uioc = megaraid_mbox_mm_handler; 3502 - adp.timeout = 300; 3478 + adp.timeout = MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT; 3503 3479 adp.max_kioc = MBOX_MAX_USER_CMDS; 3504 3480 3505 3481 if ((rval = mraid_mm_register_adp(&adp)) != 0) { ··· 3726 3702 unsigned long flags; 3727 3703 3728 3704 kioc = (uioc_t *)scb->gp; 3729 - kioc->status = 0; 3730 3705 mbox64 = (mbox64_t *)(unsigned long)kioc->cmdbuf; 3731 3706 mbox64->mbox32.status = scb->status; 3732 3707 raw_mbox = (uint8_t *)&mbox64->mbox32;
+5 -2
drivers/scsi/megaraid/megaraid_mbox.h
··· 21 21 #include "megaraid_ioctl.h" 22 22 23 23 24 - #define MEGARAID_VERSION "2.20.4.7" 25 - #define MEGARAID_EXT_VERSION "(Release Date: Mon Nov 14 12:27:22 EST 2005)" 24 + #define MEGARAID_VERSION "2.20.4.8" 25 + #define MEGARAID_EXT_VERSION "(Release Date: Mon Apr 11 12:27:22 EST 2006)" 26 26 27 27 28 28 /* ··· 100 100 #define MBOX_BUSY_WAIT 10 // max usec to wait for busy mailbox 101 101 #define MBOX_RESET_WAIT 180 // wait these many seconds in reset 102 102 #define MBOX_RESET_EXT_WAIT 120 // extended wait reset 103 + #define MBOX_SYNC_WAIT_CNT 0xFFFF // wait loop index for synchronous mode 104 + 105 + #define MBOX_SYNC_DELAY_200 200 // 200 micro-seconds 103 106 104 107 /* 105 108 * maximum transfer that can happen through the firmware commands issued