cciss: make rebuild_lun_table behave better

This patch makes the rebuild_lun_table smart enough to not rip a logical
volume out from under the OS. Without this fix if a customer is running
hpacucli to monitor their storage the driver will blindly remove and re-add
the disks whenever the utility calls the CCISS_REGNEWD ioctl. Unfortunately,
both hpacucli and ACUXE call the ioctl repeatedly. Customers have reported
IO coming to a standstill. Calling the ioctl is the problem, this patch is
the fix.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Mike Miller <mike.miller@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

authored by Mike Miller and committed by Jens Axboe a72da29b f7108f91

+216 -122
+214 -122
drivers/block/cciss.c
··· 1330 1330 spin_unlock_irqrestore(&h->lock, flags); 1331 1331 } 1332 1332 1333 + /* This function gets the serial number of a logical drive via 1334 + * inquiry page 0x83. Serial no. is 16 bytes. If the serial 1335 + * number cannot be had, for whatever reason, 16 bytes of 0xff 1336 + * are returned instead. 1337 + */ 1338 + static void cciss_get_serial_no(int ctlr, int logvol, int withirq, 1339 + unsigned char *serial_no, int buflen) 1340 + { 1341 + #define PAGE_83_INQ_BYTES 64 1342 + int rc; 1343 + unsigned char *buf; 1344 + 1345 + if (buflen > 16) 1346 + buflen = 16; 1347 + memset(serial_no, 0xff, buflen); 1348 + buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL); 1349 + if (!buf) 1350 + return; 1351 + memset(serial_no, 0, buflen); 1352 + if (withirq) 1353 + rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf, 1354 + PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD); 1355 + else 1356 + rc = sendcmd(CISS_INQUIRY, ctlr, buf, 1357 + PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD); 1358 + if (rc == IO_OK) 1359 + memcpy(serial_no, &buf[8], buflen); 1360 + kfree(buf); 1361 + return; 1362 + } 1363 + 1333 1364 /* This function will check the usage_count of the drive to be updated/added. 1334 - * If the usage_count is zero then the drive information will be updated and 1335 - * the disk will be re-registered with the kernel. If not then it will be 1336 - * left alone for the next reboot. The exception to this is disk 0 which 1337 - * will always be left registered with the kernel since it is also the 1338 - * controller node. Any changes to disk 0 will show up on the next 1339 - * reboot. 1365 + * If the usage_count is zero and it is a heretofore unknown drive, or, 1366 + * the drive's capacity, geometry, or serial number has changed, 1367 + * then the drive information will be updated and the disk will be 1368 + * re-registered with the kernel. If these conditions don't hold, 1369 + * then it will be left alone for the next reboot. The exception to this 1370 + * is disk 0 which will always be left registered with the kernel since it 1371 + * is also the controller node. Any changes to disk 0 will show up on 1372 + * the next reboot. 1340 1373 */ 1341 1374 static void cciss_update_drive_info(int ctlr, int drv_index) 1342 1375 { ··· 1380 1347 sector_t total_size; 1381 1348 unsigned long flags = 0; 1382 1349 int ret = 0; 1350 + drive_info_struct *drvinfo; 1351 + 1352 + /* Get information about the disk and modify the driver structure */ 1353 + inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL); 1354 + drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL); 1355 + if (inq_buff == NULL || drvinfo == NULL) 1356 + goto mem_msg; 1357 + 1358 + /* testing to see if 16-byte CDBs are already being used */ 1359 + if (h->cciss_read == CCISS_READ_16) { 1360 + cciss_read_capacity_16(h->ctlr, drv_index, 1, 1361 + &total_size, &block_size); 1362 + 1363 + } else { 1364 + cciss_read_capacity(ctlr, drv_index, 1, 1365 + &total_size, &block_size); 1366 + 1367 + /* if read_capacity returns all F's this volume is >2TB */ 1368 + /* in size so we switch to 16-byte CDB's for all */ 1369 + /* read/write ops */ 1370 + if (total_size == 0xFFFFFFFFULL) { 1371 + cciss_read_capacity_16(ctlr, drv_index, 1, 1372 + &total_size, &block_size); 1373 + h->cciss_read = CCISS_READ_16; 1374 + h->cciss_write = CCISS_WRITE_16; 1375 + } else { 1376 + h->cciss_read = CCISS_READ_10; 1377 + h->cciss_write = CCISS_WRITE_10; 1378 + } 1379 + } 1380 + 1381 + cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size, 1382 + inq_buff, drvinfo); 1383 + drvinfo->block_size = block_size; 1384 + drvinfo->nr_blocks = total_size + 1; 1385 + 1386 + cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no, 1387 + sizeof(drvinfo->serial_no)); 1388 + 1389 + /* Is it the same disk we already know, and nothing's changed? */ 1390 + if (h->drv[drv_index].raid_level != -1 && 1391 + ((memcmp(drvinfo->serial_no, 1392 + h->drv[drv_index].serial_no, 16) == 0) && 1393 + drvinfo->block_size == h->drv[drv_index].block_size && 1394 + drvinfo->nr_blocks == h->drv[drv_index].nr_blocks && 1395 + drvinfo->heads == h->drv[drv_index].heads && 1396 + drvinfo->sectors == h->drv[drv_index].sectors && 1397 + drvinfo->cylinders == h->drv[drv_index].cylinders)) { 1398 + /* The disk is unchanged, nothing to update */ 1399 + goto freeret; 1400 + } 1401 + 1402 + /* Not the same disk, or something's changed, so we need to */ 1403 + /* deregister it, and re-register it, if it's not in use. */ 1383 1404 1384 1405 /* if the disk already exists then deregister it before proceeding */ 1385 - if (h->drv[drv_index].raid_level != -1) { 1406 + /* (unless it's the first disk (for the controller node). */ 1407 + if (h->drv[drv_index].raid_level != -1 && drv_index != 0) { 1408 + printk(KERN_WARNING "disk %d has changed.\n", drv_index); 1386 1409 spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); 1387 1410 h->drv[drv_index].busy_configuring = 1; 1388 1411 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); ··· 1453 1364 1454 1365 /* If the disk is in use return */ 1455 1366 if (ret) 1456 - return; 1367 + goto freeret; 1457 1368 1458 - /* Get information about the disk and modify the driver structure */ 1459 - inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL); 1460 - if (inq_buff == NULL) 1461 - goto mem_msg; 1462 - 1463 - /* testing to see if 16-byte CDBs are already being used */ 1464 - if (h->cciss_read == CCISS_READ_16) { 1465 - cciss_read_capacity_16(h->ctlr, drv_index, 1, 1466 - &total_size, &block_size); 1467 - goto geo_inq; 1468 - } 1469 - 1470 - cciss_read_capacity(ctlr, drv_index, 1, 1471 - &total_size, &block_size); 1472 - 1473 - /* if read_capacity returns all F's this volume is >2TB in size */ 1474 - /* so we switch to 16-byte CDB's for all read/write ops */ 1475 - if (total_size == 0xFFFFFFFFULL) { 1476 - cciss_read_capacity_16(ctlr, drv_index, 1, 1477 - &total_size, &block_size); 1478 - h->cciss_read = CCISS_READ_16; 1479 - h->cciss_write = CCISS_WRITE_16; 1480 - } else { 1481 - h->cciss_read = CCISS_READ_10; 1482 - h->cciss_write = CCISS_WRITE_10; 1483 - } 1484 - geo_inq: 1485 - cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size, 1486 - inq_buff, &h->drv[drv_index]); 1369 + /* Save the new information from cciss_geometry_inquiry */ 1370 + /* and serial number inquiry. */ 1371 + h->drv[drv_index].block_size = drvinfo->block_size; 1372 + h->drv[drv_index].nr_blocks = drvinfo->nr_blocks; 1373 + h->drv[drv_index].heads = drvinfo->heads; 1374 + h->drv[drv_index].sectors = drvinfo->sectors; 1375 + h->drv[drv_index].cylinders = drvinfo->cylinders; 1376 + h->drv[drv_index].raid_level = drvinfo->raid_level; 1377 + memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16); 1487 1378 1488 1379 ++h->num_luns; 1489 1380 disk = h->gendisk[drv_index]; 1490 1381 set_capacity(disk, h->drv[drv_index].nr_blocks); 1491 1382 1492 - /* if it's the controller it's already added */ 1383 + /* if it's the controller (if drv_index == 0) it's already added */ 1493 1384 if (drv_index) { 1494 1385 disk->queue = blk_init_queue(do_cciss_request, &h->lock); 1495 1386 sprintf(disk->disk_name, "cciss/c%dd%d", ctlr, drv_index); ··· 1506 1437 1507 1438 freeret: 1508 1439 kfree(inq_buff); 1440 + kfree(drvinfo); 1509 1441 return; 1510 1442 mem_msg: 1511 1443 printk(KERN_ERR "cciss: out of memory\n"); ··· 1548 1478 int ctlr = h->ctlr; 1549 1479 int num_luns; 1550 1480 ReportLunData_struct *ld_buff = NULL; 1551 - drive_info_struct *drv = NULL; 1552 1481 int return_code; 1553 1482 int listlength = 0; 1554 1483 int i; ··· 1563 1494 return -EBUSY; 1564 1495 } 1565 1496 h->busy_configuring = 1; 1497 + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); 1566 1498 1567 - /* if del_disk is NULL then we are being called to add a new disk 1568 - * and update the logical drive table. If it is not NULL then 1569 - * we will check if the disk is in use or not. 1570 - */ 1571 - if (del_disk != NULL) { 1572 - drv = get_drv(del_disk); 1573 - drv->busy_configuring = 1; 1574 - spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); 1575 - return_code = deregister_disk(del_disk, drv, 1); 1576 - drv->busy_configuring = 0; 1577 - h->busy_configuring = 0; 1578 - return return_code; 1579 - } else { 1580 - spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); 1581 - if (!capable(CAP_SYS_RAWIO)) 1582 - return -EPERM; 1499 + if (!capable(CAP_SYS_RAWIO)) 1500 + return -EPERM; 1583 1501 1584 - ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL); 1585 - if (ld_buff == NULL) 1586 - goto mem_msg; 1502 + ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL); 1503 + if (ld_buff == NULL) 1504 + goto mem_msg; 1587 1505 1588 - return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff, 1589 - sizeof(ReportLunData_struct), 0, 1590 - 0, 0, TYPE_CMD); 1506 + return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff, 1507 + sizeof(ReportLunData_struct), 0, 1508 + 0, 0, TYPE_CMD); 1591 1509 1592 - if (return_code == IO_OK) { 1593 - listlength = 1594 - be32_to_cpu(*(__be32 *) ld_buff->LUNListLength); 1595 - } else { /* reading number of logical volumes failed */ 1596 - printk(KERN_WARNING "cciss: report logical volume" 1597 - " command failed\n"); 1598 - listlength = 0; 1599 - goto freeret; 1600 - } 1510 + if (return_code == IO_OK) 1511 + listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength); 1512 + else { /* reading number of logical volumes failed */ 1513 + printk(KERN_WARNING "cciss: report logical volume" 1514 + " command failed\n"); 1515 + listlength = 0; 1516 + goto freeret; 1517 + } 1601 1518 1602 - num_luns = listlength / 8; /* 8 bytes per entry */ 1603 - if (num_luns > CISS_MAX_LUN) { 1604 - num_luns = CISS_MAX_LUN; 1605 - printk(KERN_WARNING "cciss: more luns configured" 1606 - " on controller than can be handled by" 1607 - " this driver.\n"); 1608 - } 1519 + num_luns = listlength / 8; /* 8 bytes per entry */ 1520 + if (num_luns > CISS_MAX_LUN) { 1521 + num_luns = CISS_MAX_LUN; 1522 + printk(KERN_WARNING "cciss: more luns configured" 1523 + " on controller than can be handled by" 1524 + " this driver.\n"); 1525 + } 1609 1526 1610 - /* Compare controller drive array to drivers drive array. 1611 - * Check for updates in the drive information and any new drives 1612 - * on the controller. 1613 - */ 1614 - for (i = 0; i < num_luns; i++) { 1615 - int j; 1616 - 1617 - drv_found = 0; 1618 - 1619 - lunid = (0xff & 1620 - (unsigned int)(ld_buff->LUN[i][3])) << 24; 1621 - lunid |= (0xff & 1622 - (unsigned int)(ld_buff->LUN[i][2])) << 16; 1623 - lunid |= (0xff & 1624 - (unsigned int)(ld_buff->LUN[i][1])) << 8; 1625 - lunid |= 0xff & (unsigned int)(ld_buff->LUN[i][0]); 1626 - 1627 - /* Find if the LUN is already in the drive array 1628 - * of the controller. If so then update its info 1629 - * if not is use. If it does not exist then find 1630 - * the first free index and add it. 1631 - */ 1632 - for (j = 0; j <= h->highest_lun; j++) { 1633 - if (h->drv[j].LunID == lunid) { 1634 - drv_index = j; 1635 - drv_found = 1; 1636 - } 1527 + /* Compare controller drive array to driver's drive array */ 1528 + /* to see if any drives are missing on the controller due */ 1529 + /* to action of Array Config Utility (user deletes drive) */ 1530 + /* and deregister logical drives which have disappeared. */ 1531 + for (i = 0; i <= h->highest_lun; i++) { 1532 + int j; 1533 + drv_found = 0; 1534 + for (j = 0; j < num_luns; j++) { 1535 + memcpy(&lunid, &ld_buff->LUN[j][0], 4); 1536 + lunid = le32_to_cpu(lunid); 1537 + if (h->drv[i].LunID == lunid) { 1538 + drv_found = 1; 1539 + break; 1637 1540 } 1541 + } 1542 + if (!drv_found) { 1543 + /* Deregister it from the OS, it's gone. */ 1544 + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); 1545 + h->drv[i].busy_configuring = 1; 1546 + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); 1547 + return_code = deregister_disk(h->gendisk[i], 1548 + &h->drv[i], 1); 1549 + h->drv[i].busy_configuring = 0; 1550 + } 1551 + } 1638 1552 1639 - /* check if the drive was found already in the array */ 1640 - if (!drv_found) { 1641 - drv_index = cciss_find_free_drive_index(ctlr); 1642 - if (drv_index == -1) 1643 - goto freeret; 1553 + /* Compare controller drive array to driver's drive array. 1554 + * Check for updates in the drive information and any new drives 1555 + * on the controller due to ACU adding logical drives, or changing 1556 + * a logical drive's size, etc. Reregister any new/changed drives 1557 + */ 1558 + for (i = 0; i < num_luns; i++) { 1559 + int j; 1644 1560 1645 - /*Check if the gendisk needs to be allocated */ 1561 + drv_found = 0; 1562 + 1563 + memcpy(&lunid, &ld_buff->LUN[i][0], 4); 1564 + lunid = le32_to_cpu(lunid); 1565 + 1566 + /* Find if the LUN is already in the drive array 1567 + * of the driver. If so then update its info 1568 + * if not in use. If it does not exist then find 1569 + * the first free index and add it. 1570 + */ 1571 + for (j = 0; j <= h->highest_lun; j++) { 1572 + if (h->drv[j].raid_level != -1 && 1573 + h->drv[j].LunID == lunid) { 1574 + drv_index = j; 1575 + drv_found = 1; 1576 + break; 1577 + } 1578 + } 1579 + 1580 + /* check if the drive was found already in the array */ 1581 + if (!drv_found) { 1582 + drv_index = cciss_find_free_drive_index(ctlr); 1583 + if (drv_index == -1) 1584 + goto freeret; 1585 + /*Check if the gendisk needs to be allocated */ 1586 + if (!h->gendisk[drv_index]) { 1587 + h->gendisk[drv_index] = 1588 + alloc_disk(1 << NWD_SHIFT); 1646 1589 if (!h->gendisk[drv_index]){ 1647 - h->gendisk[drv_index] = alloc_disk(1 << NWD_SHIFT); 1648 - if (!h->gendisk[drv_index]){ 1649 - printk(KERN_ERR "cciss: could not allocate new disk %d\n", drv_index); 1650 - goto mem_msg; 1651 - } 1590 + printk(KERN_ERR "cciss: could not " 1591 + "allocate new disk %d\n", 1592 + drv_index); 1593 + goto mem_msg; 1652 1594 } 1653 1595 } 1654 1596 h->drv[drv_index].LunID = lunid; 1655 - cciss_update_drive_info(ctlr, drv_index); 1656 - } /* end for */ 1657 - } /* end else */ 1597 + 1598 + /* Don't need to mark this busy because nobody 1599 + * else knows about this disk yet to contend 1600 + * for access to it. 1601 + */ 1602 + h->drv[drv_index].busy_configuring = 0; 1603 + wmb(); 1604 + 1605 + } 1606 + cciss_update_drive_info(ctlr, drv_index); 1607 + } /* end for */ 1658 1608 1659 1609 freeret: 1660 1610 kfree(ld_buff); ··· 1685 1597 return -1; 1686 1598 mem_msg: 1687 1599 printk(KERN_ERR "cciss: out of memory\n"); 1600 + h->busy_configuring = 0; 1688 1601 goto freeret; 1689 1602 } 1690 1603 ··· 1741 1652 * other than disk 0 we will call put_disk. We do not 1742 1653 * do this for disk 0 as we need it to be able to 1743 1654 * configure the controller. 1744 - */ 1655 + */ 1745 1656 if (clear_all){ 1746 1657 /* This isn't pretty, but we need to find the 1747 1658 * disk in our array and NULL our the pointer. 1748 1659 * This is so that we will call alloc_disk if 1749 1660 * this index is used again later. 1750 - */ 1661 + */ 1751 1662 for (i=0; i < CISS_MAX_LUN; i++){ 1752 - if(h->gendisk[i] == disk){ 1663 + if (h->gendisk[i] == disk) { 1753 1664 h->gendisk[i] = NULL; 1754 1665 break; 1755 1666 } ··· 1777 1688 if (drv == h->drv + h->highest_lun) { 1778 1689 /* if so, find the new hightest lun */ 1779 1690 int i, newhighest = -1; 1780 - for (i = 0; i < h->highest_lun; i++) { 1691 + for (i = 0; i <= h->highest_lun; i++) { 1781 1692 /* if the disk has size > 0, it is available */ 1782 1693 if (h->drv[i].heads) 1783 1694 newhighest = i; ··· 3407 3318 cciss_geometry_inquiry(cntl_num, i, 0, total_size, 3408 3319 block_size, inq_buff, 3409 3320 &hba[cntl_num]->drv[i]); 3321 + cciss_get_serial_no(cntl_num, i, 0, 3322 + hba[cntl_num]->drv[i].serial_no, 3323 + sizeof(hba[cntl_num]->drv[i].serial_no)); 3410 3324 } else { 3411 3325 /* initialize raid_level to indicate a free space */ 3412 3326 hba[cntl_num]->drv[i].raid_level = -1;
+2
drivers/block/cciss.h
··· 39 39 *to prevent it from being opened or it's queue 40 40 *from being started. 41 41 */ 42 + __u8 serial_no[16]; /* from inquiry page 0x83, */ 43 + /* not necc. null terminated. */ 42 44 } drive_info_struct; 43 45 44 46 #ifdef CONFIG_CISS_SCSI_TAPE