Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mmc: core: Fix some driver hangs when dealing with broken devices

There are infinite loops in the mmc code that can be caused by bad
hardware. The code will loop forever if the device never comes back
from program mode, R1_STATE_PRG, and it is not ready for data,
R1_READY_FOR_DATA.

A long timeout is added to prevent the code from looping forever.
The timeout will occur if the device never comes back from program
state or the device never becomes ready for data.

It's not clear whether the timeout will do more than log a pr_err()
and then start a fresh hang all over again. We may need to extend
this patch later to perform some kind of reset of the device (is
that possible?) or rejection of new I/O to the device.

Signed-off-by: Trey Ramsay <tramsay@linux.vnet.ibm.com>
Signed-off-by: Chris Ball <cjb@laptop.org>

authored by

Trey Ramsay and committed by
Chris Ball
8fee476b e95baf13

+43 -1
+15
drivers/mmc/card/block.c
··· 57 57 #define INAND_CMD38_ARG_SECERASE 0x80 58 58 #define INAND_CMD38_ARG_SECTRIM1 0x81 59 59 #define INAND_CMD38_ARG_SECTRIM2 0x88 60 + #define MMC_BLK_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ 60 61 61 62 static DEFINE_MUTEX(block_mutex); 62 63 ··· 1035 1034 */ 1036 1035 if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { 1037 1036 u32 status; 1037 + unsigned long timeout; 1038 + 1039 + timeout = jiffies + msecs_to_jiffies(MMC_BLK_TIMEOUT_MS); 1038 1040 do { 1039 1041 int err = get_card_status(card, &status, 5); 1040 1042 if (err) { 1041 1043 pr_err("%s: error %d requesting status\n", 1042 1044 req->rq_disk->disk_name, err); 1045 + return MMC_BLK_CMD_ERR; 1046 + } 1047 + 1048 + /* Timeout if the device never becomes ready for data 1049 + * and never leaves the program state. 1050 + */ 1051 + if (time_after(jiffies, timeout)) { 1052 + pr_err("%s: Card stuck in programming state!"\ 1053 + " %s %s\n", mmc_hostname(card->host), 1054 + req->rq_disk->disk_name, __func__); 1055 + 1043 1056 return MMC_BLK_CMD_ERR; 1044 1057 } 1045 1058 /*
+17 -1
drivers/mmc/core/core.c
··· 42 42 #include "sd_ops.h" 43 43 #include "sdio_ops.h" 44 44 45 + /* If the device is not responding */ 46 + #define MMC_CORE_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ 47 + 45 48 /* 46 49 * Background operations can take a long time, depending on the housekeeping 47 50 * operations the card has to perform. ··· 1634 1631 { 1635 1632 struct mmc_command cmd = {0}; 1636 1633 unsigned int qty = 0; 1634 + unsigned long timeout; 1637 1635 int err; 1638 1636 1639 1637 /* ··· 1712 1708 if (mmc_host_is_spi(card->host)) 1713 1709 goto out; 1714 1710 1711 + timeout = jiffies + msecs_to_jiffies(MMC_CORE_TIMEOUT_MS); 1715 1712 do { 1716 1713 memset(&cmd, 0, sizeof(struct mmc_command)); 1717 1714 cmd.opcode = MMC_SEND_STATUS; ··· 1726 1721 err = -EIO; 1727 1722 goto out; 1728 1723 } 1724 + 1725 + /* Timeout if the device never becomes ready for data and 1726 + * never leaves the program state. 1727 + */ 1728 + if (time_after(jiffies, timeout)) { 1729 + pr_err("%s: Card stuck in programming state! %s\n", 1730 + mmc_hostname(card->host), __func__); 1731 + err = -EIO; 1732 + goto out; 1733 + } 1734 + 1729 1735 } while (!(cmd.resp[0] & R1_READY_FOR_DATA) || 1730 - R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG); 1736 + (R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG)); 1731 1737 out: 1732 1738 return err; 1733 1739 }
+11
drivers/mmc/core/mmc_ops.c
··· 21 21 #include "core.h" 22 22 #include "mmc_ops.h" 23 23 24 + #define MMC_OPS_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ 25 + 24 26 static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card) 25 27 { 26 28 int err; ··· 411 409 { 412 410 int err; 413 411 struct mmc_command cmd = {0}; 412 + unsigned long timeout; 414 413 u32 status; 415 414 416 415 BUG_ON(!card); ··· 440 437 return 0; 441 438 442 439 /* Must check status to be sure of no errors */ 440 + timeout = jiffies + msecs_to_jiffies(MMC_OPS_TIMEOUT_MS); 443 441 do { 444 442 err = mmc_send_status(card, &status); 445 443 if (err) ··· 449 445 break; 450 446 if (mmc_host_is_spi(card->host)) 451 447 break; 448 + 449 + /* Timeout if the device never leaves the program state. */ 450 + if (time_after(jiffies, timeout)) { 451 + pr_err("%s: Card stuck in programming state! %s\n", 452 + mmc_hostname(card->host), __func__); 453 + return -ETIMEDOUT; 454 + } 452 455 } while (R1_CURRENT_STATE(status) == R1_STATE_PRG); 453 456 454 457 if (mmc_host_is_spi(card->host)) {