Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[MTD] NAND Signal that a bitflip was corrected by ECC

Return -EUCLEAN on read when a bitflip was detected and corrected, so the
clients can react and eventually copy the affected block to a spare one.
Make all in kernel users aware of the change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

+46 -24
+5 -2
drivers/mtd/inftlcore.c
··· 355 355 ret = mtd->read(mtd, (inftl->EraseSize * BlockMap[block]) + 356 356 (block * SECTORSIZE), SECTORSIZE, &retlen, 357 357 movebuf); 358 - if (ret < 0) { 358 + if (ret < 0 && ret != -EUCLEAN) { 359 359 ret = mtd->read(mtd, 360 360 (inftl->EraseSize * BlockMap[block]) + 361 361 (block * SECTORSIZE), SECTORSIZE, ··· 922 922 } else { 923 923 size_t retlen; 924 924 loff_t ptr = (thisEUN * inftl->EraseSize) + blockofs; 925 - if (mtd->read(mtd, ptr, SECTORSIZE, &retlen, buffer)) 925 + int ret = mtd->read(mtd, ptr, SECTORSIZE, &retlen, buffer); 926 + 927 + /* Handle corrected bit flips gracefully */ 928 + if (ret < 0 && ret != -EUCLEAN) 926 929 return -EIO; 927 930 } 928 931 return 0;
+4 -1
drivers/mtd/mtdchar.c
··· 199 199 /* Nand returns -EBADMSG on ecc errors, but it returns 200 200 * the data. For our userspace tools it is important 201 201 * to dump areas with ecc errors ! 202 + * For kernel internal usage it also might return -EUCLEAN 203 + * to signal the caller that a bitflip has occured and has 204 + * been corrected by the ECC algorithm. 202 205 * Userspace software which accesses NAND this way 203 206 * must be aware of the fact that it deals with NAND 204 207 */ 205 - if (!ret || (ret == -EBADMSG)) { 208 + if (!ret || (ret == -EUCLEAN) || (ret == -EBADMSG)) { 206 209 *ppos += retlen; 207 210 if (copy_to_user(buf, kbuf, retlen)) { 208 211 kfree(kbuf);
+12 -3
drivers/mtd/mtdconcat.c
··· 56 56 size_t * retlen, u_char * buf) 57 57 { 58 58 struct mtd_concat *concat = CONCAT(mtd); 59 - int err = -EINVAL; 59 + int ret = 0, err = -EINVAL; 60 60 int i; 61 61 62 62 *retlen = 0; ··· 80 80 81 81 err = subdev->read(subdev, from, size, &retsize, buf); 82 82 83 - if (err) 83 + if (err && (err != -EBADMSG) && (err != -EUCLEAN)) 84 84 break; 85 + 86 + /* Save information about bitflips! */ 87 + if (err) { 88 + if (err == -EBADMSG) 89 + ret = err; 90 + else if (!ret) 91 + ret = err; 92 + err = 0; 93 + } 85 94 86 95 *retlen += retsize; 87 96 len -= size; ··· 101 92 buf += size; 102 93 from = 0; 103 94 } 104 - return err; 95 + return err ? err : ret; 105 96 } 106 97 107 98 static int
+4 -1
drivers/mtd/nand/nand_base.c
··· 1035 1035 if (ret) 1036 1036 return ret; 1037 1037 1038 - return mtd->ecc_stats.failed - stats.failed ? -EBADMSG : 0; 1038 + if (mtd->ecc_stats.failed - stats.failed) 1039 + return -EBADMSG; 1040 + 1041 + return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; 1039 1042 } 1040 1043 1041 1044 /**
+4 -2
drivers/mtd/nftlcore.c
··· 422 422 423 423 ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block]) + (block * 512), 424 424 512, &retlen, movebuf); 425 - if (ret < 0) { 425 + if (ret < 0 && ret != -EUCLEAN) { 426 426 ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block]) 427 427 + (block * 512), 512, &retlen, 428 428 movebuf); ··· 768 768 } else { 769 769 loff_t ptr = (lastgoodEUN * nftl->EraseSize) + blockofs; 770 770 size_t retlen; 771 - if (mtd->read(mtd, ptr, 512, &retlen, buffer)) 771 + int res = mtd->read(mtd, ptr, 512, &retlen, buffer); 772 + 773 + if (res < 0 && res != -EUCLEAN) 772 774 return -EIO; 773 775 } 774 776 return 0;
+17 -15
fs/jffs2/wbuf.c
··· 296 296 /* Do the read... */ 297 297 ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf); 298 298 299 - if (ret == -EBADMSG && retlen == c->wbuf_ofs - start) { 300 - /* ECC recovered */ 299 + /* ECC recovered ? */ 300 + if ((ret == -EUCLEAN || ret == -EBADMSG) && 301 + (retlen == c->wbuf_ofs - start)) 301 302 ret = 0; 302 - } 303 + 303 304 if (ret || retlen != c->wbuf_ofs - start) { 304 305 printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n"); 305 306 ··· 909 908 down_read(&c->wbuf_sem); 910 909 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); 911 910 912 - if ( (ret == -EBADMSG) && (*retlen == len) ) { 913 - printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n", 914 - len, ofs); 911 + if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { 912 + if (ret == -EBADMSG) 913 + printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx)" 914 + " returned ECC error\n", len, ofs); 915 915 /* 916 - * We have the raw data without ECC correction in the buffer, maybe 917 - * we are lucky and all data or parts are correct. We check the node. 918 - * If data are corrupted node check will sort it out. 919 - * We keep this block, it will fail on write or erase and the we 920 - * mark it bad. Or should we do that now? But we should give him a chance. 921 - * Maybe we had a system crash or power loss before the ecc write or 922 - * a erase was completed. 916 + * We have the raw data without ECC correction in the buffer, 917 + * maybe we are lucky and all data or parts are correct. We 918 + * check the node. If data are corrupted node check will sort 919 + * it out. We keep this block, it will fail on write or erase 920 + * and the we mark it bad. Or should we do that now? But we 921 + * should give him a chance. Maybe we had a system crash or 922 + * power loss before the ecc write or a erase was completed. 923 923 * So we return success. :) 924 924 */ 925 - ret = 0; 925 + ret = 0; 926 926 } 927 927 928 928 /* if no writebuffer available or write buffer empty, return */ ··· 945 943 orbf = (c->wbuf_ofs - ofs); /* offset in read buffer */ 946 944 if (orbf > len) /* is write beyond write buffer ? */ 947 945 goto exit; 948 - lwbf = len - orbf; /* number of bytes to copy */ 946 + lwbf = len - orbf; /* number of bytes to copy */ 949 947 if (lwbf > c->wbuf_len) 950 948 lwbf = c->wbuf_len; 951 949 }