Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mtd: Add mtdswap block driver

Add a driver for allowing an mtd device to be used as a block device for
swapping. The block device is volatile, and the mapping of swapped pages
is not stored on flash.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

authored by

Jarkko Lavinen and committed by
David Woodhouse
a3215902 115ee88c

+1612
+18
drivers/mtd/Kconfig
··· 325 325 To use, add console=ttyMTDx to the kernel command line, 326 326 where x is the MTD device number to use. 327 327 328 + config MTD_SWAP 329 + tristate "Swap on MTD device support" 330 + depends on MTD && SWAP 331 + select MTD_BLKDEVS 332 + help 333 + Provides volatile block device driver on top of mtd partition 334 + suitable for swapping. The mapping of written blocks is not saved. 335 + The driver provides wear leveling by storing erase counter into the 336 + OOB. 337 + 338 + config MTD_SWAP_STRICT 339 + bool "Strict erase error handling" 340 + depends on MTD_SWAP 341 + help 342 + Enables strict tolerance on failed erasures, marking erase blocks bad 343 + right after the first failed operation. With non-strict mode the 344 + erase operation is retried. 345 + 328 346 source "drivers/mtd/chips/Kconfig" 329 347 330 348 source "drivers/mtd/maps/Kconfig"
+1
drivers/mtd/Makefile
··· 25 25 obj-$(CONFIG_SSFDC) += ssfdc.o 26 26 obj-$(CONFIG_SM_FTL) += sm_ftl.o 27 27 obj-$(CONFIG_MTD_OOPS) += mtdoops.o 28 + obj-$(CONFIG_MTD_SWAP) += mtdswap.o 28 29 29 30 nftl-objs := nftlcore.o nftlmount.o 30 31 inftl-objs := inftlcore.o inftlmount.o
+1593
drivers/mtd/mtdswap.c
··· 1 + /* 2 + * Swap block device support for MTDs 3 + * Turns an MTD device into a swap device with block wear leveling 4 + * 5 + * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 6 + * 7 + * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 8 + * 9 + * Based on Richard Purdie's earlier implementation in 2007. Background 10 + * support and lock-less operation written by Adrian Hunter. 11 + * 12 + * This program is free software; you can redistribute it and/or 13 + * modify it under the terms of the GNU General Public License 14 + * version 2 as published by the Free Software Foundation. 15 + * 16 + * This program is distributed in the hope that it will be useful, but 17 + * WITHOUT ANY WARRANTY; without even the implied warranty of 18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 + * General Public License for more details. 20 + * 21 + * You should have received a copy of the GNU General Public License 22 + * along with this program; if not, write to the Free Software 23 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 + * 02110-1301 USA 25 + */ 26 + 27 + #include <linux/kernel.h> 28 + #include <linux/module.h> 29 + #include <linux/mtd/mtd.h> 30 + #include <linux/mtd/blktrans.h> 31 + #include <linux/rbtree.h> 32 + #include <linux/sched.h> 33 + #include <linux/slab.h> 34 + #include <linux/vmalloc.h> 35 + #include <linux/genhd.h> 36 + #include <linux/swap.h> 37 + #include <linux/debugfs.h> 38 + #include <linux/seq_file.h> 39 + #include <linux/device.h> 40 + #include <linux/math64.h> 41 + 42 + #define MTDSWAP_PREFIX "mtdswap" 43 + 44 + /* 45 + * The number of free eraseblocks when GC should stop 46 + */ 47 + #define CLEAN_BLOCK_THRESHOLD 20 48 + 49 + /* 50 + * Number of free eraseblocks below which GC can also collect low frag 51 + * blocks. 52 + */ 53 + #define LOW_FRAG_GC_TRESHOLD 5 54 + 55 + /* 56 + * Wear level cost amortization. We want to do wear leveling on the background 57 + * without disturbing gc too much. This is made by defining max GC frequency. 58 + * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 59 + * on the biggest wear difference rather than the biggest dirtiness. 60 + * 61 + * The lower freq2 should be chosen so that it makes sure the maximum erase 62 + * difference will decrease even if a malicious application is deliberately 63 + * trying to make erase differences large. 64 + */ 65 + #define MAX_ERASE_DIFF 4000 66 + #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 67 + #define COLLECT_NONDIRTY_FREQ1 6 68 + #define COLLECT_NONDIRTY_FREQ2 4 69 + 70 + #define PAGE_UNDEF UINT_MAX 71 + #define BLOCK_UNDEF UINT_MAX 72 + #define BLOCK_ERROR (UINT_MAX - 1) 73 + #define BLOCK_MAX (UINT_MAX - 2) 74 + 75 + #define EBLOCK_BAD (1 << 0) 76 + #define EBLOCK_NOMAGIC (1 << 1) 77 + #define EBLOCK_BITFLIP (1 << 2) 78 + #define EBLOCK_FAILED (1 << 3) 79 + #define EBLOCK_READERR (1 << 4) 80 + #define EBLOCK_IDX_SHIFT 5 81 + 82 + struct swap_eb { 83 + struct rb_node rb; 84 + struct rb_root *root; 85 + 86 + unsigned int flags; 87 + unsigned int active_count; 88 + unsigned int erase_count; 89 + unsigned int pad; /* speeds up pointer decremtnt */ 90 + }; 91 + 92 + #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 93 + rb)->erase_count) 94 + #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 95 + rb)->erase_count) 96 + 97 + struct mtdswap_tree { 98 + struct rb_root root; 99 + unsigned int count; 100 + }; 101 + 102 + enum { 103 + MTDSWAP_CLEAN, 104 + MTDSWAP_USED, 105 + MTDSWAP_LOWFRAG, 106 + MTDSWAP_HIFRAG, 107 + MTDSWAP_DIRTY, 108 + MTDSWAP_BITFLIP, 109 + MTDSWAP_FAILING, 110 + MTDSWAP_TREE_CNT, 111 + }; 112 + 113 + struct mtdswap_dev { 114 + struct mtd_blktrans_dev *mbd_dev; 115 + struct mtd_info *mtd; 116 + struct device *dev; 117 + 118 + unsigned int *page_data; 119 + unsigned int *revmap; 120 + 121 + unsigned int eblks; 122 + unsigned int spare_eblks; 123 + unsigned int pages_per_eblk; 124 + unsigned int max_erase_count; 125 + struct swap_eb *eb_data; 126 + 127 + struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 128 + 129 + unsigned long long sect_read_count; 130 + unsigned long long sect_write_count; 131 + unsigned long long mtd_write_count; 132 + unsigned long long mtd_read_count; 133 + unsigned long long discard_count; 134 + unsigned long long discard_page_count; 135 + 136 + unsigned int curr_write_pos; 137 + struct swap_eb *curr_write; 138 + 139 + char *page_buf; 140 + char *oob_buf; 141 + 142 + struct dentry *debugfs_root; 143 + }; 144 + 145 + struct mtdswap_oobdata { 146 + __le16 magic; 147 + __le32 count; 148 + } __attribute__((packed)); 149 + 150 + #define MTDSWAP_MAGIC_CLEAN 0x2095 151 + #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 152 + #define MTDSWAP_TYPE_CLEAN 0 153 + #define MTDSWAP_TYPE_DIRTY 1 154 + #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 155 + 156 + #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 157 + #define MTDSWAP_IO_RETRIES 3 158 + 159 + #ifdef CONFIG_MTD_SWAP_STRICT 160 + #define MTDSWAP_STRICT 1 161 + #else 162 + #define MTDSWAP_STRICT 0 163 + #endif 164 + 165 + enum { 166 + MTDSWAP_SCANNED_CLEAN, 167 + MTDSWAP_SCANNED_DIRTY, 168 + MTDSWAP_SCANNED_BITFLIP, 169 + MTDSWAP_SCANNED_BAD, 170 + }; 171 + 172 + /* 173 + * In the worst case mtdswap_writesect() has allocated the last clean 174 + * page from the current block and is then pre-empted by the GC 175 + * thread. The thread can consume a full erase block when moving a 176 + * block. 177 + */ 178 + #define MIN_SPARE_EBLOCKS 2 179 + #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 180 + 181 + #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 182 + #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 183 + #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 184 + #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 185 + 186 + #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 187 + 188 + static char partitions[128] = ""; 189 + module_param_string(partitions, partitions, sizeof(partitions), 0444); 190 + MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 191 + "partitions=\"1,3,5\""); 192 + 193 + static unsigned int spare_eblocks = 10; 194 + module_param(spare_eblocks, uint, 0444); 195 + MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 196 + "garbage collection (default 10%)"); 197 + 198 + static bool header; /* false */ 199 + module_param(header, bool, 0444); 200 + MODULE_PARM_DESC(header, 201 + "Include builtin swap header (default 0, without header)"); 202 + 203 + static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 204 + 205 + static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 206 + { 207 + return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 208 + } 209 + 210 + static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 211 + { 212 + unsigned int oldidx; 213 + struct mtdswap_tree *tp; 214 + 215 + if (eb->root) { 216 + tp = container_of(eb->root, struct mtdswap_tree, root); 217 + oldidx = tp - &d->trees[0]; 218 + 219 + d->trees[oldidx].count--; 220 + rb_erase(&eb->rb, eb->root); 221 + } 222 + } 223 + 224 + static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 225 + { 226 + struct rb_node **p, *parent = NULL; 227 + struct swap_eb *cur; 228 + 229 + p = &root->rb_node; 230 + while (*p) { 231 + parent = *p; 232 + cur = rb_entry(parent, struct swap_eb, rb); 233 + if (eb->erase_count > cur->erase_count) 234 + p = &(*p)->rb_right; 235 + else 236 + p = &(*p)->rb_left; 237 + } 238 + 239 + rb_link_node(&eb->rb, parent, p); 240 + rb_insert_color(&eb->rb, root); 241 + } 242 + 243 + static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 244 + { 245 + struct rb_root *root; 246 + 247 + if (eb->root == &d->trees[idx].root) 248 + return; 249 + 250 + mtdswap_eb_detach(d, eb); 251 + root = &d->trees[idx].root; 252 + __mtdswap_rb_add(root, eb); 253 + eb->root = root; 254 + d->trees[idx].count++; 255 + } 256 + 257 + static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 258 + { 259 + struct rb_node *p; 260 + unsigned int i; 261 + 262 + p = rb_first(root); 263 + i = 0; 264 + while (i < idx && p) { 265 + p = rb_next(p); 266 + i++; 267 + } 268 + 269 + return p; 270 + } 271 + 272 + static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 273 + { 274 + int ret; 275 + loff_t offset; 276 + 277 + d->spare_eblks--; 278 + eb->flags |= EBLOCK_BAD; 279 + mtdswap_eb_detach(d, eb); 280 + eb->root = NULL; 281 + 282 + /* badblocks not supported */ 283 + if (!d->mtd->block_markbad) 284 + return 1; 285 + 286 + offset = mtdswap_eb_offset(d, eb); 287 + dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 288 + ret = d->mtd->block_markbad(d->mtd, offset); 289 + 290 + if (ret) { 291 + dev_warn(d->dev, "Mark block bad failed for block at %08llx " 292 + "error %d\n", offset, ret); 293 + return ret; 294 + } 295 + 296 + return 1; 297 + 298 + } 299 + 300 + static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 301 + { 302 + unsigned int marked = eb->flags & EBLOCK_FAILED; 303 + struct swap_eb *curr_write = d->curr_write; 304 + 305 + eb->flags |= EBLOCK_FAILED; 306 + if (curr_write == eb) { 307 + d->curr_write = NULL; 308 + 309 + if (!marked && d->curr_write_pos != 0) { 310 + mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 311 + return 0; 312 + } 313 + } 314 + 315 + return mtdswap_handle_badblock(d, eb); 316 + } 317 + 318 + static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 319 + struct mtd_oob_ops *ops) 320 + { 321 + int ret = d->mtd->read_oob(d->mtd, from, ops); 322 + 323 + if (ret == -EUCLEAN) 324 + return ret; 325 + 326 + if (ret) { 327 + dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 328 + ret, from); 329 + return ret; 330 + } 331 + 332 + if (ops->oobretlen < ops->ooblen) { 333 + dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 334 + "%d) for block at %08llx\n", 335 + ops->oobretlen, ops->ooblen, from); 336 + return -EIO; 337 + } 338 + 339 + return 0; 340 + } 341 + 342 + static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 343 + { 344 + struct mtdswap_oobdata *data, *data2; 345 + int ret; 346 + loff_t offset; 347 + struct mtd_oob_ops ops; 348 + 349 + offset = mtdswap_eb_offset(d, eb); 350 + 351 + /* Check first if the block is bad. */ 352 + if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset)) 353 + return MTDSWAP_SCANNED_BAD; 354 + 355 + ops.ooblen = 2 * d->mtd->ecclayout->oobavail; 356 + ops.oobbuf = d->oob_buf; 357 + ops.ooboffs = 0; 358 + ops.datbuf = NULL; 359 + ops.mode = MTD_OOB_AUTO; 360 + 361 + ret = mtdswap_read_oob(d, offset, &ops); 362 + 363 + if (ret && ret != -EUCLEAN) 364 + return ret; 365 + 366 + data = (struct mtdswap_oobdata *)d->oob_buf; 367 + data2 = (struct mtdswap_oobdata *) 368 + (d->oob_buf + d->mtd->ecclayout->oobavail); 369 + 370 + if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 371 + eb->erase_count = le32_to_cpu(data->count); 372 + if (ret == -EUCLEAN) 373 + ret = MTDSWAP_SCANNED_BITFLIP; 374 + else { 375 + if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 376 + ret = MTDSWAP_SCANNED_DIRTY; 377 + else 378 + ret = MTDSWAP_SCANNED_CLEAN; 379 + } 380 + } else { 381 + eb->flags |= EBLOCK_NOMAGIC; 382 + ret = MTDSWAP_SCANNED_DIRTY; 383 + } 384 + 385 + return ret; 386 + } 387 + 388 + static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 389 + u16 marker) 390 + { 391 + struct mtdswap_oobdata n; 392 + int ret; 393 + loff_t offset; 394 + struct mtd_oob_ops ops; 395 + 396 + ops.ooboffs = 0; 397 + ops.oobbuf = (uint8_t *)&n; 398 + ops.mode = MTD_OOB_AUTO; 399 + ops.datbuf = NULL; 400 + 401 + if (marker == MTDSWAP_TYPE_CLEAN) { 402 + n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 403 + n.count = cpu_to_le32(eb->erase_count); 404 + ops.ooblen = MTDSWAP_OOBSIZE; 405 + offset = mtdswap_eb_offset(d, eb); 406 + } else { 407 + n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 408 + ops.ooblen = sizeof(n.magic); 409 + offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 410 + } 411 + 412 + ret = d->mtd->write_oob(d->mtd, offset , &ops); 413 + 414 + if (ret) { 415 + dev_warn(d->dev, "Write OOB failed for block at %08llx " 416 + "error %d\n", offset, ret); 417 + if (ret == -EIO || ret == -EBADMSG) 418 + mtdswap_handle_write_error(d, eb); 419 + return ret; 420 + } 421 + 422 + if (ops.oobretlen != ops.ooblen) { 423 + dev_warn(d->dev, "Short OOB write for block at %08llx: " 424 + "%zd not %d\n", 425 + offset, ops.oobretlen, ops.ooblen); 426 + return ret; 427 + } 428 + 429 + return 0; 430 + } 431 + 432 + /* 433 + * Are there any erase blocks without MAGIC_CLEAN header, presumably 434 + * because power was cut off after erase but before header write? We 435 + * need to guestimate the erase count. 436 + */ 437 + static void mtdswap_check_counts(struct mtdswap_dev *d) 438 + { 439 + struct rb_root hist_root = RB_ROOT; 440 + struct rb_node *medrb; 441 + struct swap_eb *eb; 442 + unsigned int i, cnt, median; 443 + 444 + cnt = 0; 445 + for (i = 0; i < d->eblks; i++) { 446 + eb = d->eb_data + i; 447 + 448 + if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 449 + continue; 450 + 451 + __mtdswap_rb_add(&hist_root, eb); 452 + cnt++; 453 + } 454 + 455 + if (cnt == 0) 456 + return; 457 + 458 + medrb = mtdswap_rb_index(&hist_root, cnt / 2); 459 + median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 460 + 461 + d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 462 + 463 + for (i = 0; i < d->eblks; i++) { 464 + eb = d->eb_data + i; 465 + 466 + if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 467 + eb->erase_count = median; 468 + 469 + if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 470 + continue; 471 + 472 + rb_erase(&eb->rb, &hist_root); 473 + } 474 + } 475 + 476 + static void mtdswap_scan_eblks(struct mtdswap_dev *d) 477 + { 478 + int status; 479 + unsigned int i, idx; 480 + struct swap_eb *eb; 481 + 482 + for (i = 0; i < d->eblks; i++) { 483 + eb = d->eb_data + i; 484 + 485 + status = mtdswap_read_markers(d, eb); 486 + if (status < 0) 487 + eb->flags |= EBLOCK_READERR; 488 + else if (status == MTDSWAP_SCANNED_BAD) { 489 + eb->flags |= EBLOCK_BAD; 490 + continue; 491 + } 492 + 493 + switch (status) { 494 + case MTDSWAP_SCANNED_CLEAN: 495 + idx = MTDSWAP_CLEAN; 496 + break; 497 + case MTDSWAP_SCANNED_DIRTY: 498 + case MTDSWAP_SCANNED_BITFLIP: 499 + idx = MTDSWAP_DIRTY; 500 + break; 501 + default: 502 + idx = MTDSWAP_FAILING; 503 + } 504 + 505 + eb->flags |= (idx << EBLOCK_IDX_SHIFT); 506 + } 507 + 508 + mtdswap_check_counts(d); 509 + 510 + for (i = 0; i < d->eblks; i++) { 511 + eb = d->eb_data + i; 512 + 513 + if (eb->flags & EBLOCK_BAD) 514 + continue; 515 + 516 + idx = eb->flags >> EBLOCK_IDX_SHIFT; 517 + mtdswap_rb_add(d, eb, idx); 518 + } 519 + } 520 + 521 + /* 522 + * Place eblk into a tree corresponding to its number of active blocks 523 + * it contains. 524 + */ 525 + static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 526 + { 527 + unsigned int weight = eb->active_count; 528 + unsigned int maxweight = d->pages_per_eblk; 529 + 530 + if (eb == d->curr_write) 531 + return; 532 + 533 + if (eb->flags & EBLOCK_BITFLIP) 534 + mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 535 + else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 536 + mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 537 + if (weight == maxweight) 538 + mtdswap_rb_add(d, eb, MTDSWAP_USED); 539 + else if (weight == 0) 540 + mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 541 + else if (weight > (maxweight/2)) 542 + mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 543 + else 544 + mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 545 + } 546 + 547 + 548 + static void mtdswap_erase_callback(struct erase_info *done) 549 + { 550 + wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; 551 + wake_up(wait_q); 552 + } 553 + 554 + static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 555 + { 556 + struct mtd_info *mtd = d->mtd; 557 + struct erase_info erase; 558 + wait_queue_head_t wq; 559 + unsigned int retries = 0; 560 + int ret; 561 + 562 + eb->erase_count++; 563 + if (eb->erase_count > d->max_erase_count) 564 + d->max_erase_count = eb->erase_count; 565 + 566 + retry: 567 + init_waitqueue_head(&wq); 568 + memset(&erase, 0, sizeof(struct erase_info)); 569 + 570 + erase.mtd = mtd; 571 + erase.callback = mtdswap_erase_callback; 572 + erase.addr = mtdswap_eb_offset(d, eb); 573 + erase.len = mtd->erasesize; 574 + erase.priv = (u_long)&wq; 575 + 576 + ret = mtd->erase(mtd, &erase); 577 + if (ret) { 578 + if (retries++ < MTDSWAP_ERASE_RETRIES && !MTDSWAP_STRICT) { 579 + dev_warn(d->dev, 580 + "erase of erase block %#llx on %s failed", 581 + erase.addr, mtd->name); 582 + yield(); 583 + goto retry; 584 + } 585 + 586 + dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 587 + erase.addr, mtd->name); 588 + 589 + mtdswap_handle_badblock(d, eb); 590 + return -EIO; 591 + } 592 + 593 + ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE || 594 + erase.state == MTD_ERASE_FAILED); 595 + if (ret) { 596 + dev_err(d->dev, "Interrupted erase block %#llx erassure on %s", 597 + erase.addr, mtd->name); 598 + return -EINTR; 599 + } 600 + 601 + if (erase.state == MTD_ERASE_FAILED) { 602 + if (retries++ < MTDSWAP_ERASE_RETRIES) { 603 + dev_warn(d->dev, 604 + "erase of erase block %#llx on %s failed", 605 + erase.addr, mtd->name); 606 + yield(); 607 + goto retry; 608 + } 609 + 610 + mtdswap_handle_badblock(d, eb); 611 + return -EIO; 612 + } 613 + 614 + return 0; 615 + } 616 + 617 + static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 618 + unsigned int *block) 619 + { 620 + int ret; 621 + struct swap_eb *old_eb = d->curr_write; 622 + struct rb_root *clean_root; 623 + struct swap_eb *eb; 624 + 625 + if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 626 + do { 627 + if (TREE_EMPTY(d, CLEAN)) 628 + return -ENOSPC; 629 + 630 + clean_root = TREE_ROOT(d, CLEAN); 631 + eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 632 + rb_erase(&eb->rb, clean_root); 633 + eb->root = NULL; 634 + TREE_COUNT(d, CLEAN)--; 635 + 636 + ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 637 + } while (ret == -EIO || ret == -EBADMSG); 638 + 639 + if (ret) 640 + return ret; 641 + 642 + d->curr_write_pos = 0; 643 + d->curr_write = eb; 644 + if (old_eb) 645 + mtdswap_store_eb(d, old_eb); 646 + } 647 + 648 + *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 649 + d->curr_write_pos; 650 + 651 + d->curr_write->active_count++; 652 + d->revmap[*block] = page; 653 + d->curr_write_pos++; 654 + 655 + return 0; 656 + } 657 + 658 + static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 659 + { 660 + return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 661 + d->pages_per_eblk - d->curr_write_pos; 662 + } 663 + 664 + static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 665 + { 666 + return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 667 + } 668 + 669 + static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 670 + unsigned int page, unsigned int *bp, int gc_context) 671 + { 672 + struct mtd_info *mtd = d->mtd; 673 + struct swap_eb *eb; 674 + size_t retlen; 675 + loff_t writepos; 676 + int ret; 677 + 678 + retry: 679 + if (!gc_context) 680 + while (!mtdswap_enough_free_pages(d)) 681 + if (mtdswap_gc(d, 0) > 0) 682 + return -ENOSPC; 683 + 684 + ret = mtdswap_map_free_block(d, page, bp); 685 + eb = d->eb_data + (*bp / d->pages_per_eblk); 686 + 687 + if (ret == -EIO || ret == -EBADMSG) { 688 + d->curr_write = NULL; 689 + eb->active_count--; 690 + d->revmap[*bp] = PAGE_UNDEF; 691 + goto retry; 692 + } 693 + 694 + if (ret < 0) 695 + return ret; 696 + 697 + writepos = (loff_t)*bp << PAGE_SHIFT; 698 + ret = mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf); 699 + if (ret == -EIO || ret == -EBADMSG) { 700 + d->curr_write_pos--; 701 + eb->active_count--; 702 + d->revmap[*bp] = PAGE_UNDEF; 703 + mtdswap_handle_write_error(d, eb); 704 + goto retry; 705 + } 706 + 707 + if (ret < 0) { 708 + dev_err(d->dev, "Write to MTD device failed: %d (%d written)", 709 + ret, retlen); 710 + goto err; 711 + } 712 + 713 + if (retlen != PAGE_SIZE) { 714 + dev_err(d->dev, "Short write to MTD device: %d written", 715 + retlen); 716 + ret = -EIO; 717 + goto err; 718 + } 719 + 720 + return ret; 721 + 722 + err: 723 + d->curr_write_pos--; 724 + eb->active_count--; 725 + d->revmap[*bp] = PAGE_UNDEF; 726 + 727 + return ret; 728 + } 729 + 730 + static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 731 + unsigned int *newblock) 732 + { 733 + struct mtd_info *mtd = d->mtd; 734 + struct swap_eb *eb, *oldeb; 735 + int ret; 736 + size_t retlen; 737 + unsigned int page, retries; 738 + loff_t readpos; 739 + 740 + page = d->revmap[oldblock]; 741 + readpos = (loff_t) oldblock << PAGE_SHIFT; 742 + retries = 0; 743 + 744 + retry: 745 + ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 746 + 747 + if (ret < 0 && ret != -EUCLEAN) { 748 + oldeb = d->eb_data + oldblock / d->pages_per_eblk; 749 + oldeb->flags |= EBLOCK_READERR; 750 + 751 + dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 752 + oldblock); 753 + retries++; 754 + if (retries < MTDSWAP_IO_RETRIES) 755 + goto retry; 756 + 757 + goto read_error; 758 + } 759 + 760 + if (retlen != PAGE_SIZE) { 761 + dev_err(d->dev, "Short read: %d (block %u)\n", retlen, 762 + oldblock); 763 + ret = -EIO; 764 + goto read_error; 765 + } 766 + 767 + ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 768 + if (ret < 0) { 769 + d->page_data[page] = BLOCK_ERROR; 770 + dev_err(d->dev, "Write error: %d\n", ret); 771 + return ret; 772 + } 773 + 774 + eb = d->eb_data + *newblock / d->pages_per_eblk; 775 + d->page_data[page] = *newblock; 776 + d->revmap[oldblock] = PAGE_UNDEF; 777 + eb = d->eb_data + oldblock / d->pages_per_eblk; 778 + eb->active_count--; 779 + 780 + return 0; 781 + 782 + read_error: 783 + d->page_data[page] = BLOCK_ERROR; 784 + d->revmap[oldblock] = PAGE_UNDEF; 785 + return ret; 786 + } 787 + 788 + static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 789 + { 790 + unsigned int i, block, eblk_base, newblock; 791 + int ret, errcode; 792 + 793 + errcode = 0; 794 + eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 795 + 796 + for (i = 0; i < d->pages_per_eblk; i++) { 797 + if (d->spare_eblks < MIN_SPARE_EBLOCKS) 798 + return -ENOSPC; 799 + 800 + block = eblk_base + i; 801 + if (d->revmap[block] == PAGE_UNDEF) 802 + continue; 803 + 804 + ret = mtdswap_move_block(d, block, &newblock); 805 + if (ret < 0 && !errcode) 806 + errcode = ret; 807 + } 808 + 809 + return errcode; 810 + } 811 + 812 + static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 813 + { 814 + int idx, stopat; 815 + 816 + if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_TRESHOLD) 817 + stopat = MTDSWAP_LOWFRAG; 818 + else 819 + stopat = MTDSWAP_HIFRAG; 820 + 821 + for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 822 + if (d->trees[idx].root.rb_node != NULL) 823 + return idx; 824 + 825 + return -1; 826 + } 827 + 828 + static int mtdswap_wlfreq(unsigned int maxdiff) 829 + { 830 + unsigned int h, x, y, dist, base; 831 + 832 + /* 833 + * Calculate linear ramp down from f1 to f2 when maxdiff goes from 834 + * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 835 + * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 836 + */ 837 + 838 + dist = maxdiff - MAX_ERASE_DIFF; 839 + if (dist > COLLECT_NONDIRTY_BASE) 840 + dist = COLLECT_NONDIRTY_BASE; 841 + 842 + /* 843 + * Modelling the slop as right angular triangle with base 844 + * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 845 + * equal to the ratio h/base. 846 + */ 847 + h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 848 + base = COLLECT_NONDIRTY_BASE; 849 + 850 + x = dist - base; 851 + y = (x * h + base / 2) / base; 852 + 853 + return COLLECT_NONDIRTY_FREQ2 + y; 854 + } 855 + 856 + static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 857 + { 858 + static unsigned int pick_cnt; 859 + unsigned int i, idx, wear, max; 860 + struct rb_root *root; 861 + 862 + max = 0; 863 + for (i = 0; i <= MTDSWAP_DIRTY; i++) { 864 + root = &d->trees[i].root; 865 + if (root->rb_node == NULL) 866 + continue; 867 + 868 + wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 869 + if (wear > max) { 870 + max = wear; 871 + idx = i; 872 + } 873 + } 874 + 875 + if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 876 + pick_cnt = 0; 877 + return idx; 878 + } 879 + 880 + pick_cnt++; 881 + return -1; 882 + } 883 + 884 + static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 885 + unsigned int background) 886 + { 887 + int idx; 888 + 889 + if (TREE_NONEMPTY(d, FAILING) && 890 + (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 891 + return MTDSWAP_FAILING; 892 + 893 + idx = mtdswap_choose_wl_tree(d); 894 + if (idx >= MTDSWAP_CLEAN) 895 + return idx; 896 + 897 + return __mtdswap_choose_gc_tree(d); 898 + } 899 + 900 + static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 901 + unsigned int background) 902 + { 903 + struct rb_root *rp = NULL; 904 + struct swap_eb *eb = NULL; 905 + int idx; 906 + 907 + if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 908 + TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 909 + return NULL; 910 + 911 + idx = mtdswap_choose_gc_tree(d, background); 912 + if (idx < 0) 913 + return NULL; 914 + 915 + rp = &d->trees[idx].root; 916 + eb = rb_entry(rb_first(rp), struct swap_eb, rb); 917 + 918 + rb_erase(&eb->rb, rp); 919 + eb->root = NULL; 920 + d->trees[idx].count--; 921 + return eb; 922 + } 923 + 924 + static unsigned int mtdswap_test_patt(unsigned int i) 925 + { 926 + return i % 2 ? 0x55555555 : 0xAAAAAAAA; 927 + } 928 + 929 + static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 930 + struct swap_eb *eb) 931 + { 932 + struct mtd_info *mtd = d->mtd; 933 + unsigned int test, i, j, patt, mtd_pages; 934 + loff_t base, pos; 935 + unsigned int *p1 = (unsigned int *)d->page_buf; 936 + unsigned char *p2 = (unsigned char *)d->oob_buf; 937 + struct mtd_oob_ops ops; 938 + int ret; 939 + 940 + ops.mode = MTD_OOB_AUTO; 941 + ops.len = mtd->writesize; 942 + ops.ooblen = mtd->ecclayout->oobavail; 943 + ops.ooboffs = 0; 944 + ops.datbuf = d->page_buf; 945 + ops.oobbuf = d->oob_buf; 946 + base = mtdswap_eb_offset(d, eb); 947 + mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 948 + 949 + for (test = 0; test < 2; test++) { 950 + pos = base; 951 + for (i = 0; i < mtd_pages; i++) { 952 + patt = mtdswap_test_patt(test + i); 953 + memset(d->page_buf, patt, mtd->writesize); 954 + memset(d->oob_buf, patt, mtd->ecclayout->oobavail); 955 + ret = mtd->write_oob(mtd, pos, &ops); 956 + if (ret) 957 + goto error; 958 + 959 + pos += mtd->writesize; 960 + } 961 + 962 + pos = base; 963 + for (i = 0; i < mtd_pages; i++) { 964 + ret = mtd->read_oob(mtd, pos, &ops); 965 + if (ret) 966 + goto error; 967 + 968 + patt = mtdswap_test_patt(test + i); 969 + for (j = 0; j < mtd->writesize/sizeof(int); j++) 970 + if (p1[j] != patt) 971 + goto error; 972 + 973 + for (j = 0; j < mtd->ecclayout->oobavail; j++) 974 + if (p2[j] != (unsigned char)patt) 975 + goto error; 976 + 977 + pos += mtd->writesize; 978 + } 979 + 980 + ret = mtdswap_erase_block(d, eb); 981 + if (ret) 982 + goto error; 983 + } 984 + 985 + eb->flags &= ~EBLOCK_READERR; 986 + return 1; 987 + 988 + error: 989 + mtdswap_handle_badblock(d, eb); 990 + return 0; 991 + } 992 + 993 + static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 994 + { 995 + struct swap_eb *eb; 996 + int ret; 997 + 998 + if (d->spare_eblks < MIN_SPARE_EBLOCKS) 999 + return 1; 1000 + 1001 + eb = mtdswap_pick_gc_eblk(d, background); 1002 + if (!eb) 1003 + return 1; 1004 + 1005 + ret = mtdswap_gc_eblock(d, eb); 1006 + if (ret == -ENOSPC) 1007 + return 1; 1008 + 1009 + if (eb->flags & EBLOCK_FAILED) { 1010 + mtdswap_handle_badblock(d, eb); 1011 + return 0; 1012 + } 1013 + 1014 + eb->flags &= ~EBLOCK_BITFLIP; 1015 + ret = mtdswap_erase_block(d, eb); 1016 + if ((eb->flags & EBLOCK_READERR) && 1017 + (ret || !mtdswap_eblk_passes(d, eb))) 1018 + return 0; 1019 + 1020 + if (ret == 0) 1021 + ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 1022 + 1023 + if (ret == 0) 1024 + mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 1025 + else if (ret != -EIO && ret != -EBADMSG) 1026 + mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 1027 + 1028 + return 0; 1029 + } 1030 + 1031 + static void mtdswap_background(struct mtd_blktrans_dev *dev) 1032 + { 1033 + struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1034 + int ret; 1035 + 1036 + while (1) { 1037 + ret = mtdswap_gc(d, 1); 1038 + if (ret || mtd_blktrans_cease_background(dev)) 1039 + return; 1040 + } 1041 + } 1042 + 1043 + static void mtdswap_cleanup(struct mtdswap_dev *d) 1044 + { 1045 + vfree(d->eb_data); 1046 + vfree(d->revmap); 1047 + vfree(d->page_data); 1048 + kfree(d->oob_buf); 1049 + kfree(d->page_buf); 1050 + } 1051 + 1052 + static int mtdswap_flush(struct mtd_blktrans_dev *dev) 1053 + { 1054 + struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1055 + 1056 + if (d->mtd->sync) 1057 + d->mtd->sync(d->mtd); 1058 + return 0; 1059 + } 1060 + 1061 + static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1062 + { 1063 + loff_t offset; 1064 + unsigned int badcnt; 1065 + 1066 + badcnt = 0; 1067 + 1068 + if (mtd->block_isbad) 1069 + for (offset = 0; offset < size; offset += mtd->erasesize) 1070 + if (mtd->block_isbad(mtd, offset)) 1071 + badcnt++; 1072 + 1073 + return badcnt; 1074 + } 1075 + 1076 + static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1077 + unsigned long page, char *buf) 1078 + { 1079 + struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1080 + unsigned int newblock, mapped; 1081 + struct swap_eb *eb; 1082 + int ret; 1083 + 1084 + d->sect_write_count++; 1085 + 1086 + if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1087 + return -ENOSPC; 1088 + 1089 + if (header) { 1090 + /* Ignore writes to the header page */ 1091 + if (unlikely(page == 0)) 1092 + return 0; 1093 + 1094 + page--; 1095 + } 1096 + 1097 + mapped = d->page_data[page]; 1098 + if (mapped <= BLOCK_MAX) { 1099 + eb = d->eb_data + (mapped / d->pages_per_eblk); 1100 + eb->active_count--; 1101 + mtdswap_store_eb(d, eb); 1102 + d->page_data[page] = BLOCK_UNDEF; 1103 + d->revmap[mapped] = PAGE_UNDEF; 1104 + } 1105 + 1106 + ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1107 + d->mtd_write_count++; 1108 + 1109 + if (ret < 0) 1110 + return ret; 1111 + 1112 + eb = d->eb_data + (newblock / d->pages_per_eblk); 1113 + d->page_data[page] = newblock; 1114 + 1115 + return 0; 1116 + } 1117 + 1118 + /* Provide a dummy swap header for the kernel */ 1119 + static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1120 + { 1121 + union swap_header *hd = (union swap_header *)(buf); 1122 + 1123 + memset(buf, 0, PAGE_SIZE - 10); 1124 + 1125 + hd->info.version = 1; 1126 + hd->info.last_page = d->mbd_dev->size - 1; 1127 + hd->info.nr_badpages = 0; 1128 + 1129 + memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1130 + 1131 + return 0; 1132 + } 1133 + 1134 + static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1135 + unsigned long page, char *buf) 1136 + { 1137 + struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1138 + struct mtd_info *mtd = d->mtd; 1139 + unsigned int realblock, retries; 1140 + loff_t readpos; 1141 + struct swap_eb *eb; 1142 + size_t retlen; 1143 + int ret; 1144 + 1145 + d->sect_read_count++; 1146 + 1147 + if (header) { 1148 + if (unlikely(page == 0)) 1149 + return mtdswap_auto_header(d, buf); 1150 + 1151 + page--; 1152 + } 1153 + 1154 + realblock = d->page_data[page]; 1155 + if (realblock > BLOCK_MAX) { 1156 + memset(buf, 0x0, PAGE_SIZE); 1157 + if (realblock == BLOCK_UNDEF) 1158 + return 0; 1159 + else 1160 + return -EIO; 1161 + } 1162 + 1163 + eb = d->eb_data + (realblock / d->pages_per_eblk); 1164 + BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1165 + 1166 + readpos = (loff_t)realblock << PAGE_SHIFT; 1167 + retries = 0; 1168 + 1169 + retry: 1170 + ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1171 + 1172 + d->mtd_read_count++; 1173 + if (ret == -EUCLEAN) { 1174 + eb->flags |= EBLOCK_BITFLIP; 1175 + mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1176 + ret = 0; 1177 + } 1178 + 1179 + if (ret < 0) { 1180 + dev_err(d->dev, "Read error %d\n", ret); 1181 + eb->flags |= EBLOCK_READERR; 1182 + mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1183 + retries++; 1184 + if (retries < MTDSWAP_IO_RETRIES) 1185 + goto retry; 1186 + 1187 + return ret; 1188 + } 1189 + 1190 + if (retlen != PAGE_SIZE) { 1191 + dev_err(d->dev, "Short read %d\n", retlen); 1192 + return -EIO; 1193 + } 1194 + 1195 + return 0; 1196 + } 1197 + 1198 + static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1199 + unsigned nr_pages) 1200 + { 1201 + struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1202 + unsigned long page; 1203 + struct swap_eb *eb; 1204 + unsigned int mapped; 1205 + 1206 + d->discard_count++; 1207 + 1208 + for (page = first; page < first + nr_pages; page++) { 1209 + mapped = d->page_data[page]; 1210 + if (mapped <= BLOCK_MAX) { 1211 + eb = d->eb_data + (mapped / d->pages_per_eblk); 1212 + eb->active_count--; 1213 + mtdswap_store_eb(d, eb); 1214 + d->page_data[page] = BLOCK_UNDEF; 1215 + d->revmap[mapped] = PAGE_UNDEF; 1216 + d->discard_page_count++; 1217 + } else if (mapped == BLOCK_ERROR) { 1218 + d->page_data[page] = BLOCK_UNDEF; 1219 + d->discard_page_count++; 1220 + } 1221 + } 1222 + 1223 + return 0; 1224 + } 1225 + 1226 + static int mtdswap_show(struct seq_file *s, void *data) 1227 + { 1228 + struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1229 + unsigned long sum; 1230 + unsigned int count[MTDSWAP_TREE_CNT]; 1231 + unsigned int min[MTDSWAP_TREE_CNT]; 1232 + unsigned int max[MTDSWAP_TREE_CNT]; 1233 + unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1234 + uint64_t use_size; 1235 + char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip", 1236 + "failing"}; 1237 + 1238 + mutex_lock(&d->mbd_dev->lock); 1239 + 1240 + for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1241 + struct rb_root *root = &d->trees[i].root; 1242 + 1243 + if (root->rb_node) { 1244 + count[i] = d->trees[i].count; 1245 + min[i] = rb_entry(rb_first(root), struct swap_eb, 1246 + rb)->erase_count; 1247 + max[i] = rb_entry(rb_last(root), struct swap_eb, 1248 + rb)->erase_count; 1249 + } else 1250 + count[i] = 0; 1251 + } 1252 + 1253 + if (d->curr_write) { 1254 + cw = 1; 1255 + cwp = d->curr_write_pos; 1256 + cwecount = d->curr_write->erase_count; 1257 + } 1258 + 1259 + sum = 0; 1260 + for (i = 0; i < d->eblks; i++) 1261 + sum += d->eb_data[i].erase_count; 1262 + 1263 + use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1264 + bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1265 + 1266 + mapped = 0; 1267 + pages = d->mbd_dev->size; 1268 + for (i = 0; i < pages; i++) 1269 + if (d->page_data[i] != BLOCK_UNDEF) 1270 + mapped++; 1271 + 1272 + mutex_unlock(&d->mbd_dev->lock); 1273 + 1274 + for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1275 + if (!count[i]) 1276 + continue; 1277 + 1278 + if (min[i] != max[i]) 1279 + seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1280 + "max %d times\n", 1281 + name[i], count[i], min[i], max[i]); 1282 + else 1283 + seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1284 + "times\n", name[i], count[i], min[i]); 1285 + } 1286 + 1287 + if (bb_cnt) 1288 + seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1289 + 1290 + if (cw) 1291 + seq_printf(s, "current erase block: %u pages used, %u free, " 1292 + "erased %u times\n", 1293 + cwp, d->pages_per_eblk - cwp, cwecount); 1294 + 1295 + seq_printf(s, "total erasures: %lu\n", sum); 1296 + 1297 + seq_printf(s, "\n"); 1298 + 1299 + seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1300 + seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1301 + seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1302 + seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1303 + seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1304 + seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1305 + 1306 + seq_printf(s, "\n"); 1307 + seq_printf(s, "total pages: %lu\n", pages); 1308 + seq_printf(s, "pages mapped: %u\n", mapped); 1309 + 1310 + return 0; 1311 + } 1312 + 1313 + static int mtdswap_open(struct inode *inode, struct file *file) 1314 + { 1315 + return single_open(file, mtdswap_show, inode->i_private); 1316 + } 1317 + 1318 + static const struct file_operations mtdswap_fops = { 1319 + .open = mtdswap_open, 1320 + .read = seq_read, 1321 + .llseek = seq_lseek, 1322 + .release = single_release, 1323 + }; 1324 + 1325 + static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1326 + { 1327 + struct gendisk *gd = d->mbd_dev->disk; 1328 + struct device *dev = disk_to_dev(gd); 1329 + 1330 + struct dentry *root; 1331 + struct dentry *dent; 1332 + 1333 + root = debugfs_create_dir(gd->disk_name, NULL); 1334 + if (IS_ERR(root)) 1335 + return 0; 1336 + 1337 + if (!root) { 1338 + dev_err(dev, "failed to initialize debugfs\n"); 1339 + return -1; 1340 + } 1341 + 1342 + d->debugfs_root = root; 1343 + 1344 + dent = debugfs_create_file("stats", S_IRUSR, root, d, 1345 + &mtdswap_fops); 1346 + if (!dent) { 1347 + dev_err(d->dev, "debugfs_create_file failed\n"); 1348 + debugfs_remove_recursive(root); 1349 + d->debugfs_root = NULL; 1350 + return -1; 1351 + } 1352 + 1353 + return 0; 1354 + } 1355 + 1356 + static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1357 + unsigned int spare_cnt) 1358 + { 1359 + struct mtd_info *mtd = d->mbd_dev->mtd; 1360 + unsigned int i, eblk_bytes, pages, blocks; 1361 + int ret = -ENOMEM; 1362 + 1363 + d->mtd = mtd; 1364 + d->eblks = eblocks; 1365 + d->spare_eblks = spare_cnt; 1366 + d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1367 + 1368 + pages = d->mbd_dev->size; 1369 + blocks = eblocks * d->pages_per_eblk; 1370 + 1371 + for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1372 + d->trees[i].root = RB_ROOT; 1373 + 1374 + d->page_data = vmalloc(sizeof(int)*pages); 1375 + if (!d->page_data) 1376 + goto page_data_fail; 1377 + 1378 + d->revmap = vmalloc(sizeof(int)*blocks); 1379 + if (!d->revmap) 1380 + goto revmap_fail; 1381 + 1382 + eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1383 + d->eb_data = vmalloc(eblk_bytes); 1384 + if (!d->eb_data) 1385 + goto eb_data_fail; 1386 + 1387 + memset(d->eb_data, 0, eblk_bytes); 1388 + for (i = 0; i < pages; i++) 1389 + d->page_data[i] = BLOCK_UNDEF; 1390 + 1391 + for (i = 0; i < blocks; i++) 1392 + d->revmap[i] = PAGE_UNDEF; 1393 + 1394 + d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1395 + if (!d->page_buf) 1396 + goto page_buf_fail; 1397 + 1398 + d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL); 1399 + if (!d->oob_buf) 1400 + goto oob_buf_fail; 1401 + 1402 + mtdswap_scan_eblks(d); 1403 + 1404 + return 0; 1405 + 1406 + oob_buf_fail: 1407 + kfree(d->page_buf); 1408 + page_buf_fail: 1409 + vfree(d->eb_data); 1410 + eb_data_fail: 1411 + vfree(d->revmap); 1412 + revmap_fail: 1413 + vfree(d->page_data); 1414 + page_data_fail: 1415 + printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1416 + return ret; 1417 + } 1418 + 1419 + static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1420 + { 1421 + struct mtdswap_dev *d; 1422 + struct mtd_blktrans_dev *mbd_dev; 1423 + char *parts; 1424 + char *this_opt; 1425 + unsigned long part; 1426 + unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1427 + uint64_t swap_size, use_size, size_limit; 1428 + struct nand_ecclayout *oinfo; 1429 + int ret; 1430 + 1431 + parts = &partitions[0]; 1432 + if (!*parts) 1433 + return; 1434 + 1435 + while ((this_opt = strsep(&parts, ",")) != NULL) { 1436 + if (strict_strtoul(this_opt, 0, &part) < 0) 1437 + return; 1438 + 1439 + if (mtd->index == part) 1440 + break; 1441 + } 1442 + 1443 + if (mtd->index != part) 1444 + return; 1445 + 1446 + if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1447 + printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1448 + "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1449 + return; 1450 + } 1451 + 1452 + if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1453 + printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1454 + " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1455 + return; 1456 + } 1457 + 1458 + oinfo = mtd->ecclayout; 1459 + if (!mtd->oobsize || !oinfo || oinfo->oobavail < MTDSWAP_OOBSIZE) { 1460 + printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1461 + "%d available, %u needed.\n", 1462 + MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE); 1463 + return; 1464 + } 1465 + 1466 + if (spare_eblocks > 100) 1467 + spare_eblocks = 100; 1468 + 1469 + use_size = mtd->size; 1470 + size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1471 + 1472 + if (mtd->size > size_limit) { 1473 + printk(KERN_WARNING "%s: Device too large. Limiting size to " 1474 + "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1475 + use_size = size_limit; 1476 + } 1477 + 1478 + eblocks = mtd_div_by_eb(use_size, mtd); 1479 + use_size = eblocks * mtd->erasesize; 1480 + bad_blocks = mtdswap_badblocks(mtd, use_size); 1481 + eavailable = eblocks - bad_blocks; 1482 + 1483 + if (eavailable < MIN_ERASE_BLOCKS) { 1484 + printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1485 + "%d needed\n", MTDSWAP_PREFIX, eavailable, 1486 + MIN_ERASE_BLOCKS); 1487 + return; 1488 + } 1489 + 1490 + spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1491 + 1492 + if (spare_cnt < MIN_SPARE_EBLOCKS) 1493 + spare_cnt = MIN_SPARE_EBLOCKS; 1494 + 1495 + if (spare_cnt > eavailable - 1) 1496 + spare_cnt = eavailable - 1; 1497 + 1498 + swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1499 + (header ? PAGE_SIZE : 0); 1500 + 1501 + printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1502 + "%u spare, %u bad blocks\n", 1503 + MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1504 + 1505 + d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1506 + if (!d) 1507 + return; 1508 + 1509 + mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1510 + if (!mbd_dev) { 1511 + kfree(d); 1512 + return; 1513 + } 1514 + 1515 + d->mbd_dev = mbd_dev; 1516 + mbd_dev->priv = d; 1517 + 1518 + mbd_dev->mtd = mtd; 1519 + mbd_dev->devnum = mtd->index; 1520 + mbd_dev->size = swap_size >> PAGE_SHIFT; 1521 + mbd_dev->tr = tr; 1522 + 1523 + if (!(mtd->flags & MTD_WRITEABLE)) 1524 + mbd_dev->readonly = 1; 1525 + 1526 + if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1527 + goto init_failed; 1528 + 1529 + if (add_mtd_blktrans_dev(mbd_dev) < 0) 1530 + goto cleanup; 1531 + 1532 + d->dev = disk_to_dev(mbd_dev->disk); 1533 + 1534 + ret = mtdswap_add_debugfs(d); 1535 + if (ret < 0) 1536 + goto debugfs_failed; 1537 + 1538 + return; 1539 + 1540 + debugfs_failed: 1541 + del_mtd_blktrans_dev(mbd_dev); 1542 + 1543 + cleanup: 1544 + mtdswap_cleanup(d); 1545 + 1546 + init_failed: 1547 + kfree(mbd_dev); 1548 + kfree(d); 1549 + } 1550 + 1551 + static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1552 + { 1553 + struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1554 + 1555 + debugfs_remove_recursive(d->debugfs_root); 1556 + del_mtd_blktrans_dev(dev); 1557 + mtdswap_cleanup(d); 1558 + kfree(d); 1559 + } 1560 + 1561 + static struct mtd_blktrans_ops mtdswap_ops = { 1562 + .name = "mtdswap", 1563 + .major = 0, 1564 + .part_bits = 0, 1565 + .blksize = PAGE_SIZE, 1566 + .flush = mtdswap_flush, 1567 + .readsect = mtdswap_readsect, 1568 + .writesect = mtdswap_writesect, 1569 + .discard = mtdswap_discard, 1570 + .background = mtdswap_background, 1571 + .add_mtd = mtdswap_add_mtd, 1572 + .remove_dev = mtdswap_remove_dev, 1573 + .owner = THIS_MODULE, 1574 + }; 1575 + 1576 + static int __init mtdswap_modinit(void) 1577 + { 1578 + return register_mtd_blktrans(&mtdswap_ops); 1579 + } 1580 + 1581 + static void __exit mtdswap_modexit(void) 1582 + { 1583 + deregister_mtd_blktrans(&mtdswap_ops); 1584 + } 1585 + 1586 + module_init(mtdswap_modinit); 1587 + module_exit(mtdswap_modexit); 1588 + 1589 + 1590 + MODULE_LICENSE("GPL"); 1591 + MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1592 + MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1593 + "swap space");