Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.15-rc1 2137 lines 52 kB view raw
1/* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15#define KMSG_COMPONENT "zram" 16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18#include <linux/module.h> 19#include <linux/kernel.h> 20#include <linux/bio.h> 21#include <linux/bitops.h> 22#include <linux/blkdev.h> 23#include <linux/buffer_head.h> 24#include <linux/device.h> 25#include <linux/genhd.h> 26#include <linux/highmem.h> 27#include <linux/slab.h> 28#include <linux/backing-dev.h> 29#include <linux/string.h> 30#include <linux/vmalloc.h> 31#include <linux/err.h> 32#include <linux/idr.h> 33#include <linux/sysfs.h> 34#include <linux/debugfs.h> 35#include <linux/cpuhotplug.h> 36#include <linux/part_stat.h> 37 38#include "zram_drv.h" 39 40static DEFINE_IDR(zram_index_idr); 41/* idr index must be protected */ 42static DEFINE_MUTEX(zram_index_mutex); 43 44static int zram_major; 45static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47/* Module params (documentation at end) */ 48static unsigned int num_devices = 1; 49/* 50 * Pages that compress to sizes equals or greater than this are stored 51 * uncompressed in memory. 52 */ 53static size_t huge_class_size; 54 55static const struct block_device_operations zram_devops; 56static const struct block_device_operations zram_wb_devops; 57 58static void zram_free_page(struct zram *zram, size_t index); 59static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 60 u32 index, int offset, struct bio *bio); 61 62 63static int zram_slot_trylock(struct zram *zram, u32 index) 64{ 65 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); 66} 67 68static void zram_slot_lock(struct zram *zram, u32 index) 69{ 70 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); 71} 72 73static void zram_slot_unlock(struct zram *zram, u32 index) 74{ 75 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); 76} 77 78static inline bool init_done(struct zram *zram) 79{ 80 return zram->disksize; 81} 82 83static inline struct zram *dev_to_zram(struct device *dev) 84{ 85 return (struct zram *)dev_to_disk(dev)->private_data; 86} 87 88static unsigned long zram_get_handle(struct zram *zram, u32 index) 89{ 90 return zram->table[index].handle; 91} 92 93static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 94{ 95 zram->table[index].handle = handle; 96} 97 98/* flag operations require table entry bit_spin_lock() being held */ 99static bool zram_test_flag(struct zram *zram, u32 index, 100 enum zram_pageflags flag) 101{ 102 return zram->table[index].flags & BIT(flag); 103} 104 105static void zram_set_flag(struct zram *zram, u32 index, 106 enum zram_pageflags flag) 107{ 108 zram->table[index].flags |= BIT(flag); 109} 110 111static void zram_clear_flag(struct zram *zram, u32 index, 112 enum zram_pageflags flag) 113{ 114 zram->table[index].flags &= ~BIT(flag); 115} 116 117static inline void zram_set_element(struct zram *zram, u32 index, 118 unsigned long element) 119{ 120 zram->table[index].element = element; 121} 122 123static unsigned long zram_get_element(struct zram *zram, u32 index) 124{ 125 return zram->table[index].element; 126} 127 128static size_t zram_get_obj_size(struct zram *zram, u32 index) 129{ 130 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 131} 132 133static void zram_set_obj_size(struct zram *zram, 134 u32 index, size_t size) 135{ 136 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 137 138 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 139} 140 141static inline bool zram_allocated(struct zram *zram, u32 index) 142{ 143 return zram_get_obj_size(zram, index) || 144 zram_test_flag(zram, index, ZRAM_SAME) || 145 zram_test_flag(zram, index, ZRAM_WB); 146} 147 148#if PAGE_SIZE != 4096 149static inline bool is_partial_io(struct bio_vec *bvec) 150{ 151 return bvec->bv_len != PAGE_SIZE; 152} 153#else 154static inline bool is_partial_io(struct bio_vec *bvec) 155{ 156 return false; 157} 158#endif 159 160/* 161 * Check if request is within bounds and aligned on zram logical blocks. 162 */ 163static inline bool valid_io_request(struct zram *zram, 164 sector_t start, unsigned int size) 165{ 166 u64 end, bound; 167 168 /* unaligned request */ 169 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 170 return false; 171 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 172 return false; 173 174 end = start + (size >> SECTOR_SHIFT); 175 bound = zram->disksize >> SECTOR_SHIFT; 176 /* out of range range */ 177 if (unlikely(start >= bound || end > bound || start > end)) 178 return false; 179 180 /* I/O request is valid */ 181 return true; 182} 183 184static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 185{ 186 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 187 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 188} 189 190static inline void update_used_max(struct zram *zram, 191 const unsigned long pages) 192{ 193 unsigned long old_max, cur_max; 194 195 old_max = atomic_long_read(&zram->stats.max_used_pages); 196 197 do { 198 cur_max = old_max; 199 if (pages > cur_max) 200 old_max = atomic_long_cmpxchg( 201 &zram->stats.max_used_pages, cur_max, pages); 202 } while (old_max != cur_max); 203} 204 205static inline void zram_fill_page(void *ptr, unsigned long len, 206 unsigned long value) 207{ 208 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 209 memset_l(ptr, value, len / sizeof(unsigned long)); 210} 211 212static bool page_same_filled(void *ptr, unsigned long *element) 213{ 214 unsigned long *page; 215 unsigned long val; 216 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 217 218 page = (unsigned long *)ptr; 219 val = page[0]; 220 221 if (val != page[last_pos]) 222 return false; 223 224 for (pos = 1; pos < last_pos; pos++) { 225 if (val != page[pos]) 226 return false; 227 } 228 229 *element = val; 230 231 return true; 232} 233 234static ssize_t initstate_show(struct device *dev, 235 struct device_attribute *attr, char *buf) 236{ 237 u32 val; 238 struct zram *zram = dev_to_zram(dev); 239 240 down_read(&zram->init_lock); 241 val = init_done(zram); 242 up_read(&zram->init_lock); 243 244 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 245} 246 247static ssize_t disksize_show(struct device *dev, 248 struct device_attribute *attr, char *buf) 249{ 250 struct zram *zram = dev_to_zram(dev); 251 252 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 253} 254 255static ssize_t mem_limit_store(struct device *dev, 256 struct device_attribute *attr, const char *buf, size_t len) 257{ 258 u64 limit; 259 char *tmp; 260 struct zram *zram = dev_to_zram(dev); 261 262 limit = memparse(buf, &tmp); 263 if (buf == tmp) /* no chars parsed, invalid input */ 264 return -EINVAL; 265 266 down_write(&zram->init_lock); 267 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 268 up_write(&zram->init_lock); 269 270 return len; 271} 272 273static ssize_t mem_used_max_store(struct device *dev, 274 struct device_attribute *attr, const char *buf, size_t len) 275{ 276 int err; 277 unsigned long val; 278 struct zram *zram = dev_to_zram(dev); 279 280 err = kstrtoul(buf, 10, &val); 281 if (err || val != 0) 282 return -EINVAL; 283 284 down_read(&zram->init_lock); 285 if (init_done(zram)) { 286 atomic_long_set(&zram->stats.max_used_pages, 287 zs_get_total_pages(zram->mem_pool)); 288 } 289 up_read(&zram->init_lock); 290 291 return len; 292} 293 294static ssize_t idle_store(struct device *dev, 295 struct device_attribute *attr, const char *buf, size_t len) 296{ 297 struct zram *zram = dev_to_zram(dev); 298 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 299 int index; 300 301 if (!sysfs_streq(buf, "all")) 302 return -EINVAL; 303 304 down_read(&zram->init_lock); 305 if (!init_done(zram)) { 306 up_read(&zram->init_lock); 307 return -EINVAL; 308 } 309 310 for (index = 0; index < nr_pages; index++) { 311 /* 312 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. 313 * See the comment in writeback_store. 314 */ 315 zram_slot_lock(zram, index); 316 if (zram_allocated(zram, index) && 317 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) 318 zram_set_flag(zram, index, ZRAM_IDLE); 319 zram_slot_unlock(zram, index); 320 } 321 322 up_read(&zram->init_lock); 323 324 return len; 325} 326 327#ifdef CONFIG_ZRAM_WRITEBACK 328static ssize_t writeback_limit_enable_store(struct device *dev, 329 struct device_attribute *attr, const char *buf, size_t len) 330{ 331 struct zram *zram = dev_to_zram(dev); 332 u64 val; 333 ssize_t ret = -EINVAL; 334 335 if (kstrtoull(buf, 10, &val)) 336 return ret; 337 338 down_read(&zram->init_lock); 339 spin_lock(&zram->wb_limit_lock); 340 zram->wb_limit_enable = val; 341 spin_unlock(&zram->wb_limit_lock); 342 up_read(&zram->init_lock); 343 ret = len; 344 345 return ret; 346} 347 348static ssize_t writeback_limit_enable_show(struct device *dev, 349 struct device_attribute *attr, char *buf) 350{ 351 bool val; 352 struct zram *zram = dev_to_zram(dev); 353 354 down_read(&zram->init_lock); 355 spin_lock(&zram->wb_limit_lock); 356 val = zram->wb_limit_enable; 357 spin_unlock(&zram->wb_limit_lock); 358 up_read(&zram->init_lock); 359 360 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 361} 362 363static ssize_t writeback_limit_store(struct device *dev, 364 struct device_attribute *attr, const char *buf, size_t len) 365{ 366 struct zram *zram = dev_to_zram(dev); 367 u64 val; 368 ssize_t ret = -EINVAL; 369 370 if (kstrtoull(buf, 10, &val)) 371 return ret; 372 373 down_read(&zram->init_lock); 374 spin_lock(&zram->wb_limit_lock); 375 zram->bd_wb_limit = val; 376 spin_unlock(&zram->wb_limit_lock); 377 up_read(&zram->init_lock); 378 ret = len; 379 380 return ret; 381} 382 383static ssize_t writeback_limit_show(struct device *dev, 384 struct device_attribute *attr, char *buf) 385{ 386 u64 val; 387 struct zram *zram = dev_to_zram(dev); 388 389 down_read(&zram->init_lock); 390 spin_lock(&zram->wb_limit_lock); 391 val = zram->bd_wb_limit; 392 spin_unlock(&zram->wb_limit_lock); 393 up_read(&zram->init_lock); 394 395 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 396} 397 398static void reset_bdev(struct zram *zram) 399{ 400 struct block_device *bdev; 401 402 if (!zram->backing_dev) 403 return; 404 405 bdev = zram->bdev; 406 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 407 /* hope filp_close flush all of IO */ 408 filp_close(zram->backing_dev, NULL); 409 zram->backing_dev = NULL; 410 zram->bdev = NULL; 411 zram->disk->fops = &zram_devops; 412 kvfree(zram->bitmap); 413 zram->bitmap = NULL; 414} 415 416static ssize_t backing_dev_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418{ 419 struct file *file; 420 struct zram *zram = dev_to_zram(dev); 421 char *p; 422 ssize_t ret; 423 424 down_read(&zram->init_lock); 425 file = zram->backing_dev; 426 if (!file) { 427 memcpy(buf, "none\n", 5); 428 up_read(&zram->init_lock); 429 return 5; 430 } 431 432 p = file_path(file, buf, PAGE_SIZE - 1); 433 if (IS_ERR(p)) { 434 ret = PTR_ERR(p); 435 goto out; 436 } 437 438 ret = strlen(p); 439 memmove(buf, p, ret); 440 buf[ret++] = '\n'; 441out: 442 up_read(&zram->init_lock); 443 return ret; 444} 445 446static ssize_t backing_dev_store(struct device *dev, 447 struct device_attribute *attr, const char *buf, size_t len) 448{ 449 char *file_name; 450 size_t sz; 451 struct file *backing_dev = NULL; 452 struct inode *inode; 453 struct address_space *mapping; 454 unsigned int bitmap_sz; 455 unsigned long nr_pages, *bitmap = NULL; 456 struct block_device *bdev = NULL; 457 int err; 458 struct zram *zram = dev_to_zram(dev); 459 460 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 461 if (!file_name) 462 return -ENOMEM; 463 464 down_write(&zram->init_lock); 465 if (init_done(zram)) { 466 pr_info("Can't setup backing device for initialized device\n"); 467 err = -EBUSY; 468 goto out; 469 } 470 471 strlcpy(file_name, buf, PATH_MAX); 472 /* ignore trailing newline */ 473 sz = strlen(file_name); 474 if (sz > 0 && file_name[sz - 1] == '\n') 475 file_name[sz - 1] = 0x00; 476 477 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 478 if (IS_ERR(backing_dev)) { 479 err = PTR_ERR(backing_dev); 480 backing_dev = NULL; 481 goto out; 482 } 483 484 mapping = backing_dev->f_mapping; 485 inode = mapping->host; 486 487 /* Support only block device in this moment */ 488 if (!S_ISBLK(inode->i_mode)) { 489 err = -ENOTBLK; 490 goto out; 491 } 492 493 bdev = blkdev_get_by_dev(inode->i_rdev, 494 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 495 if (IS_ERR(bdev)) { 496 err = PTR_ERR(bdev); 497 bdev = NULL; 498 goto out; 499 } 500 501 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 502 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 503 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 504 if (!bitmap) { 505 err = -ENOMEM; 506 goto out; 507 } 508 509 reset_bdev(zram); 510 511 zram->bdev = bdev; 512 zram->backing_dev = backing_dev; 513 zram->bitmap = bitmap; 514 zram->nr_pages = nr_pages; 515 /* 516 * With writeback feature, zram does asynchronous IO so it's no longer 517 * synchronous device so let's remove synchronous io flag. Othewise, 518 * upper layer(e.g., swap) could wait IO completion rather than 519 * (submit and return), which will cause system sluggish. 520 * Furthermore, when the IO function returns(e.g., swap_readpage), 521 * upper layer expects IO was done so it could deallocate the page 522 * freely but in fact, IO is going on so finally could cause 523 * use-after-free when the IO is really done. 524 */ 525 zram->disk->fops = &zram_wb_devops; 526 up_write(&zram->init_lock); 527 528 pr_info("setup backing device %s\n", file_name); 529 kfree(file_name); 530 531 return len; 532out: 533 kvfree(bitmap); 534 535 if (bdev) 536 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 537 538 if (backing_dev) 539 filp_close(backing_dev, NULL); 540 541 up_write(&zram->init_lock); 542 543 kfree(file_name); 544 545 return err; 546} 547 548static unsigned long alloc_block_bdev(struct zram *zram) 549{ 550 unsigned long blk_idx = 1; 551retry: 552 /* skip 0 bit to confuse zram.handle = 0 */ 553 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 554 if (blk_idx == zram->nr_pages) 555 return 0; 556 557 if (test_and_set_bit(blk_idx, zram->bitmap)) 558 goto retry; 559 560 atomic64_inc(&zram->stats.bd_count); 561 return blk_idx; 562} 563 564static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 565{ 566 int was_set; 567 568 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 569 WARN_ON_ONCE(!was_set); 570 atomic64_dec(&zram->stats.bd_count); 571} 572 573static void zram_page_end_io(struct bio *bio) 574{ 575 struct page *page = bio_first_page_all(bio); 576 577 page_endio(page, op_is_write(bio_op(bio)), 578 blk_status_to_errno(bio->bi_status)); 579 bio_put(bio); 580} 581 582/* 583 * Returns 1 if the submission is successful. 584 */ 585static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 586 unsigned long entry, struct bio *parent) 587{ 588 struct bio *bio; 589 590 bio = bio_alloc(GFP_ATOMIC, 1); 591 if (!bio) 592 return -ENOMEM; 593 594 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 595 bio_set_dev(bio, zram->bdev); 596 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 597 bio_put(bio); 598 return -EIO; 599 } 600 601 if (!parent) { 602 bio->bi_opf = REQ_OP_READ; 603 bio->bi_end_io = zram_page_end_io; 604 } else { 605 bio->bi_opf = parent->bi_opf; 606 bio_chain(bio, parent); 607 } 608 609 submit_bio(bio); 610 return 1; 611} 612 613#define PAGE_WB_SIG "page_index=" 614 615#define PAGE_WRITEBACK 0 616#define HUGE_WRITEBACK 1 617#define IDLE_WRITEBACK 2 618 619 620static ssize_t writeback_store(struct device *dev, 621 struct device_attribute *attr, const char *buf, size_t len) 622{ 623 struct zram *zram = dev_to_zram(dev); 624 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 625 unsigned long index = 0; 626 struct bio bio; 627 struct bio_vec bio_vec; 628 struct page *page; 629 ssize_t ret = len; 630 int mode, err; 631 unsigned long blk_idx = 0; 632 633 if (sysfs_streq(buf, "idle")) 634 mode = IDLE_WRITEBACK; 635 else if (sysfs_streq(buf, "huge")) 636 mode = HUGE_WRITEBACK; 637 else { 638 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 639 return -EINVAL; 640 641 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 642 index >= nr_pages) 643 return -EINVAL; 644 645 nr_pages = 1; 646 mode = PAGE_WRITEBACK; 647 } 648 649 down_read(&zram->init_lock); 650 if (!init_done(zram)) { 651 ret = -EINVAL; 652 goto release_init_lock; 653 } 654 655 if (!zram->backing_dev) { 656 ret = -ENODEV; 657 goto release_init_lock; 658 } 659 660 page = alloc_page(GFP_KERNEL); 661 if (!page) { 662 ret = -ENOMEM; 663 goto release_init_lock; 664 } 665 666 for (; nr_pages != 0; index++, nr_pages--) { 667 struct bio_vec bvec; 668 669 bvec.bv_page = page; 670 bvec.bv_len = PAGE_SIZE; 671 bvec.bv_offset = 0; 672 673 spin_lock(&zram->wb_limit_lock); 674 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 675 spin_unlock(&zram->wb_limit_lock); 676 ret = -EIO; 677 break; 678 } 679 spin_unlock(&zram->wb_limit_lock); 680 681 if (!blk_idx) { 682 blk_idx = alloc_block_bdev(zram); 683 if (!blk_idx) { 684 ret = -ENOSPC; 685 break; 686 } 687 } 688 689 zram_slot_lock(zram, index); 690 if (!zram_allocated(zram, index)) 691 goto next; 692 693 if (zram_test_flag(zram, index, ZRAM_WB) || 694 zram_test_flag(zram, index, ZRAM_SAME) || 695 zram_test_flag(zram, index, ZRAM_UNDER_WB)) 696 goto next; 697 698 if (mode == IDLE_WRITEBACK && 699 !zram_test_flag(zram, index, ZRAM_IDLE)) 700 goto next; 701 if (mode == HUGE_WRITEBACK && 702 !zram_test_flag(zram, index, ZRAM_HUGE)) 703 goto next; 704 /* 705 * Clearing ZRAM_UNDER_WB is duty of caller. 706 * IOW, zram_free_page never clear it. 707 */ 708 zram_set_flag(zram, index, ZRAM_UNDER_WB); 709 /* Need for hugepage writeback racing */ 710 zram_set_flag(zram, index, ZRAM_IDLE); 711 zram_slot_unlock(zram, index); 712 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { 713 zram_slot_lock(zram, index); 714 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 715 zram_clear_flag(zram, index, ZRAM_IDLE); 716 zram_slot_unlock(zram, index); 717 continue; 718 } 719 720 bio_init(&bio, &bio_vec, 1); 721 bio_set_dev(&bio, zram->bdev); 722 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 723 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC; 724 725 bio_add_page(&bio, bvec.bv_page, bvec.bv_len, 726 bvec.bv_offset); 727 /* 728 * XXX: A single page IO would be inefficient for write 729 * but it would be not bad as starter. 730 */ 731 err = submit_bio_wait(&bio); 732 if (err) { 733 zram_slot_lock(zram, index); 734 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 735 zram_clear_flag(zram, index, ZRAM_IDLE); 736 zram_slot_unlock(zram, index); 737 /* 738 * Return last IO error unless every IO were 739 * not suceeded. 740 */ 741 ret = err; 742 continue; 743 } 744 745 atomic64_inc(&zram->stats.bd_writes); 746 /* 747 * We released zram_slot_lock so need to check if the slot was 748 * changed. If there is freeing for the slot, we can catch it 749 * easily by zram_allocated. 750 * A subtle case is the slot is freed/reallocated/marked as 751 * ZRAM_IDLE again. To close the race, idle_store doesn't 752 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. 753 * Thus, we could close the race by checking ZRAM_IDLE bit. 754 */ 755 zram_slot_lock(zram, index); 756 if (!zram_allocated(zram, index) || 757 !zram_test_flag(zram, index, ZRAM_IDLE)) { 758 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 759 zram_clear_flag(zram, index, ZRAM_IDLE); 760 goto next; 761 } 762 763 zram_free_page(zram, index); 764 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 765 zram_set_flag(zram, index, ZRAM_WB); 766 zram_set_element(zram, index, blk_idx); 767 blk_idx = 0; 768 atomic64_inc(&zram->stats.pages_stored); 769 spin_lock(&zram->wb_limit_lock); 770 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 771 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 772 spin_unlock(&zram->wb_limit_lock); 773next: 774 zram_slot_unlock(zram, index); 775 } 776 777 if (blk_idx) 778 free_block_bdev(zram, blk_idx); 779 __free_page(page); 780release_init_lock: 781 up_read(&zram->init_lock); 782 783 return ret; 784} 785 786struct zram_work { 787 struct work_struct work; 788 struct zram *zram; 789 unsigned long entry; 790 struct bio *bio; 791 struct bio_vec bvec; 792}; 793 794#if PAGE_SIZE != 4096 795static void zram_sync_read(struct work_struct *work) 796{ 797 struct zram_work *zw = container_of(work, struct zram_work, work); 798 struct zram *zram = zw->zram; 799 unsigned long entry = zw->entry; 800 struct bio *bio = zw->bio; 801 802 read_from_bdev_async(zram, &zw->bvec, entry, bio); 803} 804 805/* 806 * Block layer want one ->submit_bio to be active at a time, so if we use 807 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 808 * use a worker thread context. 809 */ 810static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 811 unsigned long entry, struct bio *bio) 812{ 813 struct zram_work work; 814 815 work.bvec = *bvec; 816 work.zram = zram; 817 work.entry = entry; 818 work.bio = bio; 819 820 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 821 queue_work(system_unbound_wq, &work.work); 822 flush_work(&work.work); 823 destroy_work_on_stack(&work.work); 824 825 return 1; 826} 827#else 828static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 829 unsigned long entry, struct bio *bio) 830{ 831 WARN_ON(1); 832 return -EIO; 833} 834#endif 835 836static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 837 unsigned long entry, struct bio *parent, bool sync) 838{ 839 atomic64_inc(&zram->stats.bd_reads); 840 if (sync) 841 return read_from_bdev_sync(zram, bvec, entry, parent); 842 else 843 return read_from_bdev_async(zram, bvec, entry, parent); 844} 845#else 846static inline void reset_bdev(struct zram *zram) {}; 847static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 848 unsigned long entry, struct bio *parent, bool sync) 849{ 850 return -EIO; 851} 852 853static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 854#endif 855 856#ifdef CONFIG_ZRAM_MEMORY_TRACKING 857 858static struct dentry *zram_debugfs_root; 859 860static void zram_debugfs_create(void) 861{ 862 zram_debugfs_root = debugfs_create_dir("zram", NULL); 863} 864 865static void zram_debugfs_destroy(void) 866{ 867 debugfs_remove_recursive(zram_debugfs_root); 868} 869 870static void zram_accessed(struct zram *zram, u32 index) 871{ 872 zram_clear_flag(zram, index, ZRAM_IDLE); 873 zram->table[index].ac_time = ktime_get_boottime(); 874} 875 876static ssize_t read_block_state(struct file *file, char __user *buf, 877 size_t count, loff_t *ppos) 878{ 879 char *kbuf; 880 ssize_t index, written = 0; 881 struct zram *zram = file->private_data; 882 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 883 struct timespec64 ts; 884 885 kbuf = kvmalloc(count, GFP_KERNEL); 886 if (!kbuf) 887 return -ENOMEM; 888 889 down_read(&zram->init_lock); 890 if (!init_done(zram)) { 891 up_read(&zram->init_lock); 892 kvfree(kbuf); 893 return -EINVAL; 894 } 895 896 for (index = *ppos; index < nr_pages; index++) { 897 int copied; 898 899 zram_slot_lock(zram, index); 900 if (!zram_allocated(zram, index)) 901 goto next; 902 903 ts = ktime_to_timespec64(zram->table[index].ac_time); 904 copied = snprintf(kbuf + written, count, 905 "%12zd %12lld.%06lu %c%c%c%c\n", 906 index, (s64)ts.tv_sec, 907 ts.tv_nsec / NSEC_PER_USEC, 908 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 909 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 910 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 911 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); 912 913 if (count < copied) { 914 zram_slot_unlock(zram, index); 915 break; 916 } 917 written += copied; 918 count -= copied; 919next: 920 zram_slot_unlock(zram, index); 921 *ppos += 1; 922 } 923 924 up_read(&zram->init_lock); 925 if (copy_to_user(buf, kbuf, written)) 926 written = -EFAULT; 927 kvfree(kbuf); 928 929 return written; 930} 931 932static const struct file_operations proc_zram_block_state_op = { 933 .open = simple_open, 934 .read = read_block_state, 935 .llseek = default_llseek, 936}; 937 938static void zram_debugfs_register(struct zram *zram) 939{ 940 if (!zram_debugfs_root) 941 return; 942 943 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 944 zram_debugfs_root); 945 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 946 zram, &proc_zram_block_state_op); 947} 948 949static void zram_debugfs_unregister(struct zram *zram) 950{ 951 debugfs_remove_recursive(zram->debugfs_dir); 952} 953#else 954static void zram_debugfs_create(void) {}; 955static void zram_debugfs_destroy(void) {}; 956static void zram_accessed(struct zram *zram, u32 index) 957{ 958 zram_clear_flag(zram, index, ZRAM_IDLE); 959}; 960static void zram_debugfs_register(struct zram *zram) {}; 961static void zram_debugfs_unregister(struct zram *zram) {}; 962#endif 963 964/* 965 * We switched to per-cpu streams and this attr is not needed anymore. 966 * However, we will keep it around for some time, because: 967 * a) we may revert per-cpu streams in the future 968 * b) it's visible to user space and we need to follow our 2 years 969 * retirement rule; but we already have a number of 'soon to be 970 * altered' attrs, so max_comp_streams need to wait for the next 971 * layoff cycle. 972 */ 973static ssize_t max_comp_streams_show(struct device *dev, 974 struct device_attribute *attr, char *buf) 975{ 976 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 977} 978 979static ssize_t max_comp_streams_store(struct device *dev, 980 struct device_attribute *attr, const char *buf, size_t len) 981{ 982 return len; 983} 984 985static ssize_t comp_algorithm_show(struct device *dev, 986 struct device_attribute *attr, char *buf) 987{ 988 size_t sz; 989 struct zram *zram = dev_to_zram(dev); 990 991 down_read(&zram->init_lock); 992 sz = zcomp_available_show(zram->compressor, buf); 993 up_read(&zram->init_lock); 994 995 return sz; 996} 997 998static ssize_t comp_algorithm_store(struct device *dev, 999 struct device_attribute *attr, const char *buf, size_t len) 1000{ 1001 struct zram *zram = dev_to_zram(dev); 1002 char compressor[ARRAY_SIZE(zram->compressor)]; 1003 size_t sz; 1004 1005 strlcpy(compressor, buf, sizeof(compressor)); 1006 /* ignore trailing newline */ 1007 sz = strlen(compressor); 1008 if (sz > 0 && compressor[sz - 1] == '\n') 1009 compressor[sz - 1] = 0x00; 1010 1011 if (!zcomp_available_algorithm(compressor)) 1012 return -EINVAL; 1013 1014 down_write(&zram->init_lock); 1015 if (init_done(zram)) { 1016 up_write(&zram->init_lock); 1017 pr_info("Can't change algorithm for initialized device\n"); 1018 return -EBUSY; 1019 } 1020 1021 strcpy(zram->compressor, compressor); 1022 up_write(&zram->init_lock); 1023 return len; 1024} 1025 1026static ssize_t compact_store(struct device *dev, 1027 struct device_attribute *attr, const char *buf, size_t len) 1028{ 1029 struct zram *zram = dev_to_zram(dev); 1030 1031 down_read(&zram->init_lock); 1032 if (!init_done(zram)) { 1033 up_read(&zram->init_lock); 1034 return -EINVAL; 1035 } 1036 1037 zs_compact(zram->mem_pool); 1038 up_read(&zram->init_lock); 1039 1040 return len; 1041} 1042 1043static ssize_t io_stat_show(struct device *dev, 1044 struct device_attribute *attr, char *buf) 1045{ 1046 struct zram *zram = dev_to_zram(dev); 1047 ssize_t ret; 1048 1049 down_read(&zram->init_lock); 1050 ret = scnprintf(buf, PAGE_SIZE, 1051 "%8llu %8llu %8llu %8llu\n", 1052 (u64)atomic64_read(&zram->stats.failed_reads), 1053 (u64)atomic64_read(&zram->stats.failed_writes), 1054 (u64)atomic64_read(&zram->stats.invalid_io), 1055 (u64)atomic64_read(&zram->stats.notify_free)); 1056 up_read(&zram->init_lock); 1057 1058 return ret; 1059} 1060 1061static ssize_t mm_stat_show(struct device *dev, 1062 struct device_attribute *attr, char *buf) 1063{ 1064 struct zram *zram = dev_to_zram(dev); 1065 struct zs_pool_stats pool_stats; 1066 u64 orig_size, mem_used = 0; 1067 long max_used; 1068 ssize_t ret; 1069 1070 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1071 1072 down_read(&zram->init_lock); 1073 if (init_done(zram)) { 1074 mem_used = zs_get_total_pages(zram->mem_pool); 1075 zs_pool_stats(zram->mem_pool, &pool_stats); 1076 } 1077 1078 orig_size = atomic64_read(&zram->stats.pages_stored); 1079 max_used = atomic_long_read(&zram->stats.max_used_pages); 1080 1081 ret = scnprintf(buf, PAGE_SIZE, 1082 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1083 orig_size << PAGE_SHIFT, 1084 (u64)atomic64_read(&zram->stats.compr_data_size), 1085 mem_used << PAGE_SHIFT, 1086 zram->limit_pages << PAGE_SHIFT, 1087 max_used << PAGE_SHIFT, 1088 (u64)atomic64_read(&zram->stats.same_pages), 1089 atomic_long_read(&pool_stats.pages_compacted), 1090 (u64)atomic64_read(&zram->stats.huge_pages), 1091 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1092 up_read(&zram->init_lock); 1093 1094 return ret; 1095} 1096 1097#ifdef CONFIG_ZRAM_WRITEBACK 1098#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1099static ssize_t bd_stat_show(struct device *dev, 1100 struct device_attribute *attr, char *buf) 1101{ 1102 struct zram *zram = dev_to_zram(dev); 1103 ssize_t ret; 1104 1105 down_read(&zram->init_lock); 1106 ret = scnprintf(buf, PAGE_SIZE, 1107 "%8llu %8llu %8llu\n", 1108 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1109 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1110 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1111 up_read(&zram->init_lock); 1112 1113 return ret; 1114} 1115#endif 1116 1117static ssize_t debug_stat_show(struct device *dev, 1118 struct device_attribute *attr, char *buf) 1119{ 1120 int version = 1; 1121 struct zram *zram = dev_to_zram(dev); 1122 ssize_t ret; 1123 1124 down_read(&zram->init_lock); 1125 ret = scnprintf(buf, PAGE_SIZE, 1126 "version: %d\n%8llu %8llu\n", 1127 version, 1128 (u64)atomic64_read(&zram->stats.writestall), 1129 (u64)atomic64_read(&zram->stats.miss_free)); 1130 up_read(&zram->init_lock); 1131 1132 return ret; 1133} 1134 1135static DEVICE_ATTR_RO(io_stat); 1136static DEVICE_ATTR_RO(mm_stat); 1137#ifdef CONFIG_ZRAM_WRITEBACK 1138static DEVICE_ATTR_RO(bd_stat); 1139#endif 1140static DEVICE_ATTR_RO(debug_stat); 1141 1142static void zram_meta_free(struct zram *zram, u64 disksize) 1143{ 1144 size_t num_pages = disksize >> PAGE_SHIFT; 1145 size_t index; 1146 1147 /* Free all pages that are still in this zram device */ 1148 for (index = 0; index < num_pages; index++) 1149 zram_free_page(zram, index); 1150 1151 zs_destroy_pool(zram->mem_pool); 1152 vfree(zram->table); 1153} 1154 1155static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1156{ 1157 size_t num_pages; 1158 1159 num_pages = disksize >> PAGE_SHIFT; 1160 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1161 if (!zram->table) 1162 return false; 1163 1164 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1165 if (!zram->mem_pool) { 1166 vfree(zram->table); 1167 return false; 1168 } 1169 1170 if (!huge_class_size) 1171 huge_class_size = zs_huge_class_size(zram->mem_pool); 1172 return true; 1173} 1174 1175/* 1176 * To protect concurrent access to the same index entry, 1177 * caller should hold this table index entry's bit_spinlock to 1178 * indicate this index entry is accessing. 1179 */ 1180static void zram_free_page(struct zram *zram, size_t index) 1181{ 1182 unsigned long handle; 1183 1184#ifdef CONFIG_ZRAM_MEMORY_TRACKING 1185 zram->table[index].ac_time = 0; 1186#endif 1187 if (zram_test_flag(zram, index, ZRAM_IDLE)) 1188 zram_clear_flag(zram, index, ZRAM_IDLE); 1189 1190 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1191 zram_clear_flag(zram, index, ZRAM_HUGE); 1192 atomic64_dec(&zram->stats.huge_pages); 1193 } 1194 1195 if (zram_test_flag(zram, index, ZRAM_WB)) { 1196 zram_clear_flag(zram, index, ZRAM_WB); 1197 free_block_bdev(zram, zram_get_element(zram, index)); 1198 goto out; 1199 } 1200 1201 /* 1202 * No memory is allocated for same element filled pages. 1203 * Simply clear same page flag. 1204 */ 1205 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1206 zram_clear_flag(zram, index, ZRAM_SAME); 1207 atomic64_dec(&zram->stats.same_pages); 1208 goto out; 1209 } 1210 1211 handle = zram_get_handle(zram, index); 1212 if (!handle) 1213 return; 1214 1215 zs_free(zram->mem_pool, handle); 1216 1217 atomic64_sub(zram_get_obj_size(zram, index), 1218 &zram->stats.compr_data_size); 1219out: 1220 atomic64_dec(&zram->stats.pages_stored); 1221 zram_set_handle(zram, index, 0); 1222 zram_set_obj_size(zram, index, 0); 1223 WARN_ON_ONCE(zram->table[index].flags & 1224 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); 1225} 1226 1227static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 1228 struct bio *bio, bool partial_io) 1229{ 1230 struct zcomp_strm *zstrm; 1231 unsigned long handle; 1232 unsigned int size; 1233 void *src, *dst; 1234 int ret; 1235 1236 zram_slot_lock(zram, index); 1237 if (zram_test_flag(zram, index, ZRAM_WB)) { 1238 struct bio_vec bvec; 1239 1240 zram_slot_unlock(zram, index); 1241 1242 bvec.bv_page = page; 1243 bvec.bv_len = PAGE_SIZE; 1244 bvec.bv_offset = 0; 1245 return read_from_bdev(zram, &bvec, 1246 zram_get_element(zram, index), 1247 bio, partial_io); 1248 } 1249 1250 handle = zram_get_handle(zram, index); 1251 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 1252 unsigned long value; 1253 void *mem; 1254 1255 value = handle ? zram_get_element(zram, index) : 0; 1256 mem = kmap_atomic(page); 1257 zram_fill_page(mem, PAGE_SIZE, value); 1258 kunmap_atomic(mem); 1259 zram_slot_unlock(zram, index); 1260 return 0; 1261 } 1262 1263 size = zram_get_obj_size(zram, index); 1264 1265 if (size != PAGE_SIZE) 1266 zstrm = zcomp_stream_get(zram->comp); 1267 1268 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1269 if (size == PAGE_SIZE) { 1270 dst = kmap_atomic(page); 1271 memcpy(dst, src, PAGE_SIZE); 1272 kunmap_atomic(dst); 1273 ret = 0; 1274 } else { 1275 dst = kmap_atomic(page); 1276 ret = zcomp_decompress(zstrm, src, size, dst); 1277 kunmap_atomic(dst); 1278 zcomp_stream_put(zram->comp); 1279 } 1280 zs_unmap_object(zram->mem_pool, handle); 1281 zram_slot_unlock(zram, index); 1282 1283 /* Should NEVER happen. Return bio error if it does. */ 1284 if (WARN_ON(ret)) 1285 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1286 1287 return ret; 1288} 1289 1290static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1291 u32 index, int offset, struct bio *bio) 1292{ 1293 int ret; 1294 struct page *page; 1295 1296 page = bvec->bv_page; 1297 if (is_partial_io(bvec)) { 1298 /* Use a temporary buffer to decompress the page */ 1299 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1300 if (!page) 1301 return -ENOMEM; 1302 } 1303 1304 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 1305 if (unlikely(ret)) 1306 goto out; 1307 1308 if (is_partial_io(bvec)) { 1309 void *dst = kmap_atomic(bvec->bv_page); 1310 void *src = kmap_atomic(page); 1311 1312 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); 1313 kunmap_atomic(src); 1314 kunmap_atomic(dst); 1315 } 1316out: 1317 if (is_partial_io(bvec)) 1318 __free_page(page); 1319 1320 return ret; 1321} 1322 1323static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1324 u32 index, struct bio *bio) 1325{ 1326 int ret = 0; 1327 unsigned long alloced_pages; 1328 unsigned long handle = 0; 1329 unsigned int comp_len = 0; 1330 void *src, *dst, *mem; 1331 struct zcomp_strm *zstrm; 1332 struct page *page = bvec->bv_page; 1333 unsigned long element = 0; 1334 enum zram_pageflags flags = 0; 1335 1336 mem = kmap_atomic(page); 1337 if (page_same_filled(mem, &element)) { 1338 kunmap_atomic(mem); 1339 /* Free memory associated with this sector now. */ 1340 flags = ZRAM_SAME; 1341 atomic64_inc(&zram->stats.same_pages); 1342 goto out; 1343 } 1344 kunmap_atomic(mem); 1345 1346compress_again: 1347 zstrm = zcomp_stream_get(zram->comp); 1348 src = kmap_atomic(page); 1349 ret = zcomp_compress(zstrm, src, &comp_len); 1350 kunmap_atomic(src); 1351 1352 if (unlikely(ret)) { 1353 zcomp_stream_put(zram->comp); 1354 pr_err("Compression failed! err=%d\n", ret); 1355 zs_free(zram->mem_pool, handle); 1356 return ret; 1357 } 1358 1359 if (comp_len >= huge_class_size) 1360 comp_len = PAGE_SIZE; 1361 /* 1362 * handle allocation has 2 paths: 1363 * a) fast path is executed with preemption disabled (for 1364 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1365 * since we can't sleep; 1366 * b) slow path enables preemption and attempts to allocate 1367 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1368 * put per-cpu compression stream and, thus, to re-do 1369 * the compression once handle is allocated. 1370 * 1371 * if we have a 'non-null' handle here then we are coming 1372 * from the slow path and handle has already been allocated. 1373 */ 1374 if (!handle) 1375 handle = zs_malloc(zram->mem_pool, comp_len, 1376 __GFP_KSWAPD_RECLAIM | 1377 __GFP_NOWARN | 1378 __GFP_HIGHMEM | 1379 __GFP_MOVABLE); 1380 if (!handle) { 1381 zcomp_stream_put(zram->comp); 1382 atomic64_inc(&zram->stats.writestall); 1383 handle = zs_malloc(zram->mem_pool, comp_len, 1384 GFP_NOIO | __GFP_HIGHMEM | 1385 __GFP_MOVABLE); 1386 if (handle) 1387 goto compress_again; 1388 return -ENOMEM; 1389 } 1390 1391 alloced_pages = zs_get_total_pages(zram->mem_pool); 1392 update_used_max(zram, alloced_pages); 1393 1394 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1395 zcomp_stream_put(zram->comp); 1396 zs_free(zram->mem_pool, handle); 1397 return -ENOMEM; 1398 } 1399 1400 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1401 1402 src = zstrm->buffer; 1403 if (comp_len == PAGE_SIZE) 1404 src = kmap_atomic(page); 1405 memcpy(dst, src, comp_len); 1406 if (comp_len == PAGE_SIZE) 1407 kunmap_atomic(src); 1408 1409 zcomp_stream_put(zram->comp); 1410 zs_unmap_object(zram->mem_pool, handle); 1411 atomic64_add(comp_len, &zram->stats.compr_data_size); 1412out: 1413 /* 1414 * Free memory associated with this sector 1415 * before overwriting unused sectors. 1416 */ 1417 zram_slot_lock(zram, index); 1418 zram_free_page(zram, index); 1419 1420 if (comp_len == PAGE_SIZE) { 1421 zram_set_flag(zram, index, ZRAM_HUGE); 1422 atomic64_inc(&zram->stats.huge_pages); 1423 atomic64_inc(&zram->stats.huge_pages_since); 1424 } 1425 1426 if (flags) { 1427 zram_set_flag(zram, index, flags); 1428 zram_set_element(zram, index, element); 1429 } else { 1430 zram_set_handle(zram, index, handle); 1431 zram_set_obj_size(zram, index, comp_len); 1432 } 1433 zram_slot_unlock(zram, index); 1434 1435 /* Update stats */ 1436 atomic64_inc(&zram->stats.pages_stored); 1437 return ret; 1438} 1439 1440static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1441 u32 index, int offset, struct bio *bio) 1442{ 1443 int ret; 1444 struct page *page = NULL; 1445 void *src; 1446 struct bio_vec vec; 1447 1448 vec = *bvec; 1449 if (is_partial_io(bvec)) { 1450 void *dst; 1451 /* 1452 * This is a partial IO. We need to read the full page 1453 * before to write the changes. 1454 */ 1455 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1456 if (!page) 1457 return -ENOMEM; 1458 1459 ret = __zram_bvec_read(zram, page, index, bio, true); 1460 if (ret) 1461 goto out; 1462 1463 src = kmap_atomic(bvec->bv_page); 1464 dst = kmap_atomic(page); 1465 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); 1466 kunmap_atomic(dst); 1467 kunmap_atomic(src); 1468 1469 vec.bv_page = page; 1470 vec.bv_len = PAGE_SIZE; 1471 vec.bv_offset = 0; 1472 } 1473 1474 ret = __zram_bvec_write(zram, &vec, index, bio); 1475out: 1476 if (is_partial_io(bvec)) 1477 __free_page(page); 1478 return ret; 1479} 1480 1481/* 1482 * zram_bio_discard - handler on discard request 1483 * @index: physical block index in PAGE_SIZE units 1484 * @offset: byte offset within physical block 1485 */ 1486static void zram_bio_discard(struct zram *zram, u32 index, 1487 int offset, struct bio *bio) 1488{ 1489 size_t n = bio->bi_iter.bi_size; 1490 1491 /* 1492 * zram manages data in physical block size units. Because logical block 1493 * size isn't identical with physical block size on some arch, we 1494 * could get a discard request pointing to a specific offset within a 1495 * certain physical block. Although we can handle this request by 1496 * reading that physiclal block and decompressing and partially zeroing 1497 * and re-compressing and then re-storing it, this isn't reasonable 1498 * because our intent with a discard request is to save memory. So 1499 * skipping this logical block is appropriate here. 1500 */ 1501 if (offset) { 1502 if (n <= (PAGE_SIZE - offset)) 1503 return; 1504 1505 n -= (PAGE_SIZE - offset); 1506 index++; 1507 } 1508 1509 while (n >= PAGE_SIZE) { 1510 zram_slot_lock(zram, index); 1511 zram_free_page(zram, index); 1512 zram_slot_unlock(zram, index); 1513 atomic64_inc(&zram->stats.notify_free); 1514 index++; 1515 n -= PAGE_SIZE; 1516 } 1517} 1518 1519/* 1520 * Returns errno if it has some problem. Otherwise return 0 or 1. 1521 * Returns 0 if IO request was done synchronously 1522 * Returns 1 if IO request was successfully submitted. 1523 */ 1524static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1525 int offset, unsigned int op, struct bio *bio) 1526{ 1527 int ret; 1528 1529 if (!op_is_write(op)) { 1530 atomic64_inc(&zram->stats.num_reads); 1531 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1532 flush_dcache_page(bvec->bv_page); 1533 } else { 1534 atomic64_inc(&zram->stats.num_writes); 1535 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1536 } 1537 1538 zram_slot_lock(zram, index); 1539 zram_accessed(zram, index); 1540 zram_slot_unlock(zram, index); 1541 1542 if (unlikely(ret < 0)) { 1543 if (!op_is_write(op)) 1544 atomic64_inc(&zram->stats.failed_reads); 1545 else 1546 atomic64_inc(&zram->stats.failed_writes); 1547 } 1548 1549 return ret; 1550} 1551 1552static void __zram_make_request(struct zram *zram, struct bio *bio) 1553{ 1554 int offset; 1555 u32 index; 1556 struct bio_vec bvec; 1557 struct bvec_iter iter; 1558 unsigned long start_time; 1559 1560 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1561 offset = (bio->bi_iter.bi_sector & 1562 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1563 1564 switch (bio_op(bio)) { 1565 case REQ_OP_DISCARD: 1566 case REQ_OP_WRITE_ZEROES: 1567 zram_bio_discard(zram, index, offset, bio); 1568 bio_endio(bio); 1569 return; 1570 default: 1571 break; 1572 } 1573 1574 start_time = bio_start_io_acct(bio); 1575 bio_for_each_segment(bvec, bio, iter) { 1576 struct bio_vec bv = bvec; 1577 unsigned int unwritten = bvec.bv_len; 1578 1579 do { 1580 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1581 unwritten); 1582 if (zram_bvec_rw(zram, &bv, index, offset, 1583 bio_op(bio), bio) < 0) { 1584 bio->bi_status = BLK_STS_IOERR; 1585 break; 1586 } 1587 1588 bv.bv_offset += bv.bv_len; 1589 unwritten -= bv.bv_len; 1590 1591 update_position(&index, &offset, &bv); 1592 } while (unwritten); 1593 } 1594 bio_end_io_acct(bio, start_time); 1595 bio_endio(bio); 1596} 1597 1598/* 1599 * Handler function for all zram I/O requests. 1600 */ 1601static blk_qc_t zram_submit_bio(struct bio *bio) 1602{ 1603 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 1604 1605 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1606 bio->bi_iter.bi_size)) { 1607 atomic64_inc(&zram->stats.invalid_io); 1608 goto error; 1609 } 1610 1611 __zram_make_request(zram, bio); 1612 return BLK_QC_T_NONE; 1613 1614error: 1615 bio_io_error(bio); 1616 return BLK_QC_T_NONE; 1617} 1618 1619static void zram_slot_free_notify(struct block_device *bdev, 1620 unsigned long index) 1621{ 1622 struct zram *zram; 1623 1624 zram = bdev->bd_disk->private_data; 1625 1626 atomic64_inc(&zram->stats.notify_free); 1627 if (!zram_slot_trylock(zram, index)) { 1628 atomic64_inc(&zram->stats.miss_free); 1629 return; 1630 } 1631 1632 zram_free_page(zram, index); 1633 zram_slot_unlock(zram, index); 1634} 1635 1636static int zram_rw_page(struct block_device *bdev, sector_t sector, 1637 struct page *page, unsigned int op) 1638{ 1639 int offset, ret; 1640 u32 index; 1641 struct zram *zram; 1642 struct bio_vec bv; 1643 unsigned long start_time; 1644 1645 if (PageTransHuge(page)) 1646 return -ENOTSUPP; 1647 zram = bdev->bd_disk->private_data; 1648 1649 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1650 atomic64_inc(&zram->stats.invalid_io); 1651 ret = -EINVAL; 1652 goto out; 1653 } 1654 1655 index = sector >> SECTORS_PER_PAGE_SHIFT; 1656 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1657 1658 bv.bv_page = page; 1659 bv.bv_len = PAGE_SIZE; 1660 bv.bv_offset = 0; 1661 1662 start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op); 1663 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); 1664 disk_end_io_acct(bdev->bd_disk, op, start_time); 1665out: 1666 /* 1667 * If I/O fails, just return error(ie, non-zero) without 1668 * calling page_endio. 1669 * It causes resubmit the I/O with bio request by upper functions 1670 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1671 * bio->bi_end_io does things to handle the error 1672 * (e.g., SetPageError, set_page_dirty and extra works). 1673 */ 1674 if (unlikely(ret < 0)) 1675 return ret; 1676 1677 switch (ret) { 1678 case 0: 1679 page_endio(page, op_is_write(op), 0); 1680 break; 1681 case 1: 1682 ret = 0; 1683 break; 1684 default: 1685 WARN_ON(1); 1686 } 1687 return ret; 1688} 1689 1690static void zram_reset_device(struct zram *zram) 1691{ 1692 struct zcomp *comp; 1693 u64 disksize; 1694 1695 down_write(&zram->init_lock); 1696 1697 zram->limit_pages = 0; 1698 1699 if (!init_done(zram)) { 1700 up_write(&zram->init_lock); 1701 return; 1702 } 1703 1704 comp = zram->comp; 1705 disksize = zram->disksize; 1706 zram->disksize = 0; 1707 1708 set_capacity_and_notify(zram->disk, 0); 1709 part_stat_set_all(zram->disk->part0, 0); 1710 1711 up_write(&zram->init_lock); 1712 /* I/O operation under all of CPU are done so let's free */ 1713 zram_meta_free(zram, disksize); 1714 memset(&zram->stats, 0, sizeof(zram->stats)); 1715 zcomp_destroy(comp); 1716 reset_bdev(zram); 1717} 1718 1719static ssize_t disksize_store(struct device *dev, 1720 struct device_attribute *attr, const char *buf, size_t len) 1721{ 1722 u64 disksize; 1723 struct zcomp *comp; 1724 struct zram *zram = dev_to_zram(dev); 1725 int err; 1726 1727 disksize = memparse(buf, NULL); 1728 if (!disksize) 1729 return -EINVAL; 1730 1731 down_write(&zram->init_lock); 1732 if (init_done(zram)) { 1733 pr_info("Cannot change disksize for initialized device\n"); 1734 err = -EBUSY; 1735 goto out_unlock; 1736 } 1737 1738 disksize = PAGE_ALIGN(disksize); 1739 if (!zram_meta_alloc(zram, disksize)) { 1740 err = -ENOMEM; 1741 goto out_unlock; 1742 } 1743 1744 comp = zcomp_create(zram->compressor); 1745 if (IS_ERR(comp)) { 1746 pr_err("Cannot initialise %s compressing backend\n", 1747 zram->compressor); 1748 err = PTR_ERR(comp); 1749 goto out_free_meta; 1750 } 1751 1752 zram->comp = comp; 1753 zram->disksize = disksize; 1754 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 1755 up_write(&zram->init_lock); 1756 1757 return len; 1758 1759out_free_meta: 1760 zram_meta_free(zram, disksize); 1761out_unlock: 1762 up_write(&zram->init_lock); 1763 return err; 1764} 1765 1766static ssize_t reset_store(struct device *dev, 1767 struct device_attribute *attr, const char *buf, size_t len) 1768{ 1769 int ret; 1770 unsigned short do_reset; 1771 struct zram *zram; 1772 struct block_device *bdev; 1773 1774 ret = kstrtou16(buf, 10, &do_reset); 1775 if (ret) 1776 return ret; 1777 1778 if (!do_reset) 1779 return -EINVAL; 1780 1781 zram = dev_to_zram(dev); 1782 bdev = zram->disk->part0; 1783 1784 mutex_lock(&bdev->bd_disk->open_mutex); 1785 /* Do not reset an active device or claimed device */ 1786 if (bdev->bd_openers || zram->claim) { 1787 mutex_unlock(&bdev->bd_disk->open_mutex); 1788 return -EBUSY; 1789 } 1790 1791 /* From now on, anyone can't open /dev/zram[0-9] */ 1792 zram->claim = true; 1793 mutex_unlock(&bdev->bd_disk->open_mutex); 1794 1795 /* Make sure all the pending I/O are finished */ 1796 fsync_bdev(bdev); 1797 zram_reset_device(zram); 1798 1799 mutex_lock(&bdev->bd_disk->open_mutex); 1800 zram->claim = false; 1801 mutex_unlock(&bdev->bd_disk->open_mutex); 1802 1803 return len; 1804} 1805 1806static int zram_open(struct block_device *bdev, fmode_t mode) 1807{ 1808 int ret = 0; 1809 struct zram *zram; 1810 1811 WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); 1812 1813 zram = bdev->bd_disk->private_data; 1814 /* zram was claimed to reset so open request fails */ 1815 if (zram->claim) 1816 ret = -EBUSY; 1817 1818 return ret; 1819} 1820 1821static const struct block_device_operations zram_devops = { 1822 .open = zram_open, 1823 .submit_bio = zram_submit_bio, 1824 .swap_slot_free_notify = zram_slot_free_notify, 1825 .rw_page = zram_rw_page, 1826 .owner = THIS_MODULE 1827}; 1828 1829static const struct block_device_operations zram_wb_devops = { 1830 .open = zram_open, 1831 .submit_bio = zram_submit_bio, 1832 .swap_slot_free_notify = zram_slot_free_notify, 1833 .owner = THIS_MODULE 1834}; 1835 1836static DEVICE_ATTR_WO(compact); 1837static DEVICE_ATTR_RW(disksize); 1838static DEVICE_ATTR_RO(initstate); 1839static DEVICE_ATTR_WO(reset); 1840static DEVICE_ATTR_WO(mem_limit); 1841static DEVICE_ATTR_WO(mem_used_max); 1842static DEVICE_ATTR_WO(idle); 1843static DEVICE_ATTR_RW(max_comp_streams); 1844static DEVICE_ATTR_RW(comp_algorithm); 1845#ifdef CONFIG_ZRAM_WRITEBACK 1846static DEVICE_ATTR_RW(backing_dev); 1847static DEVICE_ATTR_WO(writeback); 1848static DEVICE_ATTR_RW(writeback_limit); 1849static DEVICE_ATTR_RW(writeback_limit_enable); 1850#endif 1851 1852static struct attribute *zram_disk_attrs[] = { 1853 &dev_attr_disksize.attr, 1854 &dev_attr_initstate.attr, 1855 &dev_attr_reset.attr, 1856 &dev_attr_compact.attr, 1857 &dev_attr_mem_limit.attr, 1858 &dev_attr_mem_used_max.attr, 1859 &dev_attr_idle.attr, 1860 &dev_attr_max_comp_streams.attr, 1861 &dev_attr_comp_algorithm.attr, 1862#ifdef CONFIG_ZRAM_WRITEBACK 1863 &dev_attr_backing_dev.attr, 1864 &dev_attr_writeback.attr, 1865 &dev_attr_writeback_limit.attr, 1866 &dev_attr_writeback_limit_enable.attr, 1867#endif 1868 &dev_attr_io_stat.attr, 1869 &dev_attr_mm_stat.attr, 1870#ifdef CONFIG_ZRAM_WRITEBACK 1871 &dev_attr_bd_stat.attr, 1872#endif 1873 &dev_attr_debug_stat.attr, 1874 NULL, 1875}; 1876 1877static const struct attribute_group zram_disk_attr_group = { 1878 .attrs = zram_disk_attrs, 1879}; 1880 1881static const struct attribute_group *zram_disk_attr_groups[] = { 1882 &zram_disk_attr_group, 1883 NULL, 1884}; 1885 1886/* 1887 * Allocate and initialize new zram device. the function returns 1888 * '>= 0' device_id upon success, and negative value otherwise. 1889 */ 1890static int zram_add(void) 1891{ 1892 struct zram *zram; 1893 int ret, device_id; 1894 1895 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1896 if (!zram) 1897 return -ENOMEM; 1898 1899 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1900 if (ret < 0) 1901 goto out_free_dev; 1902 device_id = ret; 1903 1904 init_rwsem(&zram->init_lock); 1905#ifdef CONFIG_ZRAM_WRITEBACK 1906 spin_lock_init(&zram->wb_limit_lock); 1907#endif 1908 1909 /* gendisk structure */ 1910 zram->disk = blk_alloc_disk(NUMA_NO_NODE); 1911 if (!zram->disk) { 1912 pr_err("Error allocating disk structure for device %d\n", 1913 device_id); 1914 ret = -ENOMEM; 1915 goto out_free_idr; 1916 } 1917 1918 zram->disk->major = zram_major; 1919 zram->disk->first_minor = device_id; 1920 zram->disk->minors = 1; 1921 zram->disk->fops = &zram_devops; 1922 zram->disk->private_data = zram; 1923 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1924 1925 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1926 set_capacity(zram->disk, 0); 1927 /* zram devices sort of resembles non-rotational disks */ 1928 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); 1929 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1930 1931 /* 1932 * To ensure that we always get PAGE_SIZE aligned 1933 * and n*PAGE_SIZED sized I/O requests. 1934 */ 1935 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1936 blk_queue_logical_block_size(zram->disk->queue, 1937 ZRAM_LOGICAL_BLOCK_SIZE); 1938 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1939 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1940 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1941 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1942 blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); 1943 1944 /* 1945 * zram_bio_discard() will clear all logical blocks if logical block 1946 * size is identical with physical block size(PAGE_SIZE). But if it is 1947 * different, we will skip discarding some parts of logical blocks in 1948 * the part of the request range which isn't aligned to physical block 1949 * size. So we can't ensure that all discarded logical blocks are 1950 * zeroed. 1951 */ 1952 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1953 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1954 1955 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); 1956 device_add_disk(NULL, zram->disk, zram_disk_attr_groups); 1957 1958 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1959 1960 zram_debugfs_register(zram); 1961 pr_info("Added device: %s\n", zram->disk->disk_name); 1962 return device_id; 1963 1964out_free_idr: 1965 idr_remove(&zram_index_idr, device_id); 1966out_free_dev: 1967 kfree(zram); 1968 return ret; 1969} 1970 1971static int zram_remove(struct zram *zram) 1972{ 1973 struct block_device *bdev = zram->disk->part0; 1974 1975 mutex_lock(&bdev->bd_disk->open_mutex); 1976 if (bdev->bd_openers || zram->claim) { 1977 mutex_unlock(&bdev->bd_disk->open_mutex); 1978 return -EBUSY; 1979 } 1980 1981 zram->claim = true; 1982 mutex_unlock(&bdev->bd_disk->open_mutex); 1983 1984 zram_debugfs_unregister(zram); 1985 1986 /* Make sure all the pending I/O are finished */ 1987 fsync_bdev(bdev); 1988 zram_reset_device(zram); 1989 1990 pr_info("Removed device: %s\n", zram->disk->disk_name); 1991 1992 del_gendisk(zram->disk); 1993 blk_cleanup_disk(zram->disk); 1994 kfree(zram); 1995 return 0; 1996} 1997 1998/* zram-control sysfs attributes */ 1999 2000/* 2001 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2002 * sense that reading from this file does alter the state of your system -- it 2003 * creates a new un-initialized zram device and returns back this device's 2004 * device_id (or an error code if it fails to create a new device). 2005 */ 2006static ssize_t hot_add_show(struct class *class, 2007 struct class_attribute *attr, 2008 char *buf) 2009{ 2010 int ret; 2011 2012 mutex_lock(&zram_index_mutex); 2013 ret = zram_add(); 2014 mutex_unlock(&zram_index_mutex); 2015 2016 if (ret < 0) 2017 return ret; 2018 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2019} 2020static struct class_attribute class_attr_hot_add = 2021 __ATTR(hot_add, 0400, hot_add_show, NULL); 2022 2023static ssize_t hot_remove_store(struct class *class, 2024 struct class_attribute *attr, 2025 const char *buf, 2026 size_t count) 2027{ 2028 struct zram *zram; 2029 int ret, dev_id; 2030 2031 /* dev_id is gendisk->first_minor, which is `int' */ 2032 ret = kstrtoint(buf, 10, &dev_id); 2033 if (ret) 2034 return ret; 2035 if (dev_id < 0) 2036 return -EINVAL; 2037 2038 mutex_lock(&zram_index_mutex); 2039 2040 zram = idr_find(&zram_index_idr, dev_id); 2041 if (zram) { 2042 ret = zram_remove(zram); 2043 if (!ret) 2044 idr_remove(&zram_index_idr, dev_id); 2045 } else { 2046 ret = -ENODEV; 2047 } 2048 2049 mutex_unlock(&zram_index_mutex); 2050 return ret ? ret : count; 2051} 2052static CLASS_ATTR_WO(hot_remove); 2053 2054static struct attribute *zram_control_class_attrs[] = { 2055 &class_attr_hot_add.attr, 2056 &class_attr_hot_remove.attr, 2057 NULL, 2058}; 2059ATTRIBUTE_GROUPS(zram_control_class); 2060 2061static struct class zram_control_class = { 2062 .name = "zram-control", 2063 .owner = THIS_MODULE, 2064 .class_groups = zram_control_class_groups, 2065}; 2066 2067static int zram_remove_cb(int id, void *ptr, void *data) 2068{ 2069 zram_remove(ptr); 2070 return 0; 2071} 2072 2073static void destroy_devices(void) 2074{ 2075 class_unregister(&zram_control_class); 2076 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2077 zram_debugfs_destroy(); 2078 idr_destroy(&zram_index_idr); 2079 unregister_blkdev(zram_major, "zram"); 2080 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2081} 2082 2083static int __init zram_init(void) 2084{ 2085 int ret; 2086 2087 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2088 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2089 if (ret < 0) 2090 return ret; 2091 2092 ret = class_register(&zram_control_class); 2093 if (ret) { 2094 pr_err("Unable to register zram-control class\n"); 2095 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2096 return ret; 2097 } 2098 2099 zram_debugfs_create(); 2100 zram_major = register_blkdev(0, "zram"); 2101 if (zram_major <= 0) { 2102 pr_err("Unable to get major number\n"); 2103 class_unregister(&zram_control_class); 2104 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2105 return -EBUSY; 2106 } 2107 2108 while (num_devices != 0) { 2109 mutex_lock(&zram_index_mutex); 2110 ret = zram_add(); 2111 mutex_unlock(&zram_index_mutex); 2112 if (ret < 0) 2113 goto out_error; 2114 num_devices--; 2115 } 2116 2117 return 0; 2118 2119out_error: 2120 destroy_devices(); 2121 return ret; 2122} 2123 2124static void __exit zram_exit(void) 2125{ 2126 destroy_devices(); 2127} 2128 2129module_init(zram_init); 2130module_exit(zram_exit); 2131 2132module_param(num_devices, uint, 0); 2133MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2134 2135MODULE_LICENSE("Dual BSD/GPL"); 2136MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2137MODULE_DESCRIPTION("Compressed RAM Block Device");