commit aa2e7100e38880db7907cb2b7ec6267b2b243771 · tjh.dev/kernel

+2 -2

Documentation/filesystems/proc.txt

··· 1386 1386 For example, if a task is using all allowed memory, its badness score will be 1387 1387 1000. If it is using half of its allowed memory, its score will be 500. 1388 1388 1389 - There is an additional factor included in the badness score: root 1390 - processes are given 3% extra memory over other tasks. 1389 + There is an additional factor included in the badness score: the current memory 1390 + and swap usage is discounted by 3% for root processes. 1391 1391 1392 1392 The amount of "allowed" memory depends on the context in which the oom killer 1393 1393 was called. If it is due to the memory assigned to the allocating task's cpuset

+2 -10

Documentation/filesystems/vfs.txt

··· 782 782 ---------------------- 783 783 784 784 This describes how the VFS can manipulate an open file. As of kernel 785 - 3.5, the following members are defined: 785 + 3.12, the following members are defined: 786 786 787 787 struct file_operations { 788 788 struct module *owner; ··· 803 803 int (*aio_fsync) (struct kiocb *, int datasync); 804 804 int (*fasync) (int, struct file *, int); 805 805 int (*lock) (struct file *, int, struct file_lock *); 806 - ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); 807 - ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); 808 - ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); 809 806 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); 810 807 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 811 808 int (*check_flags)(int); ··· 811 814 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); 812 815 int (*setlease)(struct file *, long arg, struct file_lock **); 813 816 long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len); 817 + int (*show_fdinfo)(struct seq_file *m, struct file *f); 814 818 }; 815 819 816 820 Again, all methods are called without any locks being held, unless ··· 861 863 862 864 lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW 863 865 commands 864 - 865 - readv: called by the readv(2) system call 866 - 867 - writev: called by the writev(2) system call 868 - 869 - sendfile: called by the sendfile(2) system call 870 866 871 867 get_unmapped_area: called by the mmap(2) system call 872 868

+16

MAINTAINERS

··· 9740 9740 S: Odd Fixes 9741 9741 F: drivers/media/pci/zoran/ 9742 9742 9743 + ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER 9744 + M: Minchan Kim <minchan@kernel.org> 9745 + M: Nitin Gupta <ngupta@vflare.org> 9746 + L: linux-kernel@vger.kernel.org 9747 + S: Maintained 9748 + F: drivers/block/zram/ 9749 + F: Documentation/blockdev/zram.txt 9750 + 9743 9751 ZS DECSTATION Z85C30 SERIAL DRIVER 9744 9752 M: "Maciej W. Rozycki" <macro@linux-mips.org> 9745 9753 S: Maintained 9746 9754 F: drivers/tty/serial/zs.* 9755 + 9756 + ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR 9757 + M: Minchan Kim <minchan@kernel.org> 9758 + M: Nitin Gupta <ngupta@vflare.org> 9759 + L: linux-mm@kvack.org 9760 + S: Maintained 9761 + F: mm/zsmalloc.c 9762 + F: include/linux/zsmalloc.h 9747 9763 9748 9764 ZSWAP COMPRESSED SWAP CACHING 9749 9765 M: Seth Jennings <sjenning@linux.vnet.ibm.com>

+2 -1

arch/x86/include/asm/pgtable_types.h

··· 121 121 122 122 /* Set of bits not changed in pte_modify */ 123 123 #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ 124 - _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) 124 + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ 125 + _PAGE_SOFT_DIRTY) 125 126 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) 126 127 127 128 #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)

+2

drivers/block/Kconfig

··· 108 108 109 109 source "drivers/block/mtip32xx/Kconfig" 110 110 111 + source "drivers/block/zram/Kconfig" 112 + 111 113 config BLK_CPQ_DA 112 114 tristate "Compaq SMART2 support" 113 115 depends on PCI && VIRT_TO_BUS && 0

+1

drivers/block/Makefile

··· 42 42 43 43 obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ 44 44 obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o 45 + obj-$(CONFIG_ZRAM) += zram/ 45 46 46 47 nvme-y := nvme-core.o nvme-scsi.o 47 48 skd-y := skd_main.o

+1

drivers/net/phy/mdio_bus.c

··· 150 150 err = device_register(&bus->dev); 151 151 if (err) { 152 152 pr_err("mii_bus %s failed to register\n", bus->id); 153 + put_device(&bus->dev); 153 154 return -EINVAL; 154 155 } 155 156

-4

drivers/staging/Kconfig

··· 76 76 77 77 source "drivers/staging/iio/Kconfig" 78 78 79 - source "drivers/staging/zsmalloc/Kconfig" 80 - 81 - source "drivers/staging/zram/Kconfig" 82 - 83 79 source "drivers/staging/wlags49_h2/Kconfig" 84 80 85 81 source "drivers/staging/wlags49_h25/Kconfig"

-2

drivers/staging/Makefile

··· 32 32 obj-$(CONFIG_VME_BUS) += vme/ 33 33 obj-$(CONFIG_DX_SEP) += sep/ 34 34 obj-$(CONFIG_IIO) += iio/ 35 - obj-$(CONFIG_ZRAM) += zram/ 36 - obj-$(CONFIG_ZSMALLOC) += zsmalloc/ 37 35 obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ 38 36 obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ 39 37 obj-$(CONFIG_FB_SM7XX) += sm7xxfb/

-1

drivers/staging/zram/Kconfig drivers/block/zram/Kconfig

··· 14 14 disks and maybe many more. 15 15 16 16 See zram.txt for more information. 17 - Project home: <https://compcache.googlecode.com/> 18 17 19 18 config ZRAM_DEBUG 20 19 bool "Compressed RAM block device debug support"

drivers/staging/zram/Makefile drivers/block/zram/Makefile

-6

drivers/staging/zram/zram.txt Documentation/blockdev/zram.txt

··· 1 1 zram: Compressed RAM based block devices 2 2 ---------------------------------------- 3 3 4 - Project home: http://compcache.googlecode.com/ 5 - 6 4 * Introduction 7 5 8 6 The zram module creates RAM based block devices named /dev/zram<id> ··· 66 68 This frees all the memory allocated for the given device and 67 69 resets the disksize to zero. You must set the disksize again 68 70 before reusing the device. 69 - 70 - Please report any problems at: 71 - - Mailing list: linux-mm-cc at laptop dot org 72 - - Issue tracker: http://code.google.com/p/compcache/issues/list 73 71 74 72 Nitin Gupta 75 73 ngupta@vflare.org

+46 -82

drivers/staging/zram/zram_drv.c drivers/block/zram/zram_drv.c

··· 2 2 * Compressed RAM block device 3 3 * 4 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 + * 2012, 2013 Minchan Kim 5 6 * 6 7 * This code is released using a dual license strategy: BSD/GPL 7 8 * You can choose the licence that better fits your requirements. ··· 10 9 * Released under the terms of 3-clause BSD License 11 10 * Released under the terms of GNU General Public License Version 2.0 12 11 * 13 - * Project home: http://compcache.googlecode.com 14 12 */ 15 13 16 14 #define KMSG_COMPONENT "zram" ··· 104 104 { 105 105 struct zram *zram = dev_to_zram(dev); 106 106 107 - return sprintf(buf, "%u\n", zram->stats.pages_zero); 107 + return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); 108 108 } 109 109 110 110 static ssize_t orig_data_size_show(struct device *dev, ··· 113 113 struct zram *zram = dev_to_zram(dev); 114 114 115 115 return sprintf(buf, "%llu\n", 116 - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); 116 + (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); 117 117 } 118 118 119 119 static ssize_t compr_data_size_show(struct device *dev, ··· 140 140 return sprintf(buf, "%llu\n", val); 141 141 } 142 142 143 + /* flag operations needs meta->tb_lock */ 143 144 static int zram_test_flag(struct zram_meta *meta, u32 index, 144 145 enum zram_pageflags flag) 145 146 { ··· 229 228 goto free_table; 230 229 } 231 230 231 + rwlock_init(&meta->tb_lock); 232 + mutex_init(&meta->buffer_lock); 232 233 return meta; 233 234 234 235 free_table: ··· 283 280 flush_dcache_page(page); 284 281 } 285 282 283 + /* NOTE: caller should hold meta->tb_lock with write-side */ 286 284 static void zram_free_page(struct zram *zram, size_t index) 287 285 { 288 286 struct zram_meta *meta = zram->meta; ··· 297 293 */ 298 294 if (zram_test_flag(meta, index, ZRAM_ZERO)) { 299 295 zram_clear_flag(meta, index, ZRAM_ZERO); 300 - zram->stats.pages_zero--; 296 + atomic_dec(&zram->stats.pages_zero); 301 297 } 302 298 return; 303 299 } 304 300 305 301 if (unlikely(size > max_zpage_size)) 306 - zram->stats.bad_compress--; 302 + atomic_dec(&zram->stats.bad_compress); 307 303 308 304 zs_free(meta->mem_pool, handle); 309 305 310 306 if (size <= PAGE_SIZE / 2) 311 - zram->stats.good_compress--; 307 + atomic_dec(&zram->stats.good_compress); 312 308 313 309 atomic64_sub(meta->table[index].size, &zram->stats.compr_size); 314 - zram->stats.pages_stored--; 310 + atomic_dec(&zram->stats.pages_stored); 315 311 316 312 meta->table[index].handle = 0; 317 313 meta->table[index].size = 0; ··· 323 319 size_t clen = PAGE_SIZE; 324 320 unsigned char *cmem; 325 321 struct zram_meta *meta = zram->meta; 326 - unsigned long handle = meta->table[index].handle; 322 + unsigned long handle; 323 + u16 size; 324 + 325 + read_lock(&meta->tb_lock); 326 + handle = meta->table[index].handle; 327 + size = meta->table[index].size; 327 328 328 329 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { 330 + read_unlock(&meta->tb_lock); 329 331 clear_page(mem); 330 332 return 0; 331 333 } 332 334 333 335 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); 334 - if (meta->table[index].size == PAGE_SIZE) 336 + if (size == PAGE_SIZE) 335 337 copy_page(mem, cmem); 336 338 else 337 - ret = lzo1x_decompress_safe(cmem, meta->table[index].size, 338 - mem, &clen); 339 + ret = lzo1x_decompress_safe(cmem, size, mem, &clen); 339 340 zs_unmap_object(meta->mem_pool, handle); 341 + read_unlock(&meta->tb_lock); 340 342 341 343 /* Should NEVER happen. Return bio error if it does. */ 342 344 if (unlikely(ret != LZO_E_OK)) { ··· 363 353 struct zram_meta *meta = zram->meta; 364 354 page = bvec->bv_page; 365 355 356 + read_lock(&meta->tb_lock); 366 357 if (unlikely(!meta->table[index].handle) || 367 358 zram_test_flag(meta, index, ZRAM_ZERO)) { 359 + read_unlock(&meta->tb_lock); 368 360 handle_zero_page(bvec); 369 361 return 0; 370 362 } 363 + read_unlock(&meta->tb_lock); 371 364 372 365 if (is_partial_io(bvec)) 373 366 /* Use a temporary buffer to decompress the page */ ··· 413 400 struct page *page; 414 401 unsigned char *user_mem, *cmem, *src, *uncmem = NULL; 415 402 struct zram_meta *meta = zram->meta; 403 + bool locked = false; 416 404 417 405 page = bvec->bv_page; 418 406 src = meta->compress_buffer; ··· 433 419 goto out; 434 420 } 435 421 422 + mutex_lock(&meta->buffer_lock); 423 + locked = true; 436 424 user_mem = kmap_atomic(page); 437 425 438 426 if (is_partial_io(bvec)) { ··· 449 433 if (page_zero_filled(uncmem)) { 450 434 kunmap_atomic(user_mem); 451 435 /* Free memory associated with this sector now. */ 436 + write_lock(&zram->meta->tb_lock); 452 437 zram_free_page(zram, index); 453 - 454 - zram->stats.pages_zero++; 455 438 zram_set_flag(meta, index, ZRAM_ZERO); 439 + write_unlock(&zram->meta->tb_lock); 440 + 441 + atomic_inc(&zram->stats.pages_zero); 456 442 ret = 0; 457 443 goto out; 458 444 } 459 445 460 - /* 461 - * zram_slot_free_notify could miss free so that let's 462 - * double check. 463 - */ 464 - if (unlikely(meta->table[index].handle || 465 - zram_test_flag(meta, index, ZRAM_ZERO))) 466 - zram_free_page(zram, index); 467 - 468 446 ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, 469 447 meta->compress_workmem); 470 - 471 448 if (!is_partial_io(bvec)) { 472 449 kunmap_atomic(user_mem); 473 450 user_mem = NULL; ··· 473 464 } 474 465 475 466 if (unlikely(clen > max_zpage_size)) { 476 - zram->stats.bad_compress++; 467 + atomic_inc(&zram->stats.bad_compress); 477 468 clen = PAGE_SIZE; 478 469 src = NULL; 479 470 if (is_partial_io(bvec)) ··· 503 494 * Free memory associated with this sector 504 495 * before overwriting unused sectors. 505 496 */ 497 + write_lock(&zram->meta->tb_lock); 506 498 zram_free_page(zram, index); 507 499 508 500 meta->table[index].handle = handle; 509 501 meta->table[index].size = clen; 502 + write_unlock(&zram->meta->tb_lock); 510 503 511 504 /* Update stats */ 512 505 atomic64_add(clen, &zram->stats.compr_size); 513 - zram->stats.pages_stored++; 506 + atomic_inc(&zram->stats.pages_stored); 514 507 if (clen <= PAGE_SIZE / 2) 515 - zram->stats.good_compress++; 508 + atomic_inc(&zram->stats.good_compress); 516 509 517 510 out: 511 + if (locked) 512 + mutex_unlock(&meta->buffer_lock); 518 513 if (is_partial_io(bvec)) 519 514 kfree(uncmem); 520 515 ··· 527 514 return ret; 528 515 } 529 516 530 - static void handle_pending_slot_free(struct zram *zram) 531 - { 532 - struct zram_slot_free *free_rq; 533 - 534 - spin_lock(&zram->slot_free_lock); 535 - while (zram->slot_free_rq) { 536 - free_rq = zram->slot_free_rq; 537 - zram->slot_free_rq = free_rq->next; 538 - zram_free_page(zram, free_rq->index); 539 - kfree(free_rq); 540 - } 541 - spin_unlock(&zram->slot_free_lock); 542 - } 543 - 544 517 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 545 518 int offset, struct bio *bio, int rw) 546 519 { 547 520 int ret; 548 521 549 - if (rw == READ) { 550 - down_read(&zram->lock); 551 - handle_pending_slot_free(zram); 522 + if (rw == READ) 552 523 ret = zram_bvec_read(zram, bvec, index, offset, bio); 553 - up_read(&zram->lock); 554 - } else { 555 - down_write(&zram->lock); 556 - handle_pending_slot_free(zram); 524 + else 557 525 ret = zram_bvec_write(zram, bvec, index, offset); 558 - up_write(&zram->lock); 559 - } 560 526 561 527 return ret; 562 528 } ··· 544 552 { 545 553 size_t index; 546 554 struct zram_meta *meta; 547 - 548 - flush_work(&zram->free_work); 549 555 550 556 down_write(&zram->init_lock); 551 557 if (!zram->init_done) { ··· 752 762 bio_io_error(bio); 753 763 } 754 764 755 - static void zram_slot_free(struct work_struct *work) 756 - { 757 - struct zram *zram; 758 - 759 - zram = container_of(work, struct zram, free_work); 760 - down_write(&zram->lock); 761 - handle_pending_slot_free(zram); 762 - up_write(&zram->lock); 763 - } 764 - 765 - static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) 766 - { 767 - spin_lock(&zram->slot_free_lock); 768 - free_rq->next = zram->slot_free_rq; 769 - zram->slot_free_rq = free_rq; 770 - spin_unlock(&zram->slot_free_lock); 771 - } 772 - 773 765 static void zram_slot_free_notify(struct block_device *bdev, 774 766 unsigned long index) 775 767 { 776 768 struct zram *zram; 777 - struct zram_slot_free *free_rq; 769 + struct zram_meta *meta; 778 770 779 771 zram = bdev->bd_disk->private_data; 772 + meta = zram->meta; 773 + 774 + write_lock(&meta->tb_lock); 775 + zram_free_page(zram, index); 776 + write_unlock(&meta->tb_lock); 780 777 atomic64_inc(&zram->stats.notify_free); 781 - 782 - free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); 783 - if (!free_rq) 784 - return; 785 - 786 - free_rq->index = index; 787 - add_slot_free(zram, free_rq); 788 - schedule_work(&zram->free_work); 789 778 } 790 779 791 780 static const struct block_device_operations zram_devops = { ··· 808 839 { 809 840 int ret = -ENOMEM; 810 841 811 - init_rwsem(&zram->lock); 812 842 init_rwsem(&zram->init_lock); 813 - 814 - INIT_WORK(&zram->free_work, zram_slot_free); 815 - spin_lock_init(&zram->slot_free_lock); 816 - zram->slot_free_rq = NULL; 817 843 818 844 zram->queue = blk_alloc_queue(GFP_KERNEL); 819 845 if (!zram->queue) {

+8 -24

drivers/staging/zram/zram_drv.h drivers/block/zram/zram_drv.h

··· 2 2 * Compressed RAM block device 3 3 * 4 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 + * 2012, 2013 Minchan Kim 5 6 * 6 7 * This code is released using a dual license strategy: BSD/GPL 7 8 * You can choose the licence that better fits your requirements. ··· 10 9 * Released under the terms of 3-clause BSD License 11 10 * Released under the terms of GNU General Public License Version 2.0 12 11 * 13 - * Project home: http://compcache.googlecode.com 14 12 */ 15 13 16 14 #ifndef _ZRAM_DRV_H_ ··· 17 17 18 18 #include <linux/spinlock.h> 19 19 #include <linux/mutex.h> 20 - 21 - #include "../zsmalloc/zsmalloc.h" 20 + #include <linux/zsmalloc.h> 22 21 23 22 /* 24 23 * Some arbitrary value. This is just to catch ··· 68 69 u8 flags; 69 70 } __aligned(4); 70 71 71 - /* 72 - * All 64bit fields should only be manipulated by 64bit atomic accessors. 73 - * All modifications to 32bit counter should be protected by zram->lock. 74 - */ 75 72 struct zram_stats { 76 73 atomic64_t compr_size; /* compressed size of pages stored */ 77 74 atomic64_t num_reads; /* failed + successful */ ··· 76 81 atomic64_t failed_writes; /* can happen when memory is too low */ 77 82 atomic64_t invalid_io; /* non-page-aligned I/O requests */ 78 83 atomic64_t notify_free; /* no. of swap slot free notifications */ 79 - u32 pages_zero; /* no. of zero filled pages */ 80 - u32 pages_stored; /* no. of pages currently stored */ 81 - u32 good_compress; /* % of pages with compression ratio<=50% */ 82 - u32 bad_compress; /* % of pages with compression ratio>=75% */ 84 + atomic_t pages_zero; /* no. of zero filled pages */ 85 + atomic_t pages_stored; /* no. of pages currently stored */ 86 + atomic_t good_compress; /* % of pages with compression ratio<=50% */ 87 + atomic_t bad_compress; /* % of pages with compression ratio>=75% */ 83 88 }; 84 89 85 90 struct zram_meta { 91 + rwlock_t tb_lock; /* protect table */ 86 92 void *compress_workmem; 87 93 void *compress_buffer; 88 94 struct table *table; 89 95 struct zs_pool *mem_pool; 90 - }; 91 - 92 - struct zram_slot_free { 93 - unsigned long index; 94 - struct zram_slot_free *next; 96 + struct mutex buffer_lock; /* protect compress buffers */ 95 97 }; 96 98 97 99 struct zram { 98 100 struct zram_meta *meta; 99 - struct rw_semaphore lock; /* protect compression buffers, table, 100 - * 32bit stat counters against concurrent 101 - * notifications, reads and writes */ 102 - 103 - struct work_struct free_work; /* handle pending free request */ 104 - struct zram_slot_free *slot_free_rq; /* list head of free request */ 105 - 106 101 struct request_queue *queue; 107 102 struct gendisk *disk; 108 103 int init_done; ··· 103 118 * we can store in a disk. 104 119 */ 105 120 u64 disksize; /* bytes */ 106 - spinlock_t slot_free_lock; 107 121 108 122 struct zram_stats stats; 109 123 };

-24

drivers/staging/zsmalloc/Kconfig

··· 1 - config ZSMALLOC 2 - bool "Memory allocator for compressed pages" 3 - depends on MMU 4 - default n 5 - help 6 - zsmalloc is a slab-based memory allocator designed to store 7 - compressed RAM pages. zsmalloc uses virtual memory mapping 8 - in order to reduce fragmentation. However, this results in a 9 - non-standard allocator interface where a handle, not a pointer, is 10 - returned by an alloc(). This handle must be mapped in order to 11 - access the allocated space. 12 - 13 - config PGTABLE_MAPPING 14 - bool "Use page table mapping to access object in zsmalloc" 15 - depends on ZSMALLOC 16 - help 17 - By default, zsmalloc uses a copy-based object mapping method to 18 - access allocations that span two pages. However, if a particular 19 - architecture (ex, ARM) performs VM mapping faster than copying, 20 - then you should select this. This causes zsmalloc to use page table 21 - mapping rather than copying for object mapping. 22 - 23 - You can check speed with zsmalloc benchmark[1]. 24 - [1] https://github.com/spartacus06/zsmalloc

-3

drivers/staging/zsmalloc/Makefile

··· 1 - zsmalloc-y := zsmalloc-main.o 2 - 3 - obj-$(CONFIG_ZSMALLOC) += zsmalloc.o

+2 -2

drivers/staging/zsmalloc/zsmalloc-main.c mm/zsmalloc.c

··· 2 2 * zsmalloc memory allocator 3 3 * 4 4 * Copyright (C) 2011 Nitin Gupta 5 + * Copyright (C) 2012, 2013 Minchan Kim 5 6 * 6 7 * This code is released using a dual license strategy: BSD/GPL 7 8 * You can choose the license that better fits your requirements. ··· 91 90 #include <linux/hardirq.h> 92 91 #include <linux/spinlock.h> 93 92 #include <linux/types.h> 94 - 95 - #include "zsmalloc.h" 93 + #include <linux/zsmalloc.h> 96 94 97 95 /* 98 96 * This must be power of 2 and greater than of equal to sizeof(link_free).

+1

drivers/staging/zsmalloc/zsmalloc.h include/linux/zsmalloc.h

··· 2 2 * zsmalloc memory allocator 3 3 * 4 4 * Copyright (C) 2011 Nitin Gupta 5 + * Copyright (C) 2012, 2013 Minchan Kim 5 6 * 6 7 * This code is released using a dual license strategy: BSD/GPL 7 8 * You can choose the license that better fits your requirements.

+1 -1

drivers/video/backlight/lcd.c

··· 228 228 229 229 rc = device_register(&new_ld->dev); 230 230 if (rc) { 231 - kfree(new_ld); 231 + put_device(&new_ld->dev); 232 232 return ERR_PTR(rc); 233 233 } 234 234

+1 -4

include/linux/blkdev.h

··· 95 95 * as well! 96 96 */ 97 97 struct request { 98 - union { 99 - struct list_head queuelist; 100 - struct llist_node ll_list; 101 - }; 98 + struct list_head queuelist; 102 99 union { 103 100 struct call_single_data csd; 104 101 struct work_struct mq_flush_data;

+2 -2

include/linux/bootmem.h

··· 264 264 { 265 265 if (!align) 266 266 align = SMP_CACHE_BYTES; 267 - return __alloc_bootmem_low(size, align, BOOTMEM_LOW_LIMIT); 267 + return __alloc_bootmem_low(size, align, 0); 268 268 } 269 269 270 270 static inline void * __init memblock_virt_alloc_low_nopanic( ··· 272 272 { 273 273 if (!align) 274 274 align = SMP_CACHE_BYTES; 275 - return __alloc_bootmem_low_nopanic(size, align, BOOTMEM_LOW_LIMIT); 275 + return __alloc_bootmem_low_nopanic(size, align, 0); 276 276 } 277 277 278 278 static inline void * __init memblock_virt_alloc_from_nopanic(

+5 -1

include/linux/smp.h

··· 11 11 #include <linux/list.h> 12 12 #include <linux/cpumask.h> 13 13 #include <linux/init.h> 14 + #include <linux/llist.h> 14 15 15 16 extern void cpu_idle(void); 16 17 17 18 typedef void (*smp_call_func_t)(void *info); 18 19 struct call_single_data { 19 - struct list_head list; 20 + union { 21 + struct list_head list; 22 + struct llist_node llist; 23 + }; 20 24 smp_call_func_t func; 21 25 void *info; 22 26 u16 flags;

+14 -54

kernel/smp.c

··· 23 23 struct call_function_data { 24 24 struct call_single_data __percpu *csd; 25 25 cpumask_var_t cpumask; 26 - cpumask_var_t cpumask_ipi; 27 26 }; 28 27 29 28 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); 30 29 31 - struct call_single_queue { 32 - struct list_head list; 33 - raw_spinlock_t lock; 34 - }; 35 - 36 - static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue); 30 + static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); 37 31 38 32 static int 39 33 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) ··· 41 47 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, 42 48 cpu_to_node(cpu))) 43 49 return notifier_from_errno(-ENOMEM); 44 - if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, 45 - cpu_to_node(cpu))) { 46 - free_cpumask_var(cfd->cpumask); 47 - return notifier_from_errno(-ENOMEM); 48 - } 49 50 cfd->csd = alloc_percpu(struct call_single_data); 50 51 if (!cfd->csd) { 51 - free_cpumask_var(cfd->cpumask_ipi); 52 52 free_cpumask_var(cfd->cpumask); 53 53 return notifier_from_errno(-ENOMEM); 54 54 } ··· 55 67 case CPU_DEAD: 56 68 case CPU_DEAD_FROZEN: 57 69 free_cpumask_var(cfd->cpumask); 58 - free_cpumask_var(cfd->cpumask_ipi); 59 70 free_percpu(cfd->csd); 60 71 break; 61 72 #endif ··· 72 85 void *cpu = (void *)(long)smp_processor_id(); 73 86 int i; 74 87 75 - for_each_possible_cpu(i) { 76 - struct call_single_queue *q = &per_cpu(call_single_queue, i); 77 - 78 - raw_spin_lock_init(&q->lock); 79 - INIT_LIST_HEAD(&q->list); 80 - } 88 + for_each_possible_cpu(i) 89 + init_llist_head(&per_cpu(call_single_queue, i)); 81 90 82 91 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); 83 92 register_cpu_notifier(&hotplug_cfd_notifier); ··· 124 141 */ 125 142 static void generic_exec_single(int cpu, struct call_single_data *csd, int wait) 126 143 { 127 - struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 128 - unsigned long flags; 129 - int ipi; 130 - 131 144 if (wait) 132 145 csd->flags |= CSD_FLAG_WAIT; 133 - 134 - raw_spin_lock_irqsave(&dst->lock, flags); 135 - ipi = list_empty(&dst->list); 136 - list_add_tail(&csd->list, &dst->list); 137 - raw_spin_unlock_irqrestore(&dst->lock, flags); 138 146 139 147 /* 140 148 * The list addition should be visible before sending the IPI ··· 138 164 * locking and barrier primitives. Generic code isn't really 139 165 * equipped to do the right thing... 140 166 */ 141 - if (ipi) 167 + if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) 142 168 arch_send_call_function_single_ipi(cpu); 143 169 144 170 if (wait) ··· 151 177 */ 152 178 void generic_smp_call_function_single_interrupt(void) 153 179 { 154 - struct call_single_queue *q = &__get_cpu_var(call_single_queue); 155 - LIST_HEAD(list); 180 + struct llist_node *entry, *next; 156 181 157 182 /* 158 183 * Shouldn't receive this interrupt on a cpu that is not yet online. 159 184 */ 160 185 WARN_ON_ONCE(!cpu_online(smp_processor_id())); 161 186 162 - raw_spin_lock(&q->lock); 163 - list_replace_init(&q->list, &list); 164 - raw_spin_unlock(&q->lock); 187 + entry = llist_del_all(&__get_cpu_var(call_single_queue)); 188 + entry = llist_reverse_order(entry); 165 189 166 - while (!list_empty(&list)) { 190 + while (entry) { 167 191 struct call_single_data *csd; 168 192 169 - csd = list_entry(list.next, struct call_single_data, list); 170 - list_del(&csd->list); 193 + next = entry->next; 171 194 195 + csd = llist_entry(entry, struct call_single_data, llist); 172 196 csd->func(csd->info); 173 - 174 197 csd_unlock(csd); 198 + 199 + entry = next; 175 200 } 176 201 } 177 202 ··· 375 402 if (unlikely(!cpumask_weight(cfd->cpumask))) 376 403 return; 377 404 378 - /* 379 - * After we put an entry into the list, cfd->cpumask may be cleared 380 - * again when another CPU sends another IPI for a SMP function call, so 381 - * cfd->cpumask will be zero. 382 - */ 383 - cpumask_copy(cfd->cpumask_ipi, cfd->cpumask); 384 - 385 405 for_each_cpu(cpu, cfd->cpumask) { 386 406 struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); 387 - struct call_single_queue *dst = 388 - &per_cpu(call_single_queue, cpu); 389 - unsigned long flags; 390 407 391 408 csd_lock(csd); 392 409 csd->func = func; 393 410 csd->info = info; 394 - 395 - raw_spin_lock_irqsave(&dst->lock, flags); 396 - list_add_tail(&csd->list, &dst->list); 397 - raw_spin_unlock_irqrestore(&dst->lock, flags); 411 + llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)); 398 412 } 399 413 400 414 /* Send a message to all CPUs in the map */ 401 - arch_send_call_function_ipi_mask(cfd->cpumask_ipi); 415 + arch_send_call_function_ipi_mask(cfd->cpumask); 402 416 403 417 if (wait) { 404 418 for_each_cpu(cpu, cfd->cpumask) {

+25

mm/Kconfig

··· 552 552 it can be cleared by hands. 553 553 554 554 See Documentation/vm/soft-dirty.txt for more details. 555 + 556 + config ZSMALLOC 557 + bool "Memory allocator for compressed pages" 558 + depends on MMU 559 + default n 560 + help 561 + zsmalloc is a slab-based memory allocator designed to store 562 + compressed RAM pages. zsmalloc uses virtual memory mapping 563 + in order to reduce fragmentation. However, this results in a 564 + non-standard allocator interface where a handle, not a pointer, is 565 + returned by an alloc(). This handle must be mapped in order to 566 + access the allocated space. 567 + 568 + config PGTABLE_MAPPING 569 + bool "Use page table mapping to access object in zsmalloc" 570 + depends on ZSMALLOC 571 + help 572 + By default, zsmalloc uses a copy-based object mapping method to 573 + access allocations that span two pages. However, if a particular 574 + architecture (ex, ARM) performs VM mapping faster than copying, 575 + then you should select this. This causes zsmalloc to use page table 576 + mapping rather than copying for object mapping. 577 + 578 + You can check speed with zsmalloc benchmark[1]. 579 + [1] https://github.com/spartacus06/zsmalloc

+1

mm/Makefile

··· 60 60 obj-$(CONFIG_CLEANCACHE) += cleancache.o 61 61 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o 62 62 obj-$(CONFIG_ZBUD) += zbud.o 63 + obj-$(CONFIG_ZSMALLOC) += zsmalloc.o

+3 -4

mm/memcontrol.c

··· 3400 3400 static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, 3401 3401 struct kmem_cache *s) 3402 3402 { 3403 - struct kmem_cache *new; 3403 + struct kmem_cache *new = NULL; 3404 3404 static char *tmp_name = NULL; 3405 3405 static DEFINE_MUTEX(mutex); /* protects tmp_name */ 3406 3406 ··· 3416 3416 if (!tmp_name) { 3417 3417 tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); 3418 3418 if (!tmp_name) 3419 - return NULL; 3419 + goto out; 3420 3420 } 3421 3421 3422 3422 rcu_read_lock(); ··· 3426 3426 3427 3427 new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, 3428 3428 (s->flags & ~SLAB_PANIC), s->ctor, s); 3429 - 3430 3429 if (new) 3431 3430 new->allocflags |= __GFP_KMEMCG; 3432 3431 else 3433 3432 new = s; 3434 - 3433 + out: 3435 3434 mutex_unlock(&mutex); 3436 3435 return new; 3437 3436 }

+1 -1

mm/mempolicy.c

··· 2930 2930 unsigned short mode = MPOL_DEFAULT; 2931 2931 unsigned short flags = 0; 2932 2932 2933 - if (pol && pol != &default_policy) { 2933 + if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) { 2934 2934 mode = pol->mode; 2935 2935 flags = pol->flags; 2936 2936 }

+1 -1

mm/oom_kill.c

··· 178 178 * implementation used by LSMs. 179 179 */ 180 180 if (has_capability_noaudit(p, CAP_SYS_ADMIN)) 181 - adj -= 30; 181 + points -= (points * 3) / 100; 182 182 183 183 /* Normalize to oom_score_adj units */ 184 184 adj *= totalpages / 1000;

+17 -2

mm/slub.c

··· 355 355 __bit_spin_unlock(PG_locked, &page->flags); 356 356 } 357 357 358 + static inline void set_page_slub_counters(struct page *page, unsigned long counters_new) 359 + { 360 + struct page tmp; 361 + tmp.counters = counters_new; 362 + /* 363 + * page->counters can cover frozen/inuse/objects as well 364 + * as page->_count. If we assign to ->counters directly 365 + * we run the risk of losing updates to page->_count, so 366 + * be careful and only assign to the fields we need. 367 + */ 368 + page->frozen = tmp.frozen; 369 + page->inuse = tmp.inuse; 370 + page->objects = tmp.objects; 371 + } 372 + 358 373 /* Interrupts must be disabled (for the fallback code to work right) */ 359 374 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 360 375 void *freelist_old, unsigned long counters_old, ··· 391 376 if (page->freelist == freelist_old && 392 377 page->counters == counters_old) { 393 378 page->freelist = freelist_new; 394 - page->counters = counters_new; 379 + set_page_slub_counters(page, counters_new); 395 380 slab_unlock(page); 396 381 return 1; 397 382 } ··· 430 415 if (page->freelist == freelist_old && 431 416 page->counters == counters_old) { 432 417 page->freelist = freelist_new; 433 - page->counters = counters_new; 418 + set_page_slub_counters(page, counters_new); 434 419 slab_unlock(page); 435 420 local_irq_restore(flags); 436 421 return 1;