Merge branch 'akpm' (patchbomb from Andrew Morton)

+1 -1

Documentation/RCU/whatisRCU.txt

··· 818 818 list_add_tail_rcu 819 819 list_del_rcu 820 820 list_replace_rcu 821 - hlist_add_after_rcu 821 + hlist_add_behind_rcu 822 822 hlist_add_before_rcu 823 823 hlist_add_head_rcu 824 824 hlist_del_rcu

+6 -2

Documentation/kernel-parameters.txt

··· 1716 1716 7 (KERN_DEBUG) debug-level messages 1717 1717 1718 1718 log_buf_len=n[KMG] Sets the size of the printk ring buffer, 1719 - in bytes. n must be a power of two. The default 1720 - size is set in the kernel config file. 1719 + in bytes. n must be a power of two and greater 1720 + than the minimal size. The minimal size is defined 1721 + by LOG_BUF_SHIFT kernel config parameter. There is 1722 + also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter 1723 + that allows to increase the default size depending on 1724 + the number of CPUs. See init/Kconfig for more details. 1721 1725 1722 1726 logo.nologo [FB] Disables display of the built-in Linux logo. 1723 1727 This may be used to provide more screen space for

+53

Documentation/trace/postprocess/trace-vmscan-postprocess.pl

··· 47 47 use constant HIGH_NR_SCANNED => 22; 48 48 use constant HIGH_NR_TAKEN => 23; 49 49 use constant HIGH_NR_RECLAIMED => 24; 50 + use constant HIGH_NR_FILE_SCANNED => 25; 51 + use constant HIGH_NR_ANON_SCANNED => 26; 52 + use constant HIGH_NR_FILE_RECLAIMED => 27; 53 + use constant HIGH_NR_ANON_RECLAIMED => 28; 50 54 51 55 my %perprocesspid; 52 56 my %perprocess; ··· 60 56 61 57 my $total_wakeup_kswapd; 62 58 my ($total_direct_reclaim, $total_direct_nr_scanned); 59 + my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned); 63 60 my ($total_direct_latency, $total_kswapd_latency); 64 61 my ($total_direct_nr_reclaimed); 62 + my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed); 65 63 my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async); 66 64 my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async); 67 65 my ($total_kswapd_nr_scanned, $total_kswapd_wake); 66 + my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned); 68 67 my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async); 69 68 my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async); 70 69 my ($total_kswapd_nr_reclaimed); 70 + my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed); 71 71 72 72 # Catch sigint and exit on request 73 73 my $sigint_report = 0; ··· 382 374 } 383 375 my $isolate_mode = $1; 384 376 my $nr_scanned = $4; 377 + my $file = $6; 385 378 386 379 # To closer match vmstat scanning statistics, only count isolate_both 387 380 # and isolate_inactive as scanning. isolate_active is rotation ··· 391 382 # isolate_both == 3 392 383 if ($isolate_mode != 2) { 393 384 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; 385 + if ($file == 1) { 386 + $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned; 387 + } else { 388 + $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned; 389 + } 394 390 } 395 391 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") { 396 392 $details = $6; ··· 405 391 print " $regex_lru_shrink_inactive/o\n"; 406 392 next; 407 393 } 394 + 408 395 my $nr_reclaimed = $4; 396 + my $flags = $6; 397 + my $file = 0; 398 + if ($flags =~ /RECLAIM_WB_FILE/) { 399 + $file = 1; 400 + } 409 401 $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed; 402 + if ($file) { 403 + $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed; 404 + } else { 405 + $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed; 406 + } 410 407 } elsif ($tracepoint eq "mm_vmscan_writepage") { 411 408 $details = $6; 412 409 if ($details !~ /$regex_writepage/o) { ··· 518 493 $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}; 519 494 $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}; 520 495 $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED}; 496 + $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED}; 497 + $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED}; 521 498 $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED}; 499 + $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED}; 500 + $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED}; 522 501 $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; 523 502 $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; 524 503 $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; ··· 542 513 $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}, 543 514 $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}, 544 515 $stats{$process_pid}->{HIGH_NR_SCANNED}, 516 + $stats{$process_pid}->{HIGH_NR_FILE_SCANNED}, 517 + $stats{$process_pid}->{HIGH_NR_ANON_SCANNED}, 545 518 $stats{$process_pid}->{HIGH_NR_RECLAIMED}, 519 + $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED}, 520 + $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED}, 546 521 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}, 547 522 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}, 548 523 $this_reclaim_delay / 1000); ··· 585 552 586 553 $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}; 587 554 $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED}; 555 + $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED}; 556 + $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED}; 588 557 $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED}; 558 + $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED}; 559 + $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED}; 589 560 $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; 590 561 $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; 591 562 $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; ··· 600 563 $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}, 601 564 $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP}, 602 565 $stats{$process_pid}->{HIGH_NR_SCANNED}, 566 + $stats{$process_pid}->{HIGH_NR_FILE_SCANNED}, 567 + $stats{$process_pid}->{HIGH_NR_ANON_SCANNED}, 603 568 $stats{$process_pid}->{HIGH_NR_RECLAIMED}, 569 + $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED}, 570 + $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED}, 604 571 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}, 605 572 $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}); 606 573 ··· 635 594 print "\nSummary\n"; 636 595 print "Direct reclaims: $total_direct_reclaim\n"; 637 596 print "Direct reclaim pages scanned: $total_direct_nr_scanned\n"; 597 + print "Direct reclaim file pages scanned: $total_direct_nr_file_scanned\n"; 598 + print "Direct reclaim anon pages scanned: $total_direct_nr_anon_scanned\n"; 638 599 print "Direct reclaim pages reclaimed: $total_direct_nr_reclaimed\n"; 600 + print "Direct reclaim file pages reclaimed: $total_direct_nr_file_reclaimed\n"; 601 + print "Direct reclaim anon pages reclaimed: $total_direct_nr_anon_reclaimed\n"; 639 602 print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n"; 640 603 print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n"; 641 604 print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n"; ··· 649 604 print "\n"; 650 605 print "Kswapd wakeups: $total_kswapd_wake\n"; 651 606 print "Kswapd pages scanned: $total_kswapd_nr_scanned\n"; 607 + print "Kswapd file pages scanned: $total_kswapd_nr_file_scanned\n"; 608 + print "Kswapd anon pages scanned: $total_kswapd_nr_anon_scanned\n"; 652 609 print "Kswapd pages reclaimed: $total_kswapd_nr_reclaimed\n"; 610 + print "Kswapd file pages reclaimed: $total_kswapd_nr_file_reclaimed\n"; 611 + print "Kswapd anon pages reclaimed: $total_kswapd_nr_anon_reclaimed\n"; 653 612 print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n"; 654 613 print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n"; 655 614 print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n"; ··· 678 629 $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}; 679 630 $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP}; 680 631 $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED}; 632 + $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED}; 633 + $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED}; 681 634 $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED}; 635 + $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED}; 636 + $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED}; 682 637 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; 683 638 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; 684 639 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};

+19

Makefile

··· 621 621 KBUILD_CFLAGS += -O2 622 622 endif 623 623 624 + # Tell gcc to never replace conditional load with a non-conditional one 625 + KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) 626 + 624 627 ifdef CONFIG_READABLE_ASM 625 628 # Disable optimizations that make assembler listings hard to read. 626 629 # reorder blocks reorders the control in the function ··· 639 636 endif 640 637 641 638 # Handle stack protector mode. 639 + # 640 + # Since kbuild can potentially perform two passes (first with the old 641 + # .config values and then with updated .config values), we cannot error out 642 + # if a desired compiler option is unsupported. If we were to error, kbuild 643 + # could never get to the second pass and actually notice that we changed 644 + # the option to something that was supported. 645 + # 646 + # Additionally, we don't want to fallback and/or silently change which compiler 647 + # flags will be used, since that leads to producing kernels with different 648 + # security feature characteristics depending on the compiler used. ("But I 649 + # selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!") 650 + # 651 + # The middle ground is to warn here so that the failed option is obvious, but 652 + # to let the build fail with bad compiler flags so that we can't produce a 653 + # kernel when there is a CONFIG and compiler mismatch. 654 + # 642 655 ifdef CONFIG_CC_STACKPROTECTOR_REGULAR 643 656 stackp-flag := -fstack-protector 644 657 ifeq ($(call cc-option, $(stackp-flag)),)

+1

arch/arm/mm/dma-mapping.c

··· 26 26 #include <linux/io.h> 27 27 #include <linux/vmalloc.h> 28 28 #include <linux/sizes.h> 29 + #include <linux/cma.h> 29 30 30 31 #include <asm/memory.h> 31 32 #include <asm/highmem.h>

+2 -1

arch/ia64/mm/init.c

··· 631 631 632 632 pgdat = NODE_DATA(nid); 633 633 634 - zone = pgdat->node_zones + ZONE_NORMAL; 634 + zone = pgdat->node_zones + 635 + zone_for_memory(nid, start, size, ZONE_NORMAL); 635 636 ret = __add_pages(nid, zone, start_pfn, nr_pages); 636 637 637 638 if (ret)

-1

arch/powerpc/kvm/Makefile

··· 90 90 book3s_hv_rm_mmu.o \ 91 91 book3s_hv_ras.o \ 92 92 book3s_hv_builtin.o \ 93 - book3s_hv_cma.o \ 94 93 $(kvm-book3s_64-builtin-xics-objs-y) 95 94 endif 96 95

+1 -3

arch/powerpc/kvm/book3s_64_mmu_hv.c

··· 37 37 #include <asm/ppc-opcode.h> 38 38 #include <asm/cputable.h> 39 39 40 - #include "book3s_hv_cma.h" 41 - 42 40 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 43 41 #define MAX_LPID_970 63 44 42 ··· 62 64 } 63 65 64 66 kvm->arch.hpt_cma_alloc = 0; 65 - VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); 66 67 page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); 67 68 if (page) { 68 69 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 70 + memset((void *)hpt, 0, (1 << order)); 69 71 kvm->arch.hpt_cma_alloc = 1; 70 72 } 71 73

+13 -6

arch/powerpc/kvm/book3s_hv_builtin.c

··· 16 16 #include <linux/init.h> 17 17 #include <linux/memblock.h> 18 18 #include <linux/sizes.h> 19 + #include <linux/cma.h> 19 20 20 21 #include <asm/cputable.h> 21 22 #include <asm/kvm_ppc.h> 22 23 #include <asm/kvm_book3s.h> 23 24 24 - #include "book3s_hv_cma.h" 25 + #define KVM_CMA_CHUNK_ORDER 18 26 + 25 27 /* 26 28 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) 27 29 * should be power of 2. ··· 44 42 */ 45 43 unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ 46 44 EXPORT_SYMBOL_GPL(kvm_rma_pages); 45 + 46 + static struct cma *kvm_cma; 47 47 48 48 /* Work out RMLS (real mode limit selector) field value for a given RMA size. 49 49 Assumes POWER7 or PPC970. */ ··· 101 97 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); 102 98 if (!ri) 103 99 return NULL; 104 - page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); 100 + page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages)); 105 101 if (!page) 106 102 goto err_out; 107 103 atomic_set(&ri->use_count, 1); ··· 116 112 void kvm_release_rma(struct kvm_rma_info *ri) 117 113 { 118 114 if (atomic_dec_and_test(&ri->use_count)) { 119 - kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); 115 + cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages); 120 116 kfree(ri); 121 117 } 122 118 } ··· 135 131 { 136 132 unsigned long align_pages = HPT_ALIGN_PAGES; 137 133 134 + VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 135 + 138 136 /* Old CPUs require HPT aligned on a multiple of its size */ 139 137 if (!cpu_has_feature(CPU_FTR_ARCH_206)) 140 138 align_pages = nr_pages; 141 - return kvm_alloc_cma(nr_pages, align_pages); 139 + return cma_alloc(kvm_cma, nr_pages, get_order(align_pages)); 142 140 } 143 141 EXPORT_SYMBOL_GPL(kvm_alloc_hpt); 144 142 145 143 void kvm_release_hpt(struct page *page, unsigned long nr_pages) 146 144 { 147 - kvm_release_cma(page, nr_pages); 145 + cma_release(kvm_cma, page, nr_pages); 148 146 } 149 147 EXPORT_SYMBOL_GPL(kvm_release_hpt); 150 148 ··· 185 179 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; 186 180 187 181 align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); 188 - kvm_cma_declare_contiguous(selected_size, align_size); 182 + cma_declare_contiguous(0, selected_size, 0, align_size, 183 + KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); 189 184 } 190 185 } 191 186

-240

arch/powerpc/kvm/book3s_hv_cma.c

··· 1 - /* 2 - * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA 3 - * for DMA mapping framework 4 - * 5 - * Copyright IBM Corporation, 2013 6 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 7 - * 8 - * This program is free software; you can redistribute it and/or 9 - * modify it under the terms of the GNU General Public License as 10 - * published by the Free Software Foundation; either version 2 of the 11 - * License or (at your optional) any later version of the license. 12 - * 13 - */ 14 - #define pr_fmt(fmt) "kvm_cma: " fmt 15 - 16 - #ifdef CONFIG_CMA_DEBUG 17 - #ifndef DEBUG 18 - # define DEBUG 19 - #endif 20 - #endif 21 - 22 - #include <linux/memblock.h> 23 - #include <linux/mutex.h> 24 - #include <linux/sizes.h> 25 - #include <linux/slab.h> 26 - 27 - #include "book3s_hv_cma.h" 28 - 29 - struct kvm_cma { 30 - unsigned long base_pfn; 31 - unsigned long count; 32 - unsigned long *bitmap; 33 - }; 34 - 35 - static DEFINE_MUTEX(kvm_cma_mutex); 36 - static struct kvm_cma kvm_cma_area; 37 - 38 - /** 39 - * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling 40 - * for kvm hash pagetable 41 - * @size: Size of the reserved memory. 42 - * @alignment: Alignment for the contiguous memory area 43 - * 44 - * This function reserves memory for kvm cma area. It should be 45 - * called by arch code when early allocator (memblock or bootmem) 46 - * is still activate. 47 - */ 48 - long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) 49 - { 50 - long base_pfn; 51 - phys_addr_t addr; 52 - struct kvm_cma *cma = &kvm_cma_area; 53 - 54 - pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); 55 - 56 - if (!size) 57 - return -EINVAL; 58 - /* 59 - * Sanitise input arguments. 60 - * We should be pageblock aligned for CMA. 61 - */ 62 - alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); 63 - size = ALIGN(size, alignment); 64 - /* 65 - * Reserve memory 66 - * Use __memblock_alloc_base() since 67 - * memblock_alloc_base() panic()s. 68 - */ 69 - addr = __memblock_alloc_base(size, alignment, 0); 70 - if (!addr) { 71 - base_pfn = -ENOMEM; 72 - goto err; 73 - } else 74 - base_pfn = PFN_DOWN(addr); 75 - 76 - /* 77 - * Each reserved area must be initialised later, when more kernel 78 - * subsystems (like slab allocator) are available. 79 - */ 80 - cma->base_pfn = base_pfn; 81 - cma->count = size >> PAGE_SHIFT; 82 - pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); 83 - return 0; 84 - err: 85 - pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); 86 - return base_pfn; 87 - } 88 - 89 - /** 90 - * kvm_alloc_cma() - allocate pages from contiguous area 91 - * @nr_pages: Requested number of pages. 92 - * @align_pages: Requested alignment in number of pages 93 - * 94 - * This function allocates memory buffer for hash pagetable. 95 - */ 96 - struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) 97 - { 98 - int ret; 99 - struct page *page = NULL; 100 - struct kvm_cma *cma = &kvm_cma_area; 101 - unsigned long chunk_count, nr_chunk; 102 - unsigned long mask, pfn, pageno, start = 0; 103 - 104 - 105 - if (!cma || !cma->count) 106 - return NULL; 107 - 108 - pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, 109 - (void *)cma, nr_pages, align_pages); 110 - 111 - if (!nr_pages) 112 - return NULL; 113 - /* 114 - * align mask with chunk size. The bit tracks pages in chunk size 115 - */ 116 - VM_BUG_ON(!is_power_of_2(align_pages)); 117 - mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; 118 - BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); 119 - 120 - chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 121 - nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 122 - 123 - mutex_lock(&kvm_cma_mutex); 124 - for (;;) { 125 - pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, 126 - start, nr_chunk, mask); 127 - if (pageno >= chunk_count) 128 - break; 129 - 130 - pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); 131 - ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); 132 - if (ret == 0) { 133 - bitmap_set(cma->bitmap, pageno, nr_chunk); 134 - page = pfn_to_page(pfn); 135 - memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); 136 - break; 137 - } else if (ret != -EBUSY) { 138 - break; 139 - } 140 - pr_debug("%s(): memory range at %p is busy, retrying\n", 141 - __func__, pfn_to_page(pfn)); 142 - /* try again with a bit different memory target */ 143 - start = pageno + mask + 1; 144 - } 145 - mutex_unlock(&kvm_cma_mutex); 146 - pr_debug("%s(): returned %p\n", __func__, page); 147 - return page; 148 - } 149 - 150 - /** 151 - * kvm_release_cma() - release allocated pages for hash pagetable 152 - * @pages: Allocated pages. 153 - * @nr_pages: Number of allocated pages. 154 - * 155 - * This function releases memory allocated by kvm_alloc_cma(). 156 - * It returns false when provided pages do not belong to contiguous area and 157 - * true otherwise. 158 - */ 159 - bool kvm_release_cma(struct page *pages, unsigned long nr_pages) 160 - { 161 - unsigned long pfn; 162 - unsigned long nr_chunk; 163 - struct kvm_cma *cma = &kvm_cma_area; 164 - 165 - if (!cma || !pages) 166 - return false; 167 - 168 - pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); 169 - 170 - pfn = page_to_pfn(pages); 171 - 172 - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) 173 - return false; 174 - 175 - VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); 176 - nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 177 - 178 - mutex_lock(&kvm_cma_mutex); 179 - bitmap_clear(cma->bitmap, 180 - (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), 181 - nr_chunk); 182 - free_contig_range(pfn, nr_pages); 183 - mutex_unlock(&kvm_cma_mutex); 184 - 185 - return true; 186 - } 187 - 188 - static int __init kvm_cma_activate_area(unsigned long base_pfn, 189 - unsigned long count) 190 - { 191 - unsigned long pfn = base_pfn; 192 - unsigned i = count >> pageblock_order; 193 - struct zone *zone; 194 - 195 - WARN_ON_ONCE(!pfn_valid(pfn)); 196 - zone = page_zone(pfn_to_page(pfn)); 197 - do { 198 - unsigned j; 199 - base_pfn = pfn; 200 - for (j = pageblock_nr_pages; j; --j, pfn++) { 201 - WARN_ON_ONCE(!pfn_valid(pfn)); 202 - /* 203 - * alloc_contig_range requires the pfn range 204 - * specified to be in the same zone. Make this 205 - * simple by forcing the entire CMA resv range 206 - * to be in the same zone. 207 - */ 208 - if (page_zone(pfn_to_page(pfn)) != zone) 209 - return -EINVAL; 210 - } 211 - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); 212 - } while (--i); 213 - return 0; 214 - } 215 - 216 - static int __init kvm_cma_init_reserved_areas(void) 217 - { 218 - int bitmap_size, ret; 219 - unsigned long chunk_count; 220 - struct kvm_cma *cma = &kvm_cma_area; 221 - 222 - pr_debug("%s()\n", __func__); 223 - if (!cma->count) 224 - return 0; 225 - chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 226 - bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); 227 - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); 228 - if (!cma->bitmap) 229 - return -ENOMEM; 230 - 231 - ret = kvm_cma_activate_area(cma->base_pfn, cma->count); 232 - if (ret) 233 - goto error; 234 - return 0; 235 - 236 - error: 237 - kfree(cma->bitmap); 238 - return ret; 239 - } 240 - core_initcall(kvm_cma_init_reserved_areas);

-27

arch/powerpc/kvm/book3s_hv_cma.h

··· 1 - /* 2 - * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA 3 - * for DMA mapping framework 4 - * 5 - * Copyright IBM Corporation, 2013 6 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 7 - * 8 - * This program is free software; you can redistribute it and/or 9 - * modify it under the terms of the GNU General Public License as 10 - * published by the Free Software Foundation; either version 2 of the 11 - * License or (at your optional) any later version of the license. 12 - * 13 - */ 14 - 15 - #ifndef __POWERPC_KVM_CMA_ALLOC_H__ 16 - #define __POWERPC_KVM_CMA_ALLOC_H__ 17 - /* 18 - * Both RMA and Hash page allocation will be multiple of 256K. 19 - */ 20 - #define KVM_CMA_CHUNK_ORDER 18 21 - 22 - extern struct page *kvm_alloc_cma(unsigned long nr_pages, 23 - unsigned long align_pages); 24 - extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); 25 - extern long kvm_cma_declare_contiguous(phys_addr_t size, 26 - phys_addr_t alignment) __init; 27 - #endif

+2 -1

arch/powerpc/mm/mem.c

··· 128 128 return -EINVAL; 129 129 130 130 /* this should work for most non-highmem platforms */ 131 - zone = pgdata->node_zones; 131 + zone = pgdata->node_zones + 132 + zone_for_memory(nid, start, size, 0); 132 133 133 134 return __add_pages(nid, zone, start_pfn, nr_pages); 134 135 }

-11

arch/score/include/uapi/asm/ptrace.h

··· 4 4 #define PTRACE_GETREGS 12 5 5 #define PTRACE_SETREGS 13 6 6 7 - #define PC 32 8 - #define CONDITION 33 9 - #define ECR 34 10 - #define EMA 35 11 - #define CEH 36 12 - #define CEL 37 13 - #define COUNTER 38 14 - #define LDCR 39 15 - #define STCR 40 16 - #define PSR 41 17 - 18 7 #define SINGLESTEP16_INSN 0x7006 19 8 #define SINGLESTEP32_INSN 0x840C8000 20 9 #define BREAKPOINT16_INSN 0x7002 /* work on SPG300 */

+2 -3

arch/sh/drivers/dma/Kconfig

··· 12 12 default y if CPU_SUBTYPE_SH7750 || CPU_SUBTYPE_SH7751 || \ 13 13 CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \ 14 14 CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091 || \ 15 - CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7764 || \ 16 - CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ 17 - CPU_SUBTYPE_SH7760 15 + CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \ 16 + CPU_SUBTYPE_SH7785 || CPU_SUBTYPE_SH7760 18 17 19 18 config SH_DMA_API 20 19 depends on SH_DMA

+11

arch/sh/include/asm/io_noioport.h

··· 34 34 BUG(); 35 35 } 36 36 37 + static inline void __iomem *ioport_map(unsigned long port, unsigned int size) 38 + { 39 + BUG(); 40 + return NULL; 41 + } 42 + 43 + static inline void ioport_unmap(void __iomem *addr) 44 + { 45 + BUG(); 46 + } 47 + 37 48 #define inb_p(addr) inb(addr) 38 49 #define inw_p(addr) inw(addr) 39 50 #define inl_p(addr) inl(addr)

-1

arch/sh/include/cpu-sh4/cpu/dma-register.h

··· 32 32 #define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */ 33 33 #elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \ 34 34 defined(CONFIG_CPU_SUBTYPE_SH7763) || \ 35 - defined(CONFIG_CPU_SUBTYPE_SH7764) || \ 36 35 defined(CONFIG_CPU_SUBTYPE_SH7780) || \ 37 36 defined(CONFIG_CPU_SUBTYPE_SH7785) 38 37 #define CHCR_TS_LOW_MASK 0x00000018

+1 -2

arch/sh/include/cpu-sh4a/cpu/dma.h

··· 14 14 #define DMTE4_IRQ evt2irq(0xb80) 15 15 #define DMAE0_IRQ evt2irq(0xbc0) /* DMA Error IRQ*/ 16 16 #define SH_DMAC_BASE0 0xFE008020 17 - #elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \ 18 - defined(CONFIG_CPU_SUBTYPE_SH7764) 17 + #elif defined(CONFIG_CPU_SUBTYPE_SH7763) 19 18 #define DMTE0_IRQ evt2irq(0x640) 20 19 #define DMTE4_IRQ evt2irq(0x780) 21 20 #define DMAE0_IRQ evt2irq(0x6c0)

+3 -1

arch/sh/kernel/cpu/sh4a/clock-sh7724.c

··· 307 307 CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]), 308 308 CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]), 309 309 310 - CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]), 310 + CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]), 311 311 CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]), 312 312 CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]), 313 313 ··· 332 332 CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]), 333 333 CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]), 334 334 CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]), 335 + CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]), 336 + CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]), 335 337 CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]), 336 338 CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]), 337 339 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),

+1 -3

arch/sh/kernel/time.c

··· 80 80 return -ENODEV; 81 81 82 82 pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); 83 - if (IS_ERR(pdev)) 84 - return PTR_ERR(pdev); 85 83 86 - return 0; 84 + return PTR_ERR_OR_ZERO(pdev); 87 85 } 88 86 module_init(rtc_generic_init); 89 87

+1 -3

arch/sh/mm/asids-debugfs.c

··· 67 67 NULL, &asids_debugfs_fops); 68 68 if (!asids_dentry) 69 69 return -ENOMEM; 70 - if (IS_ERR(asids_dentry)) 71 - return PTR_ERR(asids_dentry); 72 70 73 - return 0; 71 + return PTR_ERR_OR_ZERO(asids_dentry); 74 72 } 75 73 module_init(asids_debugfs_init); 76 74

+3 -2

arch/sh/mm/init.c

··· 495 495 pgdat = NODE_DATA(nid); 496 496 497 497 /* We only have ZONE_NORMAL, so this is easy.. */ 498 - ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL, 499 - start_pfn, nr_pages); 498 + ret = __add_pages(nid, pgdat->node_zones + 499 + zone_for_memory(nid, start, size, ZONE_NORMAL), 500 + start_pfn, nr_pages); 500 501 if (unlikely(ret)) 501 502 printk("%s: Failed, __add_pages() == %d\n", __func__, ret); 502 503

+1 -1

arch/tile/kernel/module.c

··· 58 58 area->nr_pages = npages; 59 59 area->pages = pages; 60 60 61 - if (map_vm_area(area, prot_rwx, &pages)) { 61 + if (map_vm_area(area, prot_rwx, pages)) { 62 62 vunmap(area->addr); 63 63 goto error; 64 64 }

+2 -1

arch/x86/mm/fault.c

··· 1218 1218 /* 1219 1219 * If for any reason at all we couldn't handle the fault, 1220 1220 * make sure we exit gracefully rather than endlessly redo 1221 - * the fault: 1221 + * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if 1222 + * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. 1222 1223 */ 1223 1224 fault = handle_mm_fault(mm, vma, address, flags); 1224 1225

+2 -1

arch/x86/mm/init_32.c

··· 825 825 int arch_add_memory(int nid, u64 start, u64 size) 826 826 { 827 827 struct pglist_data *pgdata = NODE_DATA(nid); 828 - struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; 828 + struct zone *zone = pgdata->node_zones + 829 + zone_for_memory(nid, start, size, ZONE_HIGHMEM); 829 830 unsigned long start_pfn = start >> PAGE_SHIFT; 830 831 unsigned long nr_pages = size >> PAGE_SHIFT; 831 832

+2 -1

arch/x86/mm/init_64.c

··· 691 691 int arch_add_memory(int nid, u64 start, u64 size) 692 692 { 693 693 struct pglist_data *pgdat = NODE_DATA(nid); 694 - struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 694 + struct zone *zone = pgdat->node_zones + 695 + zone_for_memory(nid, start, size, ZONE_NORMAL); 695 696 unsigned long start_pfn = start >> PAGE_SHIFT; 696 697 unsigned long nr_pages = size >> PAGE_SHIFT; 697 698 int ret;

+1

drivers/ata/Kconfig

··· 16 16 depends on BLOCK 17 17 depends on !(M32R || M68K || S390) || BROKEN 18 18 select SCSI 19 + select GLOB 19 20 ---help--- 20 21 If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or 21 22 any other ATA device under Linux, say Y and make sure that you know

+3 -69

drivers/ata/libata-core.c

··· 59 59 #include <linux/async.h> 60 60 #include <linux/log2.h> 61 61 #include <linux/slab.h> 62 + #include <linux/glob.h> 62 63 #include <scsi/scsi.h> 63 64 #include <scsi/scsi_cmnd.h> 64 65 #include <scsi/scsi_host.h> ··· 4251 4250 { } 4252 4251 }; 4253 4252 4254 - /** 4255 - * glob_match - match a text string against a glob-style pattern 4256 - * @text: the string to be examined 4257 - * @pattern: the glob-style pattern to be matched against 4258 - * 4259 - * Either/both of text and pattern can be empty strings. 4260 - * 4261 - * Match text against a glob-style pattern, with wildcards and simple sets: 4262 - * 4263 - * ? matches any single character. 4264 - * * matches any run of characters. 4265 - * [xyz] matches a single character from the set: x, y, or z. 4266 - * [a-d] matches a single character from the range: a, b, c, or d. 4267 - * [a-d0-9] matches a single character from either range. 4268 - * 4269 - * The special characters ?, [, -, or *, can be matched using a set, eg. [*] 4270 - * Behaviour with malformed patterns is undefined, though generally reasonable. 4271 - * 4272 - * Sample patterns: "SD1?", "SD1[0-5]", "*R0", "SD*1?[012]*xx" 4273 - * 4274 - * This function uses one level of recursion per '*' in pattern. 4275 - * Since it calls _nothing_ else, and has _no_ explicit local variables, 4276 - * this will not cause stack problems for any reasonable use here. 4277 - * 4278 - * RETURNS: 4279 - * 0 on match, 1 otherwise. 4280 - */ 4281 - static int glob_match (const char *text, const char *pattern) 4282 - { 4283 - do { 4284 - /* Match single character or a '?' wildcard */ 4285 - if (*text == *pattern || *pattern == '?') { 4286 - if (!*pattern++) 4287 - return 0; /* End of both strings: match */ 4288 - } else { 4289 - /* Match single char against a '[' bracketed ']' pattern set */ 4290 - if (!*text || *pattern != '[') 4291 - break; /* Not a pattern set */ 4292 - while (*++pattern && *pattern != ']' && *text != *pattern) { 4293 - if (*pattern == '-' && *(pattern - 1) != '[') 4294 - if (*text > *(pattern - 1) && *text < *(pattern + 1)) { 4295 - ++pattern; 4296 - break; 4297 - } 4298 - } 4299 - if (!*pattern || *pattern == ']') 4300 - return 1; /* No match */ 4301 - while (*pattern && *pattern++ != ']'); 4302 - } 4303 - } while (*++text && *pattern); 4304 - 4305 - /* Match any run of chars against a '*' wildcard */ 4306 - if (*pattern == '*') { 4307 - if (!*++pattern) 4308 - return 0; /* Match: avoid recursion at end of pattern */ 4309 - /* Loop to handle additional pattern chars after the wildcard */ 4310 - while (*text) { 4311 - if (glob_match(text, pattern) == 0) 4312 - return 0; /* Remainder matched */ 4313 - ++text; /* Absorb (match) this char and try again */ 4314 - } 4315 - } 4316 - if (!*text && !*pattern) 4317 - return 0; /* End of both strings: match */ 4318 - return 1; /* No match */ 4319 - } 4320 - 4321 4253 static unsigned long ata_dev_blacklisted(const struct ata_device *dev) 4322 4254 { 4323 4255 unsigned char model_num[ATA_ID_PROD_LEN + 1]; ··· 4261 4327 ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev)); 4262 4328 4263 4329 while (ad->model_num) { 4264 - if (!glob_match(model_num, ad->model_num)) { 4330 + if (glob_match(model_num, ad->model_num)) { 4265 4331 if (ad->model_rev == NULL) 4266 4332 return ad->horkage; 4267 - if (!glob_match(model_rev, ad->model_rev)) 4333 + if (glob_match(model_rev, ad->model_rev)) 4268 4334 return ad->horkage; 4269 4335 } 4270 4336 ad++;

-10

drivers/base/Kconfig

··· 289 289 290 290 If unsure, leave the default value "8". 291 291 292 - config CMA_AREAS 293 - int "Maximum count of the CMA device-private areas" 294 - default 7 295 - help 296 - CMA allows to create CMA areas for particular devices. This parameter 297 - sets the maximum number of such device private CMA areas in the 298 - system. 299 - 300 - If unsure, leave the default value "7". 301 - 302 292 endif 303 293 304 294 endmenu

+12 -208

drivers/base/dma-contiguous.c

··· 24 24 25 25 #include <linux/memblock.h> 26 26 #include <linux/err.h> 27 - #include <linux/mm.h> 28 - #include <linux/mutex.h> 29 - #include <linux/page-isolation.h> 30 27 #include <linux/sizes.h> 31 - #include <linux/slab.h> 32 - #include <linux/swap.h> 33 - #include <linux/mm_types.h> 34 28 #include <linux/dma-contiguous.h> 35 - 36 - struct cma { 37 - unsigned long base_pfn; 38 - unsigned long count; 39 - unsigned long *bitmap; 40 - struct mutex lock; 41 - }; 42 - 43 - struct cma *dma_contiguous_default_area; 29 + #include <linux/cma.h> 44 30 45 31 #ifdef CONFIG_CMA_SIZE_MBYTES 46 32 #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES 47 33 #else 48 34 #define CMA_SIZE_MBYTES 0 49 35 #endif 36 + 37 + struct cma *dma_contiguous_default_area; 50 38 51 39 /* 52 40 * Default global CMA area size can be defined in kernel's .config. ··· 142 154 } 143 155 } 144 156 145 - static DEFINE_MUTEX(cma_mutex); 146 - 147 - static int __init cma_activate_area(struct cma *cma) 148 - { 149 - int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long); 150 - unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; 151 - unsigned i = cma->count >> pageblock_order; 152 - struct zone *zone; 153 - 154 - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); 155 - 156 - if (!cma->bitmap) 157 - return -ENOMEM; 158 - 159 - WARN_ON_ONCE(!pfn_valid(pfn)); 160 - zone = page_zone(pfn_to_page(pfn)); 161 - 162 - do { 163 - unsigned j; 164 - base_pfn = pfn; 165 - for (j = pageblock_nr_pages; j; --j, pfn++) { 166 - WARN_ON_ONCE(!pfn_valid(pfn)); 167 - /* 168 - * alloc_contig_range requires the pfn range 169 - * specified to be in the same zone. Make this 170 - * simple by forcing the entire CMA resv range 171 - * to be in the same zone. 172 - */ 173 - if (page_zone(pfn_to_page(pfn)) != zone) 174 - goto err; 175 - } 176 - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); 177 - } while (--i); 178 - 179 - mutex_init(&cma->lock); 180 - return 0; 181 - 182 - err: 183 - kfree(cma->bitmap); 184 - return -EINVAL; 185 - } 186 - 187 - static struct cma cma_areas[MAX_CMA_AREAS]; 188 - static unsigned cma_area_count; 189 - 190 - static int __init cma_init_reserved_areas(void) 191 - { 192 - int i; 193 - 194 - for (i = 0; i < cma_area_count; i++) { 195 - int ret = cma_activate_area(&cma_areas[i]); 196 - if (ret) 197 - return ret; 198 - } 199 - 200 - return 0; 201 - } 202 - core_initcall(cma_init_reserved_areas); 203 - 204 157 /** 205 158 * dma_contiguous_reserve_area() - reserve custom contiguous area 206 159 * @size: Size of the reserved area (in bytes), ··· 163 234 phys_addr_t limit, struct cma **res_cma, 164 235 bool fixed) 165 236 { 166 - struct cma *cma = &cma_areas[cma_area_count]; 167 - phys_addr_t alignment; 168 - int ret = 0; 237 + int ret; 169 238 170 - pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, 171 - (unsigned long)size, (unsigned long)base, 172 - (unsigned long)limit); 173 - 174 - /* Sanity checks */ 175 - if (cma_area_count == ARRAY_SIZE(cma_areas)) { 176 - pr_err("Not enough slots for CMA reserved regions!\n"); 177 - return -ENOSPC; 178 - } 179 - 180 - if (!size) 181 - return -EINVAL; 182 - 183 - /* Sanitise input arguments */ 184 - alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); 185 - base = ALIGN(base, alignment); 186 - size = ALIGN(size, alignment); 187 - limit &= ~(alignment - 1); 188 - 189 - /* Reserve memory */ 190 - if (base && fixed) { 191 - if (memblock_is_region_reserved(base, size) || 192 - memblock_reserve(base, size) < 0) { 193 - ret = -EBUSY; 194 - goto err; 195 - } 196 - } else { 197 - phys_addr_t addr = memblock_alloc_range(size, alignment, base, 198 - limit); 199 - if (!addr) { 200 - ret = -ENOMEM; 201 - goto err; 202 - } else { 203 - base = addr; 204 - } 205 - } 206 - 207 - /* 208 - * Each reserved area must be initialised later, when more kernel 209 - * subsystems (like slab allocator) are available. 210 - */ 211 - cma->base_pfn = PFN_DOWN(base); 212 - cma->count = size >> PAGE_SHIFT; 213 - *res_cma = cma; 214 - cma_area_count++; 215 - 216 - pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, 217 - (unsigned long)base); 239 + ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma); 240 + if (ret) 241 + return ret; 218 242 219 243 /* Architecture specific contiguous memory fixup. */ 220 - dma_contiguous_early_fixup(base, size); 221 - return 0; 222 - err: 223 - pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); 224 - return ret; 225 - } 244 + dma_contiguous_early_fixup(cma_get_base(*res_cma), 245 + cma_get_size(*res_cma)); 226 246 227 - static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count) 228 - { 229 - mutex_lock(&cma->lock); 230 - bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); 231 - mutex_unlock(&cma->lock); 247 + return 0; 232 248 } 233 249 234 250 /** ··· 190 316 struct page *dma_alloc_from_contiguous(struct device *dev, int count, 191 317 unsigned int align) 192 318 { 193 - unsigned long mask, pfn, pageno, start = 0; 194 - struct cma *cma = dev_get_cma_area(dev); 195 - struct page *page = NULL; 196 - int ret; 197 - 198 - if (!cma || !cma->count) 199 - return NULL; 200 - 201 319 if (align > CONFIG_CMA_ALIGNMENT) 202 320 align = CONFIG_CMA_ALIGNMENT; 203 321 204 - pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, 205 - count, align); 206 - 207 - if (!count) 208 - return NULL; 209 - 210 - mask = (1 << align) - 1; 211 - 212 - 213 - for (;;) { 214 - mutex_lock(&cma->lock); 215 - pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, 216 - start, count, mask); 217 - if (pageno >= cma->count) { 218 - mutex_unlock(&cma->lock); 219 - break; 220 - } 221 - bitmap_set(cma->bitmap, pageno, count); 222 - /* 223 - * It's safe to drop the lock here. We've marked this region for 224 - * our exclusive use. If the migration fails we will take the 225 - * lock again and unmark it. 226 - */ 227 - mutex_unlock(&cma->lock); 228 - 229 - pfn = cma->base_pfn + pageno; 230 - mutex_lock(&cma_mutex); 231 - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); 232 - mutex_unlock(&cma_mutex); 233 - if (ret == 0) { 234 - page = pfn_to_page(pfn); 235 - break; 236 - } else if (ret != -EBUSY) { 237 - clear_cma_bitmap(cma, pfn, count); 238 - break; 239 - } 240 - clear_cma_bitmap(cma, pfn, count); 241 - pr_debug("%s(): memory range at %p is busy, retrying\n", 242 - __func__, pfn_to_page(pfn)); 243 - /* try again with a bit different memory target */ 244 - start = pageno + mask + 1; 245 - } 246 - 247 - pr_debug("%s(): returned %p\n", __func__, page); 248 - return page; 322 + return cma_alloc(dev_get_cma_area(dev), count, align); 249 323 } 250 324 251 325 /** ··· 209 387 bool dma_release_from_contiguous(struct device *dev, struct page *pages, 210 388 int count) 211 389 { 212 - struct cma *cma = dev_get_cma_area(dev); 213 - unsigned long pfn; 214 - 215 - if (!cma || !pages) 216 - return false; 217 - 218 - pr_debug("%s(page %p)\n", __func__, (void *)pages); 219 - 220 - pfn = page_to_pfn(pages); 221 - 222 - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) 223 - return false; 224 - 225 - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); 226 - 227 - free_contig_range(pfn, count); 228 - clear_cma_bitmap(cma, pfn, count); 229 - 230 - return true; 390 + return cma_release(dev_get_cma_area(dev), pages, count); 231 391 }

+16 -14

drivers/base/memory.c

··· 284 284 * attribute and need to set the online_type. 285 285 */ 286 286 if (mem->online_type < 0) 287 - mem->online_type = ONLINE_KEEP; 287 + mem->online_type = MMOP_ONLINE_KEEP; 288 288 289 289 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 290 290 ··· 315 315 if (ret) 316 316 return ret; 317 317 318 - if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) 319 - online_type = ONLINE_KERNEL; 320 - else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) 321 - online_type = ONLINE_MOVABLE; 322 - else if (!strncmp(buf, "online", min_t(int, count, 6))) 323 - online_type = ONLINE_KEEP; 324 - else if (!strncmp(buf, "offline", min_t(int, count, 7))) 325 - online_type = -1; 318 + if (sysfs_streq(buf, "online_kernel")) 319 + online_type = MMOP_ONLINE_KERNEL; 320 + else if (sysfs_streq(buf, "online_movable")) 321 + online_type = MMOP_ONLINE_MOVABLE; 322 + else if (sysfs_streq(buf, "online")) 323 + online_type = MMOP_ONLINE_KEEP; 324 + else if (sysfs_streq(buf, "offline")) 325 + online_type = MMOP_OFFLINE; 326 326 else { 327 327 ret = -EINVAL; 328 328 goto err; 329 329 } 330 330 331 331 switch (online_type) { 332 - case ONLINE_KERNEL: 333 - case ONLINE_MOVABLE: 334 - case ONLINE_KEEP: 332 + case MMOP_ONLINE_KERNEL: 333 + case MMOP_ONLINE_MOVABLE: 334 + case MMOP_ONLINE_KEEP: 335 335 /* 336 336 * mem->online_type is not protected so there can be a 337 337 * race here. However, when racing online, the first ··· 342 342 mem->online_type = online_type; 343 343 ret = device_online(&mem->dev); 344 344 break; 345 - case -1: 345 + case MMOP_OFFLINE: 346 346 ret = device_offline(&mem->dev); 347 347 break; 348 348 default: ··· 406 406 int i, ret; 407 407 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 408 408 409 - phys_addr = simple_strtoull(buf, NULL, 0); 409 + ret = kstrtoull(buf, 0, &phys_addr); 410 + if (ret) 411 + return ret; 410 412 411 413 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 412 414 return -EINVAL;

+1 -1

drivers/base/node.c

··· 126 126 nid, K(node_page_state(nid, NR_FILE_PAGES)), 127 127 nid, K(node_page_state(nid, NR_FILE_MAPPED)), 128 128 nid, K(node_page_state(nid, NR_ANON_PAGES)), 129 - nid, K(node_page_state(nid, NR_SHMEM)), 129 + nid, K(i.sharedram), 130 130 nid, node_page_state(nid, NR_KERNEL_STACK) * 131 131 THREAD_SIZE / 1024, 132 132 nid, K(node_page_state(nid, NR_PAGETABLE)),

+43 -28

drivers/block/zram/zram_drv.c

··· 183 183 static int zram_test_flag(struct zram_meta *meta, u32 index, 184 184 enum zram_pageflags flag) 185 185 { 186 - return meta->table[index].flags & BIT(flag); 186 + return meta->table[index].value & BIT(flag); 187 187 } 188 188 189 189 static void zram_set_flag(struct zram_meta *meta, u32 index, 190 190 enum zram_pageflags flag) 191 191 { 192 - meta->table[index].flags |= BIT(flag); 192 + meta->table[index].value |= BIT(flag); 193 193 } 194 194 195 195 static void zram_clear_flag(struct zram_meta *meta, u32 index, 196 196 enum zram_pageflags flag) 197 197 { 198 - meta->table[index].flags &= ~BIT(flag); 198 + meta->table[index].value &= ~BIT(flag); 199 + } 200 + 201 + static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) 202 + { 203 + return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 204 + } 205 + 206 + static void zram_set_obj_size(struct zram_meta *meta, 207 + u32 index, size_t size) 208 + { 209 + unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; 210 + 211 + meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 199 212 } 200 213 201 214 static inline int is_partial_io(struct bio_vec *bvec) ··· 268 255 goto free_table; 269 256 } 270 257 271 - rwlock_init(&meta->tb_lock); 272 258 return meta; 273 259 274 260 free_table: ··· 316 304 flush_dcache_page(page); 317 305 } 318 306 319 - /* NOTE: caller should hold meta->tb_lock with write-side */ 307 + 308 + /* 309 + * To protect concurrent access to the same index entry, 310 + * caller should hold this table index entry's bit_spinlock to 311 + * indicate this index entry is accessing. 312 + */ 320 313 static void zram_free_page(struct zram *zram, size_t index) 321 314 { 322 315 struct zram_meta *meta = zram->meta; ··· 341 324 342 325 zs_free(meta->mem_pool, handle); 343 326 344 - atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); 327 + atomic64_sub(zram_get_obj_size(meta, index), 328 + &zram->stats.compr_data_size); 345 329 atomic64_dec(&zram->stats.pages_stored); 346 330 347 331 meta->table[index].handle = 0; 348 - meta->table[index].size = 0; 332 + zram_set_obj_size(meta, index, 0); 349 333 } 350 334 351 335 static int zram_decompress_page(struct zram *zram, char *mem, u32 index) ··· 355 337 unsigned char *cmem; 356 338 struct zram_meta *meta = zram->meta; 357 339 unsigned long handle; 358 - u16 size; 340 + size_t size; 359 341 360 - read_lock(&meta->tb_lock); 342 + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 361 343 handle = meta->table[index].handle; 362 - size = meta->table[index].size; 344 + size = zram_get_obj_size(meta, index); 363 345 364 346 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { 365 - read_unlock(&meta->tb_lock); 347 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 366 348 clear_page(mem); 367 349 return 0; 368 350 } ··· 373 355 else 374 356 ret = zcomp_decompress(zram->comp, cmem, size, mem); 375 357 zs_unmap_object(meta->mem_pool, handle); 376 - read_unlock(&meta->tb_lock); 358 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 377 359 378 360 /* Should NEVER happen. Return bio error if it does. */ 379 361 if (unlikely(ret)) { ··· 394 376 struct zram_meta *meta = zram->meta; 395 377 page = bvec->bv_page; 396 378 397 - read_lock(&meta->tb_lock); 379 + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 398 380 if (unlikely(!meta->table[index].handle) || 399 381 zram_test_flag(meta, index, ZRAM_ZERO)) { 400 - read_unlock(&meta->tb_lock); 382 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 401 383 handle_zero_page(bvec); 402 384 return 0; 403 385 } 404 - read_unlock(&meta->tb_lock); 386 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 405 387 406 388 if (is_partial_io(bvec)) 407 389 /* Use a temporary buffer to decompress the page */ ··· 479 461 if (page_zero_filled(uncmem)) { 480 462 kunmap_atomic(user_mem); 481 463 /* Free memory associated with this sector now. */ 482 - write_lock(&zram->meta->tb_lock); 464 + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 483 465 zram_free_page(zram, index); 484 466 zram_set_flag(meta, index, ZRAM_ZERO); 485 - write_unlock(&zram->meta->tb_lock); 467 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 486 468 487 469 atomic64_inc(&zram->stats.zero_pages); 488 470 ret = 0; ··· 532 514 * Free memory associated with this sector 533 515 * before overwriting unused sectors. 534 516 */ 535 - write_lock(&zram->meta->tb_lock); 517 + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 536 518 zram_free_page(zram, index); 537 519 538 520 meta->table[index].handle = handle; 539 - meta->table[index].size = clen; 540 - write_unlock(&zram->meta->tb_lock); 521 + zram_set_obj_size(meta, index, clen); 522 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 541 523 542 524 /* Update stats */ 543 525 atomic64_add(clen, &zram->stats.compr_data_size); ··· 578 560 int offset, struct bio *bio) 579 561 { 580 562 size_t n = bio->bi_iter.bi_size; 563 + struct zram_meta *meta = zram->meta; 581 564 582 565 /* 583 566 * zram manages data in physical block size units. Because logical block ··· 599 580 } 600 581 601 582 while (n >= PAGE_SIZE) { 602 - /* 603 - * Discard request can be large so the lock hold times could be 604 - * lengthy. So take the lock once per page. 605 - */ 606 - write_lock(&zram->meta->tb_lock); 583 + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 607 584 zram_free_page(zram, index); 608 - write_unlock(&zram->meta->tb_lock); 585 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 609 586 index++; 610 587 n -= PAGE_SIZE; 611 588 } ··· 836 821 zram = bdev->bd_disk->private_data; 837 822 meta = zram->meta; 838 823 839 - write_lock(&meta->tb_lock); 824 + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 840 825 zram_free_page(zram, index); 841 - write_unlock(&meta->tb_lock); 826 + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 842 827 atomic64_inc(&zram->stats.notify_free); 843 828 } 844 829

+20 -9

drivers/block/zram/zram_drv.h

··· 43 43 /*-- End of configurable params */ 44 44 45 45 #define SECTOR_SHIFT 9 46 - #define SECTOR_SIZE (1 << SECTOR_SHIFT) 47 46 #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 48 47 #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) 49 48 #define ZRAM_LOGICAL_BLOCK_SHIFT 12 ··· 50 51 #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ 51 52 (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) 52 53 53 - /* Flags for zram pages (table[page_no].flags) */ 54 + 55 + /* 56 + * The lower ZRAM_FLAG_SHIFT bits of table.value is for 57 + * object size (excluding header), the higher bits is for 58 + * zram_pageflags. 59 + * 60 + * zram is mainly used for memory efficiency so we want to keep memory 61 + * footprint small so we can squeeze size and flags into a field. 62 + * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header), 63 + * the higher bits is for zram_pageflags. 64 + */ 65 + #define ZRAM_FLAG_SHIFT 24 66 + 67 + /* Flags for zram pages (table[page_no].value) */ 54 68 enum zram_pageflags { 55 69 /* Page consists entirely of zeros */ 56 - ZRAM_ZERO, 70 + ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1, 71 + ZRAM_ACCESS, /* page in now accessed */ 57 72 58 73 __NR_ZRAM_PAGEFLAGS, 59 74 }; ··· 75 62 /*-- Data structures */ 76 63 77 64 /* Allocated for each disk page */ 78 - struct table { 65 + struct zram_table_entry { 79 66 unsigned long handle; 80 - u16 size; /* object size (excluding header) */ 81 - u8 flags; 82 - } __aligned(4); 67 + unsigned long value; 68 + }; 83 69 84 70 struct zram_stats { 85 71 atomic64_t compr_data_size; /* compressed size of pages stored */ ··· 93 81 }; 94 82 95 83 struct zram_meta { 96 - rwlock_t tb_lock; /* protect table */ 97 - struct table *table; 84 + struct zram_table_entry *table; 98 85 struct zs_pool *mem_pool; 99 86 }; 100 87

+5 -1

drivers/firmware/memmap.c

··· 286 286 { 287 287 struct firmware_map_entry *entry; 288 288 289 - entry = firmware_map_find_entry_bootmem(start, end, type); 289 + entry = firmware_map_find_entry(start, end - 1, type); 290 + if (entry) 291 + return 0; 292 + 293 + entry = firmware_map_find_entry_bootmem(start, end - 1, type); 290 294 if (!entry) { 291 295 entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); 292 296 if (!entry)

+1 -1

drivers/gpu/drm/drm_hashtab.c

··· 125 125 parent = &entry->head; 126 126 } 127 127 if (parent) { 128 - hlist_add_after_rcu(parent, &item->head); 128 + hlist_add_behind_rcu(&item->head, parent); 129 129 } else { 130 130 hlist_add_head_rcu(&item->head, h_list); 131 131 }

+1 -1

drivers/hwmon/asus_atk0110.c

··· 688 688 DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm, 689 689 atk_debugfs_gitm_get, 690 690 NULL, 691 - "0x%08llx\n") 691 + "0x%08llx\n"); 692 692 693 693 static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj) 694 694 {

+2 -5

drivers/lguest/core.c

··· 42 42 static __init int map_switcher(void) 43 43 { 44 44 int i, err; 45 - struct page **pagep; 46 45 47 46 /* 48 47 * Map the Switcher in to high memory. ··· 109 110 * This code actually sets up the pages we've allocated to appear at 110 111 * switcher_addr. map_vm_area() takes the vma we allocated above, the 111 112 * kind of pages we're mapping (kernel pages), and a pointer to our 112 - * array of struct pages. It increments that pointer, but we don't 113 - * care. 113 + * array of struct pages. 114 114 */ 115 - pagep = lg_switcher_pages; 116 - err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); 115 + err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages); 117 116 if (err) { 118 117 printk("lguest: map_vm_area failed: %i\n", err); 119 118 goto free_vma;

+1 -1

drivers/net/ethernet/intel/i40e/i40e_ethtool.c

··· 1948 1948 1949 1949 /* add filter to the list */ 1950 1950 if (parent) 1951 - hlist_add_after(&parent->fdir_node, &input->fdir_node); 1951 + hlist_add_behind(&input->fdir_node, &parent->fdir_node); 1952 1952 else 1953 1953 hlist_add_head(&input->fdir_node, 1954 1954 &pf->fdir_filter_list);

+1 -1

drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c

··· 2517 2517 2518 2518 /* add filter to the list */ 2519 2519 if (parent) 2520 - hlist_add_after(&parent->fdir_node, &input->fdir_node); 2520 + hlist_add_behind(&input->fdir_node, &parent->fdir_node); 2521 2521 else 2522 2522 hlist_add_head(&input->fdir_node, 2523 2523 &adapter->fdir_filter_list);

+1 -3

drivers/staging/android/binder.c

··· 585 585 586 586 for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { 587 587 int ret; 588 - struct page **page_array_ptr; 589 588 590 589 page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; 591 590 ··· 597 598 } 598 599 tmp_area.addr = page_addr; 599 600 tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; 600 - page_array_ptr = page; 601 - ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); 601 + ret = map_vm_area(&tmp_area, PAGE_KERNEL, page); 602 602 if (ret) { 603 603 pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", 604 604 proc->pid, page_addr);

+2 -2

drivers/staging/lustre/lustre/libcfs/hash.c

··· 351 351 cfs_hash_dhead_t, dh_head); 352 352 353 353 if (dh->dh_tail != NULL) /* not empty */ 354 - hlist_add_after(dh->dh_tail, hnode); 354 + hlist_add_behind(hnode, dh->dh_tail); 355 355 else /* empty list */ 356 356 hlist_add_head(hnode, &dh->dh_head); 357 357 dh->dh_tail = hnode; ··· 406 406 cfs_hash_dhead_dep_t, dd_head); 407 407 408 408 if (dh->dd_tail != NULL) /* not empty */ 409 - hlist_add_after(dh->dd_tail, hnode); 409 + hlist_add_behind(hnode, dh->dd_tail); 410 410 else /* empty list */ 411 411 hlist_add_head(hnode, &dh->dd_head); 412 412 dh->dd_tail = hnode;

+1 -1

drivers/tty/sysrq.c

··· 355 355 356 356 static void moom_callback(struct work_struct *ignored) 357 357 { 358 - out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL, 358 + out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, 359 359 0, NULL, true); 360 360 } 361 361

+2 -2

fs/fscache/main.c

··· 67 67 return ret; 68 68 } 69 69 70 - struct ctl_table fscache_sysctls[] = { 70 + static struct ctl_table fscache_sysctls[] = { 71 71 { 72 72 .procname = "object_max_active", 73 73 .data = &fscache_object_max_active, ··· 87 87 {} 88 88 }; 89 89 90 - struct ctl_table fscache_sysctls_root[] = { 90 + static struct ctl_table fscache_sysctls_root[] = { 91 91 { 92 92 .procname = "fscache", 93 93 .mode = 0555,

+7 -8

fs/logfs/readwrite.c

··· 1019 1019 /** 1020 1020 * logfs_is_valid_block - check whether this block is still valid 1021 1021 * 1022 - * @sb - superblock 1023 - * @ofs - block physical offset 1024 - * @ino - block inode number 1025 - * @bix - block index 1026 - * @level - block level 1022 + * @sb: superblock 1023 + * @ofs: block physical offset 1024 + * @ino: block inode number 1025 + * @bix: block index 1026 + * @gc_level: block level 1027 1027 * 1028 1028 * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will 1029 1029 * become invalid once the journal is written. ··· 2226 2226 * 2227 2227 * @inode: parent inode (ifile or directory) 2228 2228 * @buf: object to write (inode or dentry) 2229 - * @n: object size 2230 - * @_pos: object number (file position in blocks/objects) 2229 + * @count: object size 2230 + * @bix: block index 2231 2231 * @flags: write flags 2232 - * @lock: 0 if write lock is already taken, 1 otherwise 2233 2232 * @shadow_tree: shadow below this inode 2234 2233 * 2235 2234 * FIXME: All caller of this put a 200-300 byte variable on the stack,

+1 -1

fs/namespace.c

··· 798 798 list_splice(&head, n->list.prev); 799 799 800 800 if (shadows) 801 - hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); 801 + hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); 802 802 else 803 803 hlist_add_head_rcu(&mnt->mnt_hash, 804 804 m_hash(&parent->mnt, mnt->mnt_mountpoint));

+9 -2

fs/notify/fanotify/fanotify.c

··· 70 70 wait_event(group->fanotify_data.access_waitq, event->response || 71 71 atomic_read(&group->fanotify_data.bypass_perm)); 72 72 73 - if (!event->response) /* bypass_perm set */ 73 + if (!event->response) { /* bypass_perm set */ 74 + /* 75 + * Event was canceled because group is being destroyed. Remove 76 + * it from group's event list because we are responsible for 77 + * freeing the permission event. 78 + */ 79 + fsnotify_remove_event(group, &event->fae.fse); 74 80 return 0; 81 + } 75 82 76 83 /* userspace responded, convert to something usable */ 77 84 switch (event->response) { ··· 217 210 return -ENOMEM; 218 211 219 212 fsn_event = &event->fse; 220 - ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); 213 + ret = fsnotify_add_event(group, fsn_event, fanotify_merge); 221 214 if (ret) { 222 215 /* Permission events shouldn't be merged */ 223 216 BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);

+13 -1

fs/notify/fanotify/fanotify_user.c

··· 66 66 67 67 /* held the notification_mutex the whole time, so this is the 68 68 * same event we peeked above */ 69 - return fsnotify_remove_notify_event(group); 69 + return fsnotify_remove_first_event(group); 70 70 } 71 71 72 72 static int create_fd(struct fsnotify_group *group, ··· 359 359 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 360 360 struct fanotify_perm_event_info *event, *next; 361 361 362 + /* 363 + * There may be still new events arriving in the notification queue 364 + * but since userspace cannot use fanotify fd anymore, no event can 365 + * enter or leave access_list by now. 366 + */ 362 367 spin_lock(&group->fanotify_data.access_lock); 363 368 364 369 atomic_inc(&group->fanotify_data.bypass_perm); ··· 378 373 } 379 374 spin_unlock(&group->fanotify_data.access_lock); 380 375 376 + /* 377 + * Since bypass_perm is set, newly queued events will not wait for 378 + * access response. Wake up the already sleeping ones now. 379 + * synchronize_srcu() in fsnotify_destroy_group() will wait for all 380 + * processes sleeping in fanotify_handle_event() waiting for access 381 + * response and thus also for all permission events to be freed. 382 + */ 381 383 wake_up(&group->fanotify_data.access_waitq); 382 384 #endif 383 385

+1 -1

fs/notify/inode_mark.c

··· 232 232 233 233 BUG_ON(last == NULL); 234 234 /* mark should be the last entry. last is the current last entry */ 235 - hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); 235 + hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list); 236 236 out: 237 237 fsnotify_recalc_inode_mask_locked(inode); 238 238 spin_unlock(&inode->i_lock);

+1 -1

fs/notify/inotify/inotify_fsnotify.c

··· 108 108 if (len) 109 109 strcpy(event->name, file_name); 110 110 111 - ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge); 111 + ret = fsnotify_add_event(group, fsn_event, inotify_merge); 112 112 if (ret) { 113 113 /* Our event wasn't used in the end. Free it. */ 114 114 fsnotify_destroy_event(group, fsn_event);

+2 -2

fs/notify/inotify/inotify_user.c

··· 149 149 if (fsnotify_notify_queue_is_empty(group)) 150 150 return NULL; 151 151 152 - event = fsnotify_peek_notify_event(group); 152 + event = fsnotify_peek_first_event(group); 153 153 154 154 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 155 155 ··· 159 159 160 160 /* held the notification_mutex the whole time, so this is the 161 161 * same event we peeked above */ 162 - fsnotify_remove_notify_event(group); 162 + fsnotify_remove_first_event(group); 163 163 164 164 return event; 165 165 }

+27 -10

fs/notify/notification.c

··· 73 73 /* Overflow events are per-group and we don't want to free them */ 74 74 if (!event || event->mask == FS_Q_OVERFLOW) 75 75 return; 76 - 76 + /* If the event is still queued, we have a problem... */ 77 + WARN_ON(!list_empty(&event->list)); 77 78 group->ops->free_event(event); 78 79 } 79 80 ··· 84 83 * added to the queue, 1 if the event was merged with some other queued event, 85 84 * 2 if the queue of events has overflown. 86 85 */ 87 - int fsnotify_add_notify_event(struct fsnotify_group *group, 88 - struct fsnotify_event *event, 89 - int (*merge)(struct list_head *, 90 - struct fsnotify_event *)) 86 + int fsnotify_add_event(struct fsnotify_group *group, 87 + struct fsnotify_event *event, 88 + int (*merge)(struct list_head *, 89 + struct fsnotify_event *)) 91 90 { 92 91 int ret = 0; 93 92 struct list_head *list = &group->notification_list; ··· 126 125 } 127 126 128 127 /* 128 + * Remove @event from group's notification queue. It is the responsibility of 129 + * the caller to destroy the event. 130 + */ 131 + void fsnotify_remove_event(struct fsnotify_group *group, 132 + struct fsnotify_event *event) 133 + { 134 + mutex_lock(&group->notification_mutex); 135 + if (!list_empty(&event->list)) { 136 + list_del_init(&event->list); 137 + group->q_len--; 138 + } 139 + mutex_unlock(&group->notification_mutex); 140 + } 141 + 142 + /* 129 143 * Remove and return the first event from the notification list. It is the 130 144 * responsibility of the caller to destroy the obtained event 131 145 */ 132 - struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) 146 + struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group) 133 147 { 134 148 struct fsnotify_event *event; 135 149 ··· 156 140 struct fsnotify_event, list); 157 141 /* 158 142 * We need to init list head for the case of overflow event so that 159 - * check in fsnotify_add_notify_events() works 143 + * check in fsnotify_add_event() works 160 144 */ 161 145 list_del_init(&event->list); 162 146 group->q_len--; ··· 165 149 } 166 150 167 151 /* 168 - * This will not remove the event, that must be done with fsnotify_remove_notify_event() 152 + * This will not remove the event, that must be done with 153 + * fsnotify_remove_first_event() 169 154 */ 170 - struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) 155 + struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group) 171 156 { 172 157 BUG_ON(!mutex_is_locked(&group->notification_mutex)); 173 158 ··· 186 169 187 170 mutex_lock(&group->notification_mutex); 188 171 while (!fsnotify_notify_queue_is_empty(group)) { 189 - event = fsnotify_remove_notify_event(group); 172 + event = fsnotify_remove_first_event(group); 190 173 fsnotify_destroy_event(group, event); 191 174 } 192 175 mutex_unlock(&group->notification_mutex);

+1 -1

fs/notify/vfsmount_mark.c

··· 191 191 192 192 BUG_ON(last == NULL); 193 193 /* mark should be the last entry. last is the current last entry */ 194 - hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); 194 + hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list); 195 195 out: 196 196 fsnotify_recalc_vfsmount_mask_locked(mnt); 197 197 spin_unlock(&mnt->mnt_root->d_lock);

-3

fs/ntfs/file.c

··· 74 74 * ntfs_attr_extend_initialized - extend the initialized size of an attribute 75 75 * @ni: ntfs inode of the attribute to extend 76 76 * @new_init_size: requested new initialized size in bytes 77 - * @cached_page: store any allocated but unused page here 78 - * @lru_pvec: lru-buffering pagevec of the caller 79 77 * 80 78 * Extend the initialized size of an attribute described by the ntfs inode @ni 81 79 * to @new_init_size bytes. This involves zeroing any non-sparse space between ··· 393 395 * @nr_pages: number of page cache pages to obtain 394 396 * @pages: array of pages in which to return the obtained page cache pages 395 397 * @cached_page: allocated but as yet unused page 396 - * @lru_pvec: lru-buffering pagevec of caller 397 398 * 398 399 * Obtain @nr_pages locked page cache pages from the mapping @mapping and 399 400 * starting at index @index.

+12 -3

fs/ocfs2/alloc.c

··· 4961 4961 4962 4962 el = path_leaf_el(path); 4963 4963 split_index = ocfs2_search_extent_list(el, cpos); 4964 + if (split_index == -1) { 4965 + ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 4966 + "Owner %llu has an extent at cpos %u " 4967 + "which can no longer be found.\n", 4968 + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), 4969 + cpos); 4970 + ret = -EROFS; 4971 + goto out; 4972 + } 4964 4973 goto leftright; 4965 4974 } 4966 4975 out: ··· 5144 5135 el = path_leaf_el(left_path); 5145 5136 5146 5137 index = ocfs2_search_extent_list(el, cpos); 5147 - if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5138 + if (index == -1) { 5148 5139 ocfs2_error(sb, 5149 5140 "Owner %llu has an extent at cpos %u which can no " 5150 5141 "longer be found.\n", ··· 5500 5491 5501 5492 el = path_leaf_el(path); 5502 5493 index = ocfs2_search_extent_list(el, cpos); 5503 - if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5494 + if (index == -1) { 5504 5495 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 5505 5496 "Owner %llu has an extent at cpos %u which can no " 5506 5497 "longer be found.\n", ··· 5566 5557 5567 5558 el = path_leaf_el(path); 5568 5559 index = ocfs2_search_extent_list(el, cpos); 5569 - if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5560 + if (index == -1) { 5570 5561 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 5571 5562 "Owner %llu: split at cpos %u lost record.", 5572 5563 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),

+2 -3

fs/ocfs2/dlm/dlmdomain.c

··· 1923 1923 goto bail; 1924 1924 } 1925 1925 1926 - if (total_backoff > 1927 - msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) { 1926 + if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) { 1928 1927 status = -ERESTARTSYS; 1929 1928 mlog(ML_NOTICE, "Timed out joining dlm domain " 1930 1929 "%s after %u msecs\n", dlm->name, 1931 - jiffies_to_msecs(total_backoff)); 1930 + total_backoff); 1932 1931 goto bail; 1933 1932 } 1934 1933

+4

fs/ocfs2/dlm/dlmmaster.c

··· 2405 2405 if (res->state & DLM_LOCK_RES_MIGRATING) 2406 2406 return 0; 2407 2407 2408 + /* delay migration when the lockres is in RECOCERING state */ 2409 + if (res->state & DLM_LOCK_RES_RECOVERING) 2410 + return 0; 2411 + 2408 2412 if (res->owner != dlm->node_num) 2409 2413 return 0; 2410 2414

+1 -1

fs/ocfs2/move_extents.c

··· 98 98 el = path_leaf_el(path); 99 99 100 100 index = ocfs2_search_extent_list(el, cpos); 101 - if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 101 + if (index == -1) { 102 102 ocfs2_error(inode->i_sb, 103 103 "Inode %llu has an extent at cpos %u which can no " 104 104 "longer be found.\n",

+1 -1

fs/ocfs2/refcounttree.c

··· 3109 3109 el = path_leaf_el(path); 3110 3110 3111 3111 index = ocfs2_search_extent_list(el, cpos); 3112 - if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 3112 + if (index == -1) { 3113 3113 ocfs2_error(sb, 3114 3114 "Inode %llu has an extent at cpos %u which can no " 3115 3115 "longer be found.\n",

+1 -1

fs/ocfs2/slot_map.c

··· 382 382 383 383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks); 384 384 385 - si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, 385 + si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *), 386 386 GFP_KERNEL); 387 387 if (!si->si_bh) { 388 388 status = -ENOMEM;

+1 -1

fs/proc/meminfo.c

··· 168 168 K(global_page_state(NR_WRITEBACK)), 169 169 K(global_page_state(NR_ANON_PAGES)), 170 170 K(global_page_state(NR_FILE_MAPPED)), 171 - K(global_page_state(NR_SHMEM)), 171 + K(i.sharedram), 172 172 K(global_page_state(NR_SLAB_RECLAIMABLE) + 173 173 global_page_state(NR_SLAB_UNRECLAIMABLE)), 174 174 K(global_page_state(NR_SLAB_RECLAIMABLE)),

+21 -6

fs/proc/task_mmu.c

··· 925 925 struct mm_walk *walk) 926 926 { 927 927 struct pagemapread *pm = walk->private; 928 - unsigned long addr; 928 + unsigned long addr = start; 929 929 int err = 0; 930 - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); 931 930 932 - for (addr = start; addr < end; addr += PAGE_SIZE) { 933 - err = add_to_pagemap(addr, &pme, pm); 934 - if (err) 935 - break; 931 + while (addr < end) { 932 + struct vm_area_struct *vma = find_vma(walk->mm, addr); 933 + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); 934 + unsigned long vm_end; 935 + 936 + if (!vma) { 937 + vm_end = end; 938 + } else { 939 + vm_end = min(end, vma->vm_end); 940 + if (vma->vm_flags & VM_SOFTDIRTY) 941 + pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY); 942 + } 943 + 944 + for (; addr < vm_end; addr += PAGE_SIZE) { 945 + err = add_to_pagemap(addr, &pme, pm); 946 + if (err) 947 + goto out; 948 + } 936 949 } 950 + 951 + out: 937 952 return err; 938 953 } 939 954

+1 -1

fs/squashfs/file_direct.c

··· 44 44 45 45 pages = end_index - start_index + 1; 46 46 47 - page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); 47 + page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); 48 48 if (page == NULL) 49 49 return res; 50 50

+3 -2

fs/squashfs/super.c

··· 27 27 * the filesystem. 28 28 */ 29 29 30 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 31 + 30 32 #include <linux/fs.h> 31 33 #include <linux/vfs.h> 32 34 #include <linux/slab.h> ··· 450 448 return err; 451 449 } 452 450 453 - printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) " 454 - "Phillip Lougher\n"); 451 + pr_info("version 4.0 (2009/01/31) Phillip Lougher\n"); 455 452 456 453 return 0; 457 454 }

+31 -31

include/linux/bitmap.h

··· 88 88 * lib/bitmap.c provides these functions: 89 89 */ 90 90 91 - extern int __bitmap_empty(const unsigned long *bitmap, int bits); 92 - extern int __bitmap_full(const unsigned long *bitmap, int bits); 91 + extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); 92 + extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); 93 93 extern int __bitmap_equal(const unsigned long *bitmap1, 94 - const unsigned long *bitmap2, int bits); 94 + const unsigned long *bitmap2, unsigned int nbits); 95 95 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, 96 - int bits); 96 + unsigned int nbits); 97 97 extern void __bitmap_shift_right(unsigned long *dst, 98 98 const unsigned long *src, int shift, int bits); 99 99 extern void __bitmap_shift_left(unsigned long *dst, 100 100 const unsigned long *src, int shift, int bits); 101 101 extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, 102 - const unsigned long *bitmap2, int bits); 102 + const unsigned long *bitmap2, unsigned int nbits); 103 103 extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, 104 - const unsigned long *bitmap2, int bits); 104 + const unsigned long *bitmap2, unsigned int nbits); 105 105 extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, 106 - const unsigned long *bitmap2, int bits); 106 + const unsigned long *bitmap2, unsigned int nbits); 107 107 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 108 - const unsigned long *bitmap2, int bits); 108 + const unsigned long *bitmap2, unsigned int nbits); 109 109 extern int __bitmap_intersects(const unsigned long *bitmap1, 110 - const unsigned long *bitmap2, int bits); 110 + const unsigned long *bitmap2, unsigned int nbits); 111 111 extern int __bitmap_subset(const unsigned long *bitmap1, 112 - const unsigned long *bitmap2, int bits); 113 - extern int __bitmap_weight(const unsigned long *bitmap, int bits); 112 + const unsigned long *bitmap2, unsigned int nbits); 113 + extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); 114 114 115 - extern void bitmap_set(unsigned long *map, int i, int len); 116 - extern void bitmap_clear(unsigned long *map, int start, int nr); 115 + extern void bitmap_set(unsigned long *map, unsigned int start, int len); 116 + extern void bitmap_clear(unsigned long *map, unsigned int start, int len); 117 117 extern unsigned long bitmap_find_next_zero_area(unsigned long *map, 118 118 unsigned long size, 119 119 unsigned long start, ··· 140 140 const unsigned long *relmap, int bits); 141 141 extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, 142 142 int sz, int bits); 143 - extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); 144 - extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); 145 - extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); 143 + extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); 144 + extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); 145 + extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); 146 146 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); 147 147 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); 148 148 ··· 188 188 } 189 189 190 190 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, 191 - const unsigned long *src2, int nbits) 191 + const unsigned long *src2, unsigned int nbits) 192 192 { 193 193 if (small_const_nbits(nbits)) 194 - return (*dst = *src1 & *src2) != 0; 194 + return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; 195 195 return __bitmap_and(dst, src1, src2, nbits); 196 196 } 197 197 198 198 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, 199 - const unsigned long *src2, int nbits) 199 + const unsigned long *src2, unsigned int nbits) 200 200 { 201 201 if (small_const_nbits(nbits)) 202 202 *dst = *src1 | *src2; ··· 205 205 } 206 206 207 207 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, 208 - const unsigned long *src2, int nbits) 208 + const unsigned long *src2, unsigned int nbits) 209 209 { 210 210 if (small_const_nbits(nbits)) 211 211 *dst = *src1 ^ *src2; ··· 214 214 } 215 215 216 216 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, 217 - const unsigned long *src2, int nbits) 217 + const unsigned long *src2, unsigned int nbits) 218 218 { 219 219 if (small_const_nbits(nbits)) 220 - return (*dst = *src1 & ~(*src2)) != 0; 220 + return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; 221 221 return __bitmap_andnot(dst, src1, src2, nbits); 222 222 } 223 223 224 224 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, 225 - int nbits) 225 + unsigned int nbits) 226 226 { 227 227 if (small_const_nbits(nbits)) 228 - *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); 228 + *dst = ~(*src); 229 229 else 230 230 __bitmap_complement(dst, src, nbits); 231 231 } 232 232 233 233 static inline int bitmap_equal(const unsigned long *src1, 234 - const unsigned long *src2, int nbits) 234 + const unsigned long *src2, unsigned int nbits) 235 235 { 236 236 if (small_const_nbits(nbits)) 237 237 return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); ··· 240 240 } 241 241 242 242 static inline int bitmap_intersects(const unsigned long *src1, 243 - const unsigned long *src2, int nbits) 243 + const unsigned long *src2, unsigned int nbits) 244 244 { 245 245 if (small_const_nbits(nbits)) 246 246 return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; ··· 249 249 } 250 250 251 251 static inline int bitmap_subset(const unsigned long *src1, 252 - const unsigned long *src2, int nbits) 252 + const unsigned long *src2, unsigned int nbits) 253 253 { 254 254 if (small_const_nbits(nbits)) 255 255 return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); ··· 257 257 return __bitmap_subset(src1, src2, nbits); 258 258 } 259 259 260 - static inline int bitmap_empty(const unsigned long *src, int nbits) 260 + static inline int bitmap_empty(const unsigned long *src, unsigned nbits) 261 261 { 262 262 if (small_const_nbits(nbits)) 263 263 return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); ··· 265 265 return __bitmap_empty(src, nbits); 266 266 } 267 267 268 - static inline int bitmap_full(const unsigned long *src, int nbits) 268 + static inline int bitmap_full(const unsigned long *src, unsigned int nbits) 269 269 { 270 270 if (small_const_nbits(nbits)) 271 271 return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); ··· 273 273 return __bitmap_full(src, nbits); 274 274 } 275 275 276 - static inline int bitmap_weight(const unsigned long *src, int nbits) 276 + static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) 277 277 { 278 278 if (small_const_nbits(nbits)) 279 279 return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); ··· 284 284 const unsigned long *src, int n, int nbits) 285 285 { 286 286 if (small_const_nbits(nbits)) 287 - *dst = *src >> n; 287 + *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n; 288 288 else 289 289 __bitmap_shift_right(dst, src, n, nbits); 290 290 }

+1 -1

include/linux/byteorder/generic.h

··· 2 2 #define _LINUX_BYTEORDER_GENERIC_H 3 3 4 4 /* 5 - * linux/byteorder_generic.h 5 + * linux/byteorder/generic.h 6 6 * Generic Byte-reordering support 7 7 * 8 8 * The "... p" macros, like le64_to_cpup, can be used with pointers

+27

include/linux/cma.h

··· 1 + #ifndef __CMA_H__ 2 + #define __CMA_H__ 3 + 4 + /* 5 + * There is always at least global CMA area and a few optional 6 + * areas configured in kernel .config. 7 + */ 8 + #ifdef CONFIG_CMA_AREAS 9 + #define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) 10 + 11 + #else 12 + #define MAX_CMA_AREAS (0) 13 + 14 + #endif 15 + 16 + struct cma; 17 + 18 + extern phys_addr_t cma_get_base(struct cma *cma); 19 + extern unsigned long cma_get_size(struct cma *cma); 20 + 21 + extern int __init cma_declare_contiguous(phys_addr_t size, 22 + phys_addr_t base, phys_addr_t limit, 23 + phys_addr_t alignment, unsigned int order_per_bit, 24 + bool fixed, struct cma **res_cma); 25 + extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); 26 + extern bool cma_release(struct cma *cma, struct page *pages, int count); 27 + #endif

+2 -9

include/linux/dma-contiguous.h

··· 53 53 54 54 #ifdef __KERNEL__ 55 55 56 + #include <linux/device.h> 57 + 56 58 struct cma; 57 59 struct page; 58 - struct device; 59 60 60 61 #ifdef CONFIG_DMA_CMA 61 - 62 - /* 63 - * There is always at least global CMA area and a few optional device 64 - * private areas configured in kernel .config. 65 - */ 66 - #define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) 67 62 68 63 extern struct cma *dma_contiguous_default_area; 69 64 ··· 117 122 int count); 118 123 119 124 #else 120 - 121 - #define MAX_CMA_AREAS (0) 122 125 123 126 static inline struct cma *dev_get_cma_area(struct device *dev) 124 127 {

+1 -1

include/linux/fs.h

··· 2688 2688 .read = simple_attr_read, \ 2689 2689 .write = simple_attr_write, \ 2690 2690 .llseek = generic_file_llseek, \ 2691 - }; 2691 + } 2692 2692 2693 2693 static inline __printf(1, 2) 2694 2694 void __simple_attr_check_format(const char *fmt, ...)

+8 -6

include/linux/fsnotify_backend.h

··· 322 322 extern void fsnotify_destroy_event(struct fsnotify_group *group, 323 323 struct fsnotify_event *event); 324 324 /* attach the event to the group notification queue */ 325 - extern int fsnotify_add_notify_event(struct fsnotify_group *group, 326 - struct fsnotify_event *event, 327 - int (*merge)(struct list_head *, 328 - struct fsnotify_event *)); 325 + extern int fsnotify_add_event(struct fsnotify_group *group, 326 + struct fsnotify_event *event, 327 + int (*merge)(struct list_head *, 328 + struct fsnotify_event *)); 329 + /* Remove passed event from groups notification queue */ 330 + extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); 329 331 /* true if the group notification queue is empty */ 330 332 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); 331 333 /* return, but do not dequeue the first event on the notification queue */ 332 - extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); 334 + extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); 333 335 /* return AND dequeue the first event on the notification queue */ 334 - extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); 336 + extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group); 335 337 336 338 /* functions used to manipulate the marks attached to inodes */ 337 339

+1 -1

include/linux/gfp.h

··· 360 360 void *alloc_pages_exact(size_t size, gfp_t gfp_mask); 361 361 void free_pages_exact(void *virt, size_t size); 362 362 /* This is different from alloc_pages_exact_node !!! */ 363 - void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); 363 + void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); 364 364 365 365 #define __get_free_page(gfp_mask) \ 366 366 __get_free_pages((gfp_mask), 0)

+9

include/linux/glob.h

··· 1 + #ifndef _LINUX_GLOB_H 2 + #define _LINUX_GLOB_H 3 + 4 + #include <linux/types.h> /* For bool */ 5 + #include <linux/compiler.h> /* For __pure */ 6 + 7 + bool __pure glob_match(char const *pat, char const *str); 8 + 9 + #endif /* _LINUX_GLOB_H */

+1 -1

include/linux/highmem.h

··· 93 93 94 94 #ifdef CONFIG_DEBUG_HIGHMEM 95 95 WARN_ON_ONCE(in_irq() && !irqs_disabled()); 96 - BUG_ON(idx > KM_TYPE_NR); 96 + BUG_ON(idx >= KM_TYPE_NR); 97 97 #endif 98 98 return idx; 99 99 }

-4

include/linux/huge_mm.h

··· 93 93 #endif /* CONFIG_DEBUG_VM */ 94 94 95 95 extern unsigned long transparent_hugepage_flags; 96 - extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 97 - pmd_t *dst_pmd, pmd_t *src_pmd, 98 - struct vm_area_struct *vma, 99 - unsigned long addr, unsigned long end); 100 96 extern int split_huge_page_to_list(struct page *page, struct list_head *list); 101 97 static inline int split_huge_page(struct page *page) 102 98 {

-1

include/linux/hugetlb.h

··· 87 87 #endif 88 88 89 89 extern unsigned long hugepages_treat_as_movable; 90 - extern const unsigned long hugetlb_zero, hugetlb_infinity; 91 90 extern int sysctl_hugetlb_shm_group; 92 91 extern struct list_head huge_boot_pages; 93 92

-5

include/linux/kernel.h

··· 493 493 return buf; 494 494 } 495 495 496 - static inline char * __deprecated pack_hex_byte(char *buf, u8 byte) 497 - { 498 - return hex_byte_pack(buf, byte); 499 - } 500 - 501 496 extern int hex_to_bin(char ch); 502 497 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); 503 498

+1 -1

include/linux/klist.h

··· 44 44 45 45 extern void klist_add_tail(struct klist_node *n, struct klist *k); 46 46 extern void klist_add_head(struct klist_node *n, struct klist *k); 47 - extern void klist_add_after(struct klist_node *n, struct klist_node *pos); 47 + extern void klist_add_behind(struct klist_node *n, struct klist_node *pos); 48 48 extern void klist_add_before(struct klist_node *n, struct klist_node *pos); 49 49 50 50 extern void klist_del(struct klist_node *n);

+7 -7

include/linux/list.h

··· 654 654 *(n->pprev) = n; 655 655 } 656 656 657 - static inline void hlist_add_after(struct hlist_node *n, 658 - struct hlist_node *next) 657 + static inline void hlist_add_behind(struct hlist_node *n, 658 + struct hlist_node *prev) 659 659 { 660 - next->next = n->next; 661 - n->next = next; 662 - next->pprev = &n->next; 660 + n->next = prev->next; 661 + prev->next = n; 662 + n->pprev = &prev->next; 663 663 664 - if(next->next) 665 - next->next->pprev = &next->next; 664 + if (n->next) 665 + n->next->pprev = &n->next; 666 666 } 667 667 668 668 /* after that we'll appear to be on some hlist and hlist_del will work */

+2 -2

include/linux/memblock.h

··· 249 249 /* 250 250 * Set the allocation direction to bottom-up or top-down. 251 251 */ 252 - static inline void memblock_set_bottom_up(bool enable) 252 + static inline void __init memblock_set_bottom_up(bool enable) 253 253 { 254 254 memblock.bottom_up = enable; 255 255 } ··· 264 264 return memblock.bottom_up; 265 265 } 266 266 #else 267 - static inline void memblock_set_bottom_up(bool enable) {} 267 + static inline void __init memblock_set_bottom_up(bool enable) {} 268 268 static inline bool memblock_bottom_up(void) { return false; } 269 269 #endif 270 270

+6 -4

include/linux/memory_hotplug.h

··· 26 26 MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, 27 27 }; 28 28 29 - /* Types for control the zone type of onlined memory */ 29 + /* Types for control the zone type of onlined and offlined memory */ 30 30 enum { 31 - ONLINE_KEEP, 32 - ONLINE_KERNEL, 33 - ONLINE_MOVABLE, 31 + MMOP_OFFLINE = -1, 32 + MMOP_ONLINE_KEEP, 33 + MMOP_ONLINE_KERNEL, 34 + MMOP_ONLINE_MOVABLE, 34 35 }; 35 36 36 37 /* ··· 259 258 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, 260 259 void *arg, int (*func)(struct memory_block *, void *)); 261 260 extern int add_memory(int nid, u64 start, u64 size); 261 + extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default); 262 262 extern int arch_add_memory(int nid, u64 start, u64 size); 263 263 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); 264 264 extern bool is_memblock_offlined(struct memory_block *mem);

+2

include/linux/mmdebug.h

··· 20 20 } while (0) 21 21 #define VM_WARN_ON(cond) WARN_ON(cond) 22 22 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) 23 + #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) 23 24 #else 24 25 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) 25 26 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) 26 27 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) 27 28 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) 29 + #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) 28 30 #endif 29 31 30 32 #ifdef CONFIG_DEBUG_VIRTUAL

+6

include/linux/mmu_notifier.h

··· 170 170 struct mm_struct *mm); 171 171 extern void mmu_notifier_unregister(struct mmu_notifier *mn, 172 172 struct mm_struct *mm); 173 + extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 174 + struct mm_struct *mm); 173 175 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 174 176 extern void __mmu_notifier_release(struct mm_struct *mm); 175 177 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, ··· 289 287 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 290 288 set_pte_at(___mm, ___address, __ptep, ___pte); \ 291 289 }) 290 + 291 + extern void mmu_notifier_call_srcu(struct rcu_head *rcu, 292 + void (*func)(struct rcu_head *rcu)); 293 + extern void mmu_notifier_synchronize(void); 292 294 293 295 #else /* CONFIG_MMU_NOTIFIER */ 294 296

+118 -105

include/linux/mmzone.h

··· 143 143 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ 144 144 NR_DIRTIED, /* page dirtyings since bootup */ 145 145 NR_WRITTEN, /* page writings since bootup */ 146 + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ 146 147 #ifdef CONFIG_NUMA 147 148 NUMA_HIT, /* allocated in intended node */ 148 149 NUMA_MISS, /* allocated in non intended node */ ··· 325 324 #ifndef __GENERATING_BOUNDS_H 326 325 327 326 struct zone { 328 - /* Fields commonly accessed by the page allocator */ 327 + /* Read-mostly fields */ 329 328 330 329 /* zone watermarks, access with *_wmark_pages(zone) macros */ 331 330 unsigned long watermark[NR_WMARK]; 332 - 333 - /* 334 - * When free pages are below this point, additional steps are taken 335 - * when reading the number of free pages to avoid per-cpu counter 336 - * drift allowing watermarks to be breached 337 - */ 338 - unsigned long percpu_drift_mark; 339 331 340 332 /* 341 333 * We don't know if the memory that we're going to allocate will be freeable ··· 338 344 * on the higher zones). This array is recalculated at runtime if the 339 345 * sysctl_lowmem_reserve_ratio sysctl changes. 340 346 */ 341 - unsigned long lowmem_reserve[MAX_NR_ZONES]; 347 + long lowmem_reserve[MAX_NR_ZONES]; 348 + 349 + #ifdef CONFIG_NUMA 350 + int node; 351 + #endif 352 + 353 + /* 354 + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on 355 + * this zone's LRU. Maintained by the pageout code. 356 + */ 357 + unsigned int inactive_ratio; 358 + 359 + struct pglist_data *zone_pgdat; 360 + struct per_cpu_pageset __percpu *pageset; 342 361 343 362 /* 344 363 * This is a per-zone reserve of pages that should not be 345 364 * considered dirtyable memory. 346 365 */ 347 366 unsigned long dirty_balance_reserve; 348 - 349 - #ifdef CONFIG_NUMA 350 - int node; 351 - /* 352 - * zone reclaim becomes active if more unmapped pages exist. 353 - */ 354 - unsigned long min_unmapped_pages; 355 - unsigned long min_slab_pages; 356 - #endif 357 - struct per_cpu_pageset __percpu *pageset; 358 - /* 359 - * free areas of different sizes 360 - */ 361 - spinlock_t lock; 362 - #if defined CONFIG_COMPACTION || defined CONFIG_CMA 363 - /* Set to true when the PG_migrate_skip bits should be cleared */ 364 - bool compact_blockskip_flush; 365 - 366 - /* pfn where compaction free scanner should start */ 367 - unsigned long compact_cached_free_pfn; 368 - /* pfn where async and sync compaction migration scanner should start */ 369 - unsigned long compact_cached_migrate_pfn[2]; 370 - #endif 371 - #ifdef CONFIG_MEMORY_HOTPLUG 372 - /* see spanned/present_pages for more description */ 373 - seqlock_t span_seqlock; 374 - #endif 375 - struct free_area free_area[MAX_ORDER]; 376 367 377 368 #ifndef CONFIG_SPARSEMEM 378 369 /* ··· 367 388 unsigned long *pageblock_flags; 368 389 #endif /* CONFIG_SPARSEMEM */ 369 390 370 - #ifdef CONFIG_COMPACTION 391 + #ifdef CONFIG_NUMA 371 392 /* 372 - * On compaction failure, 1<<compact_defer_shift compactions 373 - * are skipped before trying again. The number attempted since 374 - * last failure is tracked with compact_considered. 393 + * zone reclaim becomes active if more unmapped pages exist. 375 394 */ 376 - unsigned int compact_considered; 377 - unsigned int compact_defer_shift; 378 - int compact_order_failed; 379 - #endif 395 + unsigned long min_unmapped_pages; 396 + unsigned long min_slab_pages; 397 + #endif /* CONFIG_NUMA */ 380 398 381 - ZONE_PADDING(_pad1_) 382 - 383 - /* Fields commonly accessed by the page reclaim scanner */ 384 - spinlock_t lru_lock; 385 - struct lruvec lruvec; 386 - 387 - /* Evictions & activations on the inactive file list */ 388 - atomic_long_t inactive_age; 389 - 390 - unsigned long pages_scanned; /* since last reclaim */ 391 - unsigned long flags; /* zone flags, see below */ 392 - 393 - /* Zone statistics */ 394 - atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 395 - 396 - /* 397 - * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on 398 - * this zone's LRU. Maintained by the pageout code. 399 - */ 400 - unsigned int inactive_ratio; 401 - 402 - 403 - ZONE_PADDING(_pad2_) 404 - /* Rarely used or read-mostly fields */ 405 - 406 - /* 407 - * wait_table -- the array holding the hash table 408 - * wait_table_hash_nr_entries -- the size of the hash table array 409 - * wait_table_bits -- wait_table_size == (1 << wait_table_bits) 410 - * 411 - * The purpose of all these is to keep track of the people 412 - * waiting for a page to become available and make them 413 - * runnable again when possible. The trouble is that this 414 - * consumes a lot of space, especially when so few things 415 - * wait on pages at a given time. So instead of using 416 - * per-page waitqueues, we use a waitqueue hash table. 417 - * 418 - * The bucket discipline is to sleep on the same queue when 419 - * colliding and wake all in that wait queue when removing. 420 - * When something wakes, it must check to be sure its page is 421 - * truly available, a la thundering herd. The cost of a 422 - * collision is great, but given the expected load of the 423 - * table, they should be so rare as to be outweighed by the 424 - * benefits from the saved space. 425 - * 426 - * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the 427 - * primary users of these fields, and in mm/page_alloc.c 428 - * free_area_init_core() performs the initialization of them. 429 - */ 430 - wait_queue_head_t * wait_table; 431 - unsigned long wait_table_hash_nr_entries; 432 - unsigned long wait_table_bits; 433 - 434 - /* 435 - * Discontig memory support fields. 436 - */ 437 - struct pglist_data *zone_pgdat; 438 399 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 439 400 unsigned long zone_start_pfn; 440 401 ··· 419 500 * adjust_managed_page_count() should be used instead of directly 420 501 * touching zone->managed_pages and totalram_pages. 421 502 */ 503 + unsigned long managed_pages; 422 504 unsigned long spanned_pages; 423 505 unsigned long present_pages; 424 - unsigned long managed_pages; 506 + 507 + const char *name; 425 508 426 509 /* 427 510 * Number of MIGRATE_RESEVE page block. To maintain for just ··· 431 510 */ 432 511 int nr_migrate_reserve_block; 433 512 513 + #ifdef CONFIG_MEMORY_HOTPLUG 514 + /* see spanned/present_pages for more description */ 515 + seqlock_t span_seqlock; 516 + #endif 517 + 434 518 /* 435 - * rarely used fields: 519 + * wait_table -- the array holding the hash table 520 + * wait_table_hash_nr_entries -- the size of the hash table array 521 + * wait_table_bits -- wait_table_size == (1 << wait_table_bits) 522 + * 523 + * The purpose of all these is to keep track of the people 524 + * waiting for a page to become available and make them 525 + * runnable again when possible. The trouble is that this 526 + * consumes a lot of space, especially when so few things 527 + * wait on pages at a given time. So instead of using 528 + * per-page waitqueues, we use a waitqueue hash table. 529 + * 530 + * The bucket discipline is to sleep on the same queue when 531 + * colliding and wake all in that wait queue when removing. 532 + * When something wakes, it must check to be sure its page is 533 + * truly available, a la thundering herd. The cost of a 534 + * collision is great, but given the expected load of the 535 + * table, they should be so rare as to be outweighed by the 536 + * benefits from the saved space. 537 + * 538 + * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the 539 + * primary users of these fields, and in mm/page_alloc.c 540 + * free_area_init_core() performs the initialization of them. 436 541 */ 437 - const char *name; 542 + wait_queue_head_t *wait_table; 543 + unsigned long wait_table_hash_nr_entries; 544 + unsigned long wait_table_bits; 545 + 546 + ZONE_PADDING(_pad1_) 547 + 548 + /* Write-intensive fields used from the page allocator */ 549 + spinlock_t lock; 550 + 551 + /* free areas of different sizes */ 552 + struct free_area free_area[MAX_ORDER]; 553 + 554 + /* zone flags, see below */ 555 + unsigned long flags; 556 + 557 + ZONE_PADDING(_pad2_) 558 + 559 + /* Write-intensive fields used by page reclaim */ 560 + 561 + /* Fields commonly accessed by the page reclaim scanner */ 562 + spinlock_t lru_lock; 563 + struct lruvec lruvec; 564 + 565 + /* Evictions & activations on the inactive file list */ 566 + atomic_long_t inactive_age; 567 + 568 + /* 569 + * When free pages are below this point, additional steps are taken 570 + * when reading the number of free pages to avoid per-cpu counter 571 + * drift allowing watermarks to be breached 572 + */ 573 + unsigned long percpu_drift_mark; 574 + 575 + #if defined CONFIG_COMPACTION || defined CONFIG_CMA 576 + /* pfn where compaction free scanner should start */ 577 + unsigned long compact_cached_free_pfn; 578 + /* pfn where async and sync compaction migration scanner should start */ 579 + unsigned long compact_cached_migrate_pfn[2]; 580 + #endif 581 + 582 + #ifdef CONFIG_COMPACTION 583 + /* 584 + * On compaction failure, 1<<compact_defer_shift compactions 585 + * are skipped before trying again. The number attempted since 586 + * last failure is tracked with compact_considered. 587 + */ 588 + unsigned int compact_considered; 589 + unsigned int compact_defer_shift; 590 + int compact_order_failed; 591 + #endif 592 + 593 + #if defined CONFIG_COMPACTION || defined CONFIG_CMA 594 + /* Set to true when the PG_migrate_skip bits should be cleared */ 595 + bool compact_blockskip_flush; 596 + #endif 597 + 598 + ZONE_PADDING(_pad3_) 599 + /* Zone statistics */ 600 + atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 438 601 } ____cacheline_internodealigned_in_smp; 439 602 440 603 typedef enum { ··· 534 529 ZONE_WRITEBACK, /* reclaim scanning has recently found 535 530 * many pages under writeback 536 531 */ 532 + ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ 537 533 } zone_flags_t; 538 534 539 535 static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) ··· 570 564 static inline int zone_is_reclaim_locked(const struct zone *zone) 571 565 { 572 566 return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); 567 + } 568 + 569 + static inline int zone_is_fair_depleted(const struct zone *zone) 570 + { 571 + return test_bit(ZONE_FAIR_DEPLETED, &zone->flags); 573 572 } 574 573 575 574 static inline int zone_is_oom_locked(const struct zone *zone) ··· 883 872 { 884 873 #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) 885 874 return movable_zone == ZONE_HIGHMEM; 875 + #elif defined(CONFIG_HIGHMEM) 876 + return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; 886 877 #else 887 878 return 0; 888 879 #endif

+10 -1

include/linux/nodemask.h

··· 430 430 for_each_node_mask((__node), node_states[__state]) 431 431 432 432 #define first_online_node first_node(node_states[N_ONLINE]) 433 - #define next_online_node(nid) next_node((nid), node_states[N_ONLINE]) 433 + #define first_memory_node first_node(node_states[N_MEMORY]) 434 + static inline int next_online_node(int nid) 435 + { 436 + return next_node(nid, node_states[N_ONLINE]); 437 + } 438 + static inline int next_memory_node(int nid) 439 + { 440 + return next_node(nid, node_states[N_MEMORY]); 441 + } 434 442 435 443 extern int nr_node_ids; 436 444 extern int nr_online_nodes; ··· 479 471 for ( (node) = 0; (node) == 0; (node) = 1) 480 472 481 473 #define first_online_node 0 474 + #define first_memory_node 0 482 475 #define next_online_node(nid) (MAX_NUMNODES) 483 476 #define nr_node_ids 1 484 477 #define nr_online_nodes 1

+2 -2

include/linux/oom.h

··· 55 55 struct mem_cgroup *memcg, nodemask_t *nodemask, 56 56 const char *message); 57 57 58 - extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 59 - extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 58 + extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); 59 + extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); 60 60 61 61 extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 62 62 int order, const nodemask_t *nodemask);

+14 -7

include/linux/page-flags.h

··· 171 171 #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ 172 172 __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) 173 173 174 - #define PAGEFLAG_FALSE(uname) \ 175 - static inline int Page##uname(const struct page *page) \ 176 - { return 0; } 177 - 178 174 #define TESTSCFLAG(uname, lname) \ 179 175 TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) 176 + 177 + #define TESTPAGEFLAG_FALSE(uname) \ 178 + static inline int Page##uname(const struct page *page) { return 0; } 180 179 181 180 #define SETPAGEFLAG_NOOP(uname) \ 182 181 static inline void SetPage##uname(struct page *page) { } ··· 186 187 #define __CLEARPAGEFLAG_NOOP(uname) \ 187 188 static inline void __ClearPage##uname(struct page *page) { } 188 189 190 + #define TESTSETFLAG_FALSE(uname) \ 191 + static inline int TestSetPage##uname(struct page *page) { return 0; } 192 + 189 193 #define TESTCLEARFLAG_FALSE(uname) \ 190 194 static inline int TestClearPage##uname(struct page *page) { return 0; } 191 195 192 196 #define __TESTCLEARFLAG_FALSE(uname) \ 193 197 static inline int __TestClearPage##uname(struct page *page) { return 0; } 198 + 199 + #define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ 200 + SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) 201 + 202 + #define TESTSCFLAG_FALSE(uname) \ 203 + TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) 194 204 195 205 struct page; /* forward declaration */ 196 206 ··· 256 248 PAGEFLAG(SwapCache, swapcache) 257 249 #else 258 250 PAGEFLAG_FALSE(SwapCache) 259 - SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) 260 251 #endif 261 252 262 253 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) ··· 265 258 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) 266 259 TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) 267 260 #else 268 - PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) 269 - TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) 261 + PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) 262 + TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) 270 263 #endif 271 264 272 265 #ifdef CONFIG_ARCH_USES_PG_UNCACHED

+3

include/linux/pagemap.h

··· 484 484 /* 485 485 * lock_page_or_retry - Lock the page, unless this would block and the 486 486 * caller indicated that it can handle a retry. 487 + * 488 + * Return value and mmap_sem implications depend on flags; see 489 + * __lock_page_or_retry(). 487 490 */ 488 491 static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, 489 492 unsigned int flags)

+1 -1

include/linux/printk.h

··· 31 31 } 32 32 33 33 /* printk's without a loglevel use this.. */ 34 - #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL 34 + #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT 35 35 36 36 /* We show everything that is MORE important than this.. */ 37 37 #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */

+4 -4

include/linux/rculist.h

··· 432 432 } 433 433 434 434 /** 435 - * hlist_add_after_rcu 436 - * @prev: the existing element to add the new element after. 435 + * hlist_add_behind_rcu 437 436 * @n: the new element to add to the hash list. 437 + * @prev: the existing element to add the new element after. 438 438 * 439 439 * Description: 440 440 * Adds the specified element to the specified hlist ··· 449 449 * hlist_for_each_entry_rcu(), used to prevent memory-consistency 450 450 * problems on Alpha CPUs. 451 451 */ 452 - static inline void hlist_add_after_rcu(struct hlist_node *prev, 453 - struct hlist_node *n) 452 + static inline void hlist_add_behind_rcu(struct hlist_node *n, 453 + struct hlist_node *prev) 454 454 { 455 455 n->next = prev->next; 456 456 n->pprev = &prev->next;

-1

include/linux/swap.h

··· 311 311 struct lruvec *lruvec, struct list_head *head); 312 312 extern void activate_page(struct page *); 313 313 extern void mark_page_accessed(struct page *); 314 - extern void init_page_accessed(struct page *page); 315 314 extern void lru_add_drain(void); 316 315 extern void lru_add_drain_cpu(int cpu); 317 316 extern void lru_add_drain_all(void);

+1 -1

include/linux/vmalloc.h

··· 113 113 extern struct vm_struct *find_vm_area(const void *addr); 114 114 115 115 extern int map_vm_area(struct vm_struct *area, pgprot_t prot, 116 - struct page ***pages); 116 + struct page **pages); 117 117 #ifdef CONFIG_MMU 118 118 extern int map_kernel_range_noflush(unsigned long start, unsigned long size, 119 119 pgprot_t prot, struct page **pages);

+1 -1

include/linux/zbud.h

··· 11 11 12 12 struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops); 13 13 void zbud_destroy_pool(struct zbud_pool *pool); 14 - int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, 14 + int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, 15 15 unsigned long *handle); 16 16 void zbud_free(struct zbud_pool *pool, unsigned long handle); 17 17 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);

-118

include/linux/zlib.h

··· 493 493 method). msg is set to null if there is no error message. deflateInit2 does 494 494 not perform any compression: this will be done by deflate(). 495 495 */ 496 - 497 - #if 0 498 - extern int zlib_deflateSetDictionary (z_streamp strm, 499 - const Byte *dictionary, 500 - uInt dictLength); 501 - #endif 502 - /* 503 - Initializes the compression dictionary from the given byte sequence 504 - without producing any compressed output. This function must be called 505 - immediately after deflateInit, deflateInit2 or deflateReset, before any 506 - call of deflate. The compressor and decompressor must use exactly the same 507 - dictionary (see inflateSetDictionary). 508 - 509 - The dictionary should consist of strings (byte sequences) that are likely 510 - to be encountered later in the data to be compressed, with the most commonly 511 - used strings preferably put towards the end of the dictionary. Using a 512 - dictionary is most useful when the data to be compressed is short and can be 513 - predicted with good accuracy; the data can then be compressed better than 514 - with the default empty dictionary. 515 - 516 - Depending on the size of the compression data structures selected by 517 - deflateInit or deflateInit2, a part of the dictionary may in effect be 518 - discarded, for example if the dictionary is larger than the window size in 519 - deflate or deflate2. Thus the strings most likely to be useful should be 520 - put at the end of the dictionary, not at the front. 521 - 522 - Upon return of this function, strm->adler is set to the Adler32 value 523 - of the dictionary; the decompressor may later use this value to determine 524 - which dictionary has been used by the compressor. (The Adler32 value 525 - applies to the whole dictionary even if only a subset of the dictionary is 526 - actually used by the compressor.) 527 - 528 - deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a 529 - parameter is invalid (such as NULL dictionary) or the stream state is 530 - inconsistent (for example if deflate has already been called for this stream 531 - or if the compression method is bsort). deflateSetDictionary does not 532 - perform any compression: this will be done by deflate(). 533 - */ 534 - 535 - #if 0 536 - extern int zlib_deflateCopy (z_streamp dest, z_streamp source); 537 - #endif 538 - 539 - /* 540 - Sets the destination stream as a complete copy of the source stream. 541 - 542 - This function can be useful when several compression strategies will be 543 - tried, for example when there are several ways of pre-processing the input 544 - data with a filter. The streams that will be discarded should then be freed 545 - by calling deflateEnd. Note that deflateCopy duplicates the internal 546 - compression state which can be quite large, so this strategy is slow and 547 - can consume lots of memory. 548 - 549 - deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not 550 - enough memory, Z_STREAM_ERROR if the source stream state was inconsistent 551 - (such as zalloc being NULL). msg is left unchanged in both source and 552 - destination. 553 - */ 554 496 555 497 extern int zlib_deflateReset (z_streamp strm); 556 498 /* ··· 509 567 { 510 568 return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; 511 569 } 512 - 513 - #if 0 514 - extern int zlib_deflateParams (z_streamp strm, int level, int strategy); 515 - #endif 516 - /* 517 - Dynamically update the compression level and compression strategy. The 518 - interpretation of level and strategy is as in deflateInit2. This can be 519 - used to switch between compression and straight copy of the input data, or 520 - to switch to a different kind of input data requiring a different 521 - strategy. If the compression level is changed, the input available so far 522 - is compressed with the old level (and may be flushed); the new level will 523 - take effect only at the next call of deflate(). 524 - 525 - Before the call of deflateParams, the stream state must be set as for 526 - a call of deflate(), since the currently available input may have to 527 - be compressed and flushed. In particular, strm->avail_out must be non-zero. 528 - 529 - deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source 530 - stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR 531 - if strm->avail_out was zero. 532 - */ 533 570 534 571 /* 535 572 extern int inflateInit2 (z_streamp strm, int windowBits); ··· 550 629 any decompression apart from reading the zlib header if present: this will 551 630 be done by inflate(). (So next_in and avail_in may be modified, but next_out 552 631 and avail_out are unchanged.) 553 - */ 554 - 555 - extern int zlib_inflateSetDictionary (z_streamp strm, 556 - const Byte *dictionary, 557 - uInt dictLength); 558 - /* 559 - Initializes the decompression dictionary from the given uncompressed byte 560 - sequence. This function must be called immediately after a call of inflate, 561 - if that call returned Z_NEED_DICT. The dictionary chosen by the compressor 562 - can be determined from the adler32 value returned by that call of inflate. 563 - The compressor and decompressor must use exactly the same dictionary (see 564 - deflateSetDictionary). For raw inflate, this function can be called 565 - immediately after inflateInit2() or inflateReset() and before any call of 566 - inflate() to set the dictionary. The application must insure that the 567 - dictionary that was used for compression is provided. 568 - 569 - inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a 570 - parameter is invalid (such as NULL dictionary) or the stream state is 571 - inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the 572 - expected one (incorrect adler32 value). inflateSetDictionary does not 573 - perform any decompression: this will be done by subsequent calls of 574 - inflate(). 575 - */ 576 - 577 - #if 0 578 - extern int zlib_inflateSync (z_streamp strm); 579 - #endif 580 - /* 581 - Skips invalid compressed data until a full flush point (see above the 582 - description of deflate with Z_FULL_FLUSH) can be found, or until all 583 - available input is skipped. No output is provided. 584 - 585 - inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR 586 - if no more input was provided, Z_DATA_ERROR if no flush point has been found, 587 - or Z_STREAM_ERROR if the stream structure was inconsistent. In the success 588 - case, the application may save the current current value of total_in which 589 - indicates where valid compressed data was found. In the error case, the 590 - application may repeatedly call inflateSync, providing more input each time, 591 - until success or end of the input data. 592 632 */ 593 633 594 634 extern int zlib_inflateReset (z_streamp strm);

+106

include/linux/zpool.h

··· 1 + /* 2 + * zpool memory storage api 3 + * 4 + * Copyright (C) 2014 Dan Streetman 5 + * 6 + * This is a common frontend for the zbud and zsmalloc memory 7 + * storage pool implementations. Typically, this is used to 8 + * store compressed memory. 9 + */ 10 + 11 + #ifndef _ZPOOL_H_ 12 + #define _ZPOOL_H_ 13 + 14 + struct zpool; 15 + 16 + struct zpool_ops { 17 + int (*evict)(struct zpool *pool, unsigned long handle); 18 + }; 19 + 20 + /* 21 + * Control how a handle is mapped. It will be ignored if the 22 + * implementation does not support it. Its use is optional. 23 + * Note that this does not refer to memory protection, it 24 + * refers to how the memory will be copied in/out if copying 25 + * is necessary during mapping; read-write is the safest as 26 + * it copies the existing memory in on map, and copies the 27 + * changed memory back out on unmap. Write-only does not copy 28 + * in the memory and should only be used for initialization. 29 + * If in doubt, use ZPOOL_MM_DEFAULT which is read-write. 30 + */ 31 + enum zpool_mapmode { 32 + ZPOOL_MM_RW, /* normal read-write mapping */ 33 + ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */ 34 + ZPOOL_MM_WO, /* write-only (no copy-in at map time) */ 35 + 36 + ZPOOL_MM_DEFAULT = ZPOOL_MM_RW 37 + }; 38 + 39 + struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops); 40 + 41 + char *zpool_get_type(struct zpool *pool); 42 + 43 + void zpool_destroy_pool(struct zpool *pool); 44 + 45 + int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, 46 + unsigned long *handle); 47 + 48 + void zpool_free(struct zpool *pool, unsigned long handle); 49 + 50 + int zpool_shrink(struct zpool *pool, unsigned int pages, 51 + unsigned int *reclaimed); 52 + 53 + void *zpool_map_handle(struct zpool *pool, unsigned long handle, 54 + enum zpool_mapmode mm); 55 + 56 + void zpool_unmap_handle(struct zpool *pool, unsigned long handle); 57 + 58 + u64 zpool_get_total_size(struct zpool *pool); 59 + 60 + 61 + /** 62 + * struct zpool_driver - driver implementation for zpool 63 + * @type: name of the driver. 64 + * @list: entry in the list of zpool drivers. 65 + * @create: create a new pool. 66 + * @destroy: destroy a pool. 67 + * @malloc: allocate mem from a pool. 68 + * @free: free mem from a pool. 69 + * @shrink: shrink the pool. 70 + * @map: map a handle. 71 + * @unmap: unmap a handle. 72 + * @total_size: get total size of a pool. 73 + * 74 + * This is created by a zpool implementation and registered 75 + * with zpool. 76 + */ 77 + struct zpool_driver { 78 + char *type; 79 + struct module *owner; 80 + atomic_t refcount; 81 + struct list_head list; 82 + 83 + void *(*create)(gfp_t gfp, struct zpool_ops *ops); 84 + void (*destroy)(void *pool); 85 + 86 + int (*malloc)(void *pool, size_t size, gfp_t gfp, 87 + unsigned long *handle); 88 + void (*free)(void *pool, unsigned long handle); 89 + 90 + int (*shrink)(void *pool, unsigned int pages, 91 + unsigned int *reclaimed); 92 + 93 + void *(*map)(void *pool, unsigned long handle, 94 + enum zpool_mapmode mm); 95 + void (*unmap)(void *pool, unsigned long handle); 96 + 97 + u64 (*total_size)(void *pool); 98 + }; 99 + 100 + void zpool_register_driver(struct zpool_driver *driver); 101 + 102 + int zpool_unregister_driver(struct zpool_driver *driver); 103 + 104 + int zpool_evict(void *pool, unsigned long handle); 105 + 106 + #endif

+1

include/trace/events/migrate.h

··· 17 17 {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \ 18 18 {MR_SYSCALL, "syscall_or_cpuset"}, \ 19 19 {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \ 20 + {MR_NUMA_MISPLACED, "numa_misplaced"}, \ 20 21 {MR_CMA, "cma"} 21 22 22 23 TRACE_EVENT(mm_migrate_pages,

+7 -9

include/trace/events/pagemap.h

··· 28 28 29 29 TP_PROTO( 30 30 struct page *page, 31 - unsigned long pfn, 32 - int lru, 33 - unsigned long flags 31 + int lru 34 32 ), 35 33 36 - TP_ARGS(page, pfn, lru, flags), 34 + TP_ARGS(page, lru), 37 35 38 36 TP_STRUCT__entry( 39 37 __field(struct page *, page ) ··· 42 44 43 45 TP_fast_assign( 44 46 __entry->page = page; 45 - __entry->pfn = pfn; 47 + __entry->pfn = page_to_pfn(page); 46 48 __entry->lru = lru; 47 - __entry->flags = flags; 49 + __entry->flags = trace_pagemap_flags(page); 48 50 ), 49 51 50 52 /* Flag format is based on page-types.c formatting for pagemap */ ··· 62 64 63 65 TRACE_EVENT(mm_lru_activate, 64 66 65 - TP_PROTO(struct page *page, unsigned long pfn), 67 + TP_PROTO(struct page *page), 66 68 67 - TP_ARGS(page, pfn), 69 + TP_ARGS(page), 68 70 69 71 TP_STRUCT__entry( 70 72 __field(struct page *, page ) ··· 73 75 74 76 TP_fast_assign( 75 77 __entry->page = page; 76 - __entry->pfn = pfn; 78 + __entry->pfn = page_to_pfn(page); 77 79 ), 78 80 79 81 /* Flag format is based on page-types.c formatting for pagemap */

+42 -4

init/Kconfig

··· 807 807 range 12 21 808 808 default 17 809 809 help 810 - Select kernel log buffer size as a power of 2. 810 + Select the minimal kernel log buffer size as a power of 2. 811 + The final size is affected by LOG_CPU_MAX_BUF_SHIFT config 812 + parameter, see below. Any higher size also might be forced 813 + by "log_buf_len" boot parameter. 814 + 811 815 Examples: 812 - 17 => 128 KB 816 + 17 => 128 KB 813 817 16 => 64 KB 814 - 15 => 32 KB 815 - 14 => 16 KB 818 + 15 => 32 KB 819 + 14 => 16 KB 816 820 13 => 8 KB 817 821 12 => 4 KB 822 + 823 + config LOG_CPU_MAX_BUF_SHIFT 824 + int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)" 825 + range 0 21 826 + default 12 if !BASE_SMALL 827 + default 0 if BASE_SMALL 828 + help 829 + This option allows to increase the default ring buffer size 830 + according to the number of CPUs. The value defines the contribution 831 + of each CPU as a power of 2. The used space is typically only few 832 + lines however it might be much more when problems are reported, 833 + e.g. backtraces. 834 + 835 + The increased size means that a new buffer has to be allocated and 836 + the original static one is unused. It makes sense only on systems 837 + with more CPUs. Therefore this value is used only when the sum of 838 + contributions is greater than the half of the default kernel ring 839 + buffer as defined by LOG_BUF_SHIFT. The default values are set 840 + so that more than 64 CPUs are needed to trigger the allocation. 841 + 842 + Also this option is ignored when "log_buf_len" kernel parameter is 843 + used as it forces an exact (power of two) size of the ring buffer. 844 + 845 + The number of possible CPUs is used for this computation ignoring 846 + hotplugging making the compuation optimal for the the worst case 847 + scenerio while allowing a simple algorithm to be used from bootup. 848 + 849 + Examples shift values and their meaning: 850 + 17 => 128 KB for each CPU 851 + 16 => 64 KB for each CPU 852 + 15 => 32 KB for each CPU 853 + 14 => 16 KB for each CPU 854 + 13 => 8 KB for each CPU 855 + 12 => 4 KB for each CPU 818 856 819 857 # 820 858 # Architectures with an unreliable sched_clock() should select this:

+2 -2

kernel/auditfilter.c

··· 106 106 if (unlikely(!entry)) 107 107 return NULL; 108 108 109 - fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL); 109 + fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL); 110 110 if (unlikely(!fields)) { 111 111 kfree(entry); 112 112 return NULL; ··· 160 160 161 161 int __init audit_register_class(int class, unsigned *list) 162 162 { 163 - __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL); 163 + __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL); 164 164 if (!p) 165 165 return -ENOMEM; 166 166 while (*list != ~0U) {

+1

kernel/exit.c

··· 455 455 task_unlock(tsk); 456 456 mm_update_next_owner(mm); 457 457 mmput(mm); 458 + clear_thread_flag(TIF_MEMDIE); 458 459 } 459 460 460 461 /*

+101 -56

kernel/printk/printk.c

··· 45 45 #include <linux/poll.h> 46 46 #include <linux/irq_work.h> 47 47 #include <linux/utsname.h> 48 + #include <linux/ctype.h> 48 49 49 50 #include <asm/uaccess.h> 50 51 ··· 57 56 58 57 int console_printk[4] = { 59 58 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 60 - DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ 59 + MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ 61 60 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 62 61 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 63 62 }; ··· 114 113 * This is used for debugging the mess that is the VT code by 115 114 * keeping track if we have the console semaphore held. It's 116 115 * definitely not the perfect debug tool (we don't know if _WE_ 117 - * hold it are racing, but it helps tracking those weird code 118 - * path in the console code where we end up in places I want 119 - * locked without the console sempahore held 116 + * hold it and are racing, but it helps tracking those weird code 117 + * paths in the console code where we end up in places I want 118 + * locked without the console sempahore held). 120 119 */ 121 120 static int console_locked, console_suspended; 122 121 ··· 147 146 * the overall length of the record. 148 147 * 149 148 * The heads to the first and last entry in the buffer, as well as the 150 - * sequence numbers of these both entries are maintained when messages 151 - * are stored.. 149 + * sequence numbers of these entries are maintained when messages are 150 + * stored. 152 151 * 153 152 * If the heads indicate available messages, the length in the header 154 153 * tells the start next message. A length == 0 for the next message ··· 258 257 static u32 clear_idx; 259 258 260 259 #define PREFIX_MAX 32 261 - #define LOG_LINE_MAX 1024 - PREFIX_MAX 260 + #define LOG_LINE_MAX (1024 - PREFIX_MAX) 262 261 263 262 /* record buffer */ 264 263 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ··· 267 266 #define LOG_ALIGN __alignof__(struct printk_log) 268 267 #endif 269 268 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 269 + #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) 270 270 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 271 271 static char *log_buf = __log_buf; 272 272 static u32 log_buf_len = __LOG_BUF_LEN; ··· 346 344 while (log_first_seq < log_next_seq) { 347 345 if (logbuf_has_space(msg_size, false)) 348 346 return 0; 349 - /* drop old messages until we have enough continuous space */ 347 + /* drop old messages until we have enough contiguous space */ 350 348 log_first_idx = log_next(log_first_idx); 351 349 log_first_seq++; 352 350 } ··· 455 453 return msg->text_len; 456 454 } 457 455 458 - #ifdef CONFIG_SECURITY_DMESG_RESTRICT 459 - int dmesg_restrict = 1; 460 - #else 461 - int dmesg_restrict; 462 - #endif 456 + int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); 463 457 464 458 static int syslog_action_restricted(int type) 465 459 { ··· 826 828 /* requested log_buf_len from kernel cmdline */ 827 829 static unsigned long __initdata new_log_buf_len; 828 830 831 + /* we practice scaling the ring buffer by powers of 2 */ 832 + static void __init log_buf_len_update(unsigned size) 833 + { 834 + if (size) 835 + size = roundup_pow_of_two(size); 836 + if (size > log_buf_len) 837 + new_log_buf_len = size; 838 + } 839 + 829 840 /* save requested log_buf_len since it's too early to process it */ 830 841 static int __init log_buf_len_setup(char *str) 831 842 { 832 843 unsigned size = memparse(str, &str); 833 844 834 - if (size) 835 - size = roundup_pow_of_two(size); 836 - if (size > log_buf_len) 837 - new_log_buf_len = size; 845 + log_buf_len_update(size); 838 846 839 847 return 0; 840 848 } 841 849 early_param("log_buf_len", log_buf_len_setup); 850 + 851 + static void __init log_buf_add_cpu(void) 852 + { 853 + unsigned int cpu_extra; 854 + 855 + /* 856 + * archs should set up cpu_possible_bits properly with 857 + * set_cpu_possible() after setup_arch() but just in 858 + * case lets ensure this is valid. 859 + */ 860 + if (num_possible_cpus() == 1) 861 + return; 862 + 863 + cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; 864 + 865 + /* by default this will only continue through for large > 64 CPUs */ 866 + if (cpu_extra <= __LOG_BUF_LEN / 2) 867 + return; 868 + 869 + pr_info("log_buf_len individual max cpu contribution: %d bytes\n", 870 + __LOG_CPU_MAX_BUF_LEN); 871 + pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", 872 + cpu_extra); 873 + pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); 874 + 875 + log_buf_len_update(cpu_extra + __LOG_BUF_LEN); 876 + } 842 877 843 878 void __init setup_log_buf(int early) 844 879 { ··· 879 848 char *new_log_buf; 880 849 int free; 881 850 851 + if (log_buf != __log_buf) 852 + return; 853 + 854 + if (!early && !new_log_buf_len) 855 + log_buf_add_cpu(); 856 + 882 857 if (!new_log_buf_len) 883 858 return; 884 859 885 860 if (early) { 886 861 new_log_buf = 887 - memblock_virt_alloc(new_log_buf_len, PAGE_SIZE); 862 + memblock_virt_alloc(new_log_buf_len, LOG_ALIGN); 888 863 } else { 889 - new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0); 864 + new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 865 + LOG_ALIGN); 890 866 } 891 867 892 868 if (unlikely(!new_log_buf)) { ··· 910 872 memcpy(log_buf, __log_buf, __LOG_BUF_LEN); 911 873 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 912 874 913 - pr_info("log_buf_len: %d\n", log_buf_len); 875 + pr_info("log_buf_len: %d bytes\n", log_buf_len); 914 876 pr_info("early log buf free: %d(%d%%)\n", 915 877 free, (free * 100) / __LOG_BUF_LEN); 916 878 } ··· 919 881 920 882 static int __init ignore_loglevel_setup(char *str) 921 883 { 922 - ignore_loglevel = 1; 884 + ignore_loglevel = true; 923 885 pr_info("debug: ignoring loglevel setting.\n"); 924 886 925 887 return 0; ··· 985 947 } 986 948 #endif 987 949 988 - #if defined(CONFIG_PRINTK_TIME) 989 - static bool printk_time = 1; 990 - #else 991 - static bool printk_time; 992 - #endif 950 + static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); 993 951 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 994 952 995 953 static size_t print_time(u64 ts, char *buf) ··· 1344 1310 * for pending data, not the size; return the count of 1345 1311 * records, not the length. 1346 1312 */ 1347 - error = log_next_idx - syslog_idx; 1313 + error = log_next_seq - syslog_seq; 1348 1314 } else { 1349 1315 u64 seq = syslog_seq; 1350 1316 u32 idx = syslog_idx; ··· 1450 1416 /* 1451 1417 * Can we actually use the console at this time on this cpu? 1452 1418 * 1453 - * Console drivers may assume that per-cpu resources have 1454 - * been allocated. So unless they're explicitly marked as 1455 - * being able to cope (CON_ANYTIME) don't call them until 1456 - * this CPU is officially up. 1419 + * Console drivers may assume that per-cpu resources have been allocated. So 1420 + * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't 1421 + * call them until this CPU is officially up. 1457 1422 */ 1458 1423 static inline int can_use_console(unsigned int cpu) 1459 1424 { ··· 1465 1432 * console_lock held, and 'console_locked' set) if it 1466 1433 * is successful, false otherwise. 1467 1434 */ 1468 - static int console_trylock_for_printk(unsigned int cpu) 1435 + static int console_trylock_for_printk(void) 1469 1436 { 1437 + unsigned int cpu = smp_processor_id(); 1438 + 1470 1439 if (!console_trylock()) 1471 1440 return 0; 1472 1441 /* ··· 1511 1476 struct task_struct *owner; /* task of first print*/ 1512 1477 u64 ts_nsec; /* time of first print */ 1513 1478 u8 level; /* log level of first message */ 1514 - u8 facility; /* log level of first message */ 1479 + u8 facility; /* log facility of first message */ 1515 1480 enum log_flags flags; /* prefix, newline flags */ 1516 1481 bool flushed:1; /* buffer sealed and committed */ 1517 1482 } cont; ··· 1643 1608 */ 1644 1609 if (!oops_in_progress && !lockdep_recursing(current)) { 1645 1610 recursion_bug = 1; 1646 - goto out_restore_irqs; 1611 + local_irq_restore(flags); 1612 + return 0; 1647 1613 } 1648 1614 zap_locks(); 1649 1615 } ··· 1752 1716 1753 1717 logbuf_cpu = UINT_MAX; 1754 1718 raw_spin_unlock(&logbuf_lock); 1719 + lockdep_on(); 1720 + local_irq_restore(flags); 1755 1721 1756 1722 /* If called from the scheduler, we can not call up(). */ 1757 1723 if (!in_sched) { 1724 + lockdep_off(); 1725 + /* 1726 + * Disable preemption to avoid being preempted while holding 1727 + * console_sem which would prevent anyone from printing to 1728 + * console 1729 + */ 1730 + preempt_disable(); 1731 + 1758 1732 /* 1759 1733 * Try to acquire and then immediately release the console 1760 1734 * semaphore. The release will print out buffers and wake up 1761 1735 * /dev/kmsg and syslog() users. 1762 1736 */ 1763 - if (console_trylock_for_printk(this_cpu)) 1737 + if (console_trylock_for_printk()) 1764 1738 console_unlock(); 1739 + preempt_enable(); 1740 + lockdep_on(); 1765 1741 } 1766 1742 1767 - lockdep_on(); 1768 - out_restore_irqs: 1769 - local_irq_restore(flags); 1770 1743 return printed_len; 1771 1744 } 1772 1745 EXPORT_SYMBOL(vprintk_emit); ··· 1847 1802 1848 1803 #define LOG_LINE_MAX 0 1849 1804 #define PREFIX_MAX 0 1850 - #define LOG_LINE_MAX 0 1805 + 1851 1806 static u64 syslog_seq; 1852 1807 static u32 syslog_idx; 1853 1808 static u64 console_seq; ··· 1926 1881 return 0; 1927 1882 } 1928 1883 /* 1929 - * Set up a list of consoles. Called from init/main.c 1884 + * Set up a console. Called via do_early_param() in init/main.c 1885 + * for each "console=" parameter in the boot command line. 1930 1886 */ 1931 1887 static int __init console_setup(char *str) 1932 1888 { 1933 - char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ 1889 + char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ 1934 1890 char *s, *options, *brl_options = NULL; 1935 1891 int idx; 1936 1892 ··· 1948 1902 strncpy(buf, str, sizeof(buf) - 1); 1949 1903 } 1950 1904 buf[sizeof(buf) - 1] = 0; 1951 - if ((options = strchr(str, ',')) != NULL) 1905 + options = strchr(str, ','); 1906 + if (options) 1952 1907 *(options++) = 0; 1953 1908 #ifdef __sparc__ 1954 1909 if (!strcmp(str, "ttya")) ··· 1958 1911 strcpy(buf, "ttyS1"); 1959 1912 #endif 1960 1913 for (s = buf; *s; s++) 1961 - if ((*s >= '0' && *s <= '9') || *s == ',') 1914 + if (isdigit(*s) || *s == ',') 1962 1915 break; 1963 1916 idx = simple_strtoul(s, NULL, 10); 1964 1917 *s = 0; ··· 1997 1950 i++, c++) 1998 1951 if (strcmp(c->name, name) == 0 && c->index == idx) { 1999 1952 strlcpy(c->name, name_new, sizeof(c->name)); 2000 - c->name[sizeof(c->name) - 1] = 0; 2001 1953 c->options = options; 2002 1954 c->index = idx_new; 2003 1955 return i; ··· 2005 1959 return -1; 2006 1960 } 2007 1961 2008 - bool console_suspend_enabled = 1; 1962 + bool console_suspend_enabled = true; 2009 1963 EXPORT_SYMBOL(console_suspend_enabled); 2010 1964 2011 1965 static int __init console_suspend_disable(char *str) 2012 1966 { 2013 - console_suspend_enabled = 0; 1967 + console_suspend_enabled = false; 2014 1968 return 1; 2015 1969 } 2016 1970 __setup("no_console_suspend", console_suspend_disable); ··· 2091 2045 /** 2092 2046 * console_trylock - try to lock the console system for exclusive use. 2093 2047 * 2094 - * Tried to acquire a lock which guarantees that the caller has 2095 - * exclusive access to the console system and the console_drivers list. 2048 + * Try to acquire a lock which guarantees that the caller has exclusive 2049 + * access to the console system and the console_drivers list. 2096 2050 * 2097 2051 * returns 1 on success, and 0 on failure to acquire the lock. 2098 2052 */ ··· 2664 2618 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 2665 2619 unsigned int interval_msecs) 2666 2620 { 2667 - if (*caller_jiffies == 0 2668 - || !time_in_range(jiffies, *caller_jiffies, 2669 - *caller_jiffies 2670 - + msecs_to_jiffies(interval_msecs))) { 2671 - *caller_jiffies = jiffies; 2672 - return true; 2673 - } 2674 - return false; 2621 + unsigned long elapsed = jiffies - *caller_jiffies; 2622 + 2623 + if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) 2624 + return false; 2625 + 2626 + *caller_jiffies = jiffies; 2627 + return true; 2675 2628 } 2676 2629 EXPORT_SYMBOL(printk_timed_ratelimit); 2677 2630

+1 -1

kernel/smp.c

··· 670 670 if (cond_func(cpu, info)) { 671 671 ret = smp_call_function_single(cpu, func, 672 672 info, wait); 673 - WARN_ON_ONCE(!ret); 673 + WARN_ON_ONCE(ret); 674 674 } 675 675 preempt_enable(); 676 676 }

+3 -6

kernel/sysctl.c

··· 1240 1240 .maxlen = sizeof(unsigned long), 1241 1241 .mode = 0644, 1242 1242 .proc_handler = hugetlb_sysctl_handler, 1243 - .extra1 = (void *)&hugetlb_zero, 1244 - .extra2 = (void *)&hugetlb_infinity, 1243 + .extra1 = &zero, 1245 1244 }, 1246 1245 #ifdef CONFIG_NUMA 1247 1246 { ··· 1249 1250 .maxlen = sizeof(unsigned long), 1250 1251 .mode = 0644, 1251 1252 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 1252 - .extra1 = (void *)&hugetlb_zero, 1253 - .extra2 = (void *)&hugetlb_infinity, 1253 + .extra1 = &zero, 1254 1254 }, 1255 1255 #endif 1256 1256 { ··· 1272 1274 .maxlen = sizeof(unsigned long), 1273 1275 .mode = 0644, 1274 1276 .proc_handler = hugetlb_overcommit_handler, 1275 - .extra1 = (void *)&hugetlb_zero, 1276 - .extra2 = (void *)&hugetlb_infinity, 1277 + .extra1 = &zero, 1277 1278 }, 1278 1279 #endif 1279 1280 {

+6 -4

kernel/watchdog.c

··· 260 260 return; 261 261 262 262 if (hardlockup_panic) 263 - panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); 263 + panic("Watchdog detected hard LOCKUP on cpu %d", 264 + this_cpu); 264 265 else 265 - WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 266 + WARN(1, "Watchdog detected hard LOCKUP on cpu %d", 267 + this_cpu); 266 268 267 269 __this_cpu_write(hard_watchdog_warn, true); 268 270 return; ··· 347 345 } 348 346 } 349 347 350 - printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 348 + pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 351 349 smp_processor_id(), duration, 352 350 current->comm, task_pid_nr(current)); 353 351 print_modules(); ··· 486 484 if (PTR_ERR(event) == -EOPNOTSUPP) 487 485 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); 488 486 else if (PTR_ERR(event) == -ENOENT) 489 - pr_warning("disabled (cpu%i): hardware events not enabled\n", 487 + pr_warn("disabled (cpu%i): hardware events not enabled\n", 490 488 cpu); 491 489 else 492 490 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",

+33

lib/Kconfig

··· 396 396 config DQL 397 397 bool 398 398 399 + config GLOB 400 + bool 401 + # This actually supports modular compilation, but the module overhead 402 + # is ridiculous for the amount of code involved. Until an out-of-tree 403 + # driver asks for it, we'll just link it directly it into the kernel 404 + # when required. Since we're ignoring out-of-tree users, there's also 405 + # no need bother prompting for a manual decision: 406 + # prompt "glob_match() function" 407 + help 408 + This option provides a glob_match function for performing 409 + simple text pattern matching. It originated in the ATA code 410 + to blacklist particular drive models, but other device drivers 411 + may need similar functionality. 412 + 413 + All drivers in the Linux kernel tree that require this function 414 + should automatically select this option. Say N unless you 415 + are compiling an out-of tree driver which tells you that it 416 + depends on this. 417 + 418 + config GLOB_SELFTEST 419 + bool "glob self-test on init" 420 + default n 421 + depends on GLOB 422 + help 423 + This option enables a simple self-test of the glob_match 424 + function on startup. It is primarily useful for people 425 + working on the code to ensure they haven't introduced any 426 + regressions. 427 + 428 + It only adds a little bit of code and slows kernel boot (or 429 + module load) by a small amount, so you're welcome to play with 430 + it, but you probably don't need it. 431 + 399 432 # 400 433 # Netlink attribute parsing support is select'ed if needed 401 434 #

+1 -1

lib/Kconfig.debug

··· 15 15 The behavior is also controlled by the kernel command line 16 16 parameter printk.time=1. See Documentation/kernel-parameters.txt 17 17 18 - config DEFAULT_MESSAGE_LOGLEVEL 18 + config MESSAGE_LOGLEVEL_DEFAULT 19 19 int "Default message log level (1-7)" 20 20 range 1 7 21 21 default "4"

+2

lib/Makefile

··· 137 137 138 138 obj-$(CONFIG_DQL) += dynamic_queue_limits.o 139 139 140 + obj-$(CONFIG_GLOB) += glob.o 141 + 140 142 obj-$(CONFIG_MPILIB) += mpi/ 141 143 obj-$(CONFIG_SIGNATURE) += digsig.o 142 144

+56 -55

lib/bitmap.c

··· 40 40 * for the best explanations of this ordering. 41 41 */ 42 42 43 - int __bitmap_empty(const unsigned long *bitmap, int bits) 43 + int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) 44 44 { 45 - int k, lim = bits/BITS_PER_LONG; 45 + unsigned int k, lim = bits/BITS_PER_LONG; 46 46 for (k = 0; k < lim; ++k) 47 47 if (bitmap[k]) 48 48 return 0; ··· 55 55 } 56 56 EXPORT_SYMBOL(__bitmap_empty); 57 57 58 - int __bitmap_full(const unsigned long *bitmap, int bits) 58 + int __bitmap_full(const unsigned long *bitmap, unsigned int bits) 59 59 { 60 - int k, lim = bits/BITS_PER_LONG; 60 + unsigned int k, lim = bits/BITS_PER_LONG; 61 61 for (k = 0; k < lim; ++k) 62 62 if (~bitmap[k]) 63 63 return 0; ··· 71 71 EXPORT_SYMBOL(__bitmap_full); 72 72 73 73 int __bitmap_equal(const unsigned long *bitmap1, 74 - const unsigned long *bitmap2, int bits) 74 + const unsigned long *bitmap2, unsigned int bits) 75 75 { 76 - int k, lim = bits/BITS_PER_LONG; 76 + unsigned int k, lim = bits/BITS_PER_LONG; 77 77 for (k = 0; k < lim; ++k) 78 78 if (bitmap1[k] != bitmap2[k]) 79 79 return 0; ··· 86 86 } 87 87 EXPORT_SYMBOL(__bitmap_equal); 88 88 89 - void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) 89 + void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) 90 90 { 91 - int k, lim = bits/BITS_PER_LONG; 91 + unsigned int k, lim = bits/BITS_PER_LONG; 92 92 for (k = 0; k < lim; ++k) 93 93 dst[k] = ~src[k]; 94 94 95 95 if (bits % BITS_PER_LONG) 96 - dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); 96 + dst[k] = ~src[k]; 97 97 } 98 98 EXPORT_SYMBOL(__bitmap_complement); 99 99 ··· 182 182 EXPORT_SYMBOL(__bitmap_shift_left); 183 183 184 184 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, 185 - const unsigned long *bitmap2, int bits) 185 + const unsigned long *bitmap2, unsigned int bits) 186 186 { 187 - int k; 188 - int nr = BITS_TO_LONGS(bits); 187 + unsigned int k; 188 + unsigned int lim = bits/BITS_PER_LONG; 189 189 unsigned long result = 0; 190 190 191 - for (k = 0; k < nr; k++) 191 + for (k = 0; k < lim; k++) 192 192 result |= (dst[k] = bitmap1[k] & bitmap2[k]); 193 + if (bits % BITS_PER_LONG) 194 + result |= (dst[k] = bitmap1[k] & bitmap2[k] & 195 + BITMAP_LAST_WORD_MASK(bits)); 193 196 return result != 0; 194 197 } 195 198 EXPORT_SYMBOL(__bitmap_and); 196 199 197 200 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, 198 - const unsigned long *bitmap2, int bits) 201 + const unsigned long *bitmap2, unsigned int bits) 199 202 { 200 - int k; 201 - int nr = BITS_TO_LONGS(bits); 203 + unsigned int k; 204 + unsigned int nr = BITS_TO_LONGS(bits); 202 205 203 206 for (k = 0; k < nr; k++) 204 207 dst[k] = bitmap1[k] | bitmap2[k]; ··· 209 206 EXPORT_SYMBOL(__bitmap_or); 210 207 211 208 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, 212 - const unsigned long *bitmap2, int bits) 209 + const unsigned long *bitmap2, unsigned int bits) 213 210 { 214 - int k; 215 - int nr = BITS_TO_LONGS(bits); 211 + unsigned int k; 212 + unsigned int nr = BITS_TO_LONGS(bits); 216 213 217 214 for (k = 0; k < nr; k++) 218 215 dst[k] = bitmap1[k] ^ bitmap2[k]; ··· 220 217 EXPORT_SYMBOL(__bitmap_xor); 221 218 222 219 int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 223 - const unsigned long *bitmap2, int bits) 220 + const unsigned long *bitmap2, unsigned int bits) 224 221 { 225 - int k; 226 - int nr = BITS_TO_LONGS(bits); 222 + unsigned int k; 223 + unsigned int lim = bits/BITS_PER_LONG; 227 224 unsigned long result = 0; 228 225 229 - for (k = 0; k < nr; k++) 226 + for (k = 0; k < lim; k++) 230 227 result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); 228 + if (bits % BITS_PER_LONG) 229 + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & 230 + BITMAP_LAST_WORD_MASK(bits)); 231 231 return result != 0; 232 232 } 233 233 EXPORT_SYMBOL(__bitmap_andnot); 234 234 235 235 int __bitmap_intersects(const unsigned long *bitmap1, 236 - const unsigned long *bitmap2, int bits) 236 + const unsigned long *bitmap2, unsigned int bits) 237 237 { 238 - int k, lim = bits/BITS_PER_LONG; 238 + unsigned int k, lim = bits/BITS_PER_LONG; 239 239 for (k = 0; k < lim; ++k) 240 240 if (bitmap1[k] & bitmap2[k]) 241 241 return 1; ··· 251 245 EXPORT_SYMBOL(__bitmap_intersects); 252 246 253 247 int __bitmap_subset(const unsigned long *bitmap1, 254 - const unsigned long *bitmap2, int bits) 248 + const unsigned long *bitmap2, unsigned int bits) 255 249 { 256 - int k, lim = bits/BITS_PER_LONG; 250 + unsigned int k, lim = bits/BITS_PER_LONG; 257 251 for (k = 0; k < lim; ++k) 258 252 if (bitmap1[k] & ~bitmap2[k]) 259 253 return 0; ··· 265 259 } 266 260 EXPORT_SYMBOL(__bitmap_subset); 267 261 268 - int __bitmap_weight(const unsigned long *bitmap, int bits) 262 + int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) 269 263 { 270 - int k, w = 0, lim = bits/BITS_PER_LONG; 264 + unsigned int k, lim = bits/BITS_PER_LONG; 265 + int w = 0; 271 266 272 267 for (k = 0; k < lim; k++) 273 268 w += hweight_long(bitmap[k]); ··· 280 273 } 281 274 EXPORT_SYMBOL(__bitmap_weight); 282 275 283 - void bitmap_set(unsigned long *map, int start, int nr) 276 + void bitmap_set(unsigned long *map, unsigned int start, int len) 284 277 { 285 278 unsigned long *p = map + BIT_WORD(start); 286 - const int size = start + nr; 279 + const unsigned int size = start + len; 287 280 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); 288 281 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); 289 282 290 - while (nr - bits_to_set >= 0) { 283 + while (len - bits_to_set >= 0) { 291 284 *p |= mask_to_set; 292 - nr -= bits_to_set; 285 + len -= bits_to_set; 293 286 bits_to_set = BITS_PER_LONG; 294 287 mask_to_set = ~0UL; 295 288 p++; 296 289 } 297 - if (nr) { 290 + if (len) { 298 291 mask_to_set &= BITMAP_LAST_WORD_MASK(size); 299 292 *p |= mask_to_set; 300 293 } 301 294 } 302 295 EXPORT_SYMBOL(bitmap_set); 303 296 304 - void bitmap_clear(unsigned long *map, int start, int nr) 297 + void bitmap_clear(unsigned long *map, unsigned int start, int len) 305 298 { 306 299 unsigned long *p = map + BIT_WORD(start); 307 - const int size = start + nr; 300 + const unsigned int size = start + len; 308 301 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); 309 302 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); 310 303 311 - while (nr - bits_to_clear >= 0) { 304 + while (len - bits_to_clear >= 0) { 312 305 *p &= ~mask_to_clear; 313 - nr -= bits_to_clear; 306 + len -= bits_to_clear; 314 307 bits_to_clear = BITS_PER_LONG; 315 308 mask_to_clear = ~0UL; 316 309 p++; 317 310 } 318 - if (nr) { 311 + if (len) { 319 312 mask_to_clear &= BITMAP_LAST_WORD_MASK(size); 320 313 *p &= ~mask_to_clear; 321 314 } ··· 671 664 672 665 int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) 673 666 { 674 - char *nl = strchr(bp, '\n'); 675 - int len; 676 - 677 - if (nl) 678 - len = nl - bp; 679 - else 680 - len = strlen(bp); 667 + char *nl = strchrnul(bp, '\n'); 668 + int len = nl - bp; 681 669 682 670 return __bitmap_parselist(bp, len, 0, maskp, nmaskbits); 683 671 } ··· 718 716 * 719 717 * If for example, just bits 4 through 7 are set in @buf, then @pos 720 718 * values 4 through 7 will get mapped to 0 through 3, respectively, 721 - * and other @pos values will get mapped to 0. When @pos value 7 719 + * and other @pos values will get mapped to -1. When @pos value 7 722 720 * gets mapped to (returns) @ord value 3 in this example, that means 723 721 * that bit 7 is the 3rd (starting with 0th) set bit in @buf. 724 722 * ··· 1048 1046 REG_OP_RELEASE, /* clear all bits in region */ 1049 1047 }; 1050 1048 1051 - static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op) 1049 + static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op) 1052 1050 { 1053 1051 int nbits_reg; /* number of bits in region */ 1054 1052 int index; /* index first long of region in bitmap */ ··· 1114 1112 * Return the bit offset in bitmap of the allocated region, 1115 1113 * or -errno on failure. 1116 1114 */ 1117 - int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) 1115 + int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) 1118 1116 { 1119 - int pos, end; /* scans bitmap by regions of size order */ 1117 + unsigned int pos, end; /* scans bitmap by regions of size order */ 1120 1118 1121 - for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { 1119 + for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) { 1122 1120 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) 1123 1121 continue; 1124 1122 __reg_op(bitmap, pos, order, REG_OP_ALLOC); ··· 1139 1137 * 1140 1138 * No return value. 1141 1139 */ 1142 - void bitmap_release_region(unsigned long *bitmap, int pos, int order) 1140 + void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) 1143 1141 { 1144 1142 __reg_op(bitmap, pos, order, REG_OP_RELEASE); 1145 1143 } ··· 1156 1154 * Return 0 on success, or %-EBUSY if specified region wasn't 1157 1155 * free (not all bits were zero). 1158 1156 */ 1159 - int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) 1157 + int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) 1160 1158 { 1161 1159 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) 1162 1160 return -EBUSY; 1163 - __reg_op(bitmap, pos, order, REG_OP_ALLOC); 1164 - return 0; 1161 + return __reg_op(bitmap, pos, order, REG_OP_ALLOC); 1165 1162 } 1166 1163 EXPORT_SYMBOL(bitmap_allocate_region); 1167 1164

+10 -5

lib/cmdline.c

··· 121 121 * @retptr: (output) Optional pointer to next char after parse completes 122 122 * 123 123 * Parses a string into a number. The number stored at @ptr is 124 - * potentially suffixed with %K (for kilobytes, or 1024 bytes), 125 - * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or 126 - * 1073741824). If the number is suffixed with K, M, or G, then 127 - * the return value is the number multiplied by one kilobyte, one 128 - * megabyte, or one gigabyte, respectively. 124 + * potentially suffixed with K, M, G, T, P, E. 129 125 */ 130 126 131 127 unsigned long long memparse(const char *ptr, char **retptr) ··· 131 135 unsigned long long ret = simple_strtoull(ptr, &endptr, 0); 132 136 133 137 switch (*endptr) { 138 + case 'E': 139 + case 'e': 140 + ret <<= 10; 141 + case 'P': 142 + case 'p': 143 + ret <<= 10; 144 + case 'T': 145 + case 't': 146 + ret <<= 10; 134 147 case 'G': 135 148 case 'g': 136 149 ret <<= 10;

+287

lib/glob.c

··· 1 + #include <linux/module.h> 2 + #include <linux/glob.h> 3 + 4 + /* 5 + * The only reason this code can be compiled as a module is because the 6 + * ATA code that depends on it can be as well. In practice, they're 7 + * both usually compiled in and the module overhead goes away. 8 + */ 9 + MODULE_DESCRIPTION("glob(7) matching"); 10 + MODULE_LICENSE("Dual MIT/GPL"); 11 + 12 + /** 13 + * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) 14 + * @pat: Shell-style pattern to match, e.g. "*.[ch]". 15 + * @str: String to match. The pattern must match the entire string. 16 + * 17 + * Perform shell-style glob matching, returning true (1) if the match 18 + * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). 19 + * 20 + * Pattern metacharacters are ?, *, [ and \. 21 + * (And, inside character classes, !, - and ].) 22 + * 23 + * This is small and simple implementation intended for device blacklists 24 + * where a string is matched against a number of patterns. Thus, it 25 + * does not preprocess the patterns. It is non-recursive, and run-time 26 + * is at most quadratic: strlen(@str)*strlen(@pat). 27 + * 28 + * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); 29 + * it takes 6 passes over the pattern before matching the string. 30 + * 31 + * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT 32 + * treat / or leading . specially; it isn't actually used for pathnames. 33 + * 34 + * Note that according to glob(7) (and unlike bash), character classes 35 + * are complemented by a leading !; this does not support the regex-style 36 + * [^a-z] syntax. 37 + * 38 + * An opening bracket without a matching close is matched literally. 39 + */ 40 + bool __pure glob_match(char const *pat, char const *str) 41 + { 42 + /* 43 + * Backtrack to previous * on mismatch and retry starting one 44 + * character later in the string. Because * matches all characters 45 + * (no exception for /), it can be easily proved that there's 46 + * never a need to backtrack multiple levels. 47 + */ 48 + char const *back_pat = NULL, *back_str = back_str; 49 + 50 + /* 51 + * Loop over each token (character or class) in pat, matching 52 + * it against the remaining unmatched tail of str. Return false 53 + * on mismatch, or true after matching the trailing nul bytes. 54 + */ 55 + for (;;) { 56 + unsigned char c = *str++; 57 + unsigned char d = *pat++; 58 + 59 + switch (d) { 60 + case '?': /* Wildcard: anything but nul */ 61 + if (c == '\0') 62 + return false; 63 + break; 64 + case '*': /* Any-length wildcard */ 65 + if (*pat == '\0') /* Optimize trailing * case */ 66 + return true; 67 + back_pat = pat; 68 + back_str = --str; /* Allow zero-length match */ 69 + break; 70 + case '[': { /* Character class */ 71 + bool match = false, inverted = (*pat == '!'); 72 + char const *class = pat + inverted; 73 + unsigned char a = *class++; 74 + 75 + /* 76 + * Iterate over each span in the character class. 77 + * A span is either a single character a, or a 78 + * range a-b. The first span may begin with ']'. 79 + */ 80 + do { 81 + unsigned char b = a; 82 + 83 + if (a == '\0') /* Malformed */ 84 + goto literal; 85 + 86 + if (class[0] == '-' && class[1] != ']') { 87 + b = class[1]; 88 + 89 + if (b == '\0') 90 + goto literal; 91 + 92 + class += 2; 93 + /* Any special action if a > b? */ 94 + } 95 + match |= (a <= c && c <= b); 96 + } while ((a = *class++) != ']'); 97 + 98 + if (match == inverted) 99 + goto backtrack; 100 + pat = class; 101 + } 102 + break; 103 + case '\\': 104 + d = *pat++; 105 + /*FALLTHROUGH*/ 106 + default: /* Literal character */ 107 + literal: 108 + if (c == d) { 109 + if (d == '\0') 110 + return true; 111 + break; 112 + } 113 + backtrack: 114 + if (c == '\0' || !back_pat) 115 + return false; /* No point continuing */ 116 + /* Try again from last *, one character later in str. */ 117 + pat = back_pat; 118 + str = ++back_str; 119 + break; 120 + } 121 + } 122 + } 123 + EXPORT_SYMBOL(glob_match); 124 + 125 + 126 + #ifdef CONFIG_GLOB_SELFTEST 127 + 128 + #include <linux/printk.h> 129 + #include <linux/moduleparam.h> 130 + 131 + /* Boot with "glob.verbose=1" to show successful tests, too */ 132 + static bool verbose = false; 133 + module_param(verbose, bool, 0); 134 + 135 + struct glob_test { 136 + char const *pat, *str; 137 + bool expected; 138 + }; 139 + 140 + static bool __pure __init test(char const *pat, char const *str, bool expected) 141 + { 142 + bool match = glob_match(pat, str); 143 + bool success = match == expected; 144 + 145 + /* Can't get string literals into a particular section, so... */ 146 + static char const msg_error[] __initconst = 147 + KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; 148 + static char const msg_ok[] __initconst = 149 + KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; 150 + static char const mismatch[] __initconst = "mismatch"; 151 + char const *message; 152 + 153 + if (!success) 154 + message = msg_error; 155 + else if (verbose) 156 + message = msg_ok; 157 + else 158 + return success; 159 + 160 + printk(message, pat, str, mismatch + 3*match); 161 + return success; 162 + } 163 + 164 + /* 165 + * The tests are all jammed together in one array to make it simpler 166 + * to place that array in the .init.rodata section. The obvious 167 + * "array of structures containing char *" has no way to force the 168 + * pointed-to strings to be in a particular section. 169 + * 170 + * Anyway, a test consists of: 171 + * 1. Expected glob_match result: '1' or '0'. 172 + * 2. Pattern to match: null-terminated string 173 + * 3. String to match against: null-terminated string 174 + * 175 + * The list of tests is terminated with a final '\0' instead of 176 + * a glob_match result character. 177 + */ 178 + static char const glob_tests[] __initconst = 179 + /* Some basic tests */ 180 + "1" "a\0" "a\0" 181 + "0" "a\0" "b\0" 182 + "0" "a\0" "aa\0" 183 + "0" "a\0" "\0" 184 + "1" "\0" "\0" 185 + "0" "\0" "a\0" 186 + /* Simple character class tests */ 187 + "1" "[a]\0" "a\0" 188 + "0" "[a]\0" "b\0" 189 + "0" "[!a]\0" "a\0" 190 + "1" "[!a]\0" "b\0" 191 + "1" "[ab]\0" "a\0" 192 + "1" "[ab]\0" "b\0" 193 + "0" "[ab]\0" "c\0" 194 + "1" "[!ab]\0" "c\0" 195 + "1" "[a-c]\0" "b\0" 196 + "0" "[a-c]\0" "d\0" 197 + /* Corner cases in character class parsing */ 198 + "1" "[a-c-e-g]\0" "-\0" 199 + "0" "[a-c-e-g]\0" "d\0" 200 + "1" "[a-c-e-g]\0" "f\0" 201 + "1" "[]a-ceg-ik[]\0" "a\0" 202 + "1" "[]a-ceg-ik[]\0" "]\0" 203 + "1" "[]a-ceg-ik[]\0" "[\0" 204 + "1" "[]a-ceg-ik[]\0" "h\0" 205 + "0" "[]a-ceg-ik[]\0" "f\0" 206 + "0" "[!]a-ceg-ik[]\0" "h\0" 207 + "0" "[!]a-ceg-ik[]\0" "]\0" 208 + "1" "[!]a-ceg-ik[]\0" "f\0" 209 + /* Simple wild cards */ 210 + "1" "?\0" "a\0" 211 + "0" "?\0" "aa\0" 212 + "0" "??\0" "a\0" 213 + "1" "?x?\0" "axb\0" 214 + "0" "?x?\0" "abx\0" 215 + "0" "?x?\0" "xab\0" 216 + /* Asterisk wild cards (backtracking) */ 217 + "0" "*??\0" "a\0" 218 + "1" "*??\0" "ab\0" 219 + "1" "*??\0" "abc\0" 220 + "1" "*??\0" "abcd\0" 221 + "0" "??*\0" "a\0" 222 + "1" "??*\0" "ab\0" 223 + "1" "??*\0" "abc\0" 224 + "1" "??*\0" "abcd\0" 225 + "0" "?*?\0" "a\0" 226 + "1" "?*?\0" "ab\0" 227 + "1" "?*?\0" "abc\0" 228 + "1" "?*?\0" "abcd\0" 229 + "1" "*b\0" "b\0" 230 + "1" "*b\0" "ab\0" 231 + "0" "*b\0" "ba\0" 232 + "1" "*b\0" "bb\0" 233 + "1" "*b\0" "abb\0" 234 + "1" "*b\0" "bab\0" 235 + "1" "*bc\0" "abbc\0" 236 + "1" "*bc\0" "bc\0" 237 + "1" "*bc\0" "bbc\0" 238 + "1" "*bc\0" "bcbc\0" 239 + /* Multiple asterisks (complex backtracking) */ 240 + "1" "*ac*\0" "abacadaeafag\0" 241 + "1" "*ac*ae*ag*\0" "abacadaeafag\0" 242 + "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" 243 + "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" 244 + "1" "*abcd*\0" "abcabcabcabcdefg\0" 245 + "1" "*ab*cd*\0" "abcabcabcabcdefg\0" 246 + "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" 247 + "0" "*abcd*\0" "abcabcabcabcefg\0" 248 + "0" "*ab*cd*\0" "abcabcabcabcefg\0"; 249 + 250 + static int __init glob_init(void) 251 + { 252 + unsigned successes = 0; 253 + unsigned n = 0; 254 + char const *p = glob_tests; 255 + static char const message[] __initconst = 256 + KERN_INFO "glob: %u self-tests passed, %u failed\n"; 257 + 258 + /* 259 + * Tests are jammed together in a string. The first byte is '1' 260 + * or '0' to indicate the expected outcome, or '\0' to indicate the 261 + * end of the tests. Then come two null-terminated strings: the 262 + * pattern and the string to match it against. 263 + */ 264 + while (*p) { 265 + bool expected = *p++ & 1; 266 + char const *pat = p; 267 + 268 + p += strlen(p) + 1; 269 + successes += test(pat, p, expected); 270 + p += strlen(p) + 1; 271 + n++; 272 + } 273 + 274 + n -= successes; 275 + printk(message, successes, n); 276 + 277 + /* What's the errno for "kernel bug detected"? Guess... */ 278 + return n ? -ECANCELED : 0; 279 + } 280 + 281 + /* We need a dummy exit function to allow unload */ 282 + static void __exit glob_fini(void) { } 283 + 284 + module_init(glob_init); 285 + module_exit(glob_fini); 286 + 287 + #endif /* CONFIG_GLOB_SELFTEST */

+3 -3

lib/klist.c

··· 140 140 EXPORT_SYMBOL_GPL(klist_add_tail); 141 141 142 142 /** 143 - * klist_add_after - Init a klist_node and add it after an existing node 143 + * klist_add_behind - Init a klist_node and add it after an existing node 144 144 * @n: node we're adding. 145 145 * @pos: node to put @n after 146 146 */ 147 - void klist_add_after(struct klist_node *n, struct klist_node *pos) 147 + void klist_add_behind(struct klist_node *n, struct klist_node *pos) 148 148 { 149 149 struct klist *k = knode_klist(pos); 150 150 ··· 153 153 list_add(&n->n_node, &pos->n_node); 154 154 spin_unlock(&k->k_lock); 155 155 } 156 - EXPORT_SYMBOL_GPL(klist_add_after); 156 + EXPORT_SYMBOL_GPL(klist_add_behind); 157 157 158 158 /** 159 159 * klist_add_before - Init a klist_node and add it before an existing node

+35 -36

lib/list_sort.c

··· 1 + 2 + #define pr_fmt(fmt) "list_sort_test: " fmt 3 + 1 4 #include <linux/kernel.h> 2 5 #include <linux/module.h> 3 6 #include <linux/list_sort.h> ··· 50 47 struct list_head *a, struct list_head *b) 51 48 { 52 49 struct list_head *tail = head; 50 + u8 count = 0; 53 51 54 52 while (a && b) { 55 53 /* if equal, take 'a' -- important for sort stability */ ··· 74 70 * element comparison is needed, so the client's cmp() 75 71 * routine can invoke cond_resched() periodically. 76 72 */ 77 - (*cmp)(priv, tail->next, tail->next); 73 + if (unlikely(!(++count))) 74 + (*cmp)(priv, tail->next, tail->next); 78 75 79 76 tail->next->prev = tail; 80 77 tail = tail->next; ··· 128 123 } 129 124 if (lev > max_lev) { 130 125 if (unlikely(lev >= ARRAY_SIZE(part)-1)) { 131 - printk_once(KERN_DEBUG "list passed to" 132 - " list_sort() too long for" 133 - " efficiency\n"); 126 + printk_once(KERN_DEBUG "list too long for efficiency\n"); 134 127 lev--; 135 128 } 136 129 max_lev = lev; ··· 171 168 static int __init check(struct debug_el *ela, struct debug_el *elb) 172 169 { 173 170 if (ela->serial >= TEST_LIST_LEN) { 174 - printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", 175 - ela->serial); 171 + pr_err("error: incorrect serial %d\n", ela->serial); 176 172 return -EINVAL; 177 173 } 178 174 if (elb->serial >= TEST_LIST_LEN) { 179 - printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", 180 - elb->serial); 175 + pr_err("error: incorrect serial %d\n", elb->serial); 181 176 return -EINVAL; 182 177 } 183 178 if (elts[ela->serial] != ela || elts[elb->serial] != elb) { 184 - printk(KERN_ERR "list_sort_test: error: phantom element\n"); 179 + pr_err("error: phantom element\n"); 185 180 return -EINVAL; 186 181 } 187 182 if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { 188 - printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", 189 - ela->poison1, ela->poison2); 183 + pr_err("error: bad poison: %#x/%#x\n", 184 + ela->poison1, ela->poison2); 190 185 return -EINVAL; 191 186 } 192 187 if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { 193 - printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", 194 - elb->poison1, elb->poison2); 188 + pr_err("error: bad poison: %#x/%#x\n", 189 + elb->poison1, elb->poison2); 195 190 return -EINVAL; 196 191 } 197 192 return 0; ··· 208 207 209 208 static int __init list_sort_test(void) 210 209 { 211 - int i, count = 1, err = -EINVAL; 210 + int i, count = 1, err = -ENOMEM; 212 211 struct debug_el *el; 213 - struct list_head *cur, *tmp; 212 + struct list_head *cur; 214 213 LIST_HEAD(head); 215 214 216 - printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n"); 215 + pr_debug("start testing list_sort()\n"); 217 216 218 - elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL); 217 + elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); 219 218 if (!elts) { 220 - printk(KERN_ERR "list_sort_test: error: cannot allocate " 221 - "memory\n"); 222 - goto exit; 219 + pr_err("error: cannot allocate memory\n"); 220 + return err; 223 221 } 224 222 225 223 for (i = 0; i < TEST_LIST_LEN; i++) { 226 224 el = kmalloc(sizeof(*el), GFP_KERNEL); 227 225 if (!el) { 228 - printk(KERN_ERR "list_sort_test: error: cannot " 229 - "allocate memory\n"); 226 + pr_err("error: cannot allocate memory\n"); 230 227 goto exit; 231 228 } 232 229 /* force some equivalencies */ ··· 238 239 239 240 list_sort(NULL, &head, cmp); 240 241 242 + err = -EINVAL; 241 243 for (cur = head.next; cur->next != &head; cur = cur->next) { 242 244 struct debug_el *el1; 243 245 int cmp_result; 244 246 245 247 if (cur->next->prev != cur) { 246 - printk(KERN_ERR "list_sort_test: error: list is " 247 - "corrupted\n"); 248 + pr_err("error: list is corrupted\n"); 248 249 goto exit; 249 250 } 250 251 251 252 cmp_result = cmp(NULL, cur, cur->next); 252 253 if (cmp_result > 0) { 253 - printk(KERN_ERR "list_sort_test: error: list is not " 254 - "sorted\n"); 254 + pr_err("error: list is not sorted\n"); 255 255 goto exit; 256 256 } 257 257 258 258 el = container_of(cur, struct debug_el, list); 259 259 el1 = container_of(cur->next, struct debug_el, list); 260 260 if (cmp_result == 0 && el->serial >= el1->serial) { 261 - printk(KERN_ERR "list_sort_test: error: order of " 262 - "equivalent elements not preserved\n"); 261 + pr_err("error: order of equivalent elements not " 262 + "preserved\n"); 263 263 goto exit; 264 264 } 265 265 266 266 if (check(el, el1)) { 267 - printk(KERN_ERR "list_sort_test: error: element check " 268 - "failed\n"); 267 + pr_err("error: element check failed\n"); 269 268 goto exit; 270 269 } 271 270 count++; 272 271 } 272 + if (head.prev != cur) { 273 + pr_err("error: list is corrupted\n"); 274 + goto exit; 275 + } 276 + 273 277 274 278 if (count != TEST_LIST_LEN) { 275 - printk(KERN_ERR "list_sort_test: error: bad list length %d", 276 - count); 279 + pr_err("error: bad list length %d", count); 277 280 goto exit; 278 281 } 279 282 280 283 err = 0; 281 284 exit: 285 + for (i = 0; i < TEST_LIST_LEN; i++) 286 + kfree(elts[i]); 282 287 kfree(elts); 283 - list_for_each_safe(cur, tmp, &head) { 284 - list_del(cur); 285 - kfree(container_of(cur, struct debug_el, list)); 286 - } 287 288 return err; 288 289 } 289 290 module_init(list_sort_test);

+9 -6

lib/string_helpers.c

··· 25 25 int string_get_size(u64 size, const enum string_size_units units, 26 26 char *buf, int len) 27 27 { 28 - static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", 29 - "EB", "ZB", "YB", NULL}; 30 - static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", 31 - "EiB", "ZiB", "YiB", NULL }; 32 - static const char **units_str[] = { 33 - [STRING_UNITS_10] = units_10, 28 + static const char *const units_10[] = { 29 + "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL 30 + }; 31 + static const char *const units_2[] = { 32 + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", 33 + NULL 34 + }; 35 + static const char *const *const units_str[] = { 36 + [STRING_UNITS_10] = units_10, 34 37 [STRING_UNITS_2] = units_2, 35 38 }; 36 39 static const unsigned int divisor[] = {

+1 -1

lib/test-kstrtox.c

··· 3 3 #include <linux/module.h> 4 4 5 5 #define for_each_test(i, test) \ 6 - for (i = 0; i < sizeof(test) / sizeof(test[0]); i++) 6 + for (i = 0; i < ARRAY_SIZE(test); i++) 7 7 8 8 struct test_fail { 9 9 const char *str;

-143

lib/zlib_deflate/deflate.c

··· 250 250 } 251 251 252 252 /* ========================================================================= */ 253 - #if 0 254 - int zlib_deflateSetDictionary( 255 - z_streamp strm, 256 - const Byte *dictionary, 257 - uInt dictLength 258 - ) 259 - { 260 - deflate_state *s; 261 - uInt length = dictLength; 262 - uInt n; 263 - IPos hash_head = 0; 264 - 265 - if (strm == NULL || strm->state == NULL || dictionary == NULL) 266 - return Z_STREAM_ERROR; 267 - 268 - s = (deflate_state *) strm->state; 269 - if (s->status != INIT_STATE) return Z_STREAM_ERROR; 270 - 271 - strm->adler = zlib_adler32(strm->adler, dictionary, dictLength); 272 - 273 - if (length < MIN_MATCH) return Z_OK; 274 - if (length > MAX_DIST(s)) { 275 - length = MAX_DIST(s); 276 - #ifndef USE_DICT_HEAD 277 - dictionary += dictLength - length; /* use the tail of the dictionary */ 278 - #endif 279 - } 280 - memcpy((char *)s->window, dictionary, length); 281 - s->strstart = length; 282 - s->block_start = (long)length; 283 - 284 - /* Insert all strings in the hash table (except for the last two bytes). 285 - * s->lookahead stays null, so s->ins_h will be recomputed at the next 286 - * call of fill_window. 287 - */ 288 - s->ins_h = s->window[0]; 289 - UPDATE_HASH(s, s->ins_h, s->window[1]); 290 - for (n = 0; n <= length - MIN_MATCH; n++) { 291 - INSERT_STRING(s, n, hash_head); 292 - } 293 - if (hash_head) hash_head = 0; /* to make compiler happy */ 294 - return Z_OK; 295 - } 296 - #endif /* 0 */ 297 - 298 - /* ========================================================================= */ 299 253 int zlib_deflateReset( 300 254 z_streamp strm 301 255 ) ··· 279 325 280 326 return Z_OK; 281 327 } 282 - 283 - /* ========================================================================= */ 284 - #if 0 285 - int zlib_deflateParams( 286 - z_streamp strm, 287 - int level, 288 - int strategy 289 - ) 290 - { 291 - deflate_state *s; 292 - compress_func func; 293 - int err = Z_OK; 294 - 295 - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; 296 - s = (deflate_state *) strm->state; 297 - 298 - if (level == Z_DEFAULT_COMPRESSION) { 299 - level = 6; 300 - } 301 - if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { 302 - return Z_STREAM_ERROR; 303 - } 304 - func = configuration_table[s->level].func; 305 - 306 - if (func != configuration_table[level].func && strm->total_in != 0) { 307 - /* Flush the last buffer: */ 308 - err = zlib_deflate(strm, Z_PARTIAL_FLUSH); 309 - } 310 - if (s->level != level) { 311 - s->level = level; 312 - s->max_lazy_match = configuration_table[level].max_lazy; 313 - s->good_match = configuration_table[level].good_length; 314 - s->nice_match = configuration_table[level].nice_length; 315 - s->max_chain_length = configuration_table[level].max_chain; 316 - } 317 - s->strategy = strategy; 318 - return err; 319 - } 320 - #endif /* 0 */ 321 328 322 329 /* ========================================================================= 323 330 * Put a short in the pending buffer. The 16-bit value is put in MSB order. ··· 482 567 483 568 return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; 484 569 } 485 - 486 - /* ========================================================================= 487 - * Copy the source state to the destination state. 488 - */ 489 - #if 0 490 - int zlib_deflateCopy ( 491 - z_streamp dest, 492 - z_streamp source 493 - ) 494 - { 495 - #ifdef MAXSEG_64K 496 - return Z_STREAM_ERROR; 497 - #else 498 - deflate_state *ds; 499 - deflate_state *ss; 500 - ush *overlay; 501 - deflate_workspace *mem; 502 - 503 - 504 - if (source == NULL || dest == NULL || source->state == NULL) { 505 - return Z_STREAM_ERROR; 506 - } 507 - 508 - ss = (deflate_state *) source->state; 509 - 510 - *dest = *source; 511 - 512 - mem = (deflate_workspace *) dest->workspace; 513 - 514 - ds = &(mem->deflate_memory); 515 - 516 - dest->state = (struct internal_state *) ds; 517 - *ds = *ss; 518 - ds->strm = dest; 519 - 520 - ds->window = (Byte *) mem->window_memory; 521 - ds->prev = (Pos *) mem->prev_memory; 522 - ds->head = (Pos *) mem->head_memory; 523 - overlay = (ush *) mem->overlay_memory; 524 - ds->pending_buf = (uch *) overlay; 525 - 526 - memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); 527 - memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); 528 - memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); 529 - memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); 530 - 531 - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); 532 - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); 533 - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; 534 - 535 - ds->l_desc.dyn_tree = ds->dyn_ltree; 536 - ds->d_desc.dyn_tree = ds->dyn_dtree; 537 - ds->bl_desc.dyn_tree = ds->bl_tree; 538 - 539 - return Z_OK; 540 - #endif 541 - } 542 - #endif /* 0 */ 543 570 544 571 /* =========================================================================== 545 572 * Read a new buffer from the current input stream, update the adler32

-132

lib/zlib_inflate/inflate.c

··· 45 45 return Z_OK; 46 46 } 47 47 48 - #if 0 49 - int zlib_inflatePrime(z_streamp strm, int bits, int value) 50 - { 51 - struct inflate_state *state; 52 - 53 - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; 54 - state = (struct inflate_state *)strm->state; 55 - if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; 56 - value &= (1L << bits) - 1; 57 - state->hold += value << state->bits; 58 - state->bits += bits; 59 - return Z_OK; 60 - } 61 - #endif 62 - 63 48 int zlib_inflateInit2(z_streamp strm, int windowBits) 64 49 { 65 50 struct inflate_state *state; ··· 745 760 return Z_STREAM_ERROR; 746 761 return Z_OK; 747 762 } 748 - 749 - #if 0 750 - int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary, 751 - uInt dictLength) 752 - { 753 - struct inflate_state *state; 754 - unsigned long id; 755 - 756 - /* check state */ 757 - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; 758 - state = (struct inflate_state *)strm->state; 759 - if (state->wrap != 0 && state->mode != DICT) 760 - return Z_STREAM_ERROR; 761 - 762 - /* check for correct dictionary id */ 763 - if (state->mode == DICT) { 764 - id = zlib_adler32(0L, NULL, 0); 765 - id = zlib_adler32(id, dictionary, dictLength); 766 - if (id != state->check) 767 - return Z_DATA_ERROR; 768 - } 769 - 770 - /* copy dictionary to window */ 771 - zlib_updatewindow(strm, strm->avail_out); 772 - 773 - if (dictLength > state->wsize) { 774 - memcpy(state->window, dictionary + dictLength - state->wsize, 775 - state->wsize); 776 - state->whave = state->wsize; 777 - } 778 - else { 779 - memcpy(state->window + state->wsize - dictLength, dictionary, 780 - dictLength); 781 - state->whave = dictLength; 782 - } 783 - state->havedict = 1; 784 - return Z_OK; 785 - } 786 - #endif 787 - 788 - #if 0 789 - /* 790 - Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found 791 - or when out of input. When called, *have is the number of pattern bytes 792 - found in order so far, in 0..3. On return *have is updated to the new 793 - state. If on return *have equals four, then the pattern was found and the 794 - return value is how many bytes were read including the last byte of the 795 - pattern. If *have is less than four, then the pattern has not been found 796 - yet and the return value is len. In the latter case, zlib_syncsearch() can be 797 - called again with more data and the *have state. *have is initialized to 798 - zero for the first call. 799 - */ 800 - static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf, 801 - unsigned len) 802 - { 803 - unsigned got; 804 - unsigned next; 805 - 806 - got = *have; 807 - next = 0; 808 - while (next < len && got < 4) { 809 - if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) 810 - got++; 811 - else if (buf[next]) 812 - got = 0; 813 - else 814 - got = 4 - got; 815 - next++; 816 - } 817 - *have = got; 818 - return next; 819 - } 820 - #endif 821 - 822 - #if 0 823 - int zlib_inflateSync(z_streamp strm) 824 - { 825 - unsigned len; /* number of bytes to look at or looked at */ 826 - unsigned long in, out; /* temporary to save total_in and total_out */ 827 - unsigned char buf[4]; /* to restore bit buffer to byte string */ 828 - struct inflate_state *state; 829 - 830 - /* check parameters */ 831 - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; 832 - state = (struct inflate_state *)strm->state; 833 - if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; 834 - 835 - /* if first time, start search in bit buffer */ 836 - if (state->mode != SYNC) { 837 - state->mode = SYNC; 838 - state->hold <<= state->bits & 7; 839 - state->bits -= state->bits & 7; 840 - len = 0; 841 - while (state->bits >= 8) { 842 - buf[len++] = (unsigned char)(state->hold); 843 - state->hold >>= 8; 844 - state->bits -= 8; 845 - } 846 - state->have = 0; 847 - zlib_syncsearch(&(state->have), buf, len); 848 - } 849 - 850 - /* search available input */ 851 - len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in); 852 - strm->avail_in -= len; 853 - strm->next_in += len; 854 - strm->total_in += len; 855 - 856 - /* return no joy or set up to restart inflate() on a new block */ 857 - if (state->have != 4) return Z_DATA_ERROR; 858 - in = strm->total_in; out = strm->total_out; 859 - zlib_inflateReset(strm); 860 - strm->total_in = in; strm->total_out = out; 861 - state->mode = TYPE; 862 - return Z_OK; 863 - } 864 - #endif 865 763 866 764 /* 867 765 * This subroutine adds the data at next_in/avail_in to the output history

+36 -18

mm/Kconfig

··· 508 508 processing calls such as dma_alloc_from_contiguous(). 509 509 This option does not affect warning and error messages. 510 510 511 - config ZBUD 512 - tristate 513 - default n 511 + config CMA_AREAS 512 + int "Maximum count of the CMA areas" 513 + depends on CMA 514 + default 7 514 515 help 515 - A special purpose allocator for storing compressed pages. 516 - It is designed to store up to two compressed pages per physical 517 - page. While this design limits storage density, it has simple and 518 - deterministic reclaim properties that make it preferable to a higher 519 - density approach when reclaim will be used. 516 + CMA allows to create CMA areas for particular purpose, mainly, 517 + used as device private area. This parameter sets the maximum 518 + number of CMA area in the system. 519 + 520 + If unsure, leave the default value "7". 521 + 522 + config MEM_SOFT_DIRTY 523 + bool "Track memory changes" 524 + depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS 525 + select PROC_PAGE_MONITOR 526 + help 527 + This option enables memory changes tracking by introducing a 528 + soft-dirty bit on pte-s. This bit it set when someone writes 529 + into a page just as regular dirty bit, but unlike the latter 530 + it can be cleared by hands. 531 + 532 + See Documentation/vm/soft-dirty.txt for more details. 520 533 521 534 config ZSWAP 522 535 bool "Compressed cache for swap pages (EXPERIMENTAL)" 523 536 depends on FRONTSWAP && CRYPTO=y 524 537 select CRYPTO_LZO 525 - select ZBUD 538 + select ZPOOL 526 539 default n 527 540 help 528 541 A lightweight compressed cache for swap pages. It takes ··· 551 538 they have not be fully explored on the large set of potential 552 539 configurations and workloads that exist. 553 540 554 - config MEM_SOFT_DIRTY 555 - bool "Track memory changes" 556 - depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS 557 - select PROC_PAGE_MONITOR 541 + config ZPOOL 542 + tristate "Common API for compressed memory storage" 543 + default n 558 544 help 559 - This option enables memory changes tracking by introducing a 560 - soft-dirty bit on pte-s. This bit it set when someone writes 561 - into a page just as regular dirty bit, but unlike the latter 562 - it can be cleared by hands. 545 + Compressed memory storage API. This allows using either zbud or 546 + zsmalloc. 563 547 564 - See Documentation/vm/soft-dirty.txt for more details. 548 + config ZBUD 549 + tristate "Low density storage for compressed pages" 550 + default n 551 + help 552 + A special purpose allocator for storing compressed pages. 553 + It is designed to store up to two compressed pages per physical 554 + page. While this design limits storage density, it has simple and 555 + deterministic reclaim properties that make it preferable to a higher 556 + density approach when reclaim will be used. 565 557 566 558 config ZSMALLOC 567 559 tristate "Memory allocator for compressed pages"

+2

mm/Makefile

··· 59 59 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o 60 60 obj-$(CONFIG_CLEANCACHE) += cleancache.o 61 61 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o 62 + obj-$(CONFIG_ZPOOL) += zpool.o 62 63 obj-$(CONFIG_ZBUD) += zbud.o 63 64 obj-$(CONFIG_ZSMALLOC) += zsmalloc.o 64 65 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o 66 + obj-$(CONFIG_CMA) += cma.o

+335

mm/cma.c

··· 1 + /* 2 + * Contiguous Memory Allocator 3 + * 4 + * Copyright (c) 2010-2011 by Samsung Electronics. 5 + * Copyright IBM Corporation, 2013 6 + * Copyright LG Electronics Inc., 2014 7 + * Written by: 8 + * Marek Szyprowski <m.szyprowski@samsung.com> 9 + * Michal Nazarewicz <mina86@mina86.com> 10 + * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 11 + * Joonsoo Kim <iamjoonsoo.kim@lge.com> 12 + * 13 + * This program is free software; you can redistribute it and/or 14 + * modify it under the terms of the GNU General Public License as 15 + * published by the Free Software Foundation; either version 2 of the 16 + * License or (at your optional) any later version of the license. 17 + */ 18 + 19 + #define pr_fmt(fmt) "cma: " fmt 20 + 21 + #ifdef CONFIG_CMA_DEBUG 22 + #ifndef DEBUG 23 + # define DEBUG 24 + #endif 25 + #endif 26 + 27 + #include <linux/memblock.h> 28 + #include <linux/err.h> 29 + #include <linux/mm.h> 30 + #include <linux/mutex.h> 31 + #include <linux/sizes.h> 32 + #include <linux/slab.h> 33 + #include <linux/log2.h> 34 + #include <linux/cma.h> 35 + 36 + struct cma { 37 + unsigned long base_pfn; 38 + unsigned long count; 39 + unsigned long *bitmap; 40 + unsigned int order_per_bit; /* Order of pages represented by one bit */ 41 + struct mutex lock; 42 + }; 43 + 44 + static struct cma cma_areas[MAX_CMA_AREAS]; 45 + static unsigned cma_area_count; 46 + static DEFINE_MUTEX(cma_mutex); 47 + 48 + phys_addr_t cma_get_base(struct cma *cma) 49 + { 50 + return PFN_PHYS(cma->base_pfn); 51 + } 52 + 53 + unsigned long cma_get_size(struct cma *cma) 54 + { 55 + return cma->count << PAGE_SHIFT; 56 + } 57 + 58 + static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) 59 + { 60 + return (1UL << (align_order >> cma->order_per_bit)) - 1; 61 + } 62 + 63 + static unsigned long cma_bitmap_maxno(struct cma *cma) 64 + { 65 + return cma->count >> cma->order_per_bit; 66 + } 67 + 68 + static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, 69 + unsigned long pages) 70 + { 71 + return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; 72 + } 73 + 74 + static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) 75 + { 76 + unsigned long bitmap_no, bitmap_count; 77 + 78 + bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; 79 + bitmap_count = cma_bitmap_pages_to_bits(cma, count); 80 + 81 + mutex_lock(&cma->lock); 82 + bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); 83 + mutex_unlock(&cma->lock); 84 + } 85 + 86 + static int __init cma_activate_area(struct cma *cma) 87 + { 88 + int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long); 89 + unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; 90 + unsigned i = cma->count >> pageblock_order; 91 + struct zone *zone; 92 + 93 + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); 94 + 95 + if (!cma->bitmap) 96 + return -ENOMEM; 97 + 98 + WARN_ON_ONCE(!pfn_valid(pfn)); 99 + zone = page_zone(pfn_to_page(pfn)); 100 + 101 + do { 102 + unsigned j; 103 + 104 + base_pfn = pfn; 105 + for (j = pageblock_nr_pages; j; --j, pfn++) { 106 + WARN_ON_ONCE(!pfn_valid(pfn)); 107 + /* 108 + * alloc_contig_range requires the pfn range 109 + * specified to be in the same zone. Make this 110 + * simple by forcing the entire CMA resv range 111 + * to be in the same zone. 112 + */ 113 + if (page_zone(pfn_to_page(pfn)) != zone) 114 + goto err; 115 + } 116 + init_cma_reserved_pageblock(pfn_to_page(base_pfn)); 117 + } while (--i); 118 + 119 + mutex_init(&cma->lock); 120 + return 0; 121 + 122 + err: 123 + kfree(cma->bitmap); 124 + return -EINVAL; 125 + } 126 + 127 + static int __init cma_init_reserved_areas(void) 128 + { 129 + int i; 130 + 131 + for (i = 0; i < cma_area_count; i++) { 132 + int ret = cma_activate_area(&cma_areas[i]); 133 + 134 + if (ret) 135 + return ret; 136 + } 137 + 138 + return 0; 139 + } 140 + core_initcall(cma_init_reserved_areas); 141 + 142 + /** 143 + * cma_declare_contiguous() - reserve custom contiguous area 144 + * @base: Base address of the reserved area optional, use 0 for any 145 + * @size: Size of the reserved area (in bytes), 146 + * @limit: End address of the reserved memory (optional, 0 for any). 147 + * @alignment: Alignment for the CMA area, should be power of 2 or zero 148 + * @order_per_bit: Order of pages represented by one bit on bitmap. 149 + * @fixed: hint about where to place the reserved area 150 + * @res_cma: Pointer to store the created cma region. 151 + * 152 + * This function reserves memory from early allocator. It should be 153 + * called by arch specific code once the early allocator (memblock or bootmem) 154 + * has been activated and all other subsystems have already allocated/reserved 155 + * memory. This function allows to create custom reserved areas. 156 + * 157 + * If @fixed is true, reserve contiguous area at exactly @base. If false, 158 + * reserve in range from @base to @limit. 159 + */ 160 + int __init cma_declare_contiguous(phys_addr_t base, 161 + phys_addr_t size, phys_addr_t limit, 162 + phys_addr_t alignment, unsigned int order_per_bit, 163 + bool fixed, struct cma **res_cma) 164 + { 165 + struct cma *cma; 166 + int ret = 0; 167 + 168 + pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", 169 + __func__, (unsigned long)size, (unsigned long)base, 170 + (unsigned long)limit, (unsigned long)alignment); 171 + 172 + if (cma_area_count == ARRAY_SIZE(cma_areas)) { 173 + pr_err("Not enough slots for CMA reserved regions!\n"); 174 + return -ENOSPC; 175 + } 176 + 177 + if (!size) 178 + return -EINVAL; 179 + 180 + if (alignment && !is_power_of_2(alignment)) 181 + return -EINVAL; 182 + 183 + /* 184 + * Sanitise input arguments. 185 + * Pages both ends in CMA area could be merged into adjacent unmovable 186 + * migratetype page by page allocator's buddy algorithm. In the case, 187 + * you couldn't get a contiguous memory, which is not what we want. 188 + */ 189 + alignment = max(alignment, 190 + (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); 191 + base = ALIGN(base, alignment); 192 + size = ALIGN(size, alignment); 193 + limit &= ~(alignment - 1); 194 + 195 + /* size should be aligned with order_per_bit */ 196 + if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) 197 + return -EINVAL; 198 + 199 + /* Reserve memory */ 200 + if (base && fixed) { 201 + if (memblock_is_region_reserved(base, size) || 202 + memblock_reserve(base, size) < 0) { 203 + ret = -EBUSY; 204 + goto err; 205 + } 206 + } else { 207 + phys_addr_t addr = memblock_alloc_range(size, alignment, base, 208 + limit); 209 + if (!addr) { 210 + ret = -ENOMEM; 211 + goto err; 212 + } else { 213 + base = addr; 214 + } 215 + } 216 + 217 + /* 218 + * Each reserved area must be initialised later, when more kernel 219 + * subsystems (like slab allocator) are available. 220 + */ 221 + cma = &cma_areas[cma_area_count]; 222 + cma->base_pfn = PFN_DOWN(base); 223 + cma->count = size >> PAGE_SHIFT; 224 + cma->order_per_bit = order_per_bit; 225 + *res_cma = cma; 226 + cma_area_count++; 227 + 228 + pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, 229 + (unsigned long)base); 230 + return 0; 231 + 232 + err: 233 + pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); 234 + return ret; 235 + } 236 + 237 + /** 238 + * cma_alloc() - allocate pages from contiguous area 239 + * @cma: Contiguous memory region for which the allocation is performed. 240 + * @count: Requested number of pages. 241 + * @align: Requested alignment of pages (in PAGE_SIZE order). 242 + * 243 + * This function allocates part of contiguous memory on specific 244 + * contiguous memory area. 245 + */ 246 + struct page *cma_alloc(struct cma *cma, int count, unsigned int align) 247 + { 248 + unsigned long mask, pfn, start = 0; 249 + unsigned long bitmap_maxno, bitmap_no, bitmap_count; 250 + struct page *page = NULL; 251 + int ret; 252 + 253 + if (!cma || !cma->count) 254 + return NULL; 255 + 256 + pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, 257 + count, align); 258 + 259 + if (!count) 260 + return NULL; 261 + 262 + mask = cma_bitmap_aligned_mask(cma, align); 263 + bitmap_maxno = cma_bitmap_maxno(cma); 264 + bitmap_count = cma_bitmap_pages_to_bits(cma, count); 265 + 266 + for (;;) { 267 + mutex_lock(&cma->lock); 268 + bitmap_no = bitmap_find_next_zero_area(cma->bitmap, 269 + bitmap_maxno, start, bitmap_count, mask); 270 + if (bitmap_no >= bitmap_maxno) { 271 + mutex_unlock(&cma->lock); 272 + break; 273 + } 274 + bitmap_set(cma->bitmap, bitmap_no, bitmap_count); 275 + /* 276 + * It's safe to drop the lock here. We've marked this region for 277 + * our exclusive use. If the migration fails we will take the 278 + * lock again and unmark it. 279 + */ 280 + mutex_unlock(&cma->lock); 281 + 282 + pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); 283 + mutex_lock(&cma_mutex); 284 + ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); 285 + mutex_unlock(&cma_mutex); 286 + if (ret == 0) { 287 + page = pfn_to_page(pfn); 288 + break; 289 + } 290 + 291 + cma_clear_bitmap(cma, pfn, count); 292 + if (ret != -EBUSY) 293 + break; 294 + 295 + pr_debug("%s(): memory range at %p is busy, retrying\n", 296 + __func__, pfn_to_page(pfn)); 297 + /* try again with a bit different memory target */ 298 + start = bitmap_no + mask + 1; 299 + } 300 + 301 + pr_debug("%s(): returned %p\n", __func__, page); 302 + return page; 303 + } 304 + 305 + /** 306 + * cma_release() - release allocated pages 307 + * @cma: Contiguous memory region for which the allocation is performed. 308 + * @pages: Allocated pages. 309 + * @count: Number of allocated pages. 310 + * 311 + * This function releases memory allocated by alloc_cma(). 312 + * It returns false when provided pages do not belong to contiguous area and 313 + * true otherwise. 314 + */ 315 + bool cma_release(struct cma *cma, struct page *pages, int count) 316 + { 317 + unsigned long pfn; 318 + 319 + if (!cma || !pages) 320 + return false; 321 + 322 + pr_debug("%s(page %p)\n", __func__, (void *)pages); 323 + 324 + pfn = page_to_pfn(pages); 325 + 326 + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) 327 + return false; 328 + 329 + VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); 330 + 331 + free_contig_range(pfn, count); 332 + cma_clear_bitmap(cma, pfn, count); 333 + 334 + return true; 335 + }

+25 -2

mm/filemap.c

··· 808 808 } 809 809 EXPORT_SYMBOL_GPL(__lock_page_killable); 810 810 811 + /* 812 + * Return values: 813 + * 1 - page is locked; mmap_sem is still held. 814 + * 0 - page is not locked. 815 + * mmap_sem has been released (up_read()), unless flags had both 816 + * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in 817 + * which case mmap_sem is still held. 818 + * 819 + * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 820 + * with the page locked and the mmap_sem unperturbed. 821 + */ 811 822 int __lock_page_or_retry(struct page *page, struct mm_struct *mm, 812 823 unsigned int flags) 813 824 { ··· 1102 1091 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) 1103 1092 fgp_flags |= FGP_LOCK; 1104 1093 1105 - /* Init accessed so avoit atomic mark_page_accessed later */ 1094 + /* Init accessed so avoid atomic mark_page_accessed later */ 1106 1095 if (fgp_flags & FGP_ACCESSED) 1107 - init_page_accessed(page); 1096 + __SetPageReferenced(page); 1108 1097 1109 1098 err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask); 1110 1099 if (unlikely(err)) { ··· 1838 1827 * The goto's are kind of ugly, but this streamlines the normal case of having 1839 1828 * it in the page cache, and handles the special cases reasonably without 1840 1829 * having a lot of duplicated code. 1830 + * 1831 + * vma->vm_mm->mmap_sem must be held on entry. 1832 + * 1833 + * If our return value has VM_FAULT_RETRY set, it's because 1834 + * lock_page_or_retry() returned 0. 1835 + * The mmap_sem has usually been released in this case. 1836 + * See __lock_page_or_retry() for the exception. 1837 + * 1838 + * If our return value does not have VM_FAULT_RETRY set, the mmap_sem 1839 + * has not been released. 1840 + * 1841 + * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set. 1841 1842 */ 1842 1843 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1843 1844 {

+15 -3

mm/gup.c

··· 258 258 return ret; 259 259 } 260 260 261 + /* 262 + * mmap_sem must be held on entry. If @nonblocking != NULL and 263 + * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released. 264 + * If it is, *@nonblocking will be set to 0 and -EBUSY returned. 265 + */ 261 266 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, 262 267 unsigned long address, unsigned int *flags, int *nonblocking) 263 268 { ··· 378 373 * with a put_page() call when it is finished with. vmas will only 379 374 * remain valid while mmap_sem is held. 380 375 * 381 - * Must be called with mmap_sem held for read or write. 376 + * Must be called with mmap_sem held. It may be released. See below. 382 377 * 383 378 * __get_user_pages walks a process's page tables and takes a reference to 384 379 * each struct page that each user address corresponds to at a given ··· 401 396 * 402 397 * If @nonblocking != NULL, __get_user_pages will not wait for disk IO 403 398 * or mmap_sem contention, and if waiting is needed to pin all pages, 404 - * *@nonblocking will be set to 0. 399 + * *@nonblocking will be set to 0. Further, if @gup_flags does not 400 + * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in 401 + * this case. 402 + * 403 + * A caller using such a combination of @nonblocking and @gup_flags 404 + * must therefore hold the mmap_sem for reading only, and recognize 405 + * when it's been released. Otherwise, it must be held for either 406 + * reading or writing and will not be released. 405 407 * 406 408 * In most cases, get_user_pages or get_user_pages_fast should be used 407 409 * instead of __get_user_pages. __get_user_pages should be used only if ··· 540 528 * such architectures, gup() will not be enough to make a subsequent access 541 529 * succeed. 542 530 * 543 - * This should be called with the mm_sem held for read. 531 + * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault(). 544 532 */ 545 533 int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, 546 534 unsigned long address, unsigned int fault_flags)

+75 -11

mm/highmem.c

··· 44 44 */ 45 45 #ifdef CONFIG_HIGHMEM 46 46 47 + /* 48 + * Architecture with aliasing data cache may define the following family of 49 + * helper functions in its asm/highmem.h to control cache color of virtual 50 + * addresses where physical memory pages are mapped by kmap. 51 + */ 52 + #ifndef get_pkmap_color 53 + 54 + /* 55 + * Determine color of virtual address where the page should be mapped. 56 + */ 57 + static inline unsigned int get_pkmap_color(struct page *page) 58 + { 59 + return 0; 60 + } 61 + #define get_pkmap_color get_pkmap_color 62 + 63 + /* 64 + * Get next index for mapping inside PKMAP region for page with given color. 65 + */ 66 + static inline unsigned int get_next_pkmap_nr(unsigned int color) 67 + { 68 + static unsigned int last_pkmap_nr; 69 + 70 + last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 71 + return last_pkmap_nr; 72 + } 73 + 74 + /* 75 + * Determine if page index inside PKMAP region (pkmap_nr) of given color 76 + * has wrapped around PKMAP region end. When this happens an attempt to 77 + * flush all unused PKMAP slots is made. 78 + */ 79 + static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color) 80 + { 81 + return pkmap_nr == 0; 82 + } 83 + 84 + /* 85 + * Get the number of PKMAP entries of the given color. If no free slot is 86 + * found after checking that many entries, kmap will sleep waiting for 87 + * someone to call kunmap and free PKMAP slot. 88 + */ 89 + static inline int get_pkmap_entries_count(unsigned int color) 90 + { 91 + return LAST_PKMAP; 92 + } 93 + 94 + /* 95 + * Get head of a wait queue for PKMAP entries of the given color. 96 + * Wait queues for different mapping colors should be independent to avoid 97 + * unnecessary wakeups caused by freeing of slots of other colors. 98 + */ 99 + static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) 100 + { 101 + static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); 102 + 103 + return &pkmap_map_wait; 104 + } 105 + #endif 106 + 47 107 unsigned long totalhigh_pages __read_mostly; 48 108 EXPORT_SYMBOL(totalhigh_pages); 49 109 ··· 128 68 } 129 69 130 70 static int pkmap_count[LAST_PKMAP]; 131 - static unsigned int last_pkmap_nr; 132 71 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); 133 72 134 73 pte_t * pkmap_page_table; 135 - 136 - static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); 137 74 138 75 /* 139 76 * Most architectures have no use for kmap_high_get(), so let's abstract ··· 218 161 { 219 162 unsigned long vaddr; 220 163 int count; 164 + unsigned int last_pkmap_nr; 165 + unsigned int color = get_pkmap_color(page); 221 166 222 167 start: 223 - count = LAST_PKMAP; 168 + count = get_pkmap_entries_count(color); 224 169 /* Find an empty entry */ 225 170 for (;;) { 226 - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 227 - if (!last_pkmap_nr) { 171 + last_pkmap_nr = get_next_pkmap_nr(color); 172 + if (no_more_pkmaps(last_pkmap_nr, color)) { 228 173 flush_all_zero_pkmaps(); 229 - count = LAST_PKMAP; 174 + count = get_pkmap_entries_count(color); 230 175 } 231 176 if (!pkmap_count[last_pkmap_nr]) 232 177 break; /* Found a usable entry */ ··· 240 181 */ 241 182 { 242 183 DECLARE_WAITQUEUE(wait, current); 184 + wait_queue_head_t *pkmap_map_wait = 185 + get_pkmap_wait_queue_head(color); 243 186 244 187 __set_current_state(TASK_UNINTERRUPTIBLE); 245 - add_wait_queue(&pkmap_map_wait, &wait); 188 + add_wait_queue(pkmap_map_wait, &wait); 246 189 unlock_kmap(); 247 190 schedule(); 248 - remove_wait_queue(&pkmap_map_wait, &wait); 191 + remove_wait_queue(pkmap_map_wait, &wait); 249 192 lock_kmap(); 250 193 251 194 /* Somebody else might have mapped it while we slept */ ··· 335 274 unsigned long nr; 336 275 unsigned long flags; 337 276 int need_wakeup; 277 + unsigned int color = get_pkmap_color(page); 278 + wait_queue_head_t *pkmap_map_wait; 338 279 339 280 lock_kmap_any(flags); 340 281 vaddr = (unsigned long)page_address(page); ··· 362 299 * no need for the wait-queue-head's lock. Simply 363 300 * test if the queue is empty. 364 301 */ 365 - need_wakeup = waitqueue_active(&pkmap_map_wait); 302 + pkmap_map_wait = get_pkmap_wait_queue_head(color); 303 + need_wakeup = waitqueue_active(pkmap_map_wait); 366 304 } 367 305 unlock_kmap_any(flags); 368 306 369 307 /* do wake-up, if needed, race-free outside of the spin lock */ 370 308 if (need_wakeup) 371 - wake_up(&pkmap_map_wait); 309 + wake_up(pkmap_map_wait); 372 310 } 373 311 374 312 EXPORT_SYMBOL(kunmap_high);

+32 -6

mm/huge_memory.c

··· 827 827 count_vm_event(THP_FAULT_FALLBACK); 828 828 return VM_FAULT_FALLBACK; 829 829 } 830 - if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) { 830 + if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) { 831 831 put_page(page); 832 832 count_vm_event(THP_FAULT_FALLBACK); 833 833 return VM_FAULT_FALLBACK; ··· 1132 1132 goto out; 1133 1133 } 1134 1134 1135 - if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) { 1135 + if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) { 1136 1136 put_page(new_page); 1137 1137 if (page) { 1138 1138 split_huge_page(page); ··· 1681 1681 &page_tail->_count); 1682 1682 1683 1683 /* after clearing PageTail the gup refcount can be released */ 1684 - smp_mb(); 1684 + smp_mb__after_atomic(); 1685 1685 1686 1686 /* 1687 1687 * retain hwpoison flag of the poisoned tail page: ··· 1775 1775 if (pmd) { 1776 1776 pgtable = pgtable_trans_huge_withdraw(mm, pmd); 1777 1777 pmd_populate(mm, &_pmd, pgtable); 1778 + if (pmd_write(*pmd)) 1779 + BUG_ON(page_mapcount(page) != 1); 1778 1780 1779 1781 haddr = address; 1780 1782 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { ··· 1786 1784 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1787 1785 if (!pmd_write(*pmd)) 1788 1786 entry = pte_wrprotect(entry); 1789 - else 1790 - BUG_ON(page_mapcount(page) != 1); 1791 1787 if (!pmd_young(*pmd)) 1792 1788 entry = pte_mkold(entry); 1793 1789 if (pmd_numa(*pmd)) ··· 2233 2233 2234 2234 static int khugepaged_node_load[MAX_NUMNODES]; 2235 2235 2236 + static bool khugepaged_scan_abort(int nid) 2237 + { 2238 + int i; 2239 + 2240 + /* 2241 + * If zone_reclaim_mode is disabled, then no extra effort is made to 2242 + * allocate memory locally. 2243 + */ 2244 + if (!zone_reclaim_mode) 2245 + return false; 2246 + 2247 + /* If there is a count for this node already, it must be acceptable */ 2248 + if (khugepaged_node_load[nid]) 2249 + return false; 2250 + 2251 + for (i = 0; i < MAX_NUMNODES; i++) { 2252 + if (!khugepaged_node_load[i]) 2253 + continue; 2254 + if (node_distance(nid, i) > RECLAIM_DISTANCE) 2255 + return true; 2256 + } 2257 + return false; 2258 + } 2259 + 2236 2260 #ifdef CONFIG_NUMA 2237 2261 static int khugepaged_find_target_node(void) 2238 2262 { ··· 2423 2399 if (!new_page) 2424 2400 return; 2425 2401 2426 - if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) 2402 + if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) 2427 2403 return; 2428 2404 2429 2405 /* ··· 2569 2545 * hit record. 2570 2546 */ 2571 2547 node = page_to_nid(page); 2548 + if (khugepaged_scan_abort(node)) 2549 + goto out_unmap; 2572 2550 khugepaged_node_load[node]++; 2573 2551 VM_BUG_ON_PAGE(PageCompound(page), page); 2574 2552 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))

+59 -70

mm/hugetlb.c

··· 35 35 #include <linux/node.h> 36 36 #include "internal.h" 37 37 38 - const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 39 38 unsigned long hugepages_treat_as_movable; 40 39 41 40 int hugetlb_max_hstate __read_mostly; ··· 1088 1089 unsigned long pfn; 1089 1090 struct hstate *h; 1090 1091 1092 + if (!hugepages_supported()) 1093 + return; 1094 + 1091 1095 /* Set scan step to minimum hugepage size */ 1092 1096 for_each_hstate(h) 1093 1097 if (order > huge_page_order(h)) ··· 1736 1734 return sprintf(buf, "%lu\n", nr_huge_pages); 1737 1735 } 1738 1736 1739 - static ssize_t nr_hugepages_store_common(bool obey_mempolicy, 1740 - struct kobject *kobj, struct kobj_attribute *attr, 1741 - const char *buf, size_t len) 1737 + static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, 1738 + struct hstate *h, int nid, 1739 + unsigned long count, size_t len) 1742 1740 { 1743 1741 int err; 1744 - int nid; 1745 - unsigned long count; 1746 - struct hstate *h; 1747 1742 NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); 1748 1743 1749 - err = kstrtoul(buf, 10, &count); 1750 - if (err) 1751 - goto out; 1752 - 1753 - h = kobj_to_hstate(kobj, &nid); 1754 1744 if (hstate_is_gigantic(h) && !gigantic_page_supported()) { 1755 1745 err = -EINVAL; 1756 1746 goto out; ··· 1778 1784 return err; 1779 1785 } 1780 1786 1787 + static ssize_t nr_hugepages_store_common(bool obey_mempolicy, 1788 + struct kobject *kobj, const char *buf, 1789 + size_t len) 1790 + { 1791 + struct hstate *h; 1792 + unsigned long count; 1793 + int nid; 1794 + int err; 1795 + 1796 + err = kstrtoul(buf, 10, &count); 1797 + if (err) 1798 + return err; 1799 + 1800 + h = kobj_to_hstate(kobj, &nid); 1801 + return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len); 1802 + } 1803 + 1781 1804 static ssize_t nr_hugepages_show(struct kobject *kobj, 1782 1805 struct kobj_attribute *attr, char *buf) 1783 1806 { ··· 1804 1793 static ssize_t nr_hugepages_store(struct kobject *kobj, 1805 1794 struct kobj_attribute *attr, const char *buf, size_t len) 1806 1795 { 1807 - return nr_hugepages_store_common(false, kobj, attr, buf, len); 1796 + return nr_hugepages_store_common(false, kobj, buf, len); 1808 1797 } 1809 1798 HSTATE_ATTR(nr_hugepages); 1810 1799 ··· 1823 1812 static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, 1824 1813 struct kobj_attribute *attr, const char *buf, size_t len) 1825 1814 { 1826 - return nr_hugepages_store_common(true, kobj, attr, buf, len); 1815 + return nr_hugepages_store_common(true, kobj, buf, len); 1827 1816 } 1828 1817 HSTATE_ATTR(nr_hugepages_mempolicy); 1829 1818 #endif ··· 2259 2248 void __user *buffer, size_t *length, loff_t *ppos) 2260 2249 { 2261 2250 struct hstate *h = &default_hstate; 2262 - unsigned long tmp; 2251 + unsigned long tmp = h->max_huge_pages; 2263 2252 int ret; 2264 2253 2265 2254 if (!hugepages_supported()) 2266 2255 return -ENOTSUPP; 2267 - 2268 - tmp = h->max_huge_pages; 2269 - 2270 - if (write && hstate_is_gigantic(h) && !gigantic_page_supported()) 2271 - return -EINVAL; 2272 2256 2273 2257 table->data = &tmp; 2274 2258 table->maxlen = sizeof(unsigned long); ··· 2271 2265 if (ret) 2272 2266 goto out; 2273 2267 2274 - if (write) { 2275 - NODEMASK_ALLOC(nodemask_t, nodes_allowed, 2276 - GFP_KERNEL | __GFP_NORETRY); 2277 - if (!(obey_mempolicy && 2278 - init_nodemask_of_mempolicy(nodes_allowed))) { 2279 - NODEMASK_FREE(nodes_allowed); 2280 - nodes_allowed = &node_states[N_MEMORY]; 2281 - } 2282 - h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed); 2283 - 2284 - if (nodes_allowed != &node_states[N_MEMORY]) 2285 - NODEMASK_FREE(nodes_allowed); 2286 - } 2268 + if (write) 2269 + ret = __nr_hugepages_store_common(obey_mempolicy, h, 2270 + NUMA_NO_NODE, tmp, *length); 2287 2271 out: 2288 2272 return ret; 2289 2273 } ··· 2750 2754 * from other VMAs and let the children be SIGKILLed if they are faulting the 2751 2755 * same region. 2752 2756 */ 2753 - static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, 2754 - struct page *page, unsigned long address) 2757 + static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, 2758 + struct page *page, unsigned long address) 2755 2759 { 2756 2760 struct hstate *h = hstate_vma(vma); 2757 2761 struct vm_area_struct *iter_vma; ··· 2790 2794 address + huge_page_size(h), page); 2791 2795 } 2792 2796 mutex_unlock(&mapping->i_mmap_mutex); 2793 - 2794 - return 1; 2795 2797 } 2796 2798 2797 2799 /* ··· 2804 2810 { 2805 2811 struct hstate *h = hstate_vma(vma); 2806 2812 struct page *old_page, *new_page; 2807 - int outside_reserve = 0; 2813 + int ret = 0, outside_reserve = 0; 2808 2814 unsigned long mmun_start; /* For mmu_notifiers */ 2809 2815 unsigned long mmun_end; /* For mmu_notifiers */ 2810 2816 ··· 2834 2840 2835 2841 page_cache_get(old_page); 2836 2842 2837 - /* Drop page table lock as buddy allocator may be called */ 2843 + /* 2844 + * Drop page table lock as buddy allocator may be called. It will 2845 + * be acquired again before returning to the caller, as expected. 2846 + */ 2838 2847 spin_unlock(ptl); 2839 2848 new_page = alloc_huge_page(vma, address, outside_reserve); 2840 2849 2841 2850 if (IS_ERR(new_page)) { 2842 - long err = PTR_ERR(new_page); 2843 - page_cache_release(old_page); 2844 - 2845 2851 /* 2846 2852 * If a process owning a MAP_PRIVATE mapping fails to COW, 2847 2853 * it is due to references held by a child and an insufficient ··· 2850 2856 * may get SIGKILLed if it later faults. 2851 2857 */ 2852 2858 if (outside_reserve) { 2859 + page_cache_release(old_page); 2853 2860 BUG_ON(huge_pte_none(pte)); 2854 - if (unmap_ref_private(mm, vma, old_page, address)) { 2855 - BUG_ON(huge_pte_none(pte)); 2856 - spin_lock(ptl); 2857 - ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2858 - if (likely(ptep && 2859 - pte_same(huge_ptep_get(ptep), pte))) 2860 - goto retry_avoidcopy; 2861 - /* 2862 - * race occurs while re-acquiring page table 2863 - * lock, and our job is done. 2864 - */ 2865 - return 0; 2866 - } 2867 - WARN_ON_ONCE(1); 2861 + unmap_ref_private(mm, vma, old_page, address); 2862 + BUG_ON(huge_pte_none(pte)); 2863 + spin_lock(ptl); 2864 + ptep = huge_pte_offset(mm, address & huge_page_mask(h)); 2865 + if (likely(ptep && 2866 + pte_same(huge_ptep_get(ptep), pte))) 2867 + goto retry_avoidcopy; 2868 + /* 2869 + * race occurs while re-acquiring page table 2870 + * lock, and our job is done. 2871 + */ 2872 + return 0; 2868 2873 } 2869 2874 2870 - /* Caller expects lock to be held */ 2871 - spin_lock(ptl); 2872 - if (err == -ENOMEM) 2873 - return VM_FAULT_OOM; 2874 - else 2875 - return VM_FAULT_SIGBUS; 2875 + ret = (PTR_ERR(new_page) == -ENOMEM) ? 2876 + VM_FAULT_OOM : VM_FAULT_SIGBUS; 2877 + goto out_release_old; 2876 2878 } 2877 2879 2878 2880 /* ··· 2876 2886 * anon_vma prepared. 2877 2887 */ 2878 2888 if (unlikely(anon_vma_prepare(vma))) { 2879 - page_cache_release(new_page); 2880 - page_cache_release(old_page); 2881 - /* Caller expects lock to be held */ 2882 - spin_lock(ptl); 2883 - return VM_FAULT_OOM; 2889 + ret = VM_FAULT_OOM; 2890 + goto out_release_all; 2884 2891 } 2885 2892 2886 2893 copy_user_huge_page(new_page, old_page, address, vma, ··· 2887 2900 mmun_start = address & huge_page_mask(h); 2888 2901 mmun_end = mmun_start + huge_page_size(h); 2889 2902 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2903 + 2890 2904 /* 2891 2905 * Retake the page table lock to check for racing updates 2892 2906 * before the page tables are altered ··· 2908 2920 } 2909 2921 spin_unlock(ptl); 2910 2922 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2923 + out_release_all: 2911 2924 page_cache_release(new_page); 2925 + out_release_old: 2912 2926 page_cache_release(old_page); 2913 2927 2914 - /* Caller expects lock to be held */ 2915 - spin_lock(ptl); 2916 - return 0; 2928 + spin_lock(ptl); /* Caller expects lock to be held */ 2929 + return ret; 2917 2930 } 2918 2931 2919 2932 /* Return the pagecache page at a given address within a VMA */

+1 -2

mm/hwpoison-inject.c

··· 72 72 73 73 static void pfn_inject_exit(void) 74 74 { 75 - if (hwpoison_dir) 76 - debugfs_remove_recursive(hwpoison_dir); 75 + debugfs_remove_recursive(hwpoison_dir); 77 76 } 78 77 79 78 static int pfn_inject_init(void)

+1 -1

mm/internal.h

··· 247 247 static inline struct page *mem_map_offset(struct page *base, int offset) 248 248 { 249 249 if (unlikely(offset >= MAX_ORDER_NR_PAGES)) 250 - return pfn_to_page(page_to_pfn(base) + offset); 250 + return nth_page(base, offset); 251 251 return base + offset; 252 252 } 253 253

-3

mm/madvise.c

··· 292 292 /* 293 293 * Application wants to free up the pages and associated backing store. 294 294 * This is effectively punching a hole into the middle of a file. 295 - * 296 - * NOTE: Currently, only shmfs/tmpfs is supported for this operation. 297 - * Other filesystems return -ENOSYS. 298 295 */ 299 296 static long madvise_remove(struct vm_area_struct *vma, 300 297 struct vm_area_struct **prev,

+146 -286

mm/memcontrol.c

··· 2551 2551 return NOTIFY_OK; 2552 2552 } 2553 2553 2554 - 2555 - /* See mem_cgroup_try_charge() for details */ 2556 - enum { 2557 - CHARGE_OK, /* success */ 2558 - CHARGE_RETRY, /* need to retry but retry is not bad */ 2559 - CHARGE_NOMEM, /* we can't do more. return -ENOMEM */ 2560 - CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */ 2561 - }; 2562 - 2563 - static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, 2564 - unsigned int nr_pages, unsigned int min_pages, 2565 - bool invoke_oom) 2566 - { 2567 - unsigned long csize = nr_pages * PAGE_SIZE; 2568 - struct mem_cgroup *mem_over_limit; 2569 - struct res_counter *fail_res; 2570 - unsigned long flags = 0; 2571 - int ret; 2572 - 2573 - ret = res_counter_charge(&memcg->res, csize, &fail_res); 2574 - 2575 - if (likely(!ret)) { 2576 - if (!do_swap_account) 2577 - return CHARGE_OK; 2578 - ret = res_counter_charge(&memcg->memsw, csize, &fail_res); 2579 - if (likely(!ret)) 2580 - return CHARGE_OK; 2581 - 2582 - res_counter_uncharge(&memcg->res, csize); 2583 - mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 2584 - flags |= MEM_CGROUP_RECLAIM_NOSWAP; 2585 - } else 2586 - mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); 2587 - /* 2588 - * Never reclaim on behalf of optional batching, retry with a 2589 - * single page instead. 2590 - */ 2591 - if (nr_pages > min_pages) 2592 - return CHARGE_RETRY; 2593 - 2594 - if (!(gfp_mask & __GFP_WAIT)) 2595 - return CHARGE_WOULDBLOCK; 2596 - 2597 - if (gfp_mask & __GFP_NORETRY) 2598 - return CHARGE_NOMEM; 2599 - 2600 - ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags); 2601 - if (mem_cgroup_margin(mem_over_limit) >= nr_pages) 2602 - return CHARGE_RETRY; 2603 - /* 2604 - * Even though the limit is exceeded at this point, reclaim 2605 - * may have been able to free some pages. Retry the charge 2606 - * before killing the task. 2607 - * 2608 - * Only for regular pages, though: huge pages are rather 2609 - * unlikely to succeed so close to the limit, and we fall back 2610 - * to regular pages anyway in case of failure. 2611 - */ 2612 - if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret) 2613 - return CHARGE_RETRY; 2614 - 2615 - /* 2616 - * At task move, charge accounts can be doubly counted. So, it's 2617 - * better to wait until the end of task_move if something is going on. 2618 - */ 2619 - if (mem_cgroup_wait_acct_move(mem_over_limit)) 2620 - return CHARGE_RETRY; 2621 - 2622 - if (invoke_oom) 2623 - mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize)); 2624 - 2625 - return CHARGE_NOMEM; 2626 - } 2627 - 2628 2554 /** 2629 2555 * mem_cgroup_try_charge - try charging a memcg 2630 2556 * @memcg: memcg to charge 2631 2557 * @nr_pages: number of pages to charge 2632 - * @oom: trigger OOM if reclaim fails 2633 2558 * 2634 2559 * Returns 0 if @memcg was charged successfully, -EINTR if the charge 2635 2560 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed. 2636 2561 */ 2637 2562 static int mem_cgroup_try_charge(struct mem_cgroup *memcg, 2638 2563 gfp_t gfp_mask, 2639 - unsigned int nr_pages, 2640 - bool oom) 2564 + unsigned int nr_pages) 2641 2565 { 2642 2566 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2643 - int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2644 - int ret; 2567 + int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 2568 + struct mem_cgroup *mem_over_limit; 2569 + struct res_counter *fail_res; 2570 + unsigned long nr_reclaimed; 2571 + unsigned long flags = 0; 2572 + unsigned long long size; 2573 + int ret = 0; 2645 2574 2646 - if (mem_cgroup_is_root(memcg)) 2575 + retry: 2576 + if (consume_stock(memcg, nr_pages)) 2647 2577 goto done; 2578 + 2579 + size = batch * PAGE_SIZE; 2580 + if (!res_counter_charge(&memcg->res, size, &fail_res)) { 2581 + if (!do_swap_account) 2582 + goto done_restock; 2583 + if (!res_counter_charge(&memcg->memsw, size, &fail_res)) 2584 + goto done_restock; 2585 + res_counter_uncharge(&memcg->res, size); 2586 + mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 2587 + flags |= MEM_CGROUP_RECLAIM_NOSWAP; 2588 + } else 2589 + mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); 2590 + 2591 + if (batch > nr_pages) { 2592 + batch = nr_pages; 2593 + goto retry; 2594 + } 2595 + 2648 2596 /* 2649 2597 * Unlike in global OOM situations, memcg is not in a physical 2650 2598 * memory shortage. Allow dying and OOM-killed tasks to ··· 2607 2659 if (unlikely(task_in_memcg_oom(current))) 2608 2660 goto nomem; 2609 2661 2662 + if (!(gfp_mask & __GFP_WAIT)) 2663 + goto nomem; 2664 + 2665 + nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags); 2666 + 2667 + if (mem_cgroup_margin(mem_over_limit) >= nr_pages) 2668 + goto retry; 2669 + 2670 + if (gfp_mask & __GFP_NORETRY) 2671 + goto nomem; 2672 + /* 2673 + * Even though the limit is exceeded at this point, reclaim 2674 + * may have been able to free some pages. Retry the charge 2675 + * before killing the task. 2676 + * 2677 + * Only for regular pages, though: huge pages are rather 2678 + * unlikely to succeed so close to the limit, and we fall back 2679 + * to regular pages anyway in case of failure. 2680 + */ 2681 + if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER)) 2682 + goto retry; 2683 + /* 2684 + * At task move, charge accounts can be doubly counted. So, it's 2685 + * better to wait until the end of task_move if something is going on. 2686 + */ 2687 + if (mem_cgroup_wait_acct_move(mem_over_limit)) 2688 + goto retry; 2689 + 2690 + if (nr_retries--) 2691 + goto retry; 2692 + 2610 2693 if (gfp_mask & __GFP_NOFAIL) 2611 - oom = false; 2612 - again: 2613 - if (consume_stock(memcg, nr_pages)) 2614 - goto done; 2694 + goto bypass; 2615 2695 2616 - do { 2617 - bool invoke_oom = oom && !nr_oom_retries; 2696 + if (fatal_signal_pending(current)) 2697 + goto bypass; 2618 2698 2619 - /* If killed, bypass charge */ 2620 - if (fatal_signal_pending(current)) 2621 - goto bypass; 2622 - 2623 - ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, 2624 - nr_pages, invoke_oom); 2625 - switch (ret) { 2626 - case CHARGE_OK: 2627 - break; 2628 - case CHARGE_RETRY: /* not in OOM situation but retry */ 2629 - batch = nr_pages; 2630 - goto again; 2631 - case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ 2632 - goto nomem; 2633 - case CHARGE_NOMEM: /* OOM routine works */ 2634 - if (!oom || invoke_oom) 2635 - goto nomem; 2636 - nr_oom_retries--; 2637 - break; 2638 - } 2639 - } while (ret != CHARGE_OK); 2640 - 2641 - if (batch > nr_pages) 2642 - refill_stock(memcg, batch - nr_pages); 2643 - done: 2644 - return 0; 2699 + mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); 2645 2700 nomem: 2646 2701 if (!(gfp_mask & __GFP_NOFAIL)) 2647 2702 return -ENOMEM; 2648 2703 bypass: 2649 - return -EINTR; 2704 + memcg = root_mem_cgroup; 2705 + ret = -EINTR; 2706 + goto retry; 2707 + 2708 + done_restock: 2709 + if (batch > nr_pages) 2710 + refill_stock(memcg, batch - nr_pages); 2711 + done: 2712 + return ret; 2650 2713 } 2651 2714 2652 2715 /** ··· 2671 2712 */ 2672 2713 static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, 2673 2714 gfp_t gfp_mask, 2674 - unsigned int nr_pages, 2675 - bool oom) 2715 + unsigned int nr_pages) 2676 2716 2677 2717 { 2678 2718 struct mem_cgroup *memcg; 2679 2719 int ret; 2680 2720 2681 2721 memcg = get_mem_cgroup_from_mm(mm); 2682 - ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom); 2722 + ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages); 2683 2723 css_put(&memcg->css); 2684 2724 if (ret == -EINTR) 2685 2725 memcg = root_mem_cgroup; ··· 2696 2738 static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, 2697 2739 unsigned int nr_pages) 2698 2740 { 2699 - if (!mem_cgroup_is_root(memcg)) { 2700 - unsigned long bytes = nr_pages * PAGE_SIZE; 2741 + unsigned long bytes = nr_pages * PAGE_SIZE; 2701 2742 2702 - res_counter_uncharge(&memcg->res, bytes); 2703 - if (do_swap_account) 2704 - res_counter_uncharge(&memcg->memsw, bytes); 2705 - } 2743 + res_counter_uncharge(&memcg->res, bytes); 2744 + if (do_swap_account) 2745 + res_counter_uncharge(&memcg->memsw, bytes); 2706 2746 } 2707 2747 2708 2748 /* ··· 2711 2755 unsigned int nr_pages) 2712 2756 { 2713 2757 unsigned long bytes = nr_pages * PAGE_SIZE; 2714 - 2715 - if (mem_cgroup_is_root(memcg)) 2716 - return; 2717 2758 2718 2759 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); 2719 2760 if (do_swap_account) ··· 2795 2842 } 2796 2843 2797 2844 pc->mem_cgroup = memcg; 2798 - /* 2799 - * We access a page_cgroup asynchronously without lock_page_cgroup(). 2800 - * Especially when a page_cgroup is taken from a page, pc->mem_cgroup 2801 - * is accessed after testing USED bit. To make pc->mem_cgroup visible 2802 - * before USED bit, we need memory barrier here. 2803 - * See mem_cgroup_add_lru_list(), etc. 2804 - */ 2805 - smp_wmb(); 2806 2845 SetPageCgroupUsed(pc); 2807 2846 2808 2847 if (lrucare) { ··· 2882 2937 if (ret) 2883 2938 return ret; 2884 2939 2885 - ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT, 2886 - oom_gfp_allowed(gfp)); 2940 + ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT); 2887 2941 if (ret == -EINTR) { 2888 2942 /* 2889 2943 * mem_cgroup_try_charge() chosed to bypass to root due to ··· 3407 3463 memcg_uncharge_kmem(memcg, PAGE_SIZE << order); 3408 3464 return; 3409 3465 } 3410 - 3466 + /* 3467 + * The page is freshly allocated and not visible to any 3468 + * outside callers yet. Set up pc non-atomically. 3469 + */ 3411 3470 pc = lookup_page_cgroup(page); 3412 - lock_page_cgroup(pc); 3413 3471 pc->mem_cgroup = memcg; 3414 - SetPageCgroupUsed(pc); 3415 - unlock_page_cgroup(pc); 3472 + pc->flags = PCG_USED; 3416 3473 } 3417 3474 3418 3475 void __memcg_kmem_uncharge_pages(struct page *page, int order) ··· 3423 3478 3424 3479 3425 3480 pc = lookup_page_cgroup(page); 3426 - /* 3427 - * Fast unlocked return. Theoretically might have changed, have to 3428 - * check again after locking. 3429 - */ 3430 3481 if (!PageCgroupUsed(pc)) 3431 3482 return; 3432 3483 3433 - lock_page_cgroup(pc); 3434 - if (PageCgroupUsed(pc)) { 3435 - memcg = pc->mem_cgroup; 3436 - ClearPageCgroupUsed(pc); 3437 - } 3438 - unlock_page_cgroup(pc); 3484 + memcg = pc->mem_cgroup; 3485 + pc->flags = 0; 3439 3486 3440 3487 /* 3441 3488 * We trust that only if there is a memcg associated with the page, it ··· 3468 3531 for (i = 1; i < HPAGE_PMD_NR; i++) { 3469 3532 pc = head_pc + i; 3470 3533 pc->mem_cgroup = memcg; 3471 - smp_wmb();/* see __commit_charge() */ 3472 3534 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; 3473 3535 } 3474 3536 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], ··· 3623 3687 { 3624 3688 unsigned int nr_pages = 1; 3625 3689 struct mem_cgroup *memcg; 3626 - bool oom = true; 3627 3690 3628 3691 if (mem_cgroup_disabled()) 3629 3692 return 0; ··· 3634 3699 if (PageTransHuge(page)) { 3635 3700 nr_pages <<= compound_order(page); 3636 3701 VM_BUG_ON_PAGE(!PageTransHuge(page), page); 3637 - /* 3638 - * Never OOM-kill a process for a huge page. The 3639 - * fault handler will fall back to regular pages. 3640 - */ 3641 - oom = false; 3642 3702 } 3643 3703 3644 - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom); 3704 + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages); 3645 3705 if (!memcg) 3646 3706 return -ENOMEM; 3647 3707 __mem_cgroup_commit_charge(memcg, page, nr_pages, ··· 3673 3743 memcg = try_get_mem_cgroup_from_page(page); 3674 3744 if (!memcg) 3675 3745 memcg = get_mem_cgroup_from_mm(mm); 3676 - ret = mem_cgroup_try_charge(memcg, mask, 1, true); 3746 + ret = mem_cgroup_try_charge(memcg, mask, 1); 3677 3747 css_put(&memcg->css); 3678 3748 if (ret == -EINTR) 3679 3749 memcg = root_mem_cgroup; ··· 3700 3770 if (!PageSwapCache(page)) { 3701 3771 struct mem_cgroup *memcg; 3702 3772 3703 - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); 3773 + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1); 3704 3774 if (!memcg) 3705 3775 return -ENOMEM; 3706 3776 *memcgp = memcg; ··· 3769 3839 return 0; 3770 3840 } 3771 3841 3772 - memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); 3842 + memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1); 3773 3843 if (!memcg) 3774 3844 return -ENOMEM; 3775 3845 __mem_cgroup_commit_charge(memcg, page, 1, type, false); ··· 3923 3993 * replacement page, so leave it alone when phasing out the 3924 3994 * page that is unused after the migration. 3925 3995 */ 3926 - if (!end_migration && !mem_cgroup_is_root(memcg)) 3996 + if (!end_migration) 3927 3997 mem_cgroup_do_uncharge(memcg, nr_pages, ctype); 3928 3998 3929 3999 return memcg; ··· 4056 4126 * We uncharge this because swap is freed. This memcg can 4057 4127 * be obsolete one. We avoid calling css_tryget_online(). 4058 4128 */ 4059 - if (!mem_cgroup_is_root(memcg)) 4060 - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 4129 + res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 4061 4130 mem_cgroup_swap_statistics(memcg, false); 4062 4131 css_put(&memcg->css); 4063 4132 } ··· 4746 4817 return retval; 4747 4818 } 4748 4819 4749 - 4750 - static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, 4751 - enum mem_cgroup_stat_index idx) 4752 - { 4753 - struct mem_cgroup *iter; 4754 - long val = 0; 4755 - 4756 - /* Per-cpu values can be negative, use a signed accumulator */ 4757 - for_each_mem_cgroup_tree(iter, memcg) 4758 - val += mem_cgroup_read_stat(iter, idx); 4759 - 4760 - if (val < 0) /* race ? */ 4761 - val = 0; 4762 - return val; 4763 - } 4764 - 4765 - static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) 4766 - { 4767 - u64 val; 4768 - 4769 - if (!mem_cgroup_is_root(memcg)) { 4770 - if (!swap) 4771 - return res_counter_read_u64(&memcg->res, RES_USAGE); 4772 - else 4773 - return res_counter_read_u64(&memcg->memsw, RES_USAGE); 4774 - } 4775 - 4776 - /* 4777 - * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS 4778 - * as well as in MEM_CGROUP_STAT_RSS_HUGE. 4779 - */ 4780 - val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); 4781 - val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); 4782 - 4783 - if (swap) 4784 - val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP); 4785 - 4786 - return val << PAGE_SHIFT; 4787 - } 4788 - 4789 4820 static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, 4790 - struct cftype *cft) 4821 + struct cftype *cft) 4791 4822 { 4792 4823 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4793 - u64 val; 4794 - int name; 4795 - enum res_type type; 4796 - 4797 - type = MEMFILE_TYPE(cft->private); 4798 - name = MEMFILE_ATTR(cft->private); 4824 + enum res_type type = MEMFILE_TYPE(cft->private); 4825 + int name = MEMFILE_ATTR(cft->private); 4799 4826 4800 4827 switch (type) { 4801 4828 case _MEM: 4802 - if (name == RES_USAGE) 4803 - val = mem_cgroup_usage(memcg, false); 4804 - else 4805 - val = res_counter_read_u64(&memcg->res, name); 4806 - break; 4829 + return res_counter_read_u64(&memcg->res, name); 4807 4830 case _MEMSWAP: 4808 - if (name == RES_USAGE) 4809 - val = mem_cgroup_usage(memcg, true); 4810 - else 4811 - val = res_counter_read_u64(&memcg->memsw, name); 4812 - break; 4831 + return res_counter_read_u64(&memcg->memsw, name); 4813 4832 case _KMEM: 4814 - val = res_counter_read_u64(&memcg->kmem, name); 4833 + return res_counter_read_u64(&memcg->kmem, name); 4815 4834 break; 4816 4835 default: 4817 4836 BUG(); 4818 4837 } 4819 - 4820 - return val; 4821 4838 } 4822 4839 4823 4840 #ifdef CONFIG_MEMCG_KMEM ··· 5225 5350 if (!t) 5226 5351 goto unlock; 5227 5352 5228 - usage = mem_cgroup_usage(memcg, swap); 5353 + if (!swap) 5354 + usage = res_counter_read_u64(&memcg->res, RES_USAGE); 5355 + else 5356 + usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 5229 5357 5230 5358 /* 5231 5359 * current_threshold points to threshold just below or equal to usage. ··· 5324 5446 5325 5447 mutex_lock(&memcg->thresholds_lock); 5326 5448 5327 - if (type == _MEM) 5449 + if (type == _MEM) { 5328 5450 thresholds = &memcg->thresholds; 5329 - else if (type == _MEMSWAP) 5451 + usage = res_counter_read_u64(&memcg->res, RES_USAGE); 5452 + } else if (type == _MEMSWAP) { 5330 5453 thresholds = &memcg->memsw_thresholds; 5331 - else 5454 + usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 5455 + } else 5332 5456 BUG(); 5333 - 5334 - usage = mem_cgroup_usage(memcg, type == _MEMSWAP); 5335 5457 5336 5458 /* Check if a threshold crossed before adding a new one */ 5337 5459 if (thresholds->primary) ··· 5412 5534 int i, j, size; 5413 5535 5414 5536 mutex_lock(&memcg->thresholds_lock); 5415 - if (type == _MEM) 5537 + 5538 + if (type == _MEM) { 5416 5539 thresholds = &memcg->thresholds; 5417 - else if (type == _MEMSWAP) 5540 + usage = res_counter_read_u64(&memcg->res, RES_USAGE); 5541 + } else if (type == _MEMSWAP) { 5418 5542 thresholds = &memcg->memsw_thresholds; 5419 - else 5543 + usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 5544 + } else 5420 5545 BUG(); 5421 5546 5422 5547 if (!thresholds->primary) 5423 5548 goto unlock; 5424 - 5425 - usage = mem_cgroup_usage(memcg, type == _MEMSWAP); 5426 5549 5427 5550 /* Check if a threshold crossed before removing */ 5428 5551 __mem_cgroup_threshold(memcg, type == _MEMSWAP); ··· 6178 6299 * core guarantees its existence. 6179 6300 */ 6180 6301 } else { 6181 - res_counter_init(&memcg->res, NULL); 6182 - res_counter_init(&memcg->memsw, NULL); 6183 - res_counter_init(&memcg->kmem, NULL); 6302 + res_counter_init(&memcg->res, &root_mem_cgroup->res); 6303 + res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); 6304 + res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); 6184 6305 /* 6185 6306 * Deeper hierachy with use_hierarchy == false doesn't make 6186 6307 * much sense so let cgroup subsystem know about this ··· 6314 6435 6315 6436 #ifdef CONFIG_MMU 6316 6437 /* Handlers for move charge at task migration. */ 6317 - #define PRECHARGE_COUNT_AT_ONCE 256 6318 6438 static int mem_cgroup_do_precharge(unsigned long count) 6319 6439 { 6320 - int ret = 0; 6321 - int batch_count = PRECHARGE_COUNT_AT_ONCE; 6322 - struct mem_cgroup *memcg = mc.to; 6440 + int ret; 6323 6441 6324 - if (mem_cgroup_is_root(memcg)) { 6325 - mc.precharge += count; 6326 - /* we don't need css_get for root */ 6327 - return ret; 6328 - } 6329 - /* try to charge at once */ 6330 - if (count > 1) { 6331 - struct res_counter *dummy; 6332 - /* 6333 - * "memcg" cannot be under rmdir() because we've already checked 6334 - * by cgroup_lock_live_cgroup() that it is not removed and we 6335 - * are still under the same cgroup_mutex. So we can postpone 6336 - * css_get(). 6337 - */ 6338 - if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy)) 6339 - goto one_by_one; 6340 - if (do_swap_account && res_counter_charge(&memcg->memsw, 6341 - PAGE_SIZE * count, &dummy)) { 6342 - res_counter_uncharge(&memcg->res, PAGE_SIZE * count); 6343 - goto one_by_one; 6344 - } 6442 + /* Try a single bulk charge without reclaim first */ 6443 + ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); 6444 + if (!ret) { 6345 6445 mc.precharge += count; 6346 6446 return ret; 6347 6447 } 6348 - one_by_one: 6349 - /* fall back to one by one charge */ 6448 + if (ret == -EINTR) { 6449 + __mem_cgroup_cancel_charge(root_mem_cgroup, count); 6450 + return ret; 6451 + } 6452 + 6453 + /* Try charges one by one with reclaim */ 6350 6454 while (count--) { 6351 - if (signal_pending(current)) { 6352 - ret = -EINTR; 6353 - break; 6354 - } 6355 - if (!batch_count--) { 6356 - batch_count = PRECHARGE_COUNT_AT_ONCE; 6357 - cond_resched(); 6358 - } 6359 - ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false); 6455 + ret = mem_cgroup_try_charge(mc.to, 6456 + GFP_KERNEL & ~__GFP_NORETRY, 1); 6457 + /* 6458 + * In case of failure, any residual charges against 6459 + * mc.to will be dropped by mem_cgroup_clear_mc() 6460 + * later on. However, cancel any charges that are 6461 + * bypassed to root right away or they'll be lost. 6462 + */ 6463 + if (ret == -EINTR) 6464 + __mem_cgroup_cancel_charge(root_mem_cgroup, 1); 6360 6465 if (ret) 6361 - /* mem_cgroup_clear_mc() will do uncharge later */ 6362 6466 return ret; 6363 6467 mc.precharge++; 6468 + cond_resched(); 6364 6469 } 6365 - return ret; 6470 + return 0; 6366 6471 } 6367 6472 6368 6473 /** ··· 6623 6760 /* we must fixup refcnts and charges */ 6624 6761 if (mc.moved_swap) { 6625 6762 /* uncharge swap account from the old cgroup */ 6626 - if (!mem_cgroup_is_root(mc.from)) 6627 - res_counter_uncharge(&mc.from->memsw, 6628 - PAGE_SIZE * mc.moved_swap); 6763 + res_counter_uncharge(&mc.from->memsw, 6764 + PAGE_SIZE * mc.moved_swap); 6629 6765 6630 6766 for (i = 0; i < mc.moved_swap; i++) 6631 6767 css_put(&mc.from->css); 6632 6768 6633 - if (!mem_cgroup_is_root(mc.to)) { 6634 - /* 6635 - * we charged both to->res and to->memsw, so we should 6636 - * uncharge to->res. 6637 - */ 6638 - res_counter_uncharge(&mc.to->res, 6639 - PAGE_SIZE * mc.moved_swap); 6640 - } 6769 + /* 6770 + * we charged both to->res and to->memsw, so we should 6771 + * uncharge to->res. 6772 + */ 6773 + res_counter_uncharge(&mc.to->res, 6774 + PAGE_SIZE * mc.moved_swap); 6641 6775 /* we've already done css_get(mc.to) */ 6642 6776 mc.moved_swap = 0; 6643 6777 }

+10

mm/memory-failure.c

··· 1173 1173 lock_page(hpage); 1174 1174 1175 1175 /* 1176 + * The page could have changed compound pages during the locking. 1177 + * If this happens just bail out. 1178 + */ 1179 + if (compound_head(p) != hpage) { 1180 + action_result(pfn, "different compound page after locking", IGNORED); 1181 + res = -EBUSY; 1182 + goto out; 1183 + } 1184 + 1185 + /* 1176 1186 * We use page flags to determine what action should be taken, but 1177 1187 * the flags can be modified by the error containment action. One 1178 1188 * example is an mlocked page, where PG_mlocked is cleared by

+47 -23

mm/memory.c

··· 884 884 return 0; 885 885 } 886 886 887 - int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 887 + static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 888 888 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, 889 889 unsigned long addr, unsigned long end) 890 890 { ··· 2399 2399 /* 2400 2400 * We enter with non-exclusive mmap_sem (to exclude vma changes, 2401 2401 * but allow concurrent faults), and pte mapped but not yet locked. 2402 - * We return with mmap_sem still held, but pte unmapped and unlocked. 2402 + * We return with pte unmapped and unlocked. 2403 + * 2404 + * We return with the mmap_sem locked or unlocked in the same cases 2405 + * as does filemap_fault(). 2403 2406 */ 2404 2407 static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, 2405 2408 unsigned long address, pte_t *page_table, pmd_t *pmd, ··· 2691 2688 return VM_FAULT_OOM; 2692 2689 } 2693 2690 2691 + /* 2692 + * The mmap_sem must have been held on entry, and may have been 2693 + * released depending on flags and vma->vm_ops->fault() return value. 2694 + * See filemap_fault() and __lock_page_retry(). 2695 + */ 2694 2696 static int __do_fault(struct vm_area_struct *vma, unsigned long address, 2695 2697 pgoff_t pgoff, unsigned int flags, struct page **page) 2696 2698 { ··· 2752 2744 if (write) 2753 2745 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2754 2746 else if (pte_file(*pte) && pte_file_soft_dirty(*pte)) 2755 - pte_mksoft_dirty(entry); 2747 + entry = pte_mksoft_dirty(entry); 2756 2748 if (anon) { 2757 2749 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); 2758 2750 page_add_new_anon_rmap(page, vma, address); ··· 2766 2758 update_mmu_cache(vma, address, pte); 2767 2759 } 2768 2760 2769 - static unsigned long fault_around_bytes = rounddown_pow_of_two(65536); 2770 - 2771 - static inline unsigned long fault_around_pages(void) 2772 - { 2773 - return fault_around_bytes >> PAGE_SHIFT; 2774 - } 2775 - 2776 - static inline unsigned long fault_around_mask(void) 2777 - { 2778 - return ~(fault_around_bytes - 1) & PAGE_MASK; 2779 - } 2761 + static unsigned long fault_around_bytes __read_mostly = 2762 + rounddown_pow_of_two(65536); 2780 2763 2781 2764 #ifdef CONFIG_DEBUG_FS 2782 2765 static int fault_around_bytes_get(void *data, u64 *val) ··· 2833 2834 static void do_fault_around(struct vm_area_struct *vma, unsigned long address, 2834 2835 pte_t *pte, pgoff_t pgoff, unsigned int flags) 2835 2836 { 2836 - unsigned long start_addr; 2837 + unsigned long start_addr, nr_pages, mask; 2837 2838 pgoff_t max_pgoff; 2838 2839 struct vm_fault vmf; 2839 2840 int off; 2840 2841 2841 - start_addr = max(address & fault_around_mask(), vma->vm_start); 2842 + nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT; 2843 + mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; 2844 + 2845 + start_addr = max(address & mask, vma->vm_start); 2842 2846 off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 2843 2847 pte -= off; 2844 2848 pgoff -= off; ··· 2853 2851 max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + 2854 2852 PTRS_PER_PTE - 1; 2855 2853 max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1, 2856 - pgoff + fault_around_pages() - 1); 2854 + pgoff + nr_pages - 1); 2857 2855 2858 2856 /* Check if it makes any sense to call ->map_pages */ 2859 2857 while (!pte_none(*pte)) { ··· 2888 2886 * something). 2889 2887 */ 2890 2888 if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && 2891 - fault_around_pages() > 1) { 2889 + fault_around_bytes >> PAGE_SHIFT > 1) { 2892 2890 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 2893 2891 do_fault_around(vma, address, pte, pgoff, flags); 2894 2892 if (!pte_same(*pte, orig_pte)) ··· 3018 3016 return ret; 3019 3017 } 3020 3018 3019 + /* 3020 + * We enter with non-exclusive mmap_sem (to exclude vma changes, 3021 + * but allow concurrent faults). 3022 + * The mmap_sem may have been released depending on flags and our 3023 + * return value. See filemap_fault() and __lock_page_or_retry(). 3024 + */ 3021 3025 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3022 3026 unsigned long address, pte_t *page_table, pmd_t *pmd, 3023 3027 unsigned int flags, pte_t orig_pte) ··· 3048 3040 * 3049 3041 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3050 3042 * but allow concurrent faults), and pte mapped but not yet locked. 3051 - * We return with mmap_sem still held, but pte unmapped and unlocked. 3043 + * We return with pte unmapped and unlocked. 3044 + * The mmap_sem may have been released depending on flags and our 3045 + * return value. See filemap_fault() and __lock_page_or_retry(). 3052 3046 */ 3053 3047 static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3054 3048 unsigned long address, pte_t *page_table, pmd_t *pmd, ··· 3182 3172 * 3183 3173 * We enter with non-exclusive mmap_sem (to exclude vma changes, 3184 3174 * but allow concurrent faults), and pte mapped but not yet locked. 3185 - * We return with mmap_sem still held, but pte unmapped and unlocked. 3175 + * We return with pte unmapped and unlocked. 3176 + * 3177 + * The mmap_sem may have been released depending on flags and our 3178 + * return value. See filemap_fault() and __lock_page_or_retry(). 3186 3179 */ 3187 3180 static int handle_pte_fault(struct mm_struct *mm, 3188 3181 struct vm_area_struct *vma, unsigned long address, ··· 3194 3181 pte_t entry; 3195 3182 spinlock_t *ptl; 3196 3183 3197 - entry = *pte; 3184 + entry = ACCESS_ONCE(*pte); 3198 3185 if (!pte_present(entry)) { 3199 3186 if (pte_none(entry)) { 3200 3187 if (vma->vm_ops) { ··· 3245 3232 3246 3233 /* 3247 3234 * By the time we get here, we already hold the mm semaphore 3235 + * 3236 + * The mmap_sem may have been released depending on flags and our 3237 + * return value. See filemap_fault() and __lock_page_or_retry(). 3248 3238 */ 3249 3239 static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3250 3240 unsigned long address, unsigned int flags) ··· 3329 3313 return handle_pte_fault(mm, vma, address, pte, pmd, flags); 3330 3314 } 3331 3315 3316 + /* 3317 + * By the time we get here, we already hold the mm semaphore 3318 + * 3319 + * The mmap_sem may have been released depending on flags and our 3320 + * return value. See filemap_fault() and __lock_page_or_retry(). 3321 + */ 3332 3322 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3333 3323 unsigned long address, unsigned int flags) 3334 3324 { ··· 3613 3591 ret = get_user_pages(tsk, mm, addr, 1, 3614 3592 write, 1, &page, &vma); 3615 3593 if (ret <= 0) { 3594 + #ifndef CONFIG_HAVE_IOREMAP_PROT 3595 + break; 3596 + #else 3616 3597 /* 3617 3598 * Check if this is a VM_IO | VM_PFNMAP VMA, which 3618 3599 * we can access using slightly different code. 3619 3600 */ 3620 - #ifdef CONFIG_HAVE_IOREMAP_PROT 3621 3601 vma = find_vma(mm, addr); 3622 3602 if (!vma || vma->vm_start > addr) 3623 3603 break; ··· 3627 3603 ret = vma->vm_ops->access(vma, addr, buf, 3628 3604 len, write); 3629 3605 if (ret <= 0) 3630 - #endif 3631 3606 break; 3632 3607 bytes = ret; 3608 + #endif 3633 3609 } else { 3634 3610 bytes = len; 3635 3611 offset = addr & (PAGE_SIZE-1);

+38 -7

mm/memory_hotplug.c

··· 284 284 } 285 285 #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ 286 286 287 - static void grow_zone_span(struct zone *zone, unsigned long start_pfn, 288 - unsigned long end_pfn) 287 + static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn, 288 + unsigned long end_pfn) 289 289 { 290 290 unsigned long old_zone_end_pfn; 291 291 ··· 427 427 return -1; 428 428 } 429 429 430 - static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, 431 - unsigned long end_pfn) 430 + static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, 431 + unsigned long end_pfn) 432 432 { 433 433 unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat); 434 434 ··· 977 977 zone = page_zone(pfn_to_page(pfn)); 978 978 979 979 ret = -EINVAL; 980 - if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) && 980 + if ((zone_idx(zone) > ZONE_NORMAL || 981 + online_type == MMOP_ONLINE_MOVABLE) && 981 982 !can_online_high_movable(zone)) 982 983 goto out; 983 984 984 - if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) { 985 + if (online_type == MMOP_ONLINE_KERNEL && 986 + zone_idx(zone) == ZONE_MOVABLE) { 985 987 if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) 986 988 goto out; 987 989 } 988 - if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) { 990 + if (online_type == MMOP_ONLINE_MOVABLE && 991 + zone_idx(zone) == ZONE_MOVABLE - 1) { 989 992 if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) 990 993 goto out; 991 994 } ··· 1157 1154 } 1158 1155 1159 1156 return 0; 1157 + } 1158 + 1159 + /* 1160 + * If movable zone has already been setup, newly added memory should be check. 1161 + * If its address is higher than movable zone, it should be added as movable. 1162 + * Without this check, movable zone may overlap with other zone. 1163 + */ 1164 + static int should_add_memory_movable(int nid, u64 start, u64 size) 1165 + { 1166 + unsigned long start_pfn = start >> PAGE_SHIFT; 1167 + pg_data_t *pgdat = NODE_DATA(nid); 1168 + struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE; 1169 + 1170 + if (zone_is_empty(movable_zone)) 1171 + return 0; 1172 + 1173 + if (movable_zone->zone_start_pfn <= start_pfn) 1174 + return 1; 1175 + 1176 + return 0; 1177 + } 1178 + 1179 + int zone_for_memory(int nid, u64 start, u64 size, int zone_default) 1180 + { 1181 + if (should_add_memory_movable(nid, start, size)) 1182 + return ZONE_MOVABLE; 1183 + 1184 + return zone_default; 1160 1185 } 1161 1186 1162 1187 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */

+8 -1

mm/mlock.c

··· 210 210 * @vma: target vma 211 211 * @start: start address 212 212 * @end: end address 213 + * @nonblocking: 213 214 * 214 215 * This takes care of making the pages present too. 215 216 * 216 217 * return 0 on success, negative error code on error. 217 218 * 218 - * vma->vm_mm->mmap_sem must be held for at least read. 219 + * vma->vm_mm->mmap_sem must be held. 220 + * 221 + * If @nonblocking is NULL, it may be held for read or write and will 222 + * be unperturbed. 223 + * 224 + * If @nonblocking is non-NULL, it must held for read only and may be 225 + * released. If it's released, *@nonblocking will be set to 0. 219 226 */ 220 227 long __mlock_vma_pages_range(struct vm_area_struct *vma, 221 228 unsigned long start, unsigned long end, int *nonblocking)

+5

mm/mmap.c

··· 31 31 #include <linux/mempolicy.h> 32 32 #include <linux/rmap.h> 33 33 #include <linux/mmu_notifier.h> 34 + #include <linux/mmdebug.h> 34 35 #include <linux/perf_event.h> 35 36 #include <linux/audit.h> 36 37 #include <linux/khugepaged.h> ··· 134 133 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 135 134 { 136 135 unsigned long free, allowed, reserve; 136 + 137 + VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < 138 + -(s64)vm_committed_as_batch * num_online_cpus(), 139 + "memory commitment underflow"); 137 140 138 141 vm_acct_memory(pages); 139 142

+39 -1

mm/mmu_notifier.c

··· 23 23 static struct srcu_struct srcu; 24 24 25 25 /* 26 + * This function allows mmu_notifier::release callback to delay a call to 27 + * a function that will free appropriate resources. The function must be 28 + * quick and must not block. 29 + */ 30 + void mmu_notifier_call_srcu(struct rcu_head *rcu, 31 + void (*func)(struct rcu_head *rcu)) 32 + { 33 + call_srcu(&srcu, rcu, func); 34 + } 35 + EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu); 36 + 37 + void mmu_notifier_synchronize(void) 38 + { 39 + /* Wait for any running method to finish. */ 40 + srcu_barrier(&srcu); 41 + } 42 + EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); 43 + 44 + /* 26 45 * This function can't run concurrently against mmu_notifier_register 27 46 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 28 47 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers ··· 72 53 */ 73 54 if (mn->ops->release) 74 55 mn->ops->release(mn, mm); 75 - srcu_read_unlock(&srcu, id); 76 56 77 57 spin_lock(&mm->mmu_notifier_mm->lock); 78 58 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { ··· 87 69 hlist_del_init_rcu(&mn->hlist); 88 70 } 89 71 spin_unlock(&mm->mmu_notifier_mm->lock); 72 + srcu_read_unlock(&srcu, id); 90 73 91 74 /* 92 75 * synchronize_srcu here prevents mmu_notifier_release from returning to ··· 343 324 mmdrop(mm); 344 325 } 345 326 EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 327 + 328 + /* 329 + * Same as mmu_notifier_unregister but no callback and no srcu synchronization. 330 + */ 331 + void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 332 + struct mm_struct *mm) 333 + { 334 + spin_lock(&mm->mmu_notifier_mm->lock); 335 + /* 336 + * Can not use list_del_rcu() since __mmu_notifier_release 337 + * can delete it before we hold the lock. 338 + */ 339 + hlist_del_init_rcu(&mn->hlist); 340 + spin_unlock(&mm->mmu_notifier_mm->lock); 341 + 342 + BUG_ON(atomic_read(&mm->mm_count) <= 0); 343 + mmdrop(mm); 344 + } 345 + EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); 346 346 347 347 static int __init mmu_notifier_init(void) 348 348 {

+14 -20

mm/oom_kill.c

··· 258 258 unsigned long totalpages, const nodemask_t *nodemask, 259 259 bool force_kill) 260 260 { 261 - if (task->exit_state) 262 - return OOM_SCAN_CONTINUE; 263 261 if (oom_unkillable_task(task, NULL, nodemask)) 264 262 return OOM_SCAN_CONTINUE; 265 263 ··· 557 559 * if a parallel OOM killing is already taking place that includes a zone in 558 560 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. 559 561 */ 560 - int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) 562 + bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask) 561 563 { 562 564 struct zoneref *z; 563 565 struct zone *zone; 564 - int ret = 1; 566 + bool ret = true; 565 567 566 568 spin_lock(&zone_scan_lock); 567 - for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 569 + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) 568 570 if (zone_is_oom_locked(zone)) { 569 - ret = 0; 571 + ret = false; 570 572 goto out; 571 573 } 572 - } 573 574 574 - for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 575 - /* 576 - * Lock each zone in the zonelist under zone_scan_lock so a 577 - * parallel invocation of try_set_zonelist_oom() doesn't succeed 578 - * when it shouldn't. 579 - */ 575 + /* 576 + * Lock each zone in the zonelist under zone_scan_lock so a parallel 577 + * call to oom_zonelist_trylock() doesn't succeed when it shouldn't. 578 + */ 579 + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) 580 580 zone_set_flag(zone, ZONE_OOM_LOCKED); 581 - } 582 581 583 582 out: 584 583 spin_unlock(&zone_scan_lock); ··· 587 592 * allocation attempts with zonelists containing them may now recall the OOM 588 593 * killer, if necessary. 589 594 */ 590 - void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) 595 + void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) 591 596 { 592 597 struct zoneref *z; 593 598 struct zone *zone; 594 599 595 600 spin_lock(&zone_scan_lock); 596 - for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 601 + for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) 597 602 zone_clear_flag(zone, ZONE_OOM_LOCKED); 598 - } 599 603 spin_unlock(&zone_scan_lock); 600 604 } 601 605 ··· 688 694 if (mem_cgroup_oom_synchronize(true)) 689 695 return; 690 696 691 - zonelist = node_zonelist(first_online_node, GFP_KERNEL); 692 - if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { 697 + zonelist = node_zonelist(first_memory_node, GFP_KERNEL); 698 + if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { 693 699 out_of_memory(NULL, 0, 0, NULL, false); 694 - clear_zonelist_oom(zonelist, GFP_KERNEL); 700 + oom_zonelist_unlock(zonelist, GFP_KERNEL); 695 701 } 696 702 }

+1 -4

mm/page-writeback.c

··· 261 261 */ 262 262 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) 263 263 { 264 + const unsigned long available_memory = global_dirtyable_memory(); 264 265 unsigned long background; 265 266 unsigned long dirty; 266 - unsigned long uninitialized_var(available_memory); 267 267 struct task_struct *tsk; 268 - 269 - if (!vm_dirty_bytes || !dirty_background_bytes) 270 - available_memory = global_dirtyable_memory(); 271 268 272 269 if (vm_dirty_bytes) 273 270 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);

+83 -76

mm/page_alloc.c

··· 680 680 int migratetype = 0; 681 681 int batch_free = 0; 682 682 int to_free = count; 683 + unsigned long nr_scanned; 683 684 684 685 spin_lock(&zone->lock); 685 - zone->pages_scanned = 0; 686 + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); 687 + if (nr_scanned) 688 + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 686 689 687 690 while (to_free) { 688 691 struct page *page; ··· 734 731 unsigned int order, 735 732 int migratetype) 736 733 { 734 + unsigned long nr_scanned; 737 735 spin_lock(&zone->lock); 738 - zone->pages_scanned = 0; 736 + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); 737 + if (nr_scanned) 738 + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 739 739 740 740 __free_one_page(page, pfn, zone, order, migratetype); 741 741 if (unlikely(!is_migrate_isolate(migratetype))) ··· 1263 1257 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) 1264 1258 { 1265 1259 unsigned long flags; 1266 - int to_drain; 1267 - unsigned long batch; 1260 + int to_drain, batch; 1268 1261 1269 1262 local_irq_save(flags); 1270 1263 batch = ACCESS_ONCE(pcp->batch); 1271 - if (pcp->count >= batch) 1272 - to_drain = batch; 1273 - else 1274 - to_drain = pcp->count; 1264 + to_drain = min(pcp->count, batch); 1275 1265 if (to_drain > 0) { 1276 1266 free_pcppages_bulk(zone, to_drain, pcp); 1277 1267 pcp->count -= to_drain; ··· 1612 1610 } 1613 1611 1614 1612 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); 1613 + if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 && 1614 + !zone_is_fair_depleted(zone)) 1615 + zone_set_flag(zone, ZONE_FAIR_DEPLETED); 1615 1616 1616 1617 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1617 1618 zone_statistics(preferred_zone, zone, gfp_flags); ··· 1717 1712 { 1718 1713 /* free_pages my go negative - that's OK */ 1719 1714 long min = mark; 1720 - long lowmem_reserve = z->lowmem_reserve[classzone_idx]; 1721 1715 int o; 1722 1716 long free_cma = 0; 1723 1717 ··· 1731 1727 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); 1732 1728 #endif 1733 1729 1734 - if (free_pages - free_cma <= min + lowmem_reserve) 1730 + if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) 1735 1731 return false; 1736 1732 for (o = 0; o < order; o++) { 1737 1733 /* At the next order, this order's pages become unavailable */ ··· 1926 1922 1927 1923 #endif /* CONFIG_NUMA */ 1928 1924 1925 + static void reset_alloc_batches(struct zone *preferred_zone) 1926 + { 1927 + struct zone *zone = preferred_zone->zone_pgdat->node_zones; 1928 + 1929 + do { 1930 + mod_zone_page_state(zone, NR_ALLOC_BATCH, 1931 + high_wmark_pages(zone) - low_wmark_pages(zone) - 1932 + atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); 1933 + zone_clear_flag(zone, ZONE_FAIR_DEPLETED); 1934 + } while (zone++ != preferred_zone); 1935 + } 1936 + 1929 1937 /* 1930 1938 * get_page_from_freelist goes through the zonelist trying to allocate 1931 1939 * a page. ··· 1955 1939 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1956 1940 bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && 1957 1941 (gfp_mask & __GFP_WRITE); 1942 + int nr_fair_skipped = 0; 1943 + bool zonelist_rescan; 1958 1944 1959 1945 zonelist_scan: 1946 + zonelist_rescan = false; 1947 + 1960 1948 /* 1961 1949 * Scan zonelist, looking for a zone with enough free. 1962 1950 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. ··· 1984 1964 */ 1985 1965 if (alloc_flags & ALLOC_FAIR) { 1986 1966 if (!zone_local(preferred_zone, zone)) 1967 + break; 1968 + if (zone_is_fair_depleted(zone)) { 1969 + nr_fair_skipped++; 1987 1970 continue; 1988 - if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) 1989 - continue; 1971 + } 1990 1972 } 1991 1973 /* 1992 1974 * When allocating a page cache page for writing, we ··· 2094 2072 zlc_mark_zone_full(zonelist, z); 2095 2073 } 2096 2074 2097 - if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { 2098 - /* Disable zlc cache for second zonelist scan */ 2099 - zlc_active = 0; 2100 - goto zonelist_scan; 2101 - } 2102 - 2103 - if (page) 2075 + if (page) { 2104 2076 /* 2105 2077 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was 2106 2078 * necessary to allocate the page. The expectation is ··· 2103 2087 * for !PFMEMALLOC purposes. 2104 2088 */ 2105 2089 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); 2090 + return page; 2091 + } 2106 2092 2107 - return page; 2093 + /* 2094 + * The first pass makes sure allocations are spread fairly within the 2095 + * local node. However, the local node might have free pages left 2096 + * after the fairness batches are exhausted, and remote zones haven't 2097 + * even been considered yet. Try once more without fairness, and 2098 + * include remote zones now, before entering the slowpath and waking 2099 + * kswapd: prefer spilling to a remote zone over swapping locally. 2100 + */ 2101 + if (alloc_flags & ALLOC_FAIR) { 2102 + alloc_flags &= ~ALLOC_FAIR; 2103 + if (nr_fair_skipped) { 2104 + zonelist_rescan = true; 2105 + reset_alloc_batches(preferred_zone); 2106 + } 2107 + if (nr_online_nodes > 1) 2108 + zonelist_rescan = true; 2109 + } 2110 + 2111 + if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) { 2112 + /* Disable zlc cache for second zonelist scan */ 2113 + zlc_active = 0; 2114 + zonelist_rescan = true; 2115 + } 2116 + 2117 + if (zonelist_rescan) 2118 + goto zonelist_scan; 2119 + 2120 + return NULL; 2108 2121 } 2109 2122 2110 2123 /* ··· 2246 2201 { 2247 2202 struct page *page; 2248 2203 2249 - /* Acquire the OOM killer lock for the zones in zonelist */ 2250 - if (!try_set_zonelist_oom(zonelist, gfp_mask)) { 2204 + /* Acquire the per-zone oom lock for each zone */ 2205 + if (!oom_zonelist_trylock(zonelist, gfp_mask)) { 2251 2206 schedule_timeout_uninterruptible(1); 2252 2207 return NULL; 2253 2208 } ··· 2285 2240 out_of_memory(zonelist, gfp_mask, order, nodemask, false); 2286 2241 2287 2242 out: 2288 - clear_zonelist_oom(zonelist, gfp_mask); 2243 + oom_zonelist_unlock(zonelist, gfp_mask); 2289 2244 return page; 2290 2245 } 2291 2246 ··· 2452 2407 } while (!page && (gfp_mask & __GFP_NOFAIL)); 2453 2408 2454 2409 return page; 2455 - } 2456 - 2457 - static void reset_alloc_batches(struct zonelist *zonelist, 2458 - enum zone_type high_zoneidx, 2459 - struct zone *preferred_zone) 2460 - { 2461 - struct zoneref *z; 2462 - struct zone *zone; 2463 - 2464 - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 2465 - /* 2466 - * Only reset the batches of zones that were actually 2467 - * considered in the fairness pass, we don't want to 2468 - * trash fairness information for zones that are not 2469 - * actually part of this zonelist's round-robin cycle. 2470 - */ 2471 - if (!zone_local(preferred_zone, zone)) 2472 - continue; 2473 - mod_zone_page_state(zone, NR_ALLOC_BATCH, 2474 - high_wmark_pages(zone) - low_wmark_pages(zone) - 2475 - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); 2476 - } 2477 2410 } 2478 2411 2479 2412 static void wake_all_kswapds(unsigned int order, ··· 2639 2616 goto got_pg; 2640 2617 2641 2618 /* 2642 - * It can become very expensive to allocate transparent hugepages at 2643 - * fault, so use asynchronous memory compaction for THP unless it is 2644 - * khugepaged trying to collapse. 2645 - */ 2646 - if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD)) 2647 - migration_mode = MIGRATE_SYNC_LIGHT; 2648 - 2649 - /* 2650 2619 * If compaction is deferred for high-order allocations, it is because 2651 2620 * sync compaction recently failed. In this is the case and the caller 2652 2621 * requested a movable allocation that does not heavily disrupt the ··· 2647 2632 if ((deferred_compaction || contended_compaction) && 2648 2633 (gfp_mask & __GFP_NO_KSWAPD)) 2649 2634 goto nopage; 2635 + 2636 + /* 2637 + * It can become very expensive to allocate transparent hugepages at 2638 + * fault, so use asynchronous memory compaction for THP unless it is 2639 + * khugepaged trying to collapse. 2640 + */ 2641 + if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE || 2642 + (current->flags & PF_KTHREAD)) 2643 + migration_mode = MIGRATE_SYNC_LIGHT; 2650 2644 2651 2645 /* Try direct reclaim and then allocating */ 2652 2646 page = __alloc_pages_direct_reclaim(gfp_mask, order, ··· 2790 2766 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 2791 2767 alloc_flags |= ALLOC_CMA; 2792 2768 #endif 2793 - retry: 2794 2769 /* First allocation attempt */ 2795 2770 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2796 2771 zonelist, high_zoneidx, alloc_flags, 2797 2772 preferred_zone, classzone_idx, migratetype); 2798 2773 if (unlikely(!page)) { 2799 - /* 2800 - * The first pass makes sure allocations are spread 2801 - * fairly within the local node. However, the local 2802 - * node might have free pages left after the fairness 2803 - * batches are exhausted, and remote zones haven't 2804 - * even been considered yet. Try once more without 2805 - * fairness, and include remote zones now, before 2806 - * entering the slowpath and waking kswapd: prefer 2807 - * spilling to a remote zone over swapping locally. 2808 - */ 2809 - if (alloc_flags & ALLOC_FAIR) { 2810 - reset_alloc_batches(zonelist, high_zoneidx, 2811 - preferred_zone); 2812 - alloc_flags &= ~ALLOC_FAIR; 2813 - goto retry; 2814 - } 2815 2774 /* 2816 2775 * Runtime PM, block IO and its error handling path 2817 2776 * can deadlock because I/O on the device might not ··· 2969 2962 * Note this is not alloc_pages_exact_node() which allocates on a specific node, 2970 2963 * but is not exact. 2971 2964 */ 2972 - void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) 2965 + void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) 2973 2966 { 2974 2967 unsigned order = get_order(size); 2975 2968 struct page *p = alloc_pages_node(nid, gfp_mask, order); ··· 2977 2970 return NULL; 2978 2971 return make_alloc_exact((unsigned long)page_address(p), order, size); 2979 2972 } 2980 - EXPORT_SYMBOL(alloc_pages_exact_nid); 2981 2973 2982 2974 /** 2983 2975 * free_pages_exact - release memory allocated via alloc_pages_exact() ··· 3058 3052 void si_meminfo(struct sysinfo *val) 3059 3053 { 3060 3054 val->totalram = totalram_pages; 3061 - val->sharedram = 0; 3055 + val->sharedram = global_page_state(NR_SHMEM); 3062 3056 val->freeram = global_page_state(NR_FREE_PAGES); 3063 3057 val->bufferram = nr_blockdev_pages(); 3064 3058 val->totalhigh = totalhigh_pages; ··· 3078 3072 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 3079 3073 managed_pages += pgdat->node_zones[zone_type].managed_pages; 3080 3074 val->totalram = managed_pages; 3075 + val->sharedram = node_page_state(nid, NR_SHMEM); 3081 3076 val->freeram = node_page_state(nid, NR_FREE_PAGES); 3082 3077 #ifdef CONFIG_HIGHMEM 3083 3078 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; ··· 3260 3253 K(zone_page_state(zone, NR_BOUNCE)), 3261 3254 K(zone_page_state(zone, NR_FREE_CMA_PAGES)), 3262 3255 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 3263 - zone->pages_scanned, 3256 + K(zone_page_state(zone, NR_PAGES_SCANNED)), 3264 3257 (!zone_reclaimable(zone) ? "yes" : "no") 3265 3258 ); 3266 3259 printk("lowmem_reserve[]:"); 3267 3260 for (i = 0; i < MAX_NR_ZONES; i++) 3268 - printk(" %lu", zone->lowmem_reserve[i]); 3261 + printk(" %ld", zone->lowmem_reserve[i]); 3269 3262 printk("\n"); 3270 3263 } 3271 3264 ··· 5586 5579 for_each_online_pgdat(pgdat) { 5587 5580 for (i = 0; i < MAX_NR_ZONES; i++) { 5588 5581 struct zone *zone = pgdat->node_zones + i; 5589 - unsigned long max = 0; 5582 + long max = 0; 5590 5583 5591 5584 /* Find valid and maximum lowmem_reserve in the zone */ 5592 5585 for (j = i; j < MAX_NR_ZONES; j++) {

+1 -2

mm/readahead.c

··· 326 326 * - thrashing threshold in memory tight systems 327 327 */ 328 328 static pgoff_t count_history_pages(struct address_space *mapping, 329 - struct file_ra_state *ra, 330 329 pgoff_t offset, unsigned long max) 331 330 { 332 331 pgoff_t head; ··· 348 349 { 349 350 pgoff_t size; 350 351 351 - size = count_history_pages(mapping, ra, offset, max); 352 + size = count_history_pages(mapping, offset, max); 352 353 353 354 /* 354 355 * not enough history pages:

+28 -11

mm/shmem.c

··· 149 149 vm_unacct_memory(VM_ACCT(size)); 150 150 } 151 151 152 + static inline int shmem_reacct_size(unsigned long flags, 153 + loff_t oldsize, loff_t newsize) 154 + { 155 + if (!(flags & VM_NORESERVE)) { 156 + if (VM_ACCT(newsize) > VM_ACCT(oldsize)) 157 + return security_vm_enough_memory_mm(current->mm, 158 + VM_ACCT(newsize) - VM_ACCT(oldsize)); 159 + else if (VM_ACCT(newsize) < VM_ACCT(oldsize)) 160 + vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize)); 161 + } 162 + return 0; 163 + } 164 + 152 165 /* 153 166 * ... whereas tmpfs objects are accounted incrementally as 154 167 * pages are allocated, in order to allow huge sparse files. ··· 293 280 */ 294 281 static int shmem_add_to_page_cache(struct page *page, 295 282 struct address_space *mapping, 296 - pgoff_t index, gfp_t gfp, void *expected) 283 + pgoff_t index, void *expected) 297 284 { 298 285 int error; 299 286 ··· 562 549 loff_t newsize = attr->ia_size; 563 550 564 551 if (newsize != oldsize) { 552 + error = shmem_reacct_size(SHMEM_I(inode)->flags, 553 + oldsize, newsize); 554 + if (error) 555 + return error; 565 556 i_size_write(inode, newsize); 566 557 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 567 558 } ··· 666 649 */ 667 650 if (!error) 668 651 error = shmem_add_to_page_cache(*pagep, mapping, index, 669 - GFP_NOWAIT, radswap); 652 + radswap); 670 653 if (error != -ENOMEM) { 671 654 /* 672 655 * Truncation and eviction use free_swap_and_cache(), which ··· 1112 1095 gfp & GFP_RECLAIM_MASK); 1113 1096 if (!error) { 1114 1097 error = shmem_add_to_page_cache(page, mapping, index, 1115 - gfp, swp_to_radix_entry(swap)); 1098 + swp_to_radix_entry(swap)); 1116 1099 /* 1117 1100 * We already confirmed swap under page lock, and make 1118 1101 * no memory allocation here, so usually no possibility ··· 1166 1149 __SetPageSwapBacked(page); 1167 1150 __set_page_locked(page); 1168 1151 if (sgp == SGP_WRITE) 1169 - init_page_accessed(page); 1152 + __SetPageReferenced(page); 1170 1153 1171 1154 error = mem_cgroup_charge_file(page, current->mm, 1172 1155 gfp & GFP_RECLAIM_MASK); ··· 1175 1158 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); 1176 1159 if (!error) { 1177 1160 error = shmem_add_to_page_cache(page, mapping, index, 1178 - gfp, NULL); 1161 + NULL); 1179 1162 radix_tree_preload_end(); 1180 1163 } 1181 1164 if (error) { ··· 2949 2932 this.len = strlen(name); 2950 2933 this.hash = 0; /* will go */ 2951 2934 sb = shm_mnt->mnt_sb; 2935 + path.mnt = mntget(shm_mnt); 2952 2936 path.dentry = d_alloc_pseudo(sb, &this); 2953 2937 if (!path.dentry) 2954 2938 goto put_memory; 2955 2939 d_set_d_op(path.dentry, &anon_ops); 2956 - path.mnt = mntget(shm_mnt); 2957 2940 2958 2941 res = ERR_PTR(-ENOSPC); 2959 2942 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); 2960 2943 if (!inode) 2961 - goto put_dentry; 2944 + goto put_memory; 2962 2945 2963 2946 inode->i_flags |= i_flags; 2964 2947 d_instantiate(path.dentry, inode); ··· 2966 2949 clear_nlink(inode); /* It is unlinked */ 2967 2950 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); 2968 2951 if (IS_ERR(res)) 2969 - goto put_dentry; 2952 + goto put_path; 2970 2953 2971 2954 res = alloc_file(&path, FMODE_WRITE | FMODE_READ, 2972 2955 &shmem_file_operations); 2973 2956 if (IS_ERR(res)) 2974 - goto put_dentry; 2957 + goto put_path; 2975 2958 2976 2959 return res; 2977 2960 2978 - put_dentry: 2979 - path_put(&path); 2980 2961 put_memory: 2981 2962 shmem_unacct_size(flags, size); 2963 + put_path: 2964 + path_put(&path); 2982 2965 return res; 2983 2966 } 2984 2967

+198 -316

mm/slab.c

··· 191 191 unsigned int limit; 192 192 unsigned int batchcount; 193 193 unsigned int touched; 194 - spinlock_t lock; 195 194 void *entry[]; /* 196 195 * Must have this definition in here for the proper 197 196 * alignment of array_cache. Also simplifies accessing ··· 200 201 * entries belonging to slabs marked pfmemalloc will 201 202 * have the lower bits set SLAB_OBJ_PFMEMALLOC 202 203 */ 204 + }; 205 + 206 + struct alien_cache { 207 + spinlock_t lock; 208 + struct array_cache ac; 203 209 }; 204 210 205 211 #define SLAB_OBJ_PFMEMALLOC 1 ··· 246 242 static int drain_freelist(struct kmem_cache *cache, 247 243 struct kmem_cache_node *n, int tofree); 248 244 static void free_block(struct kmem_cache *cachep, void **objpp, int len, 249 - int node); 245 + int node, struct list_head *list); 246 + static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list); 250 247 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); 251 248 static void cache_reap(struct work_struct *unused); 252 249 ··· 272 267 #define MAKE_LIST(cachep, listp, slab, nodeid) \ 273 268 do { \ 274 269 INIT_LIST_HEAD(listp); \ 275 - list_splice(&(cachep->node[nodeid]->slab), listp); \ 270 + list_splice(&get_node(cachep, nodeid)->slab, listp); \ 276 271 } while (0) 277 272 278 273 #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ ··· 470 465 .name = "kmem_cache", 471 466 }; 472 467 473 - #define BAD_ALIEN_MAGIC 0x01020304ul 474 - 475 - #ifdef CONFIG_LOCKDEP 476 - 477 - /* 478 - * Slab sometimes uses the kmalloc slabs to store the slab headers 479 - * for other slabs "off slab". 480 - * The locking for this is tricky in that it nests within the locks 481 - * of all other slabs in a few places; to deal with this special 482 - * locking we put on-slab caches into a separate lock-class. 483 - * 484 - * We set lock class for alien array caches which are up during init. 485 - * The lock annotation will be lost if all cpus of a node goes down and 486 - * then comes back up during hotplug 487 - */ 488 - static struct lock_class_key on_slab_l3_key; 489 - static struct lock_class_key on_slab_alc_key; 490 - 491 - static struct lock_class_key debugobj_l3_key; 492 - static struct lock_class_key debugobj_alc_key; 493 - 494 - static void slab_set_lock_classes(struct kmem_cache *cachep, 495 - struct lock_class_key *l3_key, struct lock_class_key *alc_key, 496 - int q) 497 - { 498 - struct array_cache **alc; 499 - struct kmem_cache_node *n; 500 - int r; 501 - 502 - n = cachep->node[q]; 503 - if (!n) 504 - return; 505 - 506 - lockdep_set_class(&n->list_lock, l3_key); 507 - alc = n->alien; 508 - /* 509 - * FIXME: This check for BAD_ALIEN_MAGIC 510 - * should go away when common slab code is taught to 511 - * work even without alien caches. 512 - * Currently, non NUMA code returns BAD_ALIEN_MAGIC 513 - * for alloc_alien_cache, 514 - */ 515 - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) 516 - return; 517 - for_each_node(r) { 518 - if (alc[r]) 519 - lockdep_set_class(&alc[r]->lock, alc_key); 520 - } 521 - } 522 - 523 - static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) 524 - { 525 - slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node); 526 - } 527 - 528 - static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) 529 - { 530 - int node; 531 - 532 - for_each_online_node(node) 533 - slab_set_debugobj_lock_classes_node(cachep, node); 534 - } 535 - 536 - static void init_node_lock_keys(int q) 537 - { 538 - int i; 539 - 540 - if (slab_state < UP) 541 - return; 542 - 543 - for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) { 544 - struct kmem_cache_node *n; 545 - struct kmem_cache *cache = kmalloc_caches[i]; 546 - 547 - if (!cache) 548 - continue; 549 - 550 - n = cache->node[q]; 551 - if (!n || OFF_SLAB(cache)) 552 - continue; 553 - 554 - slab_set_lock_classes(cache, &on_slab_l3_key, 555 - &on_slab_alc_key, q); 556 - } 557 - } 558 - 559 - static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) 560 - { 561 - if (!cachep->node[q]) 562 - return; 563 - 564 - slab_set_lock_classes(cachep, &on_slab_l3_key, 565 - &on_slab_alc_key, q); 566 - } 567 - 568 - static inline void on_slab_lock_classes(struct kmem_cache *cachep) 569 - { 570 - int node; 571 - 572 - VM_BUG_ON(OFF_SLAB(cachep)); 573 - for_each_node(node) 574 - on_slab_lock_classes_node(cachep, node); 575 - } 576 - 577 - static inline void init_lock_keys(void) 578 - { 579 - int node; 580 - 581 - for_each_node(node) 582 - init_node_lock_keys(node); 583 - } 584 - #else 585 - static void init_node_lock_keys(int q) 586 - { 587 - } 588 - 589 - static inline void init_lock_keys(void) 590 - { 591 - } 592 - 593 - static inline void on_slab_lock_classes(struct kmem_cache *cachep) 594 - { 595 - } 596 - 597 - static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node) 598 - { 599 - } 600 - 601 - static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) 602 - { 603 - } 604 - 605 - static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) 606 - { 607 - } 608 - #endif 609 - 610 468 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); 611 469 612 470 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) ··· 660 792 } 661 793 } 662 794 663 - static struct array_cache *alloc_arraycache(int node, int entries, 664 - int batchcount, gfp_t gfp) 795 + static void init_arraycache(struct array_cache *ac, int limit, int batch) 665 796 { 666 - int memsize = sizeof(void *) * entries + sizeof(struct array_cache); 667 - struct array_cache *nc = NULL; 668 - 669 - nc = kmalloc_node(memsize, gfp, node); 670 797 /* 671 798 * The array_cache structures contain pointers to free object. 672 799 * However, when such objects are allocated or transferred to another ··· 669 806 * valid references during a kmemleak scan. Therefore, kmemleak must 670 807 * not scan such objects. 671 808 */ 672 - kmemleak_no_scan(nc); 673 - if (nc) { 674 - nc->avail = 0; 675 - nc->limit = entries; 676 - nc->batchcount = batchcount; 677 - nc->touched = 0; 678 - spin_lock_init(&nc->lock); 809 + kmemleak_no_scan(ac); 810 + if (ac) { 811 + ac->avail = 0; 812 + ac->limit = limit; 813 + ac->batchcount = batch; 814 + ac->touched = 0; 679 815 } 680 - return nc; 816 + } 817 + 818 + static struct array_cache *alloc_arraycache(int node, int entries, 819 + int batchcount, gfp_t gfp) 820 + { 821 + size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache); 822 + struct array_cache *ac = NULL; 823 + 824 + ac = kmalloc_node(memsize, gfp, node); 825 + init_arraycache(ac, entries, batchcount); 826 + return ac; 681 827 } 682 828 683 829 static inline bool is_slab_pfmemalloc(struct page *page) ··· 698 826 static void recheck_pfmemalloc_active(struct kmem_cache *cachep, 699 827 struct array_cache *ac) 700 828 { 701 - struct kmem_cache_node *n = cachep->node[numa_mem_id()]; 829 + struct kmem_cache_node *n = get_node(cachep, numa_mem_id()); 702 830 struct page *page; 703 831 unsigned long flags; 704 832 ··· 753 881 * If there are empty slabs on the slabs_free list and we are 754 882 * being forced to refill the cache, mark this one !pfmemalloc. 755 883 */ 756 - n = cachep->node[numa_mem_id()]; 884 + n = get_node(cachep, numa_mem_id()); 757 885 if (!list_empty(&n->slabs_free) && force_refill) { 758 886 struct page *page = virt_to_head_page(objp); 759 887 ClearPageSlabPfmemalloc(page); ··· 833 961 #define drain_alien_cache(cachep, alien) do { } while (0) 834 962 #define reap_alien(cachep, n) do { } while (0) 835 963 836 - static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 964 + static inline struct alien_cache **alloc_alien_cache(int node, 965 + int limit, gfp_t gfp) 837 966 { 838 - return (struct array_cache **)BAD_ALIEN_MAGIC; 967 + return NULL; 839 968 } 840 969 841 - static inline void free_alien_cache(struct array_cache **ac_ptr) 970 + static inline void free_alien_cache(struct alien_cache **ac_ptr) 842 971 { 843 972 } 844 973 ··· 865 992 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 866 993 static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 867 994 868 - static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 995 + static struct alien_cache *__alloc_alien_cache(int node, int entries, 996 + int batch, gfp_t gfp) 869 997 { 870 - struct array_cache **ac_ptr; 871 - int memsize = sizeof(void *) * nr_node_ids; 998 + size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache); 999 + struct alien_cache *alc = NULL; 1000 + 1001 + alc = kmalloc_node(memsize, gfp, node); 1002 + init_arraycache(&alc->ac, entries, batch); 1003 + spin_lock_init(&alc->lock); 1004 + return alc; 1005 + } 1006 + 1007 + static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 1008 + { 1009 + struct alien_cache **alc_ptr; 1010 + size_t memsize = sizeof(void *) * nr_node_ids; 872 1011 int i; 873 1012 874 1013 if (limit > 1) 875 1014 limit = 12; 876 - ac_ptr = kzalloc_node(memsize, gfp, node); 877 - if (ac_ptr) { 878 - for_each_node(i) { 879 - if (i == node || !node_online(i)) 880 - continue; 881 - ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); 882 - if (!ac_ptr[i]) { 883 - for (i--; i >= 0; i--) 884 - kfree(ac_ptr[i]); 885 - kfree(ac_ptr); 886 - return NULL; 887 - } 1015 + alc_ptr = kzalloc_node(memsize, gfp, node); 1016 + if (!alc_ptr) 1017 + return NULL; 1018 + 1019 + for_each_node(i) { 1020 + if (i == node || !node_online(i)) 1021 + continue; 1022 + alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp); 1023 + if (!alc_ptr[i]) { 1024 + for (i--; i >= 0; i--) 1025 + kfree(alc_ptr[i]); 1026 + kfree(alc_ptr); 1027 + return NULL; 888 1028 } 889 1029 } 890 - return ac_ptr; 1030 + return alc_ptr; 891 1031 } 892 1032 893 - static void free_alien_cache(struct array_cache **ac_ptr) 1033 + static void free_alien_cache(struct alien_cache **alc_ptr) 894 1034 { 895 1035 int i; 896 1036 897 - if (!ac_ptr) 1037 + if (!alc_ptr) 898 1038 return; 899 1039 for_each_node(i) 900 - kfree(ac_ptr[i]); 901 - kfree(ac_ptr); 1040 + kfree(alc_ptr[i]); 1041 + kfree(alc_ptr); 902 1042 } 903 1043 904 1044 static void __drain_alien_cache(struct kmem_cache *cachep, 905 - struct array_cache *ac, int node) 1045 + struct array_cache *ac, int node, 1046 + struct list_head *list) 906 1047 { 907 - struct kmem_cache_node *n = cachep->node[node]; 1048 + struct kmem_cache_node *n = get_node(cachep, node); 908 1049 909 1050 if (ac->avail) { 910 1051 spin_lock(&n->list_lock); ··· 930 1043 if (n->shared) 931 1044 transfer_objects(n->shared, ac, ac->limit); 932 1045 933 - free_block(cachep, ac->entry, ac->avail, node); 1046 + free_block(cachep, ac->entry, ac->avail, node, list); 934 1047 ac->avail = 0; 935 1048 spin_unlock(&n->list_lock); 936 1049 } ··· 944 1057 int node = __this_cpu_read(slab_reap_node); 945 1058 946 1059 if (n->alien) { 947 - struct array_cache *ac = n->alien[node]; 1060 + struct alien_cache *alc = n->alien[node]; 1061 + struct array_cache *ac; 948 1062 949 - if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { 950 - __drain_alien_cache(cachep, ac, node); 951 - spin_unlock_irq(&ac->lock); 1063 + if (alc) { 1064 + ac = &alc->ac; 1065 + if (ac->avail && spin_trylock_irq(&alc->lock)) { 1066 + LIST_HEAD(list); 1067 + 1068 + __drain_alien_cache(cachep, ac, node, &list); 1069 + spin_unlock_irq(&alc->lock); 1070 + slabs_destroy(cachep, &list); 1071 + } 952 1072 } 953 1073 } 954 1074 } 955 1075 956 1076 static void drain_alien_cache(struct kmem_cache *cachep, 957 - struct array_cache **alien) 1077 + struct alien_cache **alien) 958 1078 { 959 1079 int i = 0; 1080 + struct alien_cache *alc; 960 1081 struct array_cache *ac; 961 1082 unsigned long flags; 962 1083 963 1084 for_each_online_node(i) { 964 - ac = alien[i]; 965 - if (ac) { 966 - spin_lock_irqsave(&ac->lock, flags); 967 - __drain_alien_cache(cachep, ac, i); 968 - spin_unlock_irqrestore(&ac->lock, flags); 1085 + alc = alien[i]; 1086 + if (alc) { 1087 + LIST_HEAD(list); 1088 + 1089 + ac = &alc->ac; 1090 + spin_lock_irqsave(&alc->lock, flags); 1091 + __drain_alien_cache(cachep, ac, i, &list); 1092 + spin_unlock_irqrestore(&alc->lock, flags); 1093 + slabs_destroy(cachep, &list); 969 1094 } 970 1095 } 971 1096 } ··· 986 1087 { 987 1088 int nodeid = page_to_nid(virt_to_page(objp)); 988 1089 struct kmem_cache_node *n; 989 - struct array_cache *alien = NULL; 1090 + struct alien_cache *alien = NULL; 1091 + struct array_cache *ac; 990 1092 int node; 1093 + LIST_HEAD(list); 991 1094 992 1095 node = numa_mem_id(); 993 1096 ··· 1000 1099 if (likely(nodeid == node)) 1001 1100 return 0; 1002 1101 1003 - n = cachep->node[node]; 1102 + n = get_node(cachep, node); 1004 1103 STATS_INC_NODEFREES(cachep); 1005 1104 if (n->alien && n->alien[nodeid]) { 1006 1105 alien = n->alien[nodeid]; 1106 + ac = &alien->ac; 1007 1107 spin_lock(&alien->lock); 1008 - if (unlikely(alien->avail == alien->limit)) { 1108 + if (unlikely(ac->avail == ac->limit)) { 1009 1109 STATS_INC_ACOVERFLOW(cachep); 1010 - __drain_alien_cache(cachep, alien, nodeid); 1110 + __drain_alien_cache(cachep, ac, nodeid, &list); 1011 1111 } 1012 - ac_put_obj(cachep, alien, objp); 1112 + ac_put_obj(cachep, ac, objp); 1013 1113 spin_unlock(&alien->lock); 1114 + slabs_destroy(cachep, &list); 1014 1115 } else { 1015 - spin_lock(&(cachep->node[nodeid])->list_lock); 1016 - free_block(cachep, &objp, 1, nodeid); 1017 - spin_unlock(&(cachep->node[nodeid])->list_lock); 1116 + n = get_node(cachep, nodeid); 1117 + spin_lock(&n->list_lock); 1118 + free_block(cachep, &objp, 1, nodeid, &list); 1119 + spin_unlock(&n->list_lock); 1120 + slabs_destroy(cachep, &list); 1018 1121 } 1019 1122 return 1; 1020 1123 } ··· 1037 1132 { 1038 1133 struct kmem_cache *cachep; 1039 1134 struct kmem_cache_node *n; 1040 - const int memsize = sizeof(struct kmem_cache_node); 1135 + const size_t memsize = sizeof(struct kmem_cache_node); 1041 1136 1042 1137 list_for_each_entry(cachep, &slab_caches, list) { 1043 1138 /* ··· 1045 1140 * begin anything. Make sure some other cpu on this 1046 1141 * node has not already allocated this 1047 1142 */ 1048 - if (!cachep->node[node]) { 1143 + n = get_node(cachep, node); 1144 + if (!n) { 1049 1145 n = kmalloc_node(memsize, GFP_KERNEL, node); 1050 1146 if (!n) 1051 1147 return -ENOMEM; ··· 1062 1156 cachep->node[node] = n; 1063 1157 } 1064 1158 1065 - spin_lock_irq(&cachep->node[node]->list_lock); 1066 - cachep->node[node]->free_limit = 1159 + spin_lock_irq(&n->list_lock); 1160 + n->free_limit = 1067 1161 (1 + nr_cpus_node(node)) * 1068 1162 cachep->batchcount + cachep->num; 1069 - spin_unlock_irq(&cachep->node[node]->list_lock); 1163 + spin_unlock_irq(&n->list_lock); 1070 1164 } 1071 1165 return 0; 1072 1166 } ··· 1087 1181 list_for_each_entry(cachep, &slab_caches, list) { 1088 1182 struct array_cache *nc; 1089 1183 struct array_cache *shared; 1090 - struct array_cache **alien; 1184 + struct alien_cache **alien; 1185 + LIST_HEAD(list); 1091 1186 1092 1187 /* cpu is dead; no one can alloc from it. */ 1093 1188 nc = cachep->array[cpu]; 1094 1189 cachep->array[cpu] = NULL; 1095 - n = cachep->node[node]; 1190 + n = get_node(cachep, node); 1096 1191 1097 1192 if (!n) 1098 1193 goto free_array_cache; ··· 1103 1196 /* Free limit for this kmem_cache_node */ 1104 1197 n->free_limit -= cachep->batchcount; 1105 1198 if (nc) 1106 - free_block(cachep, nc->entry, nc->avail, node); 1199 + free_block(cachep, nc->entry, nc->avail, node, &list); 1107 1200 1108 1201 if (!cpumask_empty(mask)) { 1109 1202 spin_unlock_irq(&n->list_lock); ··· 1113 1206 shared = n->shared; 1114 1207 if (shared) { 1115 1208 free_block(cachep, shared->entry, 1116 - shared->avail, node); 1209 + shared->avail, node, &list); 1117 1210 n->shared = NULL; 1118 1211 } 1119 1212 ··· 1128 1221 free_alien_cache(alien); 1129 1222 } 1130 1223 free_array_cache: 1224 + slabs_destroy(cachep, &list); 1131 1225 kfree(nc); 1132 1226 } 1133 1227 /* ··· 1137 1229 * shrink each nodelist to its limit. 1138 1230 */ 1139 1231 list_for_each_entry(cachep, &slab_caches, list) { 1140 - n = cachep->node[node]; 1232 + n = get_node(cachep, node); 1141 1233 if (!n) 1142 1234 continue; 1143 1235 drain_freelist(cachep, n, slabs_tofree(cachep, n)); ··· 1168 1260 list_for_each_entry(cachep, &slab_caches, list) { 1169 1261 struct array_cache *nc; 1170 1262 struct array_cache *shared = NULL; 1171 - struct array_cache **alien = NULL; 1263 + struct alien_cache **alien = NULL; 1172 1264 1173 1265 nc = alloc_arraycache(node, cachep->limit, 1174 1266 cachep->batchcount, GFP_KERNEL); ··· 1192 1284 } 1193 1285 } 1194 1286 cachep->array[cpu] = nc; 1195 - n = cachep->node[node]; 1287 + n = get_node(cachep, node); 1196 1288 BUG_ON(!n); 1197 1289 1198 1290 spin_lock_irq(&n->list_lock); ··· 1213 1305 spin_unlock_irq(&n->list_lock); 1214 1306 kfree(shared); 1215 1307 free_alien_cache(alien); 1216 - if (cachep->flags & SLAB_DEBUG_OBJECTS) 1217 - slab_set_debugobj_lock_classes_node(cachep, node); 1218 - else if (!OFF_SLAB(cachep) && 1219 - !(cachep->flags & SLAB_DESTROY_BY_RCU)) 1220 - on_slab_lock_classes_node(cachep, node); 1221 1308 } 1222 - init_node_lock_keys(node); 1223 1309 1224 1310 return 0; 1225 1311 bad: ··· 1297 1395 list_for_each_entry(cachep, &slab_caches, list) { 1298 1396 struct kmem_cache_node *n; 1299 1397 1300 - n = cachep->node[node]; 1398 + n = get_node(cachep, node); 1301 1399 if (!n) 1302 1400 continue; 1303 1401 ··· 1477 1575 1478 1576 memcpy(ptr, cpu_cache_get(kmem_cache), 1479 1577 sizeof(struct arraycache_init)); 1480 - /* 1481 - * Do not assume that spinlocks can be initialized via memcpy: 1482 - */ 1483 - spin_lock_init(&ptr->lock); 1484 1578 1485 1579 kmem_cache->array[smp_processor_id()] = ptr; 1486 1580 ··· 1486 1588 != &initarray_generic.cache); 1487 1589 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]), 1488 1590 sizeof(struct arraycache_init)); 1489 - /* 1490 - * Do not assume that spinlocks can be initialized via memcpy: 1491 - */ 1492 - spin_lock_init(&ptr->lock); 1493 1591 1494 1592 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; 1495 1593 } ··· 1521 1627 if (enable_cpucache(cachep, GFP_NOWAIT)) 1522 1628 BUG(); 1523 1629 mutex_unlock(&slab_mutex); 1524 - 1525 - /* Annotate slab for lockdep -- annotate the malloc caches */ 1526 - init_lock_keys(); 1527 1630 1528 1631 /* Done! */ 1529 1632 slab_state = FULL; ··· 1581 1690 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", 1582 1691 cachep->name, cachep->size, cachep->gfporder); 1583 1692 1584 - for_each_online_node(node) { 1693 + for_each_kmem_cache_node(cachep, node, n) { 1585 1694 unsigned long active_objs = 0, num_objs = 0, free_objects = 0; 1586 1695 unsigned long active_slabs = 0, num_slabs = 0; 1587 - 1588 - n = cachep->node[node]; 1589 - if (!n) 1590 - continue; 1591 1696 1592 1697 spin_lock_irqsave(&n->list_lock, flags); 1593 1698 list_for_each_entry(page, &n->slabs_full, lru) { ··· 1611 1724 } 1612 1725 1613 1726 /* 1614 - * Interface to system's page allocator. No need to hold the cache-lock. 1727 + * Interface to system's page allocator. No need to hold the 1728 + * kmem_cache_node ->list_lock. 1615 1729 * 1616 1730 * If we requested dmaable memory, we will get it. Even if we 1617 1731 * did not request dmaable memory, we might get it, but that ··· 1914 2026 * @cachep: cache pointer being destroyed 1915 2027 * @page: page pointer being destroyed 1916 2028 * 1917 - * Destroy all the objs in a slab, and release the mem back to the system. 1918 - * Before calling the slab must have been unlinked from the cache. The 1919 - * cache-lock is not held/needed. 2029 + * Destroy all the objs in a slab page, and release the mem back to the system. 2030 + * Before calling the slab page must have been unlinked from the cache. The 2031 + * kmem_cache_node ->list_lock is not held/needed. 1920 2032 */ 1921 2033 static void slab_destroy(struct kmem_cache *cachep, struct page *page) 1922 2034 { ··· 1946 2058 */ 1947 2059 if (OFF_SLAB(cachep)) 1948 2060 kmem_cache_free(cachep->freelist_cache, freelist); 2061 + } 2062 + 2063 + static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) 2064 + { 2065 + struct page *page, *n; 2066 + 2067 + list_for_each_entry_safe(page, n, list, lru) { 2068 + list_del(&page->lru); 2069 + slab_destroy(cachep, page); 2070 + } 1949 2071 } 1950 2072 1951 2073 /** ··· 2303 2405 return err; 2304 2406 } 2305 2407 2306 - if (flags & SLAB_DEBUG_OBJECTS) { 2307 - /* 2308 - * Would deadlock through slab_destroy()->call_rcu()-> 2309 - * debug_object_activate()->kmem_cache_alloc(). 2310 - */ 2311 - WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU); 2312 - 2313 - slab_set_debugobj_lock_classes(cachep); 2314 - } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU)) 2315 - on_slab_lock_classes(cachep); 2316 - 2317 2408 return 0; 2318 2409 } 2319 2410 ··· 2321 2434 { 2322 2435 #ifdef CONFIG_SMP 2323 2436 check_irq_off(); 2324 - assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock); 2437 + assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); 2325 2438 #endif 2326 2439 } 2327 2440 ··· 2329 2442 { 2330 2443 #ifdef CONFIG_SMP 2331 2444 check_irq_off(); 2332 - assert_spin_locked(&cachep->node[node]->list_lock); 2445 + assert_spin_locked(&get_node(cachep, node)->list_lock); 2333 2446 #endif 2334 2447 } 2335 2448 ··· 2349 2462 struct kmem_cache *cachep = arg; 2350 2463 struct array_cache *ac; 2351 2464 int node = numa_mem_id(); 2465 + struct kmem_cache_node *n; 2466 + LIST_HEAD(list); 2352 2467 2353 2468 check_irq_off(); 2354 2469 ac = cpu_cache_get(cachep); 2355 - spin_lock(&cachep->node[node]->list_lock); 2356 - free_block(cachep, ac->entry, ac->avail, node); 2357 - spin_unlock(&cachep->node[node]->list_lock); 2470 + n = get_node(cachep, node); 2471 + spin_lock(&n->list_lock); 2472 + free_block(cachep, ac->entry, ac->avail, node, &list); 2473 + spin_unlock(&n->list_lock); 2474 + slabs_destroy(cachep, &list); 2358 2475 ac->avail = 0; 2359 2476 } 2360 2477 ··· 2369 2478 2370 2479 on_each_cpu(do_drain, cachep, 1); 2371 2480 check_irq_on(); 2372 - for_each_online_node(node) { 2373 - n = cachep->node[node]; 2374 - if (n && n->alien) 2481 + for_each_kmem_cache_node(cachep, node, n) 2482 + if (n->alien) 2375 2483 drain_alien_cache(cachep, n->alien); 2376 - } 2377 2484 2378 - for_each_online_node(node) { 2379 - n = cachep->node[node]; 2380 - if (n) 2381 - drain_array(cachep, n, n->shared, 1, node); 2382 - } 2485 + for_each_kmem_cache_node(cachep, node, n) 2486 + drain_array(cachep, n, n->shared, 1, node); 2383 2487 } 2384 2488 2385 2489 /* ··· 2420 2534 2421 2535 int __kmem_cache_shrink(struct kmem_cache *cachep) 2422 2536 { 2423 - int ret = 0, i = 0; 2537 + int ret = 0; 2538 + int node; 2424 2539 struct kmem_cache_node *n; 2425 2540 2426 2541 drain_cpu_caches(cachep); 2427 2542 2428 2543 check_irq_on(); 2429 - for_each_online_node(i) { 2430 - n = cachep->node[i]; 2431 - if (!n) 2432 - continue; 2433 - 2544 + for_each_kmem_cache_node(cachep, node, n) { 2434 2545 drain_freelist(cachep, n, slabs_tofree(cachep, n)); 2435 2546 2436 2547 ret += !list_empty(&n->slabs_full) || ··· 2449 2566 kfree(cachep->array[i]); 2450 2567 2451 2568 /* NUMA: free the node structures */ 2452 - for_each_online_node(i) { 2453 - n = cachep->node[i]; 2454 - if (n) { 2455 - kfree(n->shared); 2456 - free_alien_cache(n->alien); 2457 - kfree(n); 2458 - } 2569 + for_each_kmem_cache_node(cachep, i, n) { 2570 + kfree(n->shared); 2571 + free_alien_cache(n->alien); 2572 + kfree(n); 2573 + cachep->node[i] = NULL; 2459 2574 } 2460 2575 return 0; 2461 2576 } ··· 2632 2751 2633 2752 /* Take the node list lock to change the colour_next on this node */ 2634 2753 check_irq_off(); 2635 - n = cachep->node[nodeid]; 2754 + n = get_node(cachep, nodeid); 2636 2755 spin_lock(&n->list_lock); 2637 2756 2638 2757 /* Get colour for the slab, and cal the next value. */ ··· 2801 2920 */ 2802 2921 batchcount = BATCHREFILL_LIMIT; 2803 2922 } 2804 - n = cachep->node[node]; 2923 + n = get_node(cachep, node); 2805 2924 2806 2925 BUG_ON(ac->avail > 0 || !n); 2807 2926 spin_lock(&n->list_lock); ··· 2941 3060 2942 3061 static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) 2943 3062 { 2944 - if (cachep == kmem_cache) 3063 + if (unlikely(cachep == kmem_cache)) 2945 3064 return false; 2946 3065 2947 3066 return should_failslab(cachep->object_size, flags, cachep->flags); ··· 3050 3169 nid = zone_to_nid(zone); 3051 3170 3052 3171 if (cpuset_zone_allowed_hardwall(zone, flags) && 3053 - cache->node[nid] && 3054 - cache->node[nid]->free_objects) { 3172 + get_node(cache, nid) && 3173 + get_node(cache, nid)->free_objects) { 3055 3174 obj = ____cache_alloc_node(cache, 3056 3175 flags | GFP_THISNODE, nid); 3057 3176 if (obj) ··· 3114 3233 int x; 3115 3234 3116 3235 VM_BUG_ON(nodeid > num_online_nodes()); 3117 - n = cachep->node[nodeid]; 3236 + n = get_node(cachep, nodeid); 3118 3237 BUG_ON(!n); 3119 3238 3120 3239 retry: ··· 3185 3304 if (nodeid == NUMA_NO_NODE) 3186 3305 nodeid = slab_node; 3187 3306 3188 - if (unlikely(!cachep->node[nodeid])) { 3307 + if (unlikely(!get_node(cachep, nodeid))) { 3189 3308 /* Node not bootstrapped yet */ 3190 3309 ptr = fallback_alloc(cachep, flags); 3191 3310 goto out; ··· 3286 3405 3287 3406 /* 3288 3407 * Caller needs to acquire correct kmem_cache_node's list_lock 3408 + * @list: List of detached free slabs should be freed by caller 3289 3409 */ 3290 - static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, 3291 - int node) 3410 + static void free_block(struct kmem_cache *cachep, void **objpp, 3411 + int nr_objects, int node, struct list_head *list) 3292 3412 { 3293 3413 int i; 3294 - struct kmem_cache_node *n; 3414 + struct kmem_cache_node *n = get_node(cachep, node); 3295 3415 3296 3416 for (i = 0; i < nr_objects; i++) { 3297 3417 void *objp; ··· 3302 3420 objp = objpp[i]; 3303 3421 3304 3422 page = virt_to_head_page(objp); 3305 - n = cachep->node[node]; 3306 3423 list_del(&page->lru); 3307 3424 check_spinlock_acquired_node(cachep, node); 3308 3425 slab_put_obj(cachep, page, objp, node); ··· 3312 3431 if (page->active == 0) { 3313 3432 if (n->free_objects > n->free_limit) { 3314 3433 n->free_objects -= cachep->num; 3315 - /* No need to drop any previously held 3316 - * lock here, even if we have a off-slab slab 3317 - * descriptor it is guaranteed to come from 3318 - * a different cache, refer to comments before 3319 - * alloc_slabmgmt. 3320 - */ 3321 - slab_destroy(cachep, page); 3434 + list_add_tail(&page->lru, list); 3322 3435 } else { 3323 3436 list_add(&page->lru, &n->slabs_free); 3324 3437 } ··· 3331 3456 int batchcount; 3332 3457 struct kmem_cache_node *n; 3333 3458 int node = numa_mem_id(); 3459 + LIST_HEAD(list); 3334 3460 3335 3461 batchcount = ac->batchcount; 3336 3462 #if DEBUG 3337 3463 BUG_ON(!batchcount || batchcount > ac->avail); 3338 3464 #endif 3339 3465 check_irq_off(); 3340 - n = cachep->node[node]; 3466 + n = get_node(cachep, node); 3341 3467 spin_lock(&n->list_lock); 3342 3468 if (n->shared) { 3343 3469 struct array_cache *shared_array = n->shared; ··· 3353 3477 } 3354 3478 } 3355 3479 3356 - free_block(cachep, ac->entry, batchcount, node); 3480 + free_block(cachep, ac->entry, batchcount, node, &list); 3357 3481 free_done: 3358 3482 #if STATS 3359 3483 { ··· 3374 3498 } 3375 3499 #endif 3376 3500 spin_unlock(&n->list_lock); 3501 + slabs_destroy(cachep, &list); 3377 3502 ac->avail -= batchcount; 3378 3503 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); 3379 3504 } ··· 3631 3754 int node; 3632 3755 struct kmem_cache_node *n; 3633 3756 struct array_cache *new_shared; 3634 - struct array_cache **new_alien = NULL; 3757 + struct alien_cache **new_alien = NULL; 3635 3758 3636 3759 for_each_online_node(node) { 3637 3760 ··· 3652 3775 } 3653 3776 } 3654 3777 3655 - n = cachep->node[node]; 3778 + n = get_node(cachep, node); 3656 3779 if (n) { 3657 3780 struct array_cache *shared = n->shared; 3781 + LIST_HEAD(list); 3658 3782 3659 3783 spin_lock_irq(&n->list_lock); 3660 3784 3661 3785 if (shared) 3662 3786 free_block(cachep, shared->entry, 3663 - shared->avail, node); 3787 + shared->avail, node, &list); 3664 3788 3665 3789 n->shared = new_shared; 3666 3790 if (!n->alien) { ··· 3671 3793 n->free_limit = (1 + nr_cpus_node(node)) * 3672 3794 cachep->batchcount + cachep->num; 3673 3795 spin_unlock_irq(&n->list_lock); 3796 + slabs_destroy(cachep, &list); 3674 3797 kfree(shared); 3675 3798 free_alien_cache(new_alien); 3676 3799 continue; ··· 3699 3820 /* Cache is not active yet. Roll back what we did */ 3700 3821 node--; 3701 3822 while (node >= 0) { 3702 - if (cachep->node[node]) { 3703 - n = cachep->node[node]; 3704 - 3823 + n = get_node(cachep, node); 3824 + if (n) { 3705 3825 kfree(n->shared); 3706 3826 free_alien_cache(n->alien); 3707 3827 kfree(n); ··· 3761 3883 cachep->shared = shared; 3762 3884 3763 3885 for_each_online_cpu(i) { 3886 + LIST_HEAD(list); 3764 3887 struct array_cache *ccold = new->new[i]; 3888 + int node; 3889 + struct kmem_cache_node *n; 3890 + 3765 3891 if (!ccold) 3766 3892 continue; 3767 - spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); 3768 - free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); 3769 - spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); 3893 + 3894 + node = cpu_to_mem(i); 3895 + n = get_node(cachep, node); 3896 + spin_lock_irq(&n->list_lock); 3897 + free_block(cachep, ccold->entry, ccold->avail, node, &list); 3898 + spin_unlock_irq(&n->list_lock); 3899 + slabs_destroy(cachep, &list); 3770 3900 kfree(ccold); 3771 3901 } 3772 3902 kfree(new); ··· 3882 3996 static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, 3883 3997 struct array_cache *ac, int force, int node) 3884 3998 { 3999 + LIST_HEAD(list); 3885 4000 int tofree; 3886 4001 3887 4002 if (!ac || !ac->avail) ··· 3895 4008 tofree = force ? ac->avail : (ac->limit + 4) / 5; 3896 4009 if (tofree > ac->avail) 3897 4010 tofree = (ac->avail + 1) / 2; 3898 - free_block(cachep, ac->entry, tofree, node); 4011 + free_block(cachep, ac->entry, tofree, node, &list); 3899 4012 ac->avail -= tofree; 3900 4013 memmove(ac->entry, &(ac->entry[tofree]), 3901 4014 sizeof(void *) * ac->avail); 3902 4015 } 3903 4016 spin_unlock_irq(&n->list_lock); 4017 + slabs_destroy(cachep, &list); 3904 4018 } 3905 4019 } 3906 4020 ··· 3936 4048 * have established with reasonable certainty that 3937 4049 * we can do some work if the lock was obtained. 3938 4050 */ 3939 - n = searchp->node[node]; 4051 + n = get_node(searchp, node); 3940 4052 3941 4053 reap_alien(searchp, n); 3942 4054 ··· 3988 4100 3989 4101 active_objs = 0; 3990 4102 num_slabs = 0; 3991 - for_each_online_node(node) { 3992 - n = cachep->node[node]; 3993 - if (!n) 3994 - continue; 4103 + for_each_kmem_cache_node(cachep, node, n) { 3995 4104 3996 4105 check_irq_on(); 3997 4106 spin_lock_irq(&n->list_lock); ··· 4213 4328 4214 4329 x[1] = 0; 4215 4330 4216 - for_each_online_node(node) { 4217 - n = cachep->node[node]; 4218 - if (!n) 4219 - continue; 4331 + for_each_kmem_cache_node(cachep, node, n) { 4220 4332 4221 4333 check_irq_on(); 4222 4334 spin_lock_irq(&n->list_lock);

+20 -4

mm/slab.h

··· 256 256 return cachep; 257 257 258 258 pr_err("%s: Wrong slab cache. %s but object is from %s\n", 259 - __FUNCTION__, cachep->name, s->name); 259 + __func__, cachep->name, s->name); 260 260 WARN_ON_ONCE(1); 261 261 return s; 262 262 } 263 - #endif 264 263 265 - 264 + #ifndef CONFIG_SLOB 266 265 /* 267 266 * The slab lists for all objects. 268 267 */ ··· 276 277 unsigned int free_limit; 277 278 unsigned int colour_next; /* Per-node cache coloring */ 278 279 struct array_cache *shared; /* shared per node */ 279 - struct array_cache **alien; /* on other nodes */ 280 + struct alien_cache **alien; /* on other nodes */ 280 281 unsigned long next_reap; /* updated without locking */ 281 282 int free_touched; /* updated without locking */ 282 283 #endif ··· 293 294 294 295 }; 295 296 297 + static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 298 + { 299 + return s->node[node]; 300 + } 301 + 302 + /* 303 + * Iterator over all nodes. The body will be executed for each node that has 304 + * a kmem_cache_node structure allocated (which is true for all online nodes) 305 + */ 306 + #define for_each_kmem_cache_node(__s, __node, __n) \ 307 + for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \ 308 + if (__n) 309 + 310 + #endif 311 + 296 312 void *slab_next(struct seq_file *m, void *p, loff_t *pos); 297 313 void slab_stop(struct seq_file *m, void *p); 314 + 315 + #endif /* MM_SLAB_H */

+101

mm/slab_common.c

··· 19 19 #include <asm/tlbflush.h> 20 20 #include <asm/page.h> 21 21 #include <linux/memcontrol.h> 22 + 23 + #define CREATE_TRACE_POINTS 22 24 #include <trace/events/kmem.h> 23 25 24 26 #include "slab.h" ··· 789 787 } 790 788 module_init(slab_proc_init); 791 789 #endif /* CONFIG_SLABINFO */ 790 + 791 + static __always_inline void *__do_krealloc(const void *p, size_t new_size, 792 + gfp_t flags) 793 + { 794 + void *ret; 795 + size_t ks = 0; 796 + 797 + if (p) 798 + ks = ksize(p); 799 + 800 + if (ks >= new_size) 801 + return (void *)p; 802 + 803 + ret = kmalloc_track_caller(new_size, flags); 804 + if (ret && p) 805 + memcpy(ret, p, ks); 806 + 807 + return ret; 808 + } 809 + 810 + /** 811 + * __krealloc - like krealloc() but don't free @p. 812 + * @p: object to reallocate memory for. 813 + * @new_size: how many bytes of memory are required. 814 + * @flags: the type of memory to allocate. 815 + * 816 + * This function is like krealloc() except it never frees the originally 817 + * allocated buffer. Use this if you don't want to free the buffer immediately 818 + * like, for example, with RCU. 819 + */ 820 + void *__krealloc(const void *p, size_t new_size, gfp_t flags) 821 + { 822 + if (unlikely(!new_size)) 823 + return ZERO_SIZE_PTR; 824 + 825 + return __do_krealloc(p, new_size, flags); 826 + 827 + } 828 + EXPORT_SYMBOL(__krealloc); 829 + 830 + /** 831 + * krealloc - reallocate memory. The contents will remain unchanged. 832 + * @p: object to reallocate memory for. 833 + * @new_size: how many bytes of memory are required. 834 + * @flags: the type of memory to allocate. 835 + * 836 + * The contents of the object pointed to are preserved up to the 837 + * lesser of the new and old sizes. If @p is %NULL, krealloc() 838 + * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a 839 + * %NULL pointer, the object pointed to is freed. 840 + */ 841 + void *krealloc(const void *p, size_t new_size, gfp_t flags) 842 + { 843 + void *ret; 844 + 845 + if (unlikely(!new_size)) { 846 + kfree(p); 847 + return ZERO_SIZE_PTR; 848 + } 849 + 850 + ret = __do_krealloc(p, new_size, flags); 851 + if (ret && p != ret) 852 + kfree(p); 853 + 854 + return ret; 855 + } 856 + EXPORT_SYMBOL(krealloc); 857 + 858 + /** 859 + * kzfree - like kfree but zero memory 860 + * @p: object to free memory of 861 + * 862 + * The memory of the object @p points to is zeroed before freed. 863 + * If @p is %NULL, kzfree() does nothing. 864 + * 865 + * Note: this function zeroes the whole allocated buffer which can be a good 866 + * deal bigger than the requested buffer size passed to kmalloc(). So be 867 + * careful when using this function in performance sensitive code. 868 + */ 869 + void kzfree(const void *p) 870 + { 871 + size_t ks; 872 + void *mem = (void *)p; 873 + 874 + if (unlikely(ZERO_OR_NULL_PTR(mem))) 875 + return; 876 + ks = ksize(mem); 877 + memset(mem, 0, ks); 878 + kfree(mem); 879 + } 880 + EXPORT_SYMBOL(kzfree); 881 + 882 + /* Tracepoints definitions. */ 883 + EXPORT_TRACEPOINT_SYMBOL(kmalloc); 884 + EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 885 + EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 886 + EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); 887 + EXPORT_TRACEPOINT_SYMBOL(kfree); 888 + EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);

+85 -138

mm/slub.c

··· 233 233 * Core slab cache functions 234 234 *******************************************************************/ 235 235 236 - static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 237 - { 238 - return s->node[node]; 239 - } 240 - 241 236 /* Verify that a pointer has an address that is valid within a slab page */ 242 237 static inline int check_valid_pointer(struct kmem_cache *s, 243 238 struct page *page, const void *object) ··· 282 287 #define for_each_object(__p, __s, __addr, __objects) \ 283 288 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ 284 289 __p += (__s)->size) 290 + 291 + #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ 292 + for (__p = (__addr), __idx = 1; __idx <= __objects;\ 293 + __p += (__s)->size, __idx++) 285 294 286 295 /* Determine object index from a given position */ 287 296 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) ··· 381 382 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 382 383 if (s->flags & __CMPXCHG_DOUBLE) { 383 384 if (cmpxchg_double(&page->freelist, &page->counters, 384 - freelist_old, counters_old, 385 - freelist_new, counters_new)) 386 - return 1; 385 + freelist_old, counters_old, 386 + freelist_new, counters_new)) 387 + return 1; 387 388 } else 388 389 #endif 389 390 { ··· 417 418 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 418 419 if (s->flags & __CMPXCHG_DOUBLE) { 419 420 if (cmpxchg_double(&page->freelist, &page->counters, 420 - freelist_old, counters_old, 421 - freelist_new, counters_new)) 422 - return 1; 421 + freelist_old, counters_old, 422 + freelist_new, counters_new)) 423 + return 1; 423 424 } else 424 425 #endif 425 426 { ··· 944 945 } 945 946 946 947 /* 947 - * Hooks for other subsystems that check memory allocations. In a typical 948 - * production configuration these hooks all should produce no code at all. 949 - */ 950 - static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) 951 - { 952 - kmemleak_alloc(ptr, size, 1, flags); 953 - } 954 - 955 - static inline void kfree_hook(const void *x) 956 - { 957 - kmemleak_free(x); 958 - } 959 - 960 - static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 961 - { 962 - flags &= gfp_allowed_mask; 963 - lockdep_trace_alloc(flags); 964 - might_sleep_if(flags & __GFP_WAIT); 965 - 966 - return should_failslab(s->object_size, flags, s->flags); 967 - } 968 - 969 - static inline void slab_post_alloc_hook(struct kmem_cache *s, 970 - gfp_t flags, void *object) 971 - { 972 - flags &= gfp_allowed_mask; 973 - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 974 - kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); 975 - } 976 - 977 - static inline void slab_free_hook(struct kmem_cache *s, void *x) 978 - { 979 - kmemleak_free_recursive(x, s->flags); 980 - 981 - /* 982 - * Trouble is that we may no longer disable interrupts in the fast path 983 - * So in order to make the debug calls that expect irqs to be 984 - * disabled we need to disable interrupts temporarily. 985 - */ 986 - #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) 987 - { 988 - unsigned long flags; 989 - 990 - local_irq_save(flags); 991 - kmemcheck_slab_free(s, x, s->object_size); 992 - debug_check_no_locks_freed(x, s->object_size); 993 - local_irq_restore(flags); 994 - } 995 - #endif 996 - if (!(s->flags & SLAB_DEBUG_OBJECTS)) 997 - debug_check_no_obj_freed(x, s->object_size); 998 - } 999 - 1000 - /* 1001 948 * Tracking of fully allocated slabs for debugging purposes. 1002 949 */ 1003 950 static void add_full(struct kmem_cache *s, ··· 1227 1282 static inline void dec_slabs_node(struct kmem_cache *s, int node, 1228 1283 int objects) {} 1229 1284 1285 + #endif /* CONFIG_SLUB_DEBUG */ 1286 + 1287 + /* 1288 + * Hooks for other subsystems that check memory allocations. In a typical 1289 + * production configuration these hooks all should produce no code at all. 1290 + */ 1230 1291 static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) 1231 1292 { 1232 1293 kmemleak_alloc(ptr, size, 1, flags); ··· 1244 1293 } 1245 1294 1246 1295 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 1247 - { return 0; } 1248 - 1249 - static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, 1250 - void *object) 1251 1296 { 1252 - kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, 1253 - flags & gfp_allowed_mask); 1297 + flags &= gfp_allowed_mask; 1298 + lockdep_trace_alloc(flags); 1299 + might_sleep_if(flags & __GFP_WAIT); 1300 + 1301 + return should_failslab(s->object_size, flags, s->flags); 1302 + } 1303 + 1304 + static inline void slab_post_alloc_hook(struct kmem_cache *s, 1305 + gfp_t flags, void *object) 1306 + { 1307 + flags &= gfp_allowed_mask; 1308 + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 1309 + kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); 1254 1310 } 1255 1311 1256 1312 static inline void slab_free_hook(struct kmem_cache *s, void *x) 1257 1313 { 1258 1314 kmemleak_free_recursive(x, s->flags); 1259 - } 1260 1315 1261 - #endif /* CONFIG_SLUB_DEBUG */ 1316 + /* 1317 + * Trouble is that we may no longer disable interrupts in the fast path 1318 + * So in order to make the debug calls that expect irqs to be 1319 + * disabled we need to disable interrupts temporarily. 1320 + */ 1321 + #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) 1322 + { 1323 + unsigned long flags; 1324 + 1325 + local_irq_save(flags); 1326 + kmemcheck_slab_free(s, x, s->object_size); 1327 + debug_check_no_locks_freed(x, s->object_size); 1328 + local_irq_restore(flags); 1329 + } 1330 + #endif 1331 + if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1332 + debug_check_no_obj_freed(x, s->object_size); 1333 + } 1262 1334 1263 1335 /* 1264 1336 * Slab allocation and freeing ··· 1383 1409 { 1384 1410 struct page *page; 1385 1411 void *start; 1386 - void *last; 1387 1412 void *p; 1388 1413 int order; 1414 + int idx; 1389 1415 1390 1416 BUG_ON(flags & GFP_SLAB_BUG_MASK); 1391 1417 ··· 1406 1432 if (unlikely(s->flags & SLAB_POISON)) 1407 1433 memset(start, POISON_INUSE, PAGE_SIZE << order); 1408 1434 1409 - last = start; 1410 - for_each_object(p, s, start, page->objects) { 1411 - setup_object(s, page, last); 1412 - set_freepointer(s, last, p); 1413 - last = p; 1435 + for_each_object_idx(p, idx, s, start, page->objects) { 1436 + setup_object(s, page, p); 1437 + if (likely(idx < page->objects)) 1438 + set_freepointer(s, p, p + s->size); 1439 + else 1440 + set_freepointer(s, p, NULL); 1414 1441 } 1415 - setup_object(s, page, last); 1416 - set_freepointer(s, last, NULL); 1417 1442 1418 1443 page->freelist = start; 1419 1444 page->inuse = page->objects; ··· 2135 2162 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL, 2136 2163 DEFAULT_RATELIMIT_BURST); 2137 2164 int node; 2165 + struct kmem_cache_node *n; 2138 2166 2139 2167 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) 2140 2168 return; ··· 2150 2176 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n", 2151 2177 s->name); 2152 2178 2153 - for_each_online_node(node) { 2154 - struct kmem_cache_node *n = get_node(s, node); 2179 + for_each_kmem_cache_node(s, node, n) { 2155 2180 unsigned long nr_slabs; 2156 2181 unsigned long nr_objs; 2157 2182 unsigned long nr_free; 2158 - 2159 - if (!n) 2160 - continue; 2161 2183 2162 2184 nr_free = count_partial(n, count_free); 2163 2185 nr_slabs = node_nr_slabs(n); ··· 2898 2928 static void free_kmem_cache_nodes(struct kmem_cache *s) 2899 2929 { 2900 2930 int node; 2931 + struct kmem_cache_node *n; 2901 2932 2902 - for_each_node_state(node, N_NORMAL_MEMORY) { 2903 - struct kmem_cache_node *n = s->node[node]; 2904 - 2905 - if (n) 2906 - kmem_cache_free(kmem_cache_node, n); 2907 - 2933 + for_each_kmem_cache_node(s, node, n) { 2934 + kmem_cache_free(kmem_cache_node, n); 2908 2935 s->node[node] = NULL; 2909 2936 } 2910 2937 } ··· 3189 3222 static inline int kmem_cache_close(struct kmem_cache *s) 3190 3223 { 3191 3224 int node; 3225 + struct kmem_cache_node *n; 3192 3226 3193 3227 flush_all(s); 3194 3228 /* Attempt to free all objects */ 3195 - for_each_node_state(node, N_NORMAL_MEMORY) { 3196 - struct kmem_cache_node *n = get_node(s, node); 3197 - 3229 + for_each_kmem_cache_node(s, node, n) { 3198 3230 free_partial(s, n); 3199 3231 if (n->nr_partial || slabs_node(s, node)) 3200 3232 return 1; ··· 3378 3412 return -ENOMEM; 3379 3413 3380 3414 flush_all(s); 3381 - for_each_node_state(node, N_NORMAL_MEMORY) { 3382 - n = get_node(s, node); 3383 - 3415 + for_each_kmem_cache_node(s, node, n) { 3384 3416 if (!n->nr_partial) 3385 3417 continue; 3386 3418 ··· 3550 3586 { 3551 3587 int node; 3552 3588 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 3589 + struct kmem_cache_node *n; 3553 3590 3554 3591 memcpy(s, static_cache, kmem_cache->object_size); 3555 3592 ··· 3560 3595 * IPIs around. 3561 3596 */ 3562 3597 __flush_cpu_slab(s, smp_processor_id()); 3563 - for_each_node_state(node, N_NORMAL_MEMORY) { 3564 - struct kmem_cache_node *n = get_node(s, node); 3598 + for_each_kmem_cache_node(s, node, n) { 3565 3599 struct page *p; 3566 3600 3567 - if (n) { 3568 - list_for_each_entry(p, &n->partial, lru) 3569 - p->slab_cache = s; 3601 + list_for_each_entry(p, &n->partial, lru) 3602 + p->slab_cache = s; 3570 3603 3571 3604 #ifdef CONFIG_SLUB_DEBUG 3572 - list_for_each_entry(p, &n->full, lru) 3573 - p->slab_cache = s; 3605 + list_for_each_entry(p, &n->full, lru) 3606 + p->slab_cache = s; 3574 3607 #endif 3575 - } 3576 3608 } 3577 3609 list_add(&s->list, &slab_caches); 3578 3610 return s; ··· 3922 3960 unsigned long count = 0; 3923 3961 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 3924 3962 sizeof(unsigned long), GFP_KERNEL); 3963 + struct kmem_cache_node *n; 3925 3964 3926 3965 if (!map) 3927 3966 return -ENOMEM; 3928 3967 3929 3968 flush_all(s); 3930 - for_each_node_state(node, N_NORMAL_MEMORY) { 3931 - struct kmem_cache_node *n = get_node(s, node); 3932 - 3969 + for_each_kmem_cache_node(s, node, n) 3933 3970 count += validate_slab_node(s, n, map); 3934 - } 3935 3971 kfree(map); 3936 3972 return count; 3937 3973 } ··· 4083 4123 int node; 4084 4124 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 4085 4125 sizeof(unsigned long), GFP_KERNEL); 4126 + struct kmem_cache_node *n; 4086 4127 4087 4128 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 4088 4129 GFP_TEMPORARY)) { ··· 4093 4132 /* Push back cpu slabs */ 4094 4133 flush_all(s); 4095 4134 4096 - for_each_node_state(node, N_NORMAL_MEMORY) { 4097 - struct kmem_cache_node *n = get_node(s, node); 4135 + for_each_kmem_cache_node(s, node, n) { 4098 4136 unsigned long flags; 4099 4137 struct page *page; 4100 4138 ··· 4165 4205 #endif 4166 4206 4167 4207 #ifdef SLUB_RESILIENCY_TEST 4168 - static void resiliency_test(void) 4208 + static void __init resiliency_test(void) 4169 4209 { 4170 4210 u8 *p; 4171 4211 ··· 4292 4332 get_online_mems(); 4293 4333 #ifdef CONFIG_SLUB_DEBUG 4294 4334 if (flags & SO_ALL) { 4295 - for_each_node_state(node, N_NORMAL_MEMORY) { 4296 - struct kmem_cache_node *n = get_node(s, node); 4335 + struct kmem_cache_node *n; 4336 + 4337 + for_each_kmem_cache_node(s, node, n) { 4297 4338 4298 4339 if (flags & SO_TOTAL) 4299 4340 x = atomic_long_read(&n->total_objects); ··· 4310 4349 } else 4311 4350 #endif 4312 4351 if (flags & SO_PARTIAL) { 4313 - for_each_node_state(node, N_NORMAL_MEMORY) { 4314 - struct kmem_cache_node *n = get_node(s, node); 4352 + struct kmem_cache_node *n; 4315 4353 4354 + for_each_kmem_cache_node(s, node, n) { 4316 4355 if (flags & SO_TOTAL) 4317 4356 x = count_partial(n, count_total); 4318 4357 else if (flags & SO_OBJECTS) ··· 4325 4364 } 4326 4365 x = sprintf(buf, "%lu", total); 4327 4366 #ifdef CONFIG_NUMA 4328 - for_each_node_state(node, N_NORMAL_MEMORY) 4367 + for (node = 0; node < nr_node_ids; node++) 4329 4368 if (nodes[node]) 4330 4369 x += sprintf(buf + x, " N%d=%lu", 4331 4370 node, nodes[node]); ··· 4339 4378 static int any_slab_objects(struct kmem_cache *s) 4340 4379 { 4341 4380 int node; 4381 + struct kmem_cache_node *n; 4342 4382 4343 - for_each_online_node(node) { 4344 - struct kmem_cache_node *n = get_node(s, node); 4345 - 4346 - if (!n) 4347 - continue; 4348 - 4383 + for_each_kmem_cache_node(s, node, n) 4349 4384 if (atomic_long_read(&n->total_objects)) 4350 4385 return 1; 4351 - } 4386 + 4352 4387 return 0; 4353 4388 } 4354 4389 #endif ··· 4466 4509 4467 4510 static ssize_t aliases_show(struct kmem_cache *s, char *buf) 4468 4511 { 4469 - return sprintf(buf, "%d\n", s->refcount - 1); 4512 + return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1); 4470 4513 } 4471 4514 SLAB_ATTR_RO(aliases); 4472 4515 ··· 5128 5171 *p++ = '-'; 5129 5172 p += sprintf(p, "%07d", s->size); 5130 5173 5131 - #ifdef CONFIG_MEMCG_KMEM 5132 - if (!is_root_cache(s)) 5133 - p += sprintf(p, "-%08d", 5134 - memcg_cache_id(s->memcg_params->memcg)); 5135 - #endif 5136 - 5137 5174 BUG_ON(p > name + ID_STR_LENGTH - 1); 5138 5175 return name; 5139 5176 } ··· 5293 5342 unsigned long nr_objs = 0; 5294 5343 unsigned long nr_free = 0; 5295 5344 int node; 5345 + struct kmem_cache_node *n; 5296 5346 5297 - for_each_online_node(node) { 5298 - struct kmem_cache_node *n = get_node(s, node); 5299 - 5300 - if (!n) 5301 - continue; 5302 - 5347 + for_each_kmem_cache_node(s, node, n) { 5303 5348 nr_slabs += node_nr_slabs(n); 5304 5349 nr_objs += node_nr_objs(n); 5305 5350 nr_free += count_partial(n, count_free);

+5 -13

mm/swap.c

··· 501 501 SetPageActive(page); 502 502 lru += LRU_ACTIVE; 503 503 add_page_to_lru_list(page, lruvec, lru); 504 - trace_mm_lru_activate(page, page_to_pfn(page)); 504 + trace_mm_lru_activate(page); 505 505 506 506 __count_vm_event(PGACTIVATE); 507 507 update_page_reclaim_stat(lruvec, file, 1); ··· 589 589 * inactive,unreferenced -> inactive,referenced 590 590 * inactive,referenced -> active,unreferenced 591 591 * active,unreferenced -> active,referenced 592 + * 593 + * When a newly allocated page is not yet visible, so safe for non-atomic ops, 594 + * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). 592 595 */ 593 596 void mark_page_accessed(struct page *page) 594 597 { ··· 616 613 } 617 614 } 618 615 EXPORT_SYMBOL(mark_page_accessed); 619 - 620 - /* 621 - * Used to mark_page_accessed(page) that is not visible yet and when it is 622 - * still safe to use non-atomic ops 623 - */ 624 - void init_page_accessed(struct page *page) 625 - { 626 - if (!PageReferenced(page)) 627 - __SetPageReferenced(page); 628 - } 629 - EXPORT_SYMBOL(init_page_accessed); 630 616 631 617 static void __lru_cache_add(struct page *page) 632 618 { ··· 988 996 SetPageLRU(page); 989 997 add_page_to_lru_list(page, lruvec, lru); 990 998 update_page_reclaim_stat(lruvec, file, active); 991 - trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); 999 + trace_mm_lru_insertion(page, lru); 992 1000 } 993 1001 994 1002 /*

-102

mm/util.c

··· 16 16 17 17 #include "internal.h" 18 18 19 - #define CREATE_TRACE_POINTS 20 - #include <trace/events/kmem.h> 21 - 22 19 /** 23 20 * kstrdup - allocate space for and copy an existing string 24 21 * @s: the string to duplicate ··· 108 111 return p; 109 112 } 110 113 EXPORT_SYMBOL(memdup_user); 111 - 112 - static __always_inline void *__do_krealloc(const void *p, size_t new_size, 113 - gfp_t flags) 114 - { 115 - void *ret; 116 - size_t ks = 0; 117 - 118 - if (p) 119 - ks = ksize(p); 120 - 121 - if (ks >= new_size) 122 - return (void *)p; 123 - 124 - ret = kmalloc_track_caller(new_size, flags); 125 - if (ret && p) 126 - memcpy(ret, p, ks); 127 - 128 - return ret; 129 - } 130 - 131 - /** 132 - * __krealloc - like krealloc() but don't free @p. 133 - * @p: object to reallocate memory for. 134 - * @new_size: how many bytes of memory are required. 135 - * @flags: the type of memory to allocate. 136 - * 137 - * This function is like krealloc() except it never frees the originally 138 - * allocated buffer. Use this if you don't want to free the buffer immediately 139 - * like, for example, with RCU. 140 - */ 141 - void *__krealloc(const void *p, size_t new_size, gfp_t flags) 142 - { 143 - if (unlikely(!new_size)) 144 - return ZERO_SIZE_PTR; 145 - 146 - return __do_krealloc(p, new_size, flags); 147 - 148 - } 149 - EXPORT_SYMBOL(__krealloc); 150 - 151 - /** 152 - * krealloc - reallocate memory. The contents will remain unchanged. 153 - * @p: object to reallocate memory for. 154 - * @new_size: how many bytes of memory are required. 155 - * @flags: the type of memory to allocate. 156 - * 157 - * The contents of the object pointed to are preserved up to the 158 - * lesser of the new and old sizes. If @p is %NULL, krealloc() 159 - * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a 160 - * %NULL pointer, the object pointed to is freed. 161 - */ 162 - void *krealloc(const void *p, size_t new_size, gfp_t flags) 163 - { 164 - void *ret; 165 - 166 - if (unlikely(!new_size)) { 167 - kfree(p); 168 - return ZERO_SIZE_PTR; 169 - } 170 - 171 - ret = __do_krealloc(p, new_size, flags); 172 - if (ret && p != ret) 173 - kfree(p); 174 - 175 - return ret; 176 - } 177 - EXPORT_SYMBOL(krealloc); 178 - 179 - /** 180 - * kzfree - like kfree but zero memory 181 - * @p: object to free memory of 182 - * 183 - * The memory of the object @p points to is zeroed before freed. 184 - * If @p is %NULL, kzfree() does nothing. 185 - * 186 - * Note: this function zeroes the whole allocated buffer which can be a good 187 - * deal bigger than the requested buffer size passed to kmalloc(). So be 188 - * careful when using this function in performance sensitive code. 189 - */ 190 - void kzfree(const void *p) 191 - { 192 - size_t ks; 193 - void *mem = (void *)p; 194 - 195 - if (unlikely(ZERO_OR_NULL_PTR(mem))) 196 - return; 197 - ks = ksize(mem); 198 - memset(mem, 0, ks); 199 - kfree(mem); 200 - } 201 - EXPORT_SYMBOL(kzfree); 202 114 203 115 /* 204 116 * strndup_user - duplicate an existing string from user space ··· 410 504 out: 411 505 return res; 412 506 } 413 - 414 - /* Tracepoints definitions. */ 415 - EXPORT_TRACEPOINT_SYMBOL(kmalloc); 416 - EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 417 - EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 418 - EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); 419 - EXPORT_TRACEPOINT_SYMBOL(kfree); 420 - EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);

+14 -16

mm/vmalloc.c

··· 1270 1270 } 1271 1271 EXPORT_SYMBOL_GPL(unmap_kernel_range); 1272 1272 1273 - int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) 1273 + int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) 1274 1274 { 1275 1275 unsigned long addr = (unsigned long)area->addr; 1276 1276 unsigned long end = addr + get_vm_area_size(area); 1277 1277 int err; 1278 1278 1279 - err = vmap_page_range(addr, end, prot, *pages); 1280 - if (err > 0) { 1281 - *pages += err; 1282 - err = 0; 1283 - } 1279 + err = vmap_page_range(addr, end, prot, pages); 1284 1280 1285 - return err; 1281 + return err > 0 ? 0 : err; 1286 1282 } 1287 1283 EXPORT_SYMBOL_GPL(map_vm_area); 1288 1284 ··· 1544 1548 if (!area) 1545 1549 return NULL; 1546 1550 1547 - if (map_vm_area(area, prot, &pages)) { 1551 + if (map_vm_area(area, prot, pages)) { 1548 1552 vunmap(area->addr); 1549 1553 return NULL; 1550 1554 } ··· 1562 1566 const int order = 0; 1563 1567 struct page **pages; 1564 1568 unsigned int nr_pages, array_size, i; 1565 - gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; 1569 + const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; 1570 + const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; 1566 1571 1567 1572 nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; 1568 1573 array_size = (nr_pages * sizeof(struct page *)); ··· 1586 1589 1587 1590 for (i = 0; i < area->nr_pages; i++) { 1588 1591 struct page *page; 1589 - gfp_t tmp_mask = gfp_mask | __GFP_NOWARN; 1590 1592 1591 1593 if (node == NUMA_NO_NODE) 1592 - page = alloc_page(tmp_mask); 1594 + page = alloc_page(alloc_mask); 1593 1595 else 1594 - page = alloc_pages_node(node, tmp_mask, order); 1596 + page = alloc_pages_node(node, alloc_mask, order); 1595 1597 1596 1598 if (unlikely(!page)) { 1597 1599 /* Successfully allocated i pages, free them in __vunmap() */ ··· 1598 1602 goto fail; 1599 1603 } 1600 1604 area->pages[i] = page; 1605 + if (gfp_mask & __GFP_WAIT) 1606 + cond_resched(); 1601 1607 } 1602 1608 1603 - if (map_vm_area(area, prot, &pages)) 1609 + if (map_vm_area(area, prot, pages)) 1604 1610 goto fail; 1605 1611 return area->addr; 1606 1612 ··· 2688 2690 2689 2691 prev_end = VMALLOC_START; 2690 2692 2691 - spin_lock(&vmap_area_lock); 2693 + rcu_read_lock(); 2692 2694 2693 2695 if (list_empty(&vmap_area_list)) { 2694 2696 vmi->largest_chunk = VMALLOC_TOTAL; 2695 2697 goto out; 2696 2698 } 2697 2699 2698 - list_for_each_entry(va, &vmap_area_list, list) { 2700 + list_for_each_entry_rcu(va, &vmap_area_list, list) { 2699 2701 unsigned long addr = va->va_start; 2700 2702 2701 2703 /* ··· 2722 2724 vmi->largest_chunk = VMALLOC_END - prev_end; 2723 2725 2724 2726 out: 2725 - spin_unlock(&vmap_area_lock); 2727 + rcu_read_unlock(); 2726 2728 } 2727 2729 #endif 2728 2730

+133 -145

mm/vmscan.c

··· 59 59 #include <trace/events/vmscan.h> 60 60 61 61 struct scan_control { 62 - /* Incremented by the number of inactive pages that were scanned */ 63 - unsigned long nr_scanned; 64 - 65 - /* Number of pages freed so far during a call to shrink_zones() */ 66 - unsigned long nr_reclaimed; 67 - 68 62 /* How many pages shrink_list() should reclaim */ 69 63 unsigned long nr_to_reclaim; 70 - 71 - unsigned long hibernation_mode; 72 64 73 65 /* This context's GFP mask */ 74 66 gfp_t gfp_mask; 75 67 76 - int may_writepage; 77 - 78 - /* Can mapped pages be reclaimed? */ 79 - int may_unmap; 80 - 81 - /* Can pages be swapped as part of reclaim? */ 82 - int may_swap; 83 - 68 + /* Allocation order */ 84 69 int order; 85 70 86 - /* Scan (total_size >> priority) pages at once */ 87 - int priority; 88 - 89 - /* anon vs. file LRUs scanning "ratio" */ 90 - int swappiness; 71 + /* 72 + * Nodemask of nodes allowed by the caller. If NULL, all nodes 73 + * are scanned. 74 + */ 75 + nodemask_t *nodemask; 91 76 92 77 /* 93 78 * The memory cgroup that hit its limit and as a result is the ··· 80 95 */ 81 96 struct mem_cgroup *target_mem_cgroup; 82 97 83 - /* 84 - * Nodemask of nodes allowed by the caller. If NULL, all nodes 85 - * are scanned. 86 - */ 87 - nodemask_t *nodemask; 98 + /* Scan (total_size >> priority) pages at once */ 99 + int priority; 100 + 101 + unsigned int may_writepage:1; 102 + 103 + /* Can mapped pages be reclaimed? */ 104 + unsigned int may_unmap:1; 105 + 106 + /* Can pages be swapped as part of reclaim? */ 107 + unsigned int may_swap:1; 108 + 109 + unsigned int hibernation_mode:1; 110 + 111 + /* One of the zones is ready for compaction */ 112 + unsigned int compaction_ready:1; 113 + 114 + /* Incremented by the number of inactive pages that were scanned */ 115 + unsigned long nr_scanned; 116 + 117 + /* Number of pages freed so far during a call to shrink_zones() */ 118 + unsigned long nr_reclaimed; 88 119 }; 89 120 90 121 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) ··· 137 136 * From 0 .. 100. Higher means more swappy. 138 137 */ 139 138 int vm_swappiness = 60; 140 - unsigned long vm_total_pages; /* The total number of pages which the VM controls */ 139 + /* 140 + * The total number of pages which are beyond the high watermark within all 141 + * zones. 142 + */ 143 + unsigned long vm_total_pages; 141 144 142 145 static LIST_HEAD(shrinker_list); 143 146 static DECLARE_RWSEM(shrinker_rwsem); ··· 174 169 175 170 bool zone_reclaimable(struct zone *zone) 176 171 { 177 - return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; 172 + return zone_page_state(zone, NR_PAGES_SCANNED) < 173 + zone_reclaimable_pages(zone) * 6; 178 174 } 179 175 180 176 static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) ··· 1509 1503 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); 1510 1504 1511 1505 if (global_reclaim(sc)) { 1512 - zone->pages_scanned += nr_scanned; 1506 + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); 1513 1507 if (current_is_kswapd()) 1514 1508 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); 1515 1509 else ··· 1699 1693 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, 1700 1694 &nr_scanned, sc, isolate_mode, lru); 1701 1695 if (global_reclaim(sc)) 1702 - zone->pages_scanned += nr_scanned; 1696 + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); 1703 1697 1704 1698 reclaim_stat->recent_scanned[file] += nr_taken; 1705 1699 ··· 1756 1750 * Count referenced pages from currently used mappings as rotated, 1757 1751 * even though only some of them are actually re-activated. This 1758 1752 * helps balance scan pressure between file and anonymous pages in 1759 - * get_scan_ratio. 1753 + * get_scan_count. 1760 1754 */ 1761 1755 reclaim_stat->recent_rotated[file] += nr_rotated; 1762 1756 ··· 1871 1865 * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan 1872 1866 * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan 1873 1867 */ 1874 - static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, 1875 - unsigned long *nr) 1868 + static void get_scan_count(struct lruvec *lruvec, int swappiness, 1869 + struct scan_control *sc, unsigned long *nr) 1876 1870 { 1877 1871 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 1878 1872 u64 fraction[2]; ··· 1915 1909 * using the memory controller's swap limit feature would be 1916 1910 * too expensive. 1917 1911 */ 1918 - if (!global_reclaim(sc) && !sc->swappiness) { 1912 + if (!global_reclaim(sc) && !swappiness) { 1919 1913 scan_balance = SCAN_FILE; 1920 1914 goto out; 1921 1915 } ··· 1925 1919 * system is close to OOM, scan both anon and file equally 1926 1920 * (unless the swappiness setting disagrees with swapping). 1927 1921 */ 1928 - if (!sc->priority && sc->swappiness) { 1922 + if (!sc->priority && swappiness) { 1929 1923 scan_balance = SCAN_EQUAL; 1930 1924 goto out; 1931 1925 } 1932 - 1933 - anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + 1934 - get_lru_size(lruvec, LRU_INACTIVE_ANON); 1935 - file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + 1936 - get_lru_size(lruvec, LRU_INACTIVE_FILE); 1937 1926 1938 1927 /* 1939 1928 * Prevent the reclaimer from falling into the cache trap: as ··· 1940 1939 * anon pages. Try to detect this based on file LRU size. 1941 1940 */ 1942 1941 if (global_reclaim(sc)) { 1943 - unsigned long free = zone_page_state(zone, NR_FREE_PAGES); 1942 + unsigned long zonefile; 1943 + unsigned long zonefree; 1944 1944 1945 - if (unlikely(file + free <= high_wmark_pages(zone))) { 1945 + zonefree = zone_page_state(zone, NR_FREE_PAGES); 1946 + zonefile = zone_page_state(zone, NR_ACTIVE_FILE) + 1947 + zone_page_state(zone, NR_INACTIVE_FILE); 1948 + 1949 + if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) { 1946 1950 scan_balance = SCAN_ANON; 1947 1951 goto out; 1948 1952 } ··· 1968 1962 * With swappiness at 100, anonymous and file have the same priority. 1969 1963 * This scanning priority is essentially the inverse of IO cost. 1970 1964 */ 1971 - anon_prio = sc->swappiness; 1965 + anon_prio = swappiness; 1972 1966 file_prio = 200 - anon_prio; 1973 1967 1974 1968 /* ··· 1982 1976 * 1983 1977 * anon in [0], file in [1] 1984 1978 */ 1979 + 1980 + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + 1981 + get_lru_size(lruvec, LRU_INACTIVE_ANON); 1982 + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + 1983 + get_lru_size(lruvec, LRU_INACTIVE_FILE); 1984 + 1985 1985 spin_lock_irq(&zone->lru_lock); 1986 1986 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { 1987 1987 reclaim_stat->recent_scanned[0] /= 2; ··· 2064 2052 /* 2065 2053 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 2066 2054 */ 2067 - static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) 2055 + static void shrink_lruvec(struct lruvec *lruvec, int swappiness, 2056 + struct scan_control *sc) 2068 2057 { 2069 2058 unsigned long nr[NR_LRU_LISTS]; 2070 2059 unsigned long targets[NR_LRU_LISTS]; ··· 2076 2063 struct blk_plug plug; 2077 2064 bool scan_adjusted; 2078 2065 2079 - get_scan_count(lruvec, sc, nr); 2066 + get_scan_count(lruvec, swappiness, sc, nr); 2080 2067 2081 2068 /* Record the original scan target for proportional adjustments later */ 2082 2069 memcpy(targets, nr, sizeof(nr)); ··· 2254 2241 } 2255 2242 } 2256 2243 2257 - static void shrink_zone(struct zone *zone, struct scan_control *sc) 2244 + static bool shrink_zone(struct zone *zone, struct scan_control *sc) 2258 2245 { 2259 2246 unsigned long nr_reclaimed, nr_scanned; 2247 + bool reclaimable = false; 2260 2248 2261 2249 do { 2262 2250 struct mem_cgroup *root = sc->target_mem_cgroup; ··· 2273 2259 memcg = mem_cgroup_iter(root, NULL, &reclaim); 2274 2260 do { 2275 2261 struct lruvec *lruvec; 2262 + int swappiness; 2276 2263 2277 2264 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2265 + swappiness = mem_cgroup_swappiness(memcg); 2278 2266 2279 - sc->swappiness = mem_cgroup_swappiness(memcg); 2280 - shrink_lruvec(lruvec, sc); 2267 + shrink_lruvec(lruvec, swappiness, sc); 2281 2268 2282 2269 /* 2283 2270 * Direct reclaim and kswapd have to scan all memory ··· 2302 2287 sc->nr_scanned - nr_scanned, 2303 2288 sc->nr_reclaimed - nr_reclaimed); 2304 2289 2290 + if (sc->nr_reclaimed - nr_reclaimed) 2291 + reclaimable = true; 2292 + 2305 2293 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, 2306 2294 sc->nr_scanned - nr_scanned, sc)); 2295 + 2296 + return reclaimable; 2307 2297 } 2308 2298 2309 2299 /* Returns true if compaction should go ahead for a high-order request */ 2310 - static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) 2300 + static inline bool compaction_ready(struct zone *zone, int order) 2311 2301 { 2312 2302 unsigned long balance_gap, watermark; 2313 2303 bool watermark_ok; 2314 - 2315 - /* Do not consider compaction for orders reclaim is meant to satisfy */ 2316 - if (sc->order <= PAGE_ALLOC_COSTLY_ORDER) 2317 - return false; 2318 2304 2319 2305 /* 2320 2306 * Compaction takes time to run and there are potentially other ··· 2325 2309 */ 2326 2310 balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP( 2327 2311 zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO)); 2328 - watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order); 2312 + watermark = high_wmark_pages(zone) + balance_gap + (2UL << order); 2329 2313 watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); 2330 2314 2331 2315 /* 2332 2316 * If compaction is deferred, reclaim up to a point where 2333 2317 * compaction will have a chance of success when re-enabled 2334 2318 */ 2335 - if (compaction_deferred(zone, sc->order)) 2319 + if (compaction_deferred(zone, order)) 2336 2320 return watermark_ok; 2337 2321 2338 2322 /* If compaction is not ready to start, keep reclaiming */ 2339 - if (!compaction_suitable(zone, sc->order)) 2323 + if (!compaction_suitable(zone, order)) 2340 2324 return false; 2341 2325 2342 2326 return watermark_ok; ··· 2358 2342 * If a zone is deemed to be full of pinned pages then just give it a light 2359 2343 * scan then give up on it. 2360 2344 * 2361 - * This function returns true if a zone is being reclaimed for a costly 2362 - * high-order allocation and compaction is ready to begin. This indicates to 2363 - * the caller that it should consider retrying the allocation instead of 2364 - * further reclaim. 2345 + * Returns true if a zone was reclaimable. 2365 2346 */ 2366 2347 static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) 2367 2348 { ··· 2367 2354 unsigned long nr_soft_reclaimed; 2368 2355 unsigned long nr_soft_scanned; 2369 2356 unsigned long lru_pages = 0; 2370 - bool aborted_reclaim = false; 2371 2357 struct reclaim_state *reclaim_state = current->reclaim_state; 2372 2358 gfp_t orig_mask; 2373 2359 struct shrink_control shrink = { 2374 2360 .gfp_mask = sc->gfp_mask, 2375 2361 }; 2376 2362 enum zone_type requested_highidx = gfp_zone(sc->gfp_mask); 2363 + bool reclaimable = false; 2377 2364 2378 2365 /* 2379 2366 * If the number of buffer_heads in the machine exceeds the maximum ··· 2404 2391 if (sc->priority != DEF_PRIORITY && 2405 2392 !zone_reclaimable(zone)) 2406 2393 continue; /* Let kswapd poll it */ 2407 - if (IS_ENABLED(CONFIG_COMPACTION)) { 2408 - /* 2409 - * If we already have plenty of memory free for 2410 - * compaction in this zone, don't free any more. 2411 - * Even though compaction is invoked for any 2412 - * non-zero order, only frequent costly order 2413 - * reclamation is disruptive enough to become a 2414 - * noticeable problem, like transparent huge 2415 - * page allocations. 2416 - */ 2417 - if ((zonelist_zone_idx(z) <= requested_highidx) 2418 - && compaction_ready(zone, sc)) { 2419 - aborted_reclaim = true; 2420 - continue; 2421 - } 2394 + 2395 + /* 2396 + * If we already have plenty of memory free for 2397 + * compaction in this zone, don't free any more. 2398 + * Even though compaction is invoked for any 2399 + * non-zero order, only frequent costly order 2400 + * reclamation is disruptive enough to become a 2401 + * noticeable problem, like transparent huge 2402 + * page allocations. 2403 + */ 2404 + if (IS_ENABLED(CONFIG_COMPACTION) && 2405 + sc->order > PAGE_ALLOC_COSTLY_ORDER && 2406 + zonelist_zone_idx(z) <= requested_highidx && 2407 + compaction_ready(zone, sc->order)) { 2408 + sc->compaction_ready = true; 2409 + continue; 2422 2410 } 2411 + 2423 2412 /* 2424 2413 * This steals pages from memory cgroups over softlimit 2425 2414 * and returns the number of reclaimed pages and ··· 2434 2419 &nr_soft_scanned); 2435 2420 sc->nr_reclaimed += nr_soft_reclaimed; 2436 2421 sc->nr_scanned += nr_soft_scanned; 2422 + if (nr_soft_reclaimed) 2423 + reclaimable = true; 2437 2424 /* need some check for avoid more shrink_zone() */ 2438 2425 } 2439 2426 2440 - shrink_zone(zone, sc); 2427 + if (shrink_zone(zone, sc)) 2428 + reclaimable = true; 2429 + 2430 + if (global_reclaim(sc) && 2431 + !reclaimable && zone_reclaimable(zone)) 2432 + reclaimable = true; 2441 2433 } 2442 2434 2443 2435 /* ··· 2467 2445 */ 2468 2446 sc->gfp_mask = orig_mask; 2469 2447 2470 - return aborted_reclaim; 2471 - } 2472 - 2473 - /* All zones in zonelist are unreclaimable? */ 2474 - static bool all_unreclaimable(struct zonelist *zonelist, 2475 - struct scan_control *sc) 2476 - { 2477 - struct zoneref *z; 2478 - struct zone *zone; 2479 - 2480 - for_each_zone_zonelist_nodemask(zone, z, zonelist, 2481 - gfp_zone(sc->gfp_mask), sc->nodemask) { 2482 - if (!populated_zone(zone)) 2483 - continue; 2484 - if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 2485 - continue; 2486 - if (zone_reclaimable(zone)) 2487 - return false; 2488 - } 2489 - 2490 - return true; 2448 + return reclaimable; 2491 2449 } 2492 2450 2493 2451 /* ··· 2491 2489 { 2492 2490 unsigned long total_scanned = 0; 2493 2491 unsigned long writeback_threshold; 2494 - bool aborted_reclaim; 2492 + bool zones_reclaimable; 2495 2493 2496 2494 delayacct_freepages_start(); 2497 2495 ··· 2502 2500 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, 2503 2501 sc->priority); 2504 2502 sc->nr_scanned = 0; 2505 - aborted_reclaim = shrink_zones(zonelist, sc); 2503 + zones_reclaimable = shrink_zones(zonelist, sc); 2506 2504 2507 2505 total_scanned += sc->nr_scanned; 2508 2506 if (sc->nr_reclaimed >= sc->nr_to_reclaim) 2509 - goto out; 2507 + break; 2508 + 2509 + if (sc->compaction_ready) 2510 + break; 2510 2511 2511 2512 /* 2512 2513 * If we're getting trouble reclaiming, start doing ··· 2531 2526 WB_REASON_TRY_TO_FREE_PAGES); 2532 2527 sc->may_writepage = 1; 2533 2528 } 2534 - } while (--sc->priority >= 0 && !aborted_reclaim); 2529 + } while (--sc->priority >= 0); 2535 2530 2536 - out: 2537 2531 delayacct_freepages_end(); 2538 2532 2539 2533 if (sc->nr_reclaimed) 2540 2534 return sc->nr_reclaimed; 2541 2535 2542 - /* 2543 - * As hibernation is going on, kswapd is freezed so that it can't mark 2544 - * the zone into all_unreclaimable. Thus bypassing all_unreclaimable 2545 - * check. 2546 - */ 2547 - if (oom_killer_disabled) 2548 - return 0; 2549 - 2550 2536 /* Aborted reclaim to try compaction? don't OOM, then */ 2551 - if (aborted_reclaim) 2537 + if (sc->compaction_ready) 2552 2538 return 1; 2553 2539 2554 - /* top priority shrink_zones still had more to do? don't OOM, then */ 2555 - if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc)) 2540 + /* Any of the zones still reclaimable? Don't OOM. */ 2541 + if (zones_reclaimable) 2556 2542 return 1; 2557 2543 2558 2544 return 0; ··· 2680 2684 { 2681 2685 unsigned long nr_reclaimed; 2682 2686 struct scan_control sc = { 2683 - .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 2684 - .may_writepage = !laptop_mode, 2685 2687 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2688 + .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 2689 + .order = order, 2690 + .nodemask = nodemask, 2691 + .priority = DEF_PRIORITY, 2692 + .may_writepage = !laptop_mode, 2686 2693 .may_unmap = 1, 2687 2694 .may_swap = 1, 2688 - .order = order, 2689 - .priority = DEF_PRIORITY, 2690 - .target_mem_cgroup = NULL, 2691 - .nodemask = nodemask, 2692 2695 }; 2693 2696 2694 2697 /* ··· 2717 2722 unsigned long *nr_scanned) 2718 2723 { 2719 2724 struct scan_control sc = { 2720 - .nr_scanned = 0, 2721 2725 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2726 + .target_mem_cgroup = memcg, 2722 2727 .may_writepage = !laptop_mode, 2723 2728 .may_unmap = 1, 2724 2729 .may_swap = !noswap, 2725 - .order = 0, 2726 - .priority = 0, 2727 - .swappiness = mem_cgroup_swappiness(memcg), 2728 - .target_mem_cgroup = memcg, 2729 2730 }; 2730 2731 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2732 + int swappiness = mem_cgroup_swappiness(memcg); 2731 2733 2732 2734 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2733 2735 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); ··· 2740 2748 * will pick up pages from other mem cgroup's as well. We hack 2741 2749 * the priority and make it zero. 2742 2750 */ 2743 - shrink_lruvec(lruvec, &sc); 2751 + shrink_lruvec(lruvec, swappiness, &sc); 2744 2752 2745 2753 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2746 2754 ··· 2756 2764 unsigned long nr_reclaimed; 2757 2765 int nid; 2758 2766 struct scan_control sc = { 2767 + .nr_to_reclaim = SWAP_CLUSTER_MAX, 2768 + .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2769 + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2770 + .target_mem_cgroup = memcg, 2771 + .priority = DEF_PRIORITY, 2759 2772 .may_writepage = !laptop_mode, 2760 2773 .may_unmap = 1, 2761 2774 .may_swap = !noswap, 2762 - .nr_to_reclaim = SWAP_CLUSTER_MAX, 2763 - .order = 0, 2764 - .priority = DEF_PRIORITY, 2765 - .target_mem_cgroup = memcg, 2766 - .nodemask = NULL, /* we don't care the placement */ 2767 - .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2768 - (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2769 2775 }; 2770 2776 2771 2777 /* ··· 3021 3031 unsigned long nr_soft_scanned; 3022 3032 struct scan_control sc = { 3023 3033 .gfp_mask = GFP_KERNEL, 3034 + .order = order, 3024 3035 .priority = DEF_PRIORITY, 3036 + .may_writepage = !laptop_mode, 3025 3037 .may_unmap = 1, 3026 3038 .may_swap = 1, 3027 - .may_writepage = !laptop_mode, 3028 - .order = order, 3029 - .target_mem_cgroup = NULL, 3030 3039 }; 3031 3040 count_vm_event(PAGEOUTRUN); 3032 3041 ··· 3406 3417 { 3407 3418 struct reclaim_state reclaim_state; 3408 3419 struct scan_control sc = { 3409 - .gfp_mask = GFP_HIGHUSER_MOVABLE, 3410 - .may_swap = 1, 3411 - .may_unmap = 1, 3412 - .may_writepage = 1, 3413 3420 .nr_to_reclaim = nr_to_reclaim, 3414 - .hibernation_mode = 1, 3415 - .order = 0, 3421 + .gfp_mask = GFP_HIGHUSER_MOVABLE, 3416 3422 .priority = DEF_PRIORITY, 3423 + .may_writepage = 1, 3424 + .may_unmap = 1, 3425 + .may_swap = 1, 3426 + .hibernation_mode = 1, 3417 3427 }; 3418 3428 struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); 3419 3429 struct task_struct *p = current; ··· 3592 3604 struct task_struct *p = current; 3593 3605 struct reclaim_state reclaim_state; 3594 3606 struct scan_control sc = { 3595 - .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), 3596 - .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), 3597 - .may_swap = 1, 3598 3607 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), 3599 3608 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), 3600 3609 .order = order, 3601 3610 .priority = ZONE_RECLAIM_PRIORITY, 3611 + .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), 3612 + .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), 3613 + .may_swap = 1, 3602 3614 }; 3603 3615 struct shrink_control shrink = { 3604 3616 .gfp_mask = sc.gfp_mask,

+5 -4

mm/vmstat.c

··· 200 200 continue; 201 201 202 202 threshold = (*calculate_pressure)(zone); 203 - for_each_possible_cpu(cpu) 203 + for_each_online_cpu(cpu) 204 204 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 205 205 = threshold; 206 206 } ··· 763 763 "nr_shmem", 764 764 "nr_dirtied", 765 765 "nr_written", 766 + "nr_pages_scanned", 766 767 767 768 #ifdef CONFIG_NUMA 768 769 "numa_hit", ··· 1068 1067 min_wmark_pages(zone), 1069 1068 low_wmark_pages(zone), 1070 1069 high_wmark_pages(zone), 1071 - zone->pages_scanned, 1070 + zone_page_state(zone, NR_PAGES_SCANNED), 1072 1071 zone->spanned_pages, 1073 1072 zone->present_pages, 1074 1073 zone->managed_pages); ··· 1078 1077 zone_page_state(zone, i)); 1079 1078 1080 1079 seq_printf(m, 1081 - "\n protection: (%lu", 1080 + "\n protection: (%ld", 1082 1081 zone->lowmem_reserve[0]); 1083 1082 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 1084 - seq_printf(m, ", %lu", zone->lowmem_reserve[i]); 1083 + seq_printf(m, ", %ld", zone->lowmem_reserve[i]); 1085 1084 seq_printf(m, 1086 1085 ")" 1087 1086 "\n pagesets");

+96 -2

mm/zbud.c

··· 51 51 #include <linux/slab.h> 52 52 #include <linux/spinlock.h> 53 53 #include <linux/zbud.h> 54 + #include <linux/zpool.h> 54 55 55 56 /***************** 56 57 * Structures ··· 114 113 }; 115 114 116 115 /***************** 116 + * zpool 117 + ****************/ 118 + 119 + #ifdef CONFIG_ZPOOL 120 + 121 + static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle) 122 + { 123 + return zpool_evict(pool, handle); 124 + } 125 + 126 + static struct zbud_ops zbud_zpool_ops = { 127 + .evict = zbud_zpool_evict 128 + }; 129 + 130 + static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops) 131 + { 132 + return zbud_create_pool(gfp, &zbud_zpool_ops); 133 + } 134 + 135 + static void zbud_zpool_destroy(void *pool) 136 + { 137 + zbud_destroy_pool(pool); 138 + } 139 + 140 + static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp, 141 + unsigned long *handle) 142 + { 143 + return zbud_alloc(pool, size, gfp, handle); 144 + } 145 + static void zbud_zpool_free(void *pool, unsigned long handle) 146 + { 147 + zbud_free(pool, handle); 148 + } 149 + 150 + static int zbud_zpool_shrink(void *pool, unsigned int pages, 151 + unsigned int *reclaimed) 152 + { 153 + unsigned int total = 0; 154 + int ret = -EINVAL; 155 + 156 + while (total < pages) { 157 + ret = zbud_reclaim_page(pool, 8); 158 + if (ret < 0) 159 + break; 160 + total++; 161 + } 162 + 163 + if (reclaimed) 164 + *reclaimed = total; 165 + 166 + return ret; 167 + } 168 + 169 + static void *zbud_zpool_map(void *pool, unsigned long handle, 170 + enum zpool_mapmode mm) 171 + { 172 + return zbud_map(pool, handle); 173 + } 174 + static void zbud_zpool_unmap(void *pool, unsigned long handle) 175 + { 176 + zbud_unmap(pool, handle); 177 + } 178 + 179 + static u64 zbud_zpool_total_size(void *pool) 180 + { 181 + return zbud_get_pool_size(pool) * PAGE_SIZE; 182 + } 183 + 184 + static struct zpool_driver zbud_zpool_driver = { 185 + .type = "zbud", 186 + .owner = THIS_MODULE, 187 + .create = zbud_zpool_create, 188 + .destroy = zbud_zpool_destroy, 189 + .malloc = zbud_zpool_malloc, 190 + .free = zbud_zpool_free, 191 + .shrink = zbud_zpool_shrink, 192 + .map = zbud_zpool_map, 193 + .unmap = zbud_zpool_unmap, 194 + .total_size = zbud_zpool_total_size, 195 + }; 196 + 197 + #endif /* CONFIG_ZPOOL */ 198 + 199 + /***************** 117 200 * Helpers 118 201 *****************/ 119 202 /* Just to make the code easier to read */ ··· 207 122 }; 208 123 209 124 /* Converts an allocation size in bytes to size in zbud chunks */ 210 - static int size_to_chunks(int size) 125 + static int size_to_chunks(size_t size) 211 126 { 212 127 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 213 128 } ··· 332 247 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 333 248 * a new page. 334 249 */ 335 - int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, 250 + int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, 336 251 unsigned long *handle) 337 252 { 338 253 int chunks, i, freechunks; ··· 596 511 /* Make sure the zbud header will fit in one chunk */ 597 512 BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED); 598 513 pr_info("loaded\n"); 514 + 515 + #ifdef CONFIG_ZPOOL 516 + zpool_register_driver(&zbud_zpool_driver); 517 + #endif 518 + 599 519 return 0; 600 520 } 601 521 602 522 static void __exit exit_zbud(void) 603 523 { 524 + #ifdef CONFIG_ZPOOL 525 + zpool_unregister_driver(&zbud_zpool_driver); 526 + #endif 527 + 604 528 pr_info("unloaded\n"); 605 529 } 606 530

+364

mm/zpool.c

··· 1 + /* 2 + * zpool memory storage api 3 + * 4 + * Copyright (C) 2014 Dan Streetman 5 + * 6 + * This is a common frontend for memory storage pool implementations. 7 + * Typically, this is used to store compressed memory. 8 + */ 9 + 10 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 + 12 + #include <linux/list.h> 13 + #include <linux/types.h> 14 + #include <linux/mm.h> 15 + #include <linux/slab.h> 16 + #include <linux/spinlock.h> 17 + #include <linux/module.h> 18 + #include <linux/zpool.h> 19 + 20 + struct zpool { 21 + char *type; 22 + 23 + struct zpool_driver *driver; 24 + void *pool; 25 + struct zpool_ops *ops; 26 + 27 + struct list_head list; 28 + }; 29 + 30 + static LIST_HEAD(drivers_head); 31 + static DEFINE_SPINLOCK(drivers_lock); 32 + 33 + static LIST_HEAD(pools_head); 34 + static DEFINE_SPINLOCK(pools_lock); 35 + 36 + /** 37 + * zpool_register_driver() - register a zpool implementation. 38 + * @driver: driver to register 39 + */ 40 + void zpool_register_driver(struct zpool_driver *driver) 41 + { 42 + spin_lock(&drivers_lock); 43 + atomic_set(&driver->refcount, 0); 44 + list_add(&driver->list, &drivers_head); 45 + spin_unlock(&drivers_lock); 46 + } 47 + EXPORT_SYMBOL(zpool_register_driver); 48 + 49 + /** 50 + * zpool_unregister_driver() - unregister a zpool implementation. 51 + * @driver: driver to unregister. 52 + * 53 + * Module usage counting is used to prevent using a driver 54 + * while/after unloading, so if this is called from module 55 + * exit function, this should never fail; if called from 56 + * other than the module exit function, and this returns 57 + * failure, the driver is in use and must remain available. 58 + */ 59 + int zpool_unregister_driver(struct zpool_driver *driver) 60 + { 61 + int ret = 0, refcount; 62 + 63 + spin_lock(&drivers_lock); 64 + refcount = atomic_read(&driver->refcount); 65 + WARN_ON(refcount < 0); 66 + if (refcount > 0) 67 + ret = -EBUSY; 68 + else 69 + list_del(&driver->list); 70 + spin_unlock(&drivers_lock); 71 + 72 + return ret; 73 + } 74 + EXPORT_SYMBOL(zpool_unregister_driver); 75 + 76 + /** 77 + * zpool_evict() - evict callback from a zpool implementation. 78 + * @pool: pool to evict from. 79 + * @handle: handle to evict. 80 + * 81 + * This can be used by zpool implementations to call the 82 + * user's evict zpool_ops struct evict callback. 83 + */ 84 + int zpool_evict(void *pool, unsigned long handle) 85 + { 86 + struct zpool *zpool; 87 + 88 + spin_lock(&pools_lock); 89 + list_for_each_entry(zpool, &pools_head, list) { 90 + if (zpool->pool == pool) { 91 + spin_unlock(&pools_lock); 92 + if (!zpool->ops || !zpool->ops->evict) 93 + return -EINVAL; 94 + return zpool->ops->evict(zpool, handle); 95 + } 96 + } 97 + spin_unlock(&pools_lock); 98 + 99 + return -ENOENT; 100 + } 101 + EXPORT_SYMBOL(zpool_evict); 102 + 103 + static struct zpool_driver *zpool_get_driver(char *type) 104 + { 105 + struct zpool_driver *driver; 106 + 107 + spin_lock(&drivers_lock); 108 + list_for_each_entry(driver, &drivers_head, list) { 109 + if (!strcmp(driver->type, type)) { 110 + bool got = try_module_get(driver->owner); 111 + 112 + if (got) 113 + atomic_inc(&driver->refcount); 114 + spin_unlock(&drivers_lock); 115 + return got ? driver : NULL; 116 + } 117 + } 118 + 119 + spin_unlock(&drivers_lock); 120 + return NULL; 121 + } 122 + 123 + static void zpool_put_driver(struct zpool_driver *driver) 124 + { 125 + atomic_dec(&driver->refcount); 126 + module_put(driver->owner); 127 + } 128 + 129 + /** 130 + * zpool_create_pool() - Create a new zpool 131 + * @type The type of the zpool to create (e.g. zbud, zsmalloc) 132 + * @gfp The GFP flags to use when allocating the pool. 133 + * @ops The optional ops callback. 134 + * 135 + * This creates a new zpool of the specified type. The gfp flags will be 136 + * used when allocating memory, if the implementation supports it. If the 137 + * ops param is NULL, then the created zpool will not be shrinkable. 138 + * 139 + * Implementations must guarantee this to be thread-safe. 140 + * 141 + * Returns: New zpool on success, NULL on failure. 142 + */ 143 + struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops) 144 + { 145 + struct zpool_driver *driver; 146 + struct zpool *zpool; 147 + 148 + pr_info("creating pool type %s\n", type); 149 + 150 + driver = zpool_get_driver(type); 151 + 152 + if (!driver) { 153 + request_module(type); 154 + driver = zpool_get_driver(type); 155 + } 156 + 157 + if (!driver) { 158 + pr_err("no driver for type %s\n", type); 159 + return NULL; 160 + } 161 + 162 + zpool = kmalloc(sizeof(*zpool), gfp); 163 + if (!zpool) { 164 + pr_err("couldn't create zpool - out of memory\n"); 165 + zpool_put_driver(driver); 166 + return NULL; 167 + } 168 + 169 + zpool->type = driver->type; 170 + zpool->driver = driver; 171 + zpool->pool = driver->create(gfp, ops); 172 + zpool->ops = ops; 173 + 174 + if (!zpool->pool) { 175 + pr_err("couldn't create %s pool\n", type); 176 + zpool_put_driver(driver); 177 + kfree(zpool); 178 + return NULL; 179 + } 180 + 181 + pr_info("created %s pool\n", type); 182 + 183 + spin_lock(&pools_lock); 184 + list_add(&zpool->list, &pools_head); 185 + spin_unlock(&pools_lock); 186 + 187 + return zpool; 188 + } 189 + 190 + /** 191 + * zpool_destroy_pool() - Destroy a zpool 192 + * @pool The zpool to destroy. 193 + * 194 + * Implementations must guarantee this to be thread-safe, 195 + * however only when destroying different pools. The same 196 + * pool should only be destroyed once, and should not be used 197 + * after it is destroyed. 198 + * 199 + * This destroys an existing zpool. The zpool should not be in use. 200 + */ 201 + void zpool_destroy_pool(struct zpool *zpool) 202 + { 203 + pr_info("destroying pool type %s\n", zpool->type); 204 + 205 + spin_lock(&pools_lock); 206 + list_del(&zpool->list); 207 + spin_unlock(&pools_lock); 208 + zpool->driver->destroy(zpool->pool); 209 + zpool_put_driver(zpool->driver); 210 + kfree(zpool); 211 + } 212 + 213 + /** 214 + * zpool_get_type() - Get the type of the zpool 215 + * @pool The zpool to check 216 + * 217 + * This returns the type of the pool. 218 + * 219 + * Implementations must guarantee this to be thread-safe. 220 + * 221 + * Returns: The type of zpool. 222 + */ 223 + char *zpool_get_type(struct zpool *zpool) 224 + { 225 + return zpool->type; 226 + } 227 + 228 + /** 229 + * zpool_malloc() - Allocate memory 230 + * @pool The zpool to allocate from. 231 + * @size The amount of memory to allocate. 232 + * @gfp The GFP flags to use when allocating memory. 233 + * @handle Pointer to the handle to set 234 + * 235 + * This allocates the requested amount of memory from the pool. 236 + * The gfp flags will be used when allocating memory, if the 237 + * implementation supports it. The provided @handle will be 238 + * set to the allocated object handle. 239 + * 240 + * Implementations must guarantee this to be thread-safe. 241 + * 242 + * Returns: 0 on success, negative value on error. 243 + */ 244 + int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp, 245 + unsigned long *handle) 246 + { 247 + return zpool->driver->malloc(zpool->pool, size, gfp, handle); 248 + } 249 + 250 + /** 251 + * zpool_free() - Free previously allocated memory 252 + * @pool The zpool that allocated the memory. 253 + * @handle The handle to the memory to free. 254 + * 255 + * This frees previously allocated memory. This does not guarantee 256 + * that the pool will actually free memory, only that the memory 257 + * in the pool will become available for use by the pool. 258 + * 259 + * Implementations must guarantee this to be thread-safe, 260 + * however only when freeing different handles. The same 261 + * handle should only be freed once, and should not be used 262 + * after freeing. 263 + */ 264 + void zpool_free(struct zpool *zpool, unsigned long handle) 265 + { 266 + zpool->driver->free(zpool->pool, handle); 267 + } 268 + 269 + /** 270 + * zpool_shrink() - Shrink the pool size 271 + * @pool The zpool to shrink. 272 + * @pages The number of pages to shrink the pool. 273 + * @reclaimed The number of pages successfully evicted. 274 + * 275 + * This attempts to shrink the actual memory size of the pool 276 + * by evicting currently used handle(s). If the pool was 277 + * created with no zpool_ops, or the evict call fails for any 278 + * of the handles, this will fail. If non-NULL, the @reclaimed 279 + * parameter will be set to the number of pages reclaimed, 280 + * which may be more than the number of pages requested. 281 + * 282 + * Implementations must guarantee this to be thread-safe. 283 + * 284 + * Returns: 0 on success, negative value on error/failure. 285 + */ 286 + int zpool_shrink(struct zpool *zpool, unsigned int pages, 287 + unsigned int *reclaimed) 288 + { 289 + return zpool->driver->shrink(zpool->pool, pages, reclaimed); 290 + } 291 + 292 + /** 293 + * zpool_map_handle() - Map a previously allocated handle into memory 294 + * @pool The zpool that the handle was allocated from 295 + * @handle The handle to map 296 + * @mm How the memory should be mapped 297 + * 298 + * This maps a previously allocated handle into memory. The @mm 299 + * param indicates to the implementation how the memory will be 300 + * used, i.e. read-only, write-only, read-write. If the 301 + * implementation does not support it, the memory will be treated 302 + * as read-write. 303 + * 304 + * This may hold locks, disable interrupts, and/or preemption, 305 + * and the zpool_unmap_handle() must be called to undo those 306 + * actions. The code that uses the mapped handle should complete 307 + * its operatons on the mapped handle memory quickly and unmap 308 + * as soon as possible. As the implementation may use per-cpu 309 + * data, multiple handles should not be mapped concurrently on 310 + * any cpu. 311 + * 312 + * Returns: A pointer to the handle's mapped memory area. 313 + */ 314 + void *zpool_map_handle(struct zpool *zpool, unsigned long handle, 315 + enum zpool_mapmode mapmode) 316 + { 317 + return zpool->driver->map(zpool->pool, handle, mapmode); 318 + } 319 + 320 + /** 321 + * zpool_unmap_handle() - Unmap a previously mapped handle 322 + * @pool The zpool that the handle was allocated from 323 + * @handle The handle to unmap 324 + * 325 + * This unmaps a previously mapped handle. Any locks or other 326 + * actions that the implementation took in zpool_map_handle() 327 + * will be undone here. The memory area returned from 328 + * zpool_map_handle() should no longer be used after this. 329 + */ 330 + void zpool_unmap_handle(struct zpool *zpool, unsigned long handle) 331 + { 332 + zpool->driver->unmap(zpool->pool, handle); 333 + } 334 + 335 + /** 336 + * zpool_get_total_size() - The total size of the pool 337 + * @pool The zpool to check 338 + * 339 + * This returns the total size in bytes of the pool. 340 + * 341 + * Returns: Total size of the zpool in bytes. 342 + */ 343 + u64 zpool_get_total_size(struct zpool *zpool) 344 + { 345 + return zpool->driver->total_size(zpool->pool); 346 + } 347 + 348 + static int __init init_zpool(void) 349 + { 350 + pr_info("loaded\n"); 351 + return 0; 352 + } 353 + 354 + static void __exit exit_zpool(void) 355 + { 356 + pr_info("unloaded\n"); 357 + } 358 + 359 + module_init(init_zpool); 360 + module_exit(exit_zpool); 361 + 362 + MODULE_LICENSE("GPL"); 363 + MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 364 + MODULE_DESCRIPTION("Common API for compressed memory storage");

+85 -1

mm/zsmalloc.c

··· 92 92 #include <linux/spinlock.h> 93 93 #include <linux/types.h> 94 94 #include <linux/zsmalloc.h> 95 + #include <linux/zpool.h> 95 96 96 97 /* 97 98 * This must be power of 2 and greater than of equal to sizeof(link_free). ··· 241 240 enum zs_mapmode vm_mm; /* mapping mode */ 242 241 }; 243 242 243 + /* zpool driver */ 244 + 245 + #ifdef CONFIG_ZPOOL 246 + 247 + static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops) 248 + { 249 + return zs_create_pool(gfp); 250 + } 251 + 252 + static void zs_zpool_destroy(void *pool) 253 + { 254 + zs_destroy_pool(pool); 255 + } 256 + 257 + static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp, 258 + unsigned long *handle) 259 + { 260 + *handle = zs_malloc(pool, size); 261 + return *handle ? 0 : -1; 262 + } 263 + static void zs_zpool_free(void *pool, unsigned long handle) 264 + { 265 + zs_free(pool, handle); 266 + } 267 + 268 + static int zs_zpool_shrink(void *pool, unsigned int pages, 269 + unsigned int *reclaimed) 270 + { 271 + return -EINVAL; 272 + } 273 + 274 + static void *zs_zpool_map(void *pool, unsigned long handle, 275 + enum zpool_mapmode mm) 276 + { 277 + enum zs_mapmode zs_mm; 278 + 279 + switch (mm) { 280 + case ZPOOL_MM_RO: 281 + zs_mm = ZS_MM_RO; 282 + break; 283 + case ZPOOL_MM_WO: 284 + zs_mm = ZS_MM_WO; 285 + break; 286 + case ZPOOL_MM_RW: /* fallthru */ 287 + default: 288 + zs_mm = ZS_MM_RW; 289 + break; 290 + } 291 + 292 + return zs_map_object(pool, handle, zs_mm); 293 + } 294 + static void zs_zpool_unmap(void *pool, unsigned long handle) 295 + { 296 + zs_unmap_object(pool, handle); 297 + } 298 + 299 + static u64 zs_zpool_total_size(void *pool) 300 + { 301 + return zs_get_total_size_bytes(pool); 302 + } 303 + 304 + static struct zpool_driver zs_zpool_driver = { 305 + .type = "zsmalloc", 306 + .owner = THIS_MODULE, 307 + .create = zs_zpool_create, 308 + .destroy = zs_zpool_destroy, 309 + .malloc = zs_zpool_malloc, 310 + .free = zs_zpool_free, 311 + .shrink = zs_zpool_shrink, 312 + .map = zs_zpool_map, 313 + .unmap = zs_zpool_unmap, 314 + .total_size = zs_zpool_total_size, 315 + }; 316 + 317 + #endif /* CONFIG_ZPOOL */ 244 318 245 319 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 246 320 static DEFINE_PER_CPU(struct mapping_area, zs_map_area); ··· 766 690 static inline void *__zs_map_object(struct mapping_area *area, 767 691 struct page *pages[2], int off, int size) 768 692 { 769 - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages)); 693 + BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); 770 694 area->vm_addr = area->vm->addr; 771 695 return area->vm_addr + off; 772 696 } ··· 890 814 { 891 815 int cpu; 892 816 817 + #ifdef CONFIG_ZPOOL 818 + zpool_unregister_driver(&zs_zpool_driver); 819 + #endif 820 + 893 821 cpu_notifier_register_begin(); 894 822 895 823 for_each_online_cpu(cpu) ··· 919 839 } 920 840 921 841 cpu_notifier_register_done(); 842 + 843 + #ifdef CONFIG_ZPOOL 844 + zpool_register_driver(&zs_zpool_driver); 845 + #endif 922 846 923 847 return 0; 924 848 fail:

+45 -30

mm/zswap.c

··· 34 34 #include <linux/swap.h> 35 35 #include <linux/crypto.h> 36 36 #include <linux/mempool.h> 37 - #include <linux/zbud.h> 37 + #include <linux/zpool.h> 38 38 39 39 #include <linux/mm_types.h> 40 40 #include <linux/page-flags.h> ··· 45 45 /********************************* 46 46 * statistics 47 47 **********************************/ 48 - /* Number of memory pages used by the compressed pool */ 49 - static u64 zswap_pool_pages; 48 + /* Total bytes used by the compressed storage */ 49 + static u64 zswap_pool_total_size; 50 50 /* The number of compressed pages currently stored in zswap */ 51 51 static atomic_t zswap_stored_pages = ATOMIC_INIT(0); 52 52 ··· 89 89 module_param_named(max_pool_percent, 90 90 zswap_max_pool_percent, uint, 0644); 91 91 92 - /* zbud_pool is shared by all of zswap backend */ 93 - static struct zbud_pool *zswap_pool; 92 + /* Compressed storage to use */ 93 + #define ZSWAP_ZPOOL_DEFAULT "zbud" 94 + static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; 95 + module_param_named(zpool, zswap_zpool_type, charp, 0444); 96 + 97 + /* zpool is shared by all of zswap backend */ 98 + static struct zpool *zswap_pool; 94 99 95 100 /********************************* 96 101 * compression functions ··· 173 168 * be held while changing the refcount. Since the lock must 174 169 * be held, there is no reason to also make refcount atomic. 175 170 * offset - the swap offset for the entry. Index into the red-black tree. 176 - * handle - zbud allocation handle that stores the compressed page data 171 + * handle - zpool allocation handle that stores the compressed page data 177 172 * length - the length in bytes of the compressed page data. Needed during 178 173 * decompression 179 174 */ ··· 289 284 } 290 285 291 286 /* 292 - * Carries out the common pattern of freeing and entry's zbud allocation, 287 + * Carries out the common pattern of freeing and entry's zpool allocation, 293 288 * freeing the entry itself, and decrementing the number of stored pages. 294 289 */ 295 290 static void zswap_free_entry(struct zswap_entry *entry) 296 291 { 297 - zbud_free(zswap_pool, entry->handle); 292 + zpool_free(zswap_pool, entry->handle); 298 293 zswap_entry_cache_free(entry); 299 294 atomic_dec(&zswap_stored_pages); 300 - zswap_pool_pages = zbud_get_pool_size(zswap_pool); 295 + zswap_pool_total_size = zpool_get_total_size(zswap_pool); 301 296 } 302 297 303 298 /* caller must hold the tree lock */ ··· 414 409 static bool zswap_is_full(void) 415 410 { 416 411 return totalram_pages * zswap_max_pool_percent / 100 < 417 - zswap_pool_pages; 412 + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 418 413 } 419 414 420 415 /********************************* ··· 530 525 * the swap cache, the compressed version stored by zswap can be 531 526 * freed. 532 527 */ 533 - static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) 528 + static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) 534 529 { 535 530 struct zswap_header *zhdr; 536 531 swp_entry_t swpentry; ··· 546 541 }; 547 542 548 543 /* extract swpentry from data */ 549 - zhdr = zbud_map(pool, handle); 544 + zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 550 545 swpentry = zhdr->swpentry; /* here */ 551 - zbud_unmap(pool, handle); 546 + zpool_unmap_handle(pool, handle); 552 547 tree = zswap_trees[swp_type(swpentry)]; 553 548 offset = swp_offset(swpentry); 554 549 ··· 578 573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 579 574 /* decompress */ 580 575 dlen = PAGE_SIZE; 581 - src = (u8 *)zbud_map(zswap_pool, entry->handle) + 582 - sizeof(struct zswap_header); 576 + src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, 577 + ZPOOL_MM_RO) + sizeof(struct zswap_header); 583 578 dst = kmap_atomic(page); 584 579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 585 580 entry->length, dst, &dlen); 586 581 kunmap_atomic(dst); 587 - zbud_unmap(zswap_pool, entry->handle); 582 + zpool_unmap_handle(zswap_pool, entry->handle); 588 583 BUG_ON(ret); 589 584 BUG_ON(dlen != PAGE_SIZE); 590 585 ··· 657 652 /* reclaim space if needed */ 658 653 if (zswap_is_full()) { 659 654 zswap_pool_limit_hit++; 660 - if (zbud_reclaim_page(zswap_pool, 8)) { 655 + if (zpool_shrink(zswap_pool, 1, NULL)) { 661 656 zswap_reject_reclaim_fail++; 662 657 ret = -ENOMEM; 663 658 goto reject; ··· 684 679 685 680 /* store */ 686 681 len = dlen + sizeof(struct zswap_header); 687 - ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, 682 + ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, 688 683 &handle); 689 684 if (ret == -ENOSPC) { 690 685 zswap_reject_compress_poor++; ··· 694 689 zswap_reject_alloc_fail++; 695 690 goto freepage; 696 691 } 697 - zhdr = zbud_map(zswap_pool, handle); 692 + zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW); 698 693 zhdr->swpentry = swp_entry(type, offset); 699 694 buf = (u8 *)(zhdr + 1); 700 695 memcpy(buf, dst, dlen); 701 - zbud_unmap(zswap_pool, handle); 696 + zpool_unmap_handle(zswap_pool, handle); 702 697 put_cpu_var(zswap_dstmem); 703 698 704 699 /* populate entry */ ··· 721 716 722 717 /* update stats */ 723 718 atomic_inc(&zswap_stored_pages); 724 - zswap_pool_pages = zbud_get_pool_size(zswap_pool); 719 + zswap_pool_total_size = zpool_get_total_size(zswap_pool); 725 720 726 721 return 0; 727 722 ··· 757 752 758 753 /* decompress */ 759 754 dlen = PAGE_SIZE; 760 - src = (u8 *)zbud_map(zswap_pool, entry->handle) + 761 - sizeof(struct zswap_header); 755 + src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, 756 + ZPOOL_MM_RO) + sizeof(struct zswap_header); 762 757 dst = kmap_atomic(page); 763 758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 764 759 dst, &dlen); 765 760 kunmap_atomic(dst); 766 - zbud_unmap(zswap_pool, entry->handle); 761 + zpool_unmap_handle(zswap_pool, entry->handle); 767 762 BUG_ON(ret); 768 763 769 764 spin_lock(&tree->lock); ··· 816 811 zswap_trees[type] = NULL; 817 812 } 818 813 819 - static struct zbud_ops zswap_zbud_ops = { 814 + static struct zpool_ops zswap_zpool_ops = { 820 815 .evict = zswap_writeback_entry 821 816 }; 822 817 ··· 874 869 zswap_debugfs_root, &zswap_written_back_pages); 875 870 debugfs_create_u64("duplicate_entry", S_IRUGO, 876 871 zswap_debugfs_root, &zswap_duplicate_entry); 877 - debugfs_create_u64("pool_pages", S_IRUGO, 878 - zswap_debugfs_root, &zswap_pool_pages); 872 + debugfs_create_u64("pool_total_size", S_IRUGO, 873 + zswap_debugfs_root, &zswap_pool_total_size); 879 874 debugfs_create_atomic_t("stored_pages", S_IRUGO, 880 875 zswap_debugfs_root, &zswap_stored_pages); 881 876 ··· 900 895 **********************************/ 901 896 static int __init init_zswap(void) 902 897 { 898 + gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; 899 + 903 900 if (!zswap_enabled) 904 901 return 0; 905 902 906 903 pr_info("loading zswap\n"); 907 904 908 - zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 905 + zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops); 906 + if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { 907 + pr_info("%s zpool not available\n", zswap_zpool_type); 908 + zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; 909 + zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, 910 + &zswap_zpool_ops); 911 + } 909 912 if (!zswap_pool) { 910 - pr_err("zbud pool creation failed\n"); 913 + pr_err("%s zpool not available\n", zswap_zpool_type); 914 + pr_err("zpool creation failed\n"); 911 915 goto error; 912 916 } 917 + pr_info("using %s pool\n", zswap_zpool_type); 913 918 914 919 if (zswap_entry_cache_create()) { 915 920 pr_err("entry cache creation failed\n"); ··· 943 928 compfail: 944 929 zswap_entry_cache_destory(); 945 930 cachefail: 946 - zbud_destroy_pool(zswap_pool); 931 + zpool_destroy_pool(zswap_pool); 947 932 error: 948 933 return -ENOMEM; 949 934 }

+1 -1

net/batman-adv/fragmentation.c

··· 188 188 189 189 /* Reached the end of the list, so insert after 'frag_entry_last'. */ 190 190 if (likely(frag_entry_last)) { 191 - hlist_add_after(&frag_entry_last->list, &frag_entry_new->list); 191 + hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list); 192 192 chain->size += skb->len - hdr_size; 193 193 chain->timestamp = jiffies; 194 194 ret = true;

+1 -1

net/bridge/br_multicast.c

··· 1174 1174 } 1175 1175 1176 1176 if (slot) 1177 - hlist_add_after_rcu(slot, &port->rlist); 1177 + hlist_add_behind_rcu(&port->rlist, slot); 1178 1178 else 1179 1179 hlist_add_head_rcu(&port->rlist, &br->router_list); 1180 1180 }

+1 -1

net/ipv4/fib_trie.c

··· 940 940 last = li; 941 941 } 942 942 if (last) 943 - hlist_add_after_rcu(&last->hlist, &new->hlist); 943 + hlist_add_behind_rcu(&new->hlist, &last->hlist); 944 944 else 945 945 hlist_add_before_rcu(&new->hlist, &li->hlist); 946 946 }

+1 -1

net/ipv6/addrlabel.c

··· 277 277 last = p; 278 278 } 279 279 if (last) 280 - hlist_add_after_rcu(&last->list, &newp->list); 280 + hlist_add_behind_rcu(&newp->list, &last->list); 281 281 else 282 282 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 283 283 out:

+2 -2

net/xfrm/xfrm_policy.c

··· 389 389 if (h != h0) 390 390 continue; 391 391 hlist_del(&pol->bydst); 392 - hlist_add_after(entry0, &pol->bydst); 392 + hlist_add_behind(&pol->bydst, entry0); 393 393 } 394 394 entry0 = &pol->bydst; 395 395 } ··· 654 654 break; 655 655 } 656 656 if (newpos) 657 - hlist_add_after(newpos, &policy->bydst); 657 + hlist_add_behind(&policy->bydst, newpos); 658 658 else 659 659 hlist_add_head(&policy->bydst, chain); 660 660 xfrm_pol_hold(policy);

+470 -111

scripts/checkpatch.pl

··· 309 309 our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x; 310 310 311 311 our $NonptrType; 312 + our $NonptrTypeMisordered; 312 313 our $NonptrTypeWithAttr; 313 314 our $Type; 315 + our $TypeMisordered; 314 316 our $Declare; 317 + our $DeclareMisordered; 315 318 316 319 our $NON_ASCII_UTF8 = qr{ 317 320 [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte ··· 356 353 Cc: 357 354 )}; 358 355 356 + our @typeListMisordered = ( 357 + qr{char\s+(?:un)?signed}, 358 + qr{int\s+(?:(?:un)?signed\s+)?short\s}, 359 + qr{int\s+short(?:\s+(?:un)?signed)}, 360 + qr{short\s+int(?:\s+(?:un)?signed)}, 361 + qr{(?:un)?signed\s+int\s+short}, 362 + qr{short\s+(?:un)?signed}, 363 + qr{long\s+int\s+(?:un)?signed}, 364 + qr{int\s+long\s+(?:un)?signed}, 365 + qr{long\s+(?:un)?signed\s+int}, 366 + qr{int\s+(?:un)?signed\s+long}, 367 + qr{int\s+(?:un)?signed}, 368 + qr{int\s+long\s+long\s+(?:un)?signed}, 369 + qr{long\s+long\s+int\s+(?:un)?signed}, 370 + qr{long\s+long\s+(?:un)?signed\s+int}, 371 + qr{long\s+long\s+(?:un)?signed}, 372 + qr{long\s+(?:un)?signed}, 373 + ); 374 + 359 375 our @typeList = ( 360 376 qr{void}, 361 - qr{(?:unsigned\s+)?char}, 362 - qr{(?:unsigned\s+)?short}, 363 - qr{(?:unsigned\s+)?int}, 364 - qr{(?:unsigned\s+)?long}, 365 - qr{(?:unsigned\s+)?long\s+int}, 366 - qr{(?:unsigned\s+)?long\s+long}, 367 - qr{(?:unsigned\s+)?long\s+long\s+int}, 368 - qr{unsigned}, 377 + qr{(?:(?:un)?signed\s+)?char}, 378 + qr{(?:(?:un)?signed\s+)?short\s+int}, 379 + qr{(?:(?:un)?signed\s+)?short}, 380 + qr{(?:(?:un)?signed\s+)?int}, 381 + qr{(?:(?:un)?signed\s+)?long\s+int}, 382 + qr{(?:(?:un)?signed\s+)?long\s+long\s+int}, 383 + qr{(?:(?:un)?signed\s+)?long\s+long}, 384 + qr{(?:(?:un)?signed\s+)?long}, 385 + qr{(?:un)?signed}, 369 386 qr{float}, 370 387 qr{double}, 371 388 qr{bool}, ··· 395 372 qr{${Ident}_t}, 396 373 qr{${Ident}_handler}, 397 374 qr{${Ident}_handler_fn}, 375 + @typeListMisordered, 398 376 ); 399 377 our @typeListWithAttr = ( 400 378 @typeList, ··· 423 399 $mode_perms_search .= $entry->[0]; 424 400 } 425 401 426 - our $declaration_macros = qr{(?x: 427 - (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*$| 428 - (?:$Storage\s+)?LIST_HEAD\s*\( 429 - )}; 430 - 431 402 our $allowed_asm_includes = qr{(?x: 432 403 irq| 433 404 memory ··· 432 413 sub build_types { 433 414 my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; 434 415 my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; 416 + my $Misordered = "(?x: \n" . join("|\n ", @typeListMisordered) . "\n)"; 435 417 my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)"; 436 418 $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; 437 419 $NonptrType = qr{ ··· 441 421 (?:typeof|__typeof__)\s*\([^$]*\)| 442 422 (?:$typeTypedefs\b)| 443 423 (?:${all}\b) 424 + ) 425 + (?:\s+$Modifier|\s+const)* 426 + }x; 427 + $NonptrTypeMisordered = qr{ 428 + (?:$Modifier\s+|const\s+)* 429 + (?: 430 + (?:${Misordered}\b) 444 431 ) 445 432 (?:\s+$Modifier|\s+const)* 446 433 }x; ··· 462 435 }x; 463 436 $Type = qr{ 464 437 $NonptrType 465 - (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)? 438 + (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)? 439 + (?:\s+$Inline|\s+$Modifier)* 440 + }x; 441 + $TypeMisordered = qr{ 442 + $NonptrTypeMisordered 443 + (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)? 466 444 (?:\s+$Inline|\s+$Modifier)* 467 445 }x; 468 446 $Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type}; 447 + $DeclareMisordered = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered}; 469 448 } 470 449 build_types(); 471 450 ··· 484 451 our $balanced_parens = qr/($(?:[^\($]++|(?-1))*\))/; 485 452 our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*}; 486 453 our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)}; 454 + 455 + our $declaration_macros = qr{(?x: 456 + (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(| 457 + (?:$Storage\s+)?LIST_HEAD\s*\(| 458 + (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\( 459 + )}; 487 460 488 461 sub deparenthesize { 489 462 my ($string) = @_; ··· 589 550 } 590 551 } 591 552 553 + sub git_commit_info { 554 + my ($commit, $id, $desc) = @_; 555 + 556 + return ($id, $desc) if ((which("git") eq "") || !(-e ".git")); 557 + 558 + my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`; 559 + $output =~ s/^\s*//gm; 560 + my @lines = split("\n", $output); 561 + 562 + if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) { 563 + # Maybe one day convert this block of bash into something that returns 564 + # all matching commit ids, but it's very slow... 565 + # 566 + # echo "checking commits $1..." 567 + # git rev-list --remotes | grep -i "^$1" | 568 + # while read line ; do 569 + # git log --format='%H %s' -1 $line | 570 + # echo "commit $(cut -c 1-12,41-)" 571 + # done 572 + } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) { 573 + } else { 574 + $id = substr($lines[0], 0, 12); 575 + $desc = substr($lines[0], 41); 576 + } 577 + 578 + return ($id, $desc); 579 + } 580 + 592 581 $chk_signoff = 0 if ($file); 593 582 594 583 my @rawlines = (); 595 584 my @lines = (); 596 585 my @fixed = (); 586 + my @fixed_inserted = (); 587 + my @fixed_deleted = (); 588 + my $fixlinenr = -1; 589 + 597 590 my $vname; 598 591 for my $filename (@ARGV) { 599 592 my $FILE; ··· 654 583 @rawlines = (); 655 584 @lines = (); 656 585 @fixed = (); 586 + @fixed_inserted = (); 587 + @fixed_deleted = (); 588 + $fixlinenr = -1; 657 589 } 658 590 659 591 exit($exit); ··· 746 672 } 747 673 748 674 return $formatted_email; 675 + } 676 + 677 + sub which { 678 + my ($bin) = @_; 679 + 680 + foreach my $path (split(/:/, $ENV{PATH})) { 681 + if (-e "$path/$bin") { 682 + return "$path/$bin"; 683 + } 684 + } 685 + 686 + return ""; 749 687 } 750 688 751 689 sub which_conf { ··· 1569 1483 our @report; 1570 1484 } 1571 1485 1486 + sub fixup_current_range { 1487 + my ($lineRef, $offset, $length) = @_; 1488 + 1489 + if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) { 1490 + my $o = $1; 1491 + my $l = $2; 1492 + my $no = $o + $offset; 1493 + my $nl = $l + $length; 1494 + $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/; 1495 + } 1496 + } 1497 + 1498 + sub fix_inserted_deleted_lines { 1499 + my ($linesRef, $insertedRef, $deletedRef) = @_; 1500 + 1501 + my $range_last_linenr = 0; 1502 + my $delta_offset = 0; 1503 + 1504 + my $old_linenr = 0; 1505 + my $new_linenr = 0; 1506 + 1507 + my $next_insert = 0; 1508 + my $next_delete = 0; 1509 + 1510 + my @lines = (); 1511 + 1512 + my $inserted = @{$insertedRef}[$next_insert++]; 1513 + my $deleted = @{$deletedRef}[$next_delete++]; 1514 + 1515 + foreach my $old_line (@{$linesRef}) { 1516 + my $save_line = 1; 1517 + my $line = $old_line; #don't modify the array 1518 + if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) { #new filename 1519 + $delta_offset = 0; 1520 + } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) { #new hunk 1521 + $range_last_linenr = $new_linenr; 1522 + fixup_current_range(\$line, $delta_offset, 0); 1523 + } 1524 + 1525 + while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) { 1526 + $deleted = @{$deletedRef}[$next_delete++]; 1527 + $save_line = 0; 1528 + fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1); 1529 + } 1530 + 1531 + while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) { 1532 + push(@lines, ${$inserted}{'LINE'}); 1533 + $inserted = @{$insertedRef}[$next_insert++]; 1534 + $new_linenr++; 1535 + fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1); 1536 + } 1537 + 1538 + if ($save_line) { 1539 + push(@lines, $line); 1540 + $new_linenr++; 1541 + } 1542 + 1543 + $old_linenr++; 1544 + } 1545 + 1546 + return @lines; 1547 + } 1548 + 1549 + sub fix_insert_line { 1550 + my ($linenr, $line) = @_; 1551 + 1552 + my $inserted = { 1553 + LINENR => $linenr, 1554 + LINE => $line, 1555 + }; 1556 + push(@fixed_inserted, $inserted); 1557 + } 1558 + 1559 + sub fix_delete_line { 1560 + my ($linenr, $line) = @_; 1561 + 1562 + my $deleted = { 1563 + LINENR => $linenr, 1564 + LINE => $line, 1565 + }; 1566 + 1567 + push(@fixed_deleted, $deleted); 1568 + } 1569 + 1572 1570 sub ERROR { 1573 1571 my ($type, $msg) = @_; 1574 1572 ··· 1807 1637 my $signoff = 0; 1808 1638 my $is_patch = 0; 1809 1639 1810 - my $in_header_lines = 1; 1640 + my $in_header_lines = $file ? 0 : 1; 1811 1641 my $in_commit_log = 0; #Scanning lines before patch 1812 - 1642 + my $reported_maintainer_file = 0; 1813 1643 my $non_utf8_charset = 0; 1644 + 1645 + my $last_blank_line = 0; 1814 1646 1815 1647 our @report = (); 1816 1648 our $cnt_lines = 0; ··· 1931 1759 1932 1760 $realcnt = 0; 1933 1761 $linenr = 0; 1762 + $fixlinenr = -1; 1934 1763 foreach my $line (@lines) { 1935 1764 $linenr++; 1765 + $fixlinenr++; 1936 1766 my $sline = $line; #copy of $line 1937 1767 $sline =~ s/$;/ /g; #with comments as spaces 1938 1768 ··· 2065 1891 if (WARN("BAD_SIGN_OFF", 2066 1892 "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) && 2067 1893 $fix) { 2068 - $fixed[$linenr - 1] = 1894 + $fixed[$fixlinenr] = 2069 1895 "$ucfirst_sign_off $email"; 2070 1896 } 2071 1897 } ··· 2073 1899 if (WARN("BAD_SIGN_OFF", 2074 1900 "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) && 2075 1901 $fix) { 2076 - $fixed[$linenr - 1] = 1902 + $fixed[$fixlinenr] = 2077 1903 "$ucfirst_sign_off $email"; 2078 1904 } 2079 1905 ··· 2082 1908 if (WARN("BAD_SIGN_OFF", 2083 1909 "Use a single space after $ucfirst_sign_off\n" . $herecurr) && 2084 1910 $fix) { 2085 - $fixed[$linenr - 1] = 1911 + $fixed[$fixlinenr] = 2086 1912 "$ucfirst_sign_off $email"; 2087 1913 } 2088 1914 } ··· 2130 1956 "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr); 2131 1957 } 2132 1958 1959 + # Check for improperly formed commit descriptions 1960 + if ($in_commit_log && 1961 + $line =~ /\bcommit\s+[0-9a-f]{5,}/i && 1962 + $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) { 1963 + $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i; 1964 + my $init_char = $1; 1965 + my $orig_commit = lc($2); 1966 + my $id = '01234567890ab'; 1967 + my $desc = 'commit description'; 1968 + ($id, $desc) = git_commit_info($orig_commit, $id, $desc); 1969 + ERROR("GIT_COMMIT_ID", 1970 + "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr); 1971 + } 1972 + 1973 + # Check for added, moved or deleted files 1974 + if (!$reported_maintainer_file && !$in_commit_log && 1975 + ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ || 1976 + $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ || 1977 + ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ && 1978 + (defined($1) || defined($2))))) { 1979 + $reported_maintainer_file = 1; 1980 + WARN("FILE_PATH_CHANGES", 1981 + "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr); 1982 + } 1983 + 2133 1984 # Check for wrappage within a valid hunk of the file 2134 1985 if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) { 2135 1986 ERROR("CORRUPTED_PATCH", ··· 2192 1993 # Check if it's the start of a commit log 2193 1994 # (not a header line and we haven't seen the patch filename) 2194 1995 if ($in_header_lines && $realfile =~ /^$/ && 2195 - $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) { 1996 + !($rawline =~ /^\s+\S/ || 1997 + $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) { 2196 1998 $in_header_lines = 0; 2197 1999 $in_commit_log = 1; 2198 2000 } ··· 2221 2021 if (ERROR("DOS_LINE_ENDINGS", 2222 2022 "DOS line endings\n" . $herevet) && 2223 2023 $fix) { 2224 - $fixed[$linenr - 1] =~ s/[\s\015]+$//; 2024 + $fixed[$fixlinenr] =~ s/[\s\015]+$//; 2225 2025 } 2226 2026 } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { 2227 2027 my $herevet = "$here\n" . cat_vet($rawline) . "\n"; 2228 2028 if (ERROR("TRAILING_WHITESPACE", 2229 2029 "trailing whitespace\n" . $herevet) && 2230 2030 $fix) { 2231 - $fixed[$linenr - 1] =~ s/\s+$//; 2031 + $fixed[$fixlinenr] =~ s/\s+$//; 2232 2032 } 2233 2033 2234 2034 $rpt_cleaners = 1; ··· 2249 2049 # Only applies when adding the entry originally, after that we do not have 2250 2050 # sufficient context to determine whether it is indeed long enough. 2251 2051 if ($realfile =~ /Kconfig/ && 2252 - $line =~ /.\s*config\s+/) { 2052 + $line =~ /^\+\s*config\s+/) { 2253 2053 my $length = 0; 2254 2054 my $cnt = $realcnt; 2255 2055 my $ln = $linenr + 1; ··· 2262 2062 $is_end = $lines[$ln - 1] =~ /^\+/; 2263 2063 2264 2064 next if ($f =~ /^-/); 2065 + last if (!$file && $f =~ /^\@\@/); 2265 2066 2266 - if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) { 2067 + if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) { 2267 2068 $is_start = 1; 2268 - } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) { 2069 + } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) { 2269 2070 $length = -1; 2270 2071 } 2271 2072 ··· 2362 2161 "quoted string split across lines\n" . $hereprev); 2363 2162 } 2364 2163 2164 + # check for missing a space in a string concatination 2165 + if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) { 2166 + WARN('MISSING_SPACE', 2167 + "break quoted strings at a space character\n" . $hereprev); 2168 + } 2169 + 2365 2170 # check for spaces before a quoted newline 2366 2171 if ($rawline =~ /^.*\".*\s\\n/) { 2367 2172 if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE", 2368 2173 "unnecessary whitespace before a quoted newline\n" . $herecurr) && 2369 2174 $fix) { 2370 - $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/; 2175 + $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/; 2371 2176 } 2372 2177 2373 2178 } ··· 2410 2203 if (ERROR("CODE_INDENT", 2411 2204 "code indent should use tabs where possible\n" . $herevet) && 2412 2205 $fix) { 2413 - $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; 2206 + $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; 2414 2207 } 2415 2208 } 2416 2209 ··· 2420 2213 if (WARN("SPACE_BEFORE_TAB", 2421 2214 "please, no space before tabs\n" . $herevet) && 2422 2215 $fix) { 2423 - while ($fixed[$linenr - 1] =~ 2216 + while ($fixed[$fixlinenr] =~ 2424 2217 s/(^\+.*) {8,8}+\t/$1\t\t/) {} 2425 - while ($fixed[$linenr - 1] =~ 2218 + while ($fixed[$fixlinenr] =~ 2426 2219 s/(^\+.*) +\t/$1\t/) {} 2427 2220 } 2428 2221 } ··· 2456 2249 if (CHK("PARENTHESIS_ALIGNMENT", 2457 2250 "Alignment should match open parenthesis\n" . $hereprev) && 2458 2251 $fix && $line =~ /^\+/) { 2459 - $fixed[$linenr - 1] =~ 2252 + $fixed[$fixlinenr] =~ 2460 2253 s/^\+[ \t]*/\+$goodtabindent/; 2461 2254 } 2462 2255 } 2463 2256 } 2464 2257 } 2465 2258 2466 - if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) { 2259 + if ($line =~ /^\+.*$\s*$Type\s*$[ \t]+(?!$Assignment|$Arithmetic|{)/) { 2467 2260 if (CHK("SPACING", 2468 - "No space is necessary after a cast\n" . $hereprev) && 2261 + "No space is necessary after a cast\n" . $herecurr) && 2469 2262 $fix) { 2470 - $fixed[$linenr - 1] =~ 2471 - s/^(\+.*\*[ \t]*\))[ \t]+/$1/; 2263 + $fixed[$fixlinenr] =~ 2264 + s/($\s*$Type\s*$)[ \t]+/$1/; 2472 2265 } 2473 2266 } 2474 2267 ··· 2498 2291 "networking block comments put the trailing */ on a separate line\n" . $herecurr); 2499 2292 } 2500 2293 2294 + # check for missing blank lines after struct/union declarations 2295 + # with exceptions for various attributes and macros 2296 + if ($prevline =~ /^[\+ ]};?\s*$/ && 2297 + $line =~ /^\+/ && 2298 + !($line =~ /^\+\s*$/ || 2299 + $line =~ /^\+\s*EXPORT_SYMBOL/ || 2300 + $line =~ /^\+\s*MODULE_/i || 2301 + $line =~ /^\+\s*\#\s*(?:end|elif|else)/ || 2302 + $line =~ /^\+[a-z_]*init/ || 2303 + $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ || 2304 + $line =~ /^\+\s*DECLARE/ || 2305 + $line =~ /^\+\s*__setup/)) { 2306 + if (CHK("LINE_SPACING", 2307 + "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) && 2308 + $fix) { 2309 + fix_insert_line($fixlinenr, "\+"); 2310 + } 2311 + } 2312 + 2313 + # check for multiple consecutive blank lines 2314 + if ($prevline =~ /^[\+ ]\s*$/ && 2315 + $line =~ /^\+\s*$/ && 2316 + $last_blank_line != ($linenr - 1)) { 2317 + if (CHK("LINE_SPACING", 2318 + "Please don't use multiple blank lines\n" . $hereprev) && 2319 + $fix) { 2320 + fix_delete_line($fixlinenr, $rawline); 2321 + } 2322 + 2323 + $last_blank_line = $linenr; 2324 + } 2325 + 2501 2326 # check for missing blank lines after declarations 2502 2327 if ($sline =~ /^\+\s+\S/ && #Not at char 1 2503 2328 # actual declarations 2504 2329 ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ || 2330 + # function pointer declarations 2331 + $prevline =~ /^\+\s+$Declare\s*$\s*\*\s*$Ident\s*$\s*[=,;:\[$]/ || 2505 2332 # foo bar; where foo is some local typedef or #define 2506 2333 $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ || 2507 2334 # known declaration macros ··· 2548 2307 $prevline =~ /(?:\{\s*|\$$/) && 2549 2308 # looks like a declaration 2550 2309 !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ || 2310 + # function pointer declarations 2311 + $sline =~ /^\+\s+$Declare\s*$\s*\*\s*$Ident\s*$\s*[=,;:\[\(]/ || 2551 2312 # foo bar; where foo is some local typedef or #define 2552 2313 $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ || 2553 2314 # known declaration macros ··· 2564 2321 $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) && 2565 2322 # indentation of previous and current line are the same 2566 2323 (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) { 2567 - WARN("SPACING", 2568 - "Missing a blank line after declarations\n" . $hereprev); 2324 + if (WARN("LINE_SPACING", 2325 + "Missing a blank line after declarations\n" . $hereprev) && 2326 + $fix) { 2327 + fix_insert_line($fixlinenr, "\+"); 2328 + } 2569 2329 } 2570 2330 2571 2331 # check for spaces at the beginning of a line. ··· 2581 2335 if (WARN("LEADING_SPACE", 2582 2336 "please, no spaces at the start of a line\n" . $herevet) && 2583 2337 $fix) { 2584 - $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; 2338 + $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e; 2585 2339 } 2586 2340 } 2587 2341 2588 2342 # check we are in a valid C source file if not then ignore this hunk 2589 2343 next if ($realfile !~ /\.(h|c)$/); 2344 + 2345 + # check indentation of any line with a bare else 2346 + # if the previous line is a break or return and is indented 1 tab more... 2347 + if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) { 2348 + my $tabs = length($1) + 1; 2349 + if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) { 2350 + WARN("UNNECESSARY_ELSE", 2351 + "else is not generally useful after a break or return\n" . $hereprev); 2352 + } 2353 + } 2354 + 2355 + # check indentation of a line with a break; 2356 + # if the previous line is a goto or return and is indented the same # of tabs 2357 + if ($sline =~ /^\+([\t]+)break\s*;\s*$/) { 2358 + my $tabs = $1; 2359 + if ($prevline =~ /^\+$tabs(?:goto|return)\b/) { 2360 + WARN("UNNECESSARY_BREAK", 2361 + "break is not useful after a goto or return\n" . $hereprev); 2362 + } 2363 + } 2590 2364 2591 2365 # discourage the addition of CONFIG_EXPERIMENTAL in #if(def). 2592 2366 if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) { ··· 2743 2477 2744 2478 # if/while/etc brace do not go on next line, unless defining a do while loop, 2745 2479 # or if that brace on the next line is for something else 2746 - if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) { 2480 + if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) { 2747 2481 my $pre_ctx = "$1$2"; 2748 2482 2749 2483 my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0); ··· 2770 2504 #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n"; 2771 2505 #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; 2772 2506 2773 - if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { 2507 + if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { 2774 2508 ERROR("OPEN_BRACE", 2775 2509 "that open brace { should be on the previous line\n" . 2776 2510 "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); ··· 2789 2523 } 2790 2524 2791 2525 # Check relative indent for conditionals and blocks. 2792 - if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { 2526 + if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { 2793 2527 ($stat, $cond, $line_nr_next, $remain_next, $off_next) = 2794 2528 ctx_statement_block($linenr, $realcnt, 0) 2795 2529 if (!defined $stat); ··· 2920 2654 # check for initialisation to aggregates open brace on the next line 2921 2655 if ($line =~ /^.\s*{/ && 2922 2656 $prevline =~ /(?:^|[^=])=\s*$/) { 2923 - ERROR("OPEN_BRACE", 2924 - "that open brace { should be on the previous line\n" . $hereprev); 2657 + if (ERROR("OPEN_BRACE", 2658 + "that open brace { should be on the previous line\n" . $hereprev) && 2659 + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { 2660 + fix_delete_line($fixlinenr - 1, $prevrawline); 2661 + fix_delete_line($fixlinenr, $rawline); 2662 + my $fixedline = $prevrawline; 2663 + $fixedline =~ s/\s*=\s*$/ = {/; 2664 + fix_insert_line($fixlinenr, $fixedline); 2665 + $fixedline = $line; 2666 + $fixedline =~ s/^(.\s*){\s*/$1/; 2667 + fix_insert_line($fixlinenr, $fixedline); 2668 + } 2925 2669 } 2926 2670 2927 2671 # ··· 2956 2680 if (ERROR("C99_COMMENTS", 2957 2681 "do not use C99 // comments\n" . $herecurr) && 2958 2682 $fix) { 2959 - my $line = $fixed[$linenr - 1]; 2683 + my $line = $fixed[$fixlinenr]; 2960 2684 if ($line =~ /\/\/(.*)$/) { 2961 2685 my $comment = trim($1); 2962 - $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@; 2686 + $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@; 2963 2687 } 2964 2688 } 2965 2689 } ··· 3018 2742 "do not initialise globals to 0 or NULL\n" . 3019 2743 $herecurr) && 3020 2744 $fix) { 3021 - $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; 2745 + $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; 3022 2746 } 3023 2747 } 3024 2748 # check for static initialisers. ··· 3027 2751 "do not initialise statics to 0 or NULL\n" . 3028 2752 $herecurr) && 3029 2753 $fix) { 3030 - $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/; 2754 + $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/; 3031 2755 } 2756 + } 2757 + 2758 + # check for misordered declarations of char/short/int/long with signed/unsigned 2759 + while ($sline =~ m{(\b$TypeMisordered\b)}g) { 2760 + my $tmp = trim($1); 2761 + WARN("MISORDERED_TYPE", 2762 + "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr); 3032 2763 } 3033 2764 3034 2765 # check for static const char * arrays. ··· 3064 2781 if (ERROR("FUNCTION_WITHOUT_ARGS", 3065 2782 "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) && 3066 2783 $fix) { 3067 - $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*$\s*$/$2 $3(void)/; 2784 + $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*$\s*$/$2 $3(void)/; 3068 2785 } 3069 2786 } 3070 2787 ··· 3073 2790 if (WARN("DEFINE_PCI_DEVICE_TABLE", 3074 2791 "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) && 3075 2792 $fix) { 3076 - $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*$\s*(\w+)\s*$\s*=\s*/static const struct pci_device_id $1\[\] = /; 2793 + $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*$\s*(\w+)\s*$\s*=\s*/static const struct pci_device_id $1\[\] = /; 3077 2794 } 3078 2795 } 3079 2796 ··· 3110 2827 my $sub_from = $ident; 3111 2828 my $sub_to = $ident; 3112 2829 $sub_to =~ s/\Q$from\E/$to/; 3113 - $fixed[$linenr - 1] =~ 2830 + $fixed[$fixlinenr] =~ 3114 2831 s@\Q$sub_from\E@$sub_to@; 3115 2832 } 3116 2833 } ··· 3138 2855 my $sub_from = $match; 3139 2856 my $sub_to = $match; 3140 2857 $sub_to =~ s/\Q$from\E/$to/; 3141 - $fixed[$linenr - 1] =~ 2858 + $fixed[$fixlinenr] =~ 3142 2859 s@\Q$sub_from\E@$sub_to@; 3143 2860 } 3144 2861 } ··· 3200 2917 if (WARN("PREFER_PR_LEVEL", 3201 2918 "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) && 3202 2919 $fix) { 3203 - $fixed[$linenr - 1] =~ 2920 + $fixed[$fixlinenr] =~ 3204 2921 s/\bpr_warning\b/pr_warn/; 3205 2922 } 3206 2923 } ··· 3216 2933 3217 2934 # function brace can't be on same line, except for #defines of do while, 3218 2935 # or if closed on same line 3219 - if (($line=~/$Type\s*$Ident$.*$.*\s{/) and 2936 + if (($line=~/$Type\s*$Ident$.*$.*\s*{/) and 3220 2937 !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) { 3221 - ERROR("OPEN_BRACE", 3222 - "open brace '{' following function declarations go on the next line\n" . $herecurr); 2938 + if (ERROR("OPEN_BRACE", 2939 + "open brace '{' following function declarations go on the next line\n" . $herecurr) && 2940 + $fix) { 2941 + fix_delete_line($fixlinenr, $rawline); 2942 + my $fixed_line = $rawline; 2943 + $fixed_line =~ /(^..*$Type\s*$Ident$.*$\s*){(.*)$/; 2944 + my $line1 = $1; 2945 + my $line2 = $2; 2946 + fix_insert_line($fixlinenr, ltrim($line1)); 2947 + fix_insert_line($fixlinenr, "\+{"); 2948 + if ($line2 !~ /^\s*$/) { 2949 + fix_insert_line($fixlinenr, "\+\t" . trim($line2)); 2950 + } 2951 + } 3223 2952 } 3224 2953 3225 2954 # open braces for enum, union and struct go on the same line. 3226 2955 if ($line =~ /^.\s*{/ && 3227 2956 $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) { 3228 - ERROR("OPEN_BRACE", 3229 - "open brace '{' following $1 go on the same line\n" . $hereprev); 2957 + if (ERROR("OPEN_BRACE", 2958 + "open brace '{' following $1 go on the same line\n" . $hereprev) && 2959 + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { 2960 + fix_delete_line($fixlinenr - 1, $prevrawline); 2961 + fix_delete_line($fixlinenr, $rawline); 2962 + my $fixedline = rtrim($prevrawline) . " {"; 2963 + fix_insert_line($fixlinenr, $fixedline); 2964 + $fixedline = $rawline; 2965 + $fixedline =~ s/^(.\s*){\s*/$1\t/; 2966 + if ($fixedline !~ /^\+\s*$/) { 2967 + fix_insert_line($fixlinenr, $fixedline); 2968 + } 2969 + } 3230 2970 } 3231 2971 3232 2972 # missing space after union, struct or enum definition ··· 3257 2951 if (WARN("SPACING", 3258 2952 "missing space after $1 definition\n" . $herecurr) && 3259 2953 $fix) { 3260 - $fixed[$linenr - 1] =~ 2954 + $fixed[$fixlinenr] =~ 3261 2955 s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/; 3262 2956 } 3263 2957 } ··· 3327 3021 } 3328 3022 3329 3023 if (show_type("SPACING") && $fix) { 3330 - $fixed[$linenr - 1] =~ 3024 + $fixed[$fixlinenr] =~ 3331 3025 s/^(.\s*)$Declare\s*$\s*\*\s*$Ident\s*$\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex; 3332 3026 } 3333 3027 } ··· 3344 3038 if (ERROR("BRACKET_SPACE", 3345 3039 "space prohibited before open square bracket '['\n" . $herecurr) && 3346 3040 $fix) { 3347 - $fixed[$linenr - 1] =~ 3041 + $fixed[$fixlinenr] =~ 3348 3042 s/^(\+.*?)\s+\[/$1\[/; 3349 3043 } 3350 3044 } ··· 3379 3073 if (WARN("SPACING", 3380 3074 "space prohibited between function name and open parenthesis '('\n" . $herecurr) && 3381 3075 $fix) { 3382 - $fixed[$linenr - 1] =~ 3076 + $fixed[$fixlinenr] =~ 3383 3077 s/\b$name\s+$/$name\(/; 3384 3078 } 3385 3079 } ··· 3647 3341 $fixed_line = $fixed_line . $fix_elements[$#elements]; 3648 3342 } 3649 3343 3650 - if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) { 3651 - $fixed[$linenr - 1] = $fixed_line; 3344 + if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) { 3345 + $fixed[$fixlinenr] = $fixed_line; 3652 3346 } 3653 3347 3654 3348 ··· 3659 3353 if (WARN("SPACING", 3660 3354 "space prohibited before semicolon\n" . $herecurr) && 3661 3355 $fix) { 3662 - 1 while $fixed[$linenr - 1] =~ 3356 + 1 while $fixed[$fixlinenr] =~ 3663 3357 s/^(\+.*\S)\s+;/$1;/; 3664 3358 } 3665 3359 } ··· 3692 3386 if (ERROR("SPACING", 3693 3387 "space required before the open brace '{'\n" . $herecurr) && 3694 3388 $fix) { 3695 - $fixed[$linenr - 1] =~ s/^(\+.*(?:do|$)){/$1 {/; 3389 + $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/; 3696 3390 } 3697 3391 } 3698 3392 ··· 3710 3404 if (ERROR("SPACING", 3711 3405 "space required after that close brace '}'\n" . $herecurr) && 3712 3406 $fix) { 3713 - $fixed[$linenr - 1] =~ 3407 + $fixed[$fixlinenr] =~ 3714 3408 s/}((?!(?:,|;|\)))\S)/} $1/; 3715 3409 } 3716 3410 } ··· 3720 3414 if (ERROR("SPACING", 3721 3415 "space prohibited after that open square bracket '['\n" . $herecurr) && 3722 3416 $fix) { 3723 - $fixed[$linenr - 1] =~ 3417 + $fixed[$fixlinenr] =~ 3724 3418 s/\[\s+/\[/; 3725 3419 } 3726 3420 } ··· 3728 3422 if (ERROR("SPACING", 3729 3423 "space prohibited before that close square bracket ']'\n" . $herecurr) && 3730 3424 $fix) { 3731 - $fixed[$linenr - 1] =~ 3425 + $fixed[$fixlinenr] =~ 3732 3426 s/\s+\]/\]/; 3733 3427 } 3734 3428 } ··· 3739 3433 if (ERROR("SPACING", 3740 3434 "space prohibited after that open parenthesis '('\n" . $herecurr) && 3741 3435 $fix) { 3742 - $fixed[$linenr - 1] =~ 3436 + $fixed[$fixlinenr] =~ 3743 3437 s/$\s+/\(/; 3744 3438 } 3745 3439 } ··· 3749 3443 if (ERROR("SPACING", 3750 3444 "space prohibited before that close parenthesis ')'\n" . $herecurr) && 3751 3445 $fix) { 3752 - $fixed[$linenr - 1] =~ 3446 + print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n"); 3447 + $fixed[$fixlinenr] =~ 3753 3448 s/\s+$/\)/; 3754 3449 } 3755 3450 } 3451 + 3452 + # check unnecessary parentheses around addressof/dereference single $Lvals 3453 + # ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar 3454 + 3455 + while ($line =~ /(?:[^&]&\s*|\*)$\s*($Ident\s*(?:$Member\s*)+)\s*$/g) { 3456 + CHK("UNNECESSARY_PARENTHESES", 3457 + "Unnecessary parentheses around $1\n" . $herecurr); 3458 + } 3756 3459 3757 3460 #goto labels aren't indented, allow a single space however 3758 3461 if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and ··· 3769 3454 if (WARN("INDENTED_LABEL", 3770 3455 "labels should not be indented\n" . $herecurr) && 3771 3456 $fix) { 3772 - $fixed[$linenr - 1] =~ 3457 + $fixed[$fixlinenr] =~ 3773 3458 s/^(.)\s+/$1/; 3774 3459 } 3775 3460 } ··· 3831 3516 if (ERROR("SPACING", 3832 3517 "space required before the open parenthesis '('\n" . $herecurr) && 3833 3518 $fix) { 3834 - $fixed[$linenr - 1] =~ 3519 + $fixed[$fixlinenr] =~ 3835 3520 s/\b(if|while|for|switch)\(/$1 \(/; 3836 3521 } 3837 3522 } ··· 3921 3606 # if should not continue a brace 3922 3607 if ($line =~ /}\s*if\b/) { 3923 3608 ERROR("TRAILING_STATEMENTS", 3924 - "trailing statements should be on next line\n" . 3609 + "trailing statements should be on next line (or did you mean 'else if'?)\n" . 3925 3610 $herecurr); 3926 3611 } 3927 3612 # case and default should not have general statements after them ··· 3937 3622 3938 3623 # Check for }<nl>else {, these must be at the same 3939 3624 # indent level to be relevant to each other. 3940 - if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and 3941 - $previndent == $indent) { 3942 - ERROR("ELSE_AFTER_BRACE", 3943 - "else should follow close brace '}'\n" . $hereprev); 3625 + if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ && 3626 + $previndent == $indent) { 3627 + if (ERROR("ELSE_AFTER_BRACE", 3628 + "else should follow close brace '}'\n" . $hereprev) && 3629 + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { 3630 + fix_delete_line($fixlinenr - 1, $prevrawline); 3631 + fix_delete_line($fixlinenr, $rawline); 3632 + my $fixedline = $prevrawline; 3633 + $fixedline =~ s/}\s*$//; 3634 + if ($fixedline !~ /^\+\s*$/) { 3635 + fix_insert_line($fixlinenr, $fixedline); 3636 + } 3637 + $fixedline = $rawline; 3638 + $fixedline =~ s/^(.\s*)else/$1} else/; 3639 + fix_insert_line($fixlinenr, $fixedline); 3640 + } 3944 3641 } 3945 3642 3946 - if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and 3947 - $previndent == $indent) { 3643 + if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ && 3644 + $previndent == $indent) { 3948 3645 my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); 3949 3646 3950 3647 # Find out what is on the end of the line after the ··· 3965 3638 $s =~ s/\n.*//g; 3966 3639 3967 3640 if ($s =~ /^\s*;/) { 3968 - ERROR("WHILE_AFTER_BRACE", 3969 - "while should follow close brace '}'\n" . $hereprev); 3641 + if (ERROR("WHILE_AFTER_BRACE", 3642 + "while should follow close brace '}'\n" . $hereprev) && 3643 + $fix && $prevline =~ /^\+/ && $line =~ /^\+/) { 3644 + fix_delete_line($fixlinenr - 1, $prevrawline); 3645 + fix_delete_line($fixlinenr, $rawline); 3646 + my $fixedline = $prevrawline; 3647 + my $trailing = $rawline; 3648 + $trailing =~ s/^\+//; 3649 + $trailing = trim($trailing); 3650 + $fixedline =~ s/}\s*$/} $trailing/; 3651 + fix_insert_line($fixlinenr, $fixedline); 3652 + } 3970 3653 } 3971 3654 } 3972 3655 ··· 3990 3653 "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) && 3991 3654 $fix) { 3992 3655 my $hexval = sprintf("0x%x", oct($var)); 3993 - $fixed[$linenr - 1] =~ 3656 + $fixed[$fixlinenr] =~ 3994 3657 s/\b$var\b/$hexval/; 3995 3658 } 3996 3659 } ··· 4026 3689 if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION", 4027 3690 "Whitespace after \\ makes next lines useless\n" . $herecurr) && 4028 3691 $fix) { 4029 - $fixed[$linenr - 1] =~ s/\s+$//; 3692 + $fixed[$fixlinenr] =~ s/\s+$//; 4030 3693 } 4031 3694 } 4032 3695 ··· 4099 3762 $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(), 4100 3763 $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo(); 4101 3764 $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz 4102 - $dstat !~ /^'X'$/ && # character constants 3765 + $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ && # character constants 4103 3766 $dstat !~ /$exceptions/ && 4104 3767 $dstat !~ /^\.$Ident\s*=/ && # .foo = 4105 3768 $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo ··· 4351 4014 } 4352 4015 } 4353 4016 4017 + # check for unnecessary "Out of Memory" messages 4018 + if ($line =~ /^\+.*\b$logFunctions\s*$/ && 4019 + $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*$/ && 4020 + (defined $1 || defined $3) && 4021 + $linenr > 3) { 4022 + my $testval = $2; 4023 + my $testline = $lines[$linenr - 3]; 4024 + 4025 + my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0); 4026 + # print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n"); 4027 + 4028 + if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:$[^$]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) { 4029 + WARN("OOM_MESSAGE", 4030 + "Possible unnecessary 'out of memory' message\n" . $hereprev); 4031 + } 4032 + } 4033 + 4354 4034 # check for bad placement of section $InitAttribute (e.g.: __initdata) 4355 4035 if ($line =~ /(\b$InitAttribute\b)/) { 4356 4036 my $attr = $1; ··· 4381 4027 WARN("MISPLACED_INIT", 4382 4028 "$attr should be placed after $var\n" . $herecurr))) && 4383 4029 $fix) { 4384 - $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e; 4030 + $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e; 4385 4031 } 4386 4032 } 4387 4033 } ··· 4395 4041 if (ERROR("INIT_ATTRIBUTE", 4396 4042 "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) && 4397 4043 $fix) { 4398 - $fixed[$linenr - 1] =~ 4044 + $fixed[$fixlinenr] =~ 4399 4045 s/$InitAttributeData/${attr_prefix}initconst/; 4400 4046 } 4401 4047 } ··· 4406 4052 if (ERROR("INIT_ATTRIBUTE", 4407 4053 "Use of $attr requires a separate use of const\n" . $herecurr) && 4408 4054 $fix) { 4409 - my $lead = $fixed[$linenr - 1] =~ 4055 + my $lead = $fixed[$fixlinenr] =~ 4410 4056 /(^\+\s*(?:static\s+))/; 4411 4057 $lead = rtrim($1); 4412 4058 $lead = "$lead " if ($lead !~ /^\+$/); 4413 4059 $lead = "${lead}const "; 4414 - $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/; 4060 + $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/; 4415 4061 } 4416 4062 } 4417 4063 ··· 4424 4070 if (WARN("CONSTANT_CONVERSION", 4425 4071 "$constant_func should be $func\n" . $herecurr) && 4426 4072 $fix) { 4427 - $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g; 4073 + $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g; 4428 4074 } 4429 4075 } 4430 4076 ··· 4474 4120 if (ERROR("SPACING", 4475 4121 "exactly one space required after that #$1\n" . $herecurr) && 4476 4122 $fix) { 4477 - $fixed[$linenr - 1] =~ 4123 + $fixed[$fixlinenr] =~ 4478 4124 s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /; 4479 4125 } 4480 4126 ··· 4522 4168 if (WARN("INLINE", 4523 4169 "plain inline is preferred over $1\n" . $herecurr) && 4524 4170 $fix) { 4525 - $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/; 4171 + $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/; 4526 4172 4527 4173 } 4528 4174 } ··· 4547 4193 if (WARN("PREFER_PRINTF", 4548 4194 "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) && 4549 4195 $fix) { 4550 - $fixed[$linenr - 1] =~ s/\b__attribute__\s*$\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)$\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex; 4196 + $fixed[$fixlinenr] =~ s/\b__attribute__\s*$\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)$\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex; 4551 4197 4552 4198 } 4553 4199 } ··· 4558 4204 if (WARN("PREFER_SCANF", 4559 4205 "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) && 4560 4206 $fix) { 4561 - $fixed[$linenr - 1] =~ s/\b__attribute__\s*$\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)$\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex; 4207 + $fixed[$fixlinenr] =~ s/\b__attribute__\s*$\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)$\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex; 4562 4208 } 4563 4209 } 4564 4210 ··· 4573 4219 if (WARN("SIZEOF_PARENTHESIS", 4574 4220 "sizeof $1 should be sizeof($1)\n" . $herecurr) && 4575 4221 $fix) { 4576 - $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex; 4222 + $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex; 4577 4223 } 4578 4224 } 4579 4225 ··· 4596 4242 if (WARN("PREFER_SEQ_PUTS", 4597 4243 "Prefer seq_puts to seq_printf\n" . $herecurr) && 4598 4244 $fix) { 4599 - $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/; 4245 + $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/; 4600 4246 } 4601 4247 } 4602 4248 } ··· 4625 4271 if (WARN("PREFER_ETHER_ADDR_COPY", 4626 4272 "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) && 4627 4273 $fix) { 4628 - $fixed[$linenr - 1] =~ s/\bmemcpy\s*$\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*$/ether_addr_copy($2, $7)/; 4274 + $fixed[$fixlinenr] =~ s/\bmemcpy\s*$\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*$/ether_addr_copy($2, $7)/; 4629 4275 } 4630 4276 } 4631 4277 ··· 4713 4359 if (CHK("AVOID_EXTERNS", 4714 4360 "extern prototypes should be avoided in .h files\n" . $herecurr) && 4715 4361 $fix) { 4716 - $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; 4362 + $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; 4717 4363 } 4718 4364 } 4719 4365 ··· 4773 4419 4774 4420 # check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc 4775 4421 if ($^V && $^V ge 5.10.0 && 4776 - $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) { 4422 + $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) { 4777 4423 my $oldfunc = $3; 4778 4424 my $a1 = $4; 4779 4425 my $a2 = $10; 4780 4426 my $newfunc = "kmalloc_array"; 4781 4427 $newfunc = "kcalloc" if ($oldfunc eq "kzalloc"); 4782 - if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) { 4428 + my $r1 = $a1; 4429 + my $r2 = $a2; 4430 + if ($a1 =~ /^sizeof\s*\S/) { 4431 + $r1 = $a2; 4432 + $r2 = $a1; 4433 + } 4434 + if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ && 4435 + !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) { 4783 4436 if (WARN("ALLOC_WITH_MULTIPLY", 4784 4437 "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) && 4785 4438 $fix) { 4786 - my $r1 = $a1; 4787 - my $r2 = $a2; 4788 - if ($a1 =~ /^sizeof\s*\S/) { 4789 - $r1 = $a2; 4790 - $r2 = $a1; 4791 - } 4792 - $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e; 4439 + $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e; 4793 4440 4794 4441 } 4795 4442 } ··· 4814 4459 if (WARN("ONE_SEMICOLON", 4815 4460 "Statements terminations use 1 semicolon\n" . $herecurr) && 4816 4461 $fix) { 4817 - $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g; 4462 + $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g; 4818 4463 } 4819 4464 } 4820 4465 4821 - # check for case / default statements not preceeded by break/fallthrough/switch 4466 + # check for case / default statements not preceded by break/fallthrough/switch 4822 4467 if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) { 4823 4468 my $has_break = 0; 4824 4469 my $has_statement = 0; 4825 4470 my $count = 0; 4826 4471 my $prevline = $linenr; 4827 - while ($prevline > 1 && $count < 3 && !$has_break) { 4472 + while ($prevline > 1 && ($file || $count < 3) && !$has_break) { 4828 4473 $prevline--; 4829 4474 my $rline = $rawlines[$prevline - 1]; 4830 4475 my $fline = $lines[$prevline - 1]; ··· 4862 4507 if (WARN("USE_FUNC", 4863 4508 "__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) && 4864 4509 $fix) { 4865 - $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g; 4510 + $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g; 4866 4511 } 4867 4512 } 4868 4513 ··· 5105 4750 hash_show_words(\%use_type, "Used"); 5106 4751 hash_show_words(\%ignore_type, "Ignored"); 5107 4752 5108 - if ($clean == 0 && $fix && "@rawlines" ne "@fixed") { 4753 + if ($clean == 0 && $fix && 4754 + ("@rawlines" ne "@fixed" || 4755 + $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) { 5109 4756 my $newfile = $filename; 5110 4757 $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace); 5111 4758 my $linecount = 0; 5112 4759 my $f; 4760 + 4761 + @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted); 5113 4762 5114 4763 open($f, '>', $newfile) 5115 4764 or die "$P: Can't open $newfile for write\n"; ··· 5122 4763 if ($file) { 5123 4764 if ($linecount > 3) { 5124 4765 $fixed_line =~ s/^\+//; 5125 - print $f $fixed_line. "\n"; 4766 + print $f $fixed_line . "\n"; 5126 4767 } 5127 4768 } else { 5128 4769 print $f $fixed_line . "\n";