Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Ross Zwisler:

- Require struct page by default for filesystem DAX to remove a number
of surprising failure cases. This includes failures with direct I/O,
gdb and fork(2).

- Add support for the new Platform Capabilities Structure added to the
NFIT in ACPI 6.2a. This new table tells us whether the platform
supports flushing of CPU and memory controller caches on unexpected
power loss events.

- Revamp vmem_altmap and dev_pagemap handling to clean up code and
better support future future PCI P2P uses.

- Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has
become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL
spec, and instead rely on the generic ND_CMD_CALL approach used by
the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}.

- Enhance nfit_test so we can test some of the new things added in
version 1.6 of the DSM specification. This includes testing firmware
download and simulating the Last Shutdown State (LSS) status.

* tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits)
libnvdimm, namespace: remove redundant initialization of 'nd_mapping'
acpi, nfit: fix register dimm error handling
libnvdimm, namespace: make min namespace size 4K
tools/testing/nvdimm: force nfit_test to depend on instrumented modules
libnvdimm/nfit_test: adding support for unit testing enable LSS status
libnvdimm/nfit_test: add firmware download emulation
nfit-test: Add platform cap support from ACPI 6.2a to test
libnvdimm: expose platform persistence attribute for nd_region
acpi: nfit: add persistent memory control flag for nd_region
acpi: nfit: Add support for detect platform CPU cache flush on power loss
device-dax: Fix trailing semicolon
libnvdimm, btt: fix uninitialized err_lock
dax: require 'struct page' by default for filesystem dax
ext2: auto disable dax instead of failing mount
ext4: auto disable dax instead of failing mount
mm, dax: introduce pfn_t_special()
mm: Fix devm_memremap_pages() collision handling
mm: Fix memory size alignment in devm_memremap_pages_release()
memremap: merge find_dev_pagemap into get_dev_pagemap
memremap: change devm_memremap_pages interface to use struct dev_pagemap
...

+1124 -529
+6 -3
arch/arm64/mm/mmu.c
··· 685 685 } 686 686 #ifdef CONFIG_SPARSEMEM_VMEMMAP 687 687 #if !ARM64_SWAPPER_USES_SECTION_MAPS 688 - int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 688 + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 689 + struct vmem_altmap *altmap) 689 690 { 690 691 return vmemmap_populate_basepages(start, end, node); 691 692 } 692 693 #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ 693 - int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 694 + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 695 + struct vmem_altmap *altmap) 694 696 { 695 697 unsigned long addr = start; 696 698 unsigned long next; ··· 727 725 return 0; 728 726 } 729 727 #endif /* CONFIG_ARM64_64K_PAGES */ 730 - void vmemmap_free(unsigned long start, unsigned long end) 728 + void vmemmap_free(unsigned long start, unsigned long end, 729 + struct vmem_altmap *altmap) 731 730 { 732 731 } 733 732 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
+4 -2
arch/ia64/mm/discontig.c
··· 754 754 #endif 755 755 756 756 #ifdef CONFIG_SPARSEMEM_VMEMMAP 757 - int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 757 + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 758 + struct vmem_altmap *altmap) 758 759 { 759 760 return vmemmap_populate_basepages(start, end, node); 760 761 } 761 762 762 - void vmemmap_free(unsigned long start, unsigned long end) 763 + void vmemmap_free(unsigned long start, unsigned long end, 764 + struct vmem_altmap *altmap) 763 765 { 764 766 } 765 767 #endif
+10 -8
arch/ia64/mm/init.c
··· 501 501 if (map_start < map_end) 502 502 memmap_init_zone((unsigned long)(map_end - map_start), 503 503 args->nid, args->zone, page_to_pfn(map_start), 504 - MEMMAP_EARLY); 504 + MEMMAP_EARLY, NULL); 505 505 return 0; 506 506 } 507 507 ··· 509 509 memmap_init (unsigned long size, int nid, unsigned long zone, 510 510 unsigned long start_pfn) 511 511 { 512 - if (!vmem_map) 513 - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); 514 - else { 512 + if (!vmem_map) { 513 + memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, 514 + NULL); 515 + } else { 515 516 struct page *start; 516 517 struct memmap_init_callback_data args; 517 518 ··· 648 647 } 649 648 650 649 #ifdef CONFIG_MEMORY_HOTPLUG 651 - int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 650 + int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 651 + bool want_memblock) 652 652 { 653 653 unsigned long start_pfn = start >> PAGE_SHIFT; 654 654 unsigned long nr_pages = size >> PAGE_SHIFT; 655 655 int ret; 656 656 657 - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); 657 + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 658 658 if (ret) 659 659 printk("%s: Problem encountered in __add_pages() as ret=%d\n", 660 660 __func__, ret); ··· 664 662 } 665 663 666 664 #ifdef CONFIG_MEMORY_HOTREMOVE 667 - int arch_remove_memory(u64 start, u64 size) 665 + int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 668 666 { 669 667 unsigned long start_pfn = start >> PAGE_SHIFT; 670 668 unsigned long nr_pages = size >> PAGE_SHIFT; ··· 672 670 int ret; 673 671 674 672 zone = page_zone(pfn_to_page(start_pfn)); 675 - ret = __remove_pages(zone, start_pfn, nr_pages); 673 + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); 676 674 if (ret) 677 675 pr_warn("%s: Problem encountered in __remove_pages() as" 678 676 " ret=%d\n", __func__, ret);
+8 -9
arch/powerpc/mm/init_64.c
··· 183 183 vmemmap_list = vmem_back; 184 184 } 185 185 186 - int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 186 + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 187 + struct vmem_altmap *altmap) 187 188 { 188 189 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; 189 190 ··· 194 193 pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); 195 194 196 195 for (; start < end; start += page_size) { 197 - struct vmem_altmap *altmap; 198 196 void *p; 199 197 int rc; 200 198 201 199 if (vmemmap_populated(start, page_size)) 202 200 continue; 203 201 204 - /* altmap lookups only work at section boundaries */ 205 - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); 206 - 207 - p = __vmemmap_alloc_block_buf(page_size, node, altmap); 202 + if (altmap) 203 + p = altmap_alloc_block_buf(page_size, altmap); 204 + else 205 + p = vmemmap_alloc_block_buf(page_size, node); 208 206 if (!p) 209 207 return -ENOMEM; 210 208 ··· 256 256 return vmem_back->phys; 257 257 } 258 258 259 - void __ref vmemmap_free(unsigned long start, unsigned long end) 259 + void __ref vmemmap_free(unsigned long start, unsigned long end, 260 + struct vmem_altmap *altmap) 260 261 { 261 262 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; 262 263 unsigned long page_order = get_order(page_size); ··· 268 267 269 268 for (; start < end; start += page_size) { 270 269 unsigned long nr_pages, addr; 271 - struct vmem_altmap *altmap; 272 270 struct page *section_base; 273 271 struct page *page; 274 272 ··· 287 287 section_base = pfn_to_page(vmemmap_section_start(start)); 288 288 nr_pages = 1 << page_order; 289 289 290 - altmap = to_vmem_altmap((unsigned long) section_base); 291 290 if (altmap) { 292 291 vmem_altmap_free(altmap, nr_pages); 293 292 } else if (PageReserved(page)) {
+5 -6
arch/powerpc/mm/mem.c
··· 127 127 return -ENODEV; 128 128 } 129 129 130 - int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 130 + int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 131 + bool want_memblock) 131 132 { 132 133 unsigned long start_pfn = start >> PAGE_SHIFT; 133 134 unsigned long nr_pages = size >> PAGE_SHIFT; ··· 144 143 return -EFAULT; 145 144 } 146 145 147 - return __add_pages(nid, start_pfn, nr_pages, want_memblock); 146 + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 148 147 } 149 148 150 149 #ifdef CONFIG_MEMORY_HOTREMOVE 151 - int arch_remove_memory(u64 start, u64 size) 150 + int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 152 151 { 153 152 unsigned long start_pfn = start >> PAGE_SHIFT; 154 153 unsigned long nr_pages = size >> PAGE_SHIFT; 155 - struct vmem_altmap *altmap; 156 154 struct page *page; 157 155 int ret; 158 156 ··· 160 160 * when querying the zone. 161 161 */ 162 162 page = pfn_to_page(start_pfn); 163 - altmap = to_vmem_altmap((unsigned long) page); 164 163 if (altmap) 165 164 page += vmem_altmap_offset(altmap); 166 165 167 - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); 166 + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); 168 167 if (ret) 169 168 return ret; 170 169
+4 -3
arch/s390/mm/init.c
··· 222 222 223 223 #endif /* CONFIG_CMA */ 224 224 225 - int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 225 + int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 226 + bool want_memblock) 226 227 { 227 228 unsigned long start_pfn = PFN_DOWN(start); 228 229 unsigned long size_pages = PFN_DOWN(size); ··· 233 232 if (rc) 234 233 return rc; 235 234 236 - rc = __add_pages(nid, start_pfn, size_pages, want_memblock); 235 + rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); 237 236 if (rc) 238 237 vmem_remove_mapping(start, size); 239 238 return rc; 240 239 } 241 240 242 241 #ifdef CONFIG_MEMORY_HOTREMOVE 243 - int arch_remove_memory(u64 start, u64 size) 242 + int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 244 243 { 245 244 /* 246 245 * There is no hardware or firmware interface which could trigger a
+4 -2
arch/s390/mm/vmem.c
··· 211 211 /* 212 212 * Add a backed mem_map array to the virtual mem_map array. 213 213 */ 214 - int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 214 + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 215 + struct vmem_altmap *altmap) 215 216 { 216 217 unsigned long pgt_prot, sgt_prot; 217 218 unsigned long address = start; ··· 297 296 return ret; 298 297 } 299 298 300 - void vmemmap_free(unsigned long start, unsigned long end) 299 + void vmemmap_free(unsigned long start, unsigned long end, 300 + struct vmem_altmap *altmap) 301 301 { 302 302 } 303 303
+5 -5
arch/sh/mm/init.c
··· 485 485 #endif 486 486 487 487 #ifdef CONFIG_MEMORY_HOTPLUG 488 - int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 488 + int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 489 + bool want_memblock) 489 490 { 490 491 unsigned long start_pfn = PFN_DOWN(start); 491 492 unsigned long nr_pages = size >> PAGE_SHIFT; 492 493 int ret; 493 494 494 495 /* We only have ZONE_NORMAL, so this is easy.. */ 495 - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); 496 + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 496 497 if (unlikely(ret)) 497 498 printk("%s: Failed, __add_pages() == %d\n", __func__, ret); 498 499 499 500 return ret; 500 501 } 501 - EXPORT_SYMBOL_GPL(arch_add_memory); 502 502 503 503 #ifdef CONFIG_NUMA 504 504 int memory_add_physaddr_to_nid(u64 addr) ··· 510 510 #endif 511 511 512 512 #ifdef CONFIG_MEMORY_HOTREMOVE 513 - int arch_remove_memory(u64 start, u64 size) 513 + int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 514 514 { 515 515 unsigned long start_pfn = PFN_DOWN(start); 516 516 unsigned long nr_pages = size >> PAGE_SHIFT; ··· 518 518 int ret; 519 519 520 520 zone = page_zone(pfn_to_page(start_pfn)); 521 - ret = __remove_pages(zone, start_pfn, nr_pages); 521 + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); 522 522 if (unlikely(ret)) 523 523 pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, 524 524 ret);
+3 -2
arch/sparc/mm/init_64.c
··· 2628 2628 2629 2629 #ifdef CONFIG_SPARSEMEM_VMEMMAP 2630 2630 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, 2631 - int node) 2631 + int node, struct vmem_altmap *altmap) 2632 2632 { 2633 2633 unsigned long pte_base; 2634 2634 ··· 2671 2671 return 0; 2672 2672 } 2673 2673 2674 - void vmemmap_free(unsigned long start, unsigned long end) 2674 + void vmemmap_free(unsigned long start, unsigned long end, 2675 + struct vmem_altmap *altmap) 2675 2676 { 2676 2677 } 2677 2678 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
+5 -4
arch/x86/mm/init_32.c
··· 829 829 } 830 830 831 831 #ifdef CONFIG_MEMORY_HOTPLUG 832 - int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 832 + int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 833 + bool want_memblock) 833 834 { 834 835 unsigned long start_pfn = start >> PAGE_SHIFT; 835 836 unsigned long nr_pages = size >> PAGE_SHIFT; 836 837 837 - return __add_pages(nid, start_pfn, nr_pages, want_memblock); 838 + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 838 839 } 839 840 840 841 #ifdef CONFIG_MEMORY_HOTREMOVE 841 - int arch_remove_memory(u64 start, u64 size) 842 + int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 842 843 { 843 844 unsigned long start_pfn = start >> PAGE_SHIFT; 844 845 unsigned long nr_pages = size >> PAGE_SHIFT; 845 846 struct zone *zone; 846 847 847 848 zone = page_zone(pfn_to_page(start_pfn)); 848 - return __remove_pages(zone, start_pfn, nr_pages); 849 + return __remove_pages(zone, start_pfn, nr_pages, altmap); 849 850 } 850 851 #endif 851 852 #endif
+52 -42
arch/x86/mm/init_64.c
··· 772 772 } 773 773 } 774 774 775 - int add_pages(int nid, unsigned long start_pfn, 776 - unsigned long nr_pages, bool want_memblock) 775 + int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, 776 + struct vmem_altmap *altmap, bool want_memblock) 777 777 { 778 778 int ret; 779 779 780 - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); 780 + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 781 781 WARN_ON_ONCE(ret); 782 782 783 783 /* update max_pfn, max_low_pfn and high_memory */ ··· 787 787 return ret; 788 788 } 789 789 790 - int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 790 + int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 791 + bool want_memblock) 791 792 { 792 793 unsigned long start_pfn = start >> PAGE_SHIFT; 793 794 unsigned long nr_pages = size >> PAGE_SHIFT; 794 795 795 796 init_memory_mapping(start, start + size); 796 797 797 - return add_pages(nid, start_pfn, nr_pages, want_memblock); 798 + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 798 799 } 799 - EXPORT_SYMBOL_GPL(arch_add_memory); 800 800 801 801 #define PAGE_INUSE 0xFD 802 802 803 - static void __meminit free_pagetable(struct page *page, int order) 803 + static void __meminit free_pagetable(struct page *page, int order, 804 + struct vmem_altmap *altmap) 804 805 { 805 806 unsigned long magic; 806 807 unsigned int nr_pages = 1 << order; 807 - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); 808 808 809 809 if (altmap) { 810 810 vmem_altmap_free(altmap, nr_pages); ··· 826 826 free_pages((unsigned long)page_address(page), order); 827 827 } 828 828 829 - static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) 829 + static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, 830 + struct vmem_altmap *altmap) 830 831 { 831 832 pte_t *pte; 832 833 int i; ··· 839 838 } 840 839 841 840 /* free a pte talbe */ 842 - free_pagetable(pmd_page(*pmd), 0); 841 + free_pagetable(pmd_page(*pmd), 0, altmap); 843 842 spin_lock(&init_mm.page_table_lock); 844 843 pmd_clear(pmd); 845 844 spin_unlock(&init_mm.page_table_lock); 846 845 } 847 846 848 - static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) 847 + static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, 848 + struct vmem_altmap *altmap) 849 849 { 850 850 pmd_t *pmd; 851 851 int i; ··· 858 856 } 859 857 860 858 /* free a pmd talbe */ 861 - free_pagetable(pud_page(*pud), 0); 859 + free_pagetable(pud_page(*pud), 0, altmap); 862 860 spin_lock(&init_mm.page_table_lock); 863 861 pud_clear(pud); 864 862 spin_unlock(&init_mm.page_table_lock); 865 863 } 866 864 867 - static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) 865 + static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, 866 + struct vmem_altmap *altmap) 868 867 { 869 868 pud_t *pud; 870 869 int i; ··· 877 874 } 878 875 879 876 /* free a pud talbe */ 880 - free_pagetable(p4d_page(*p4d), 0); 877 + free_pagetable(p4d_page(*p4d), 0, altmap); 881 878 spin_lock(&init_mm.page_table_lock); 882 879 p4d_clear(p4d); 883 880 spin_unlock(&init_mm.page_table_lock); ··· 885 882 886 883 static void __meminit 887 884 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, 888 - bool direct) 885 + struct vmem_altmap *altmap, bool direct) 889 886 { 890 887 unsigned long next, pages = 0; 891 888 pte_t *pte; ··· 916 913 * freed when offlining, or simplely not in use. 917 914 */ 918 915 if (!direct) 919 - free_pagetable(pte_page(*pte), 0); 916 + free_pagetable(pte_page(*pte), 0, altmap); 920 917 921 918 spin_lock(&init_mm.page_table_lock); 922 919 pte_clear(&init_mm, addr, pte); ··· 939 936 940 937 page_addr = page_address(pte_page(*pte)); 941 938 if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { 942 - free_pagetable(pte_page(*pte), 0); 939 + free_pagetable(pte_page(*pte), 0, altmap); 943 940 944 941 spin_lock(&init_mm.page_table_lock); 945 942 pte_clear(&init_mm, addr, pte); ··· 956 953 957 954 static void __meminit 958 955 remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, 959 - bool direct) 956 + bool direct, struct vmem_altmap *altmap) 960 957 { 961 958 unsigned long next, pages = 0; 962 959 pte_t *pte_base; ··· 975 972 IS_ALIGNED(next, PMD_SIZE)) { 976 973 if (!direct) 977 974 free_pagetable(pmd_page(*pmd), 978 - get_order(PMD_SIZE)); 975 + get_order(PMD_SIZE), 976 + altmap); 979 977 980 978 spin_lock(&init_mm.page_table_lock); 981 979 pmd_clear(pmd); ··· 990 986 if (!memchr_inv(page_addr, PAGE_INUSE, 991 987 PMD_SIZE)) { 992 988 free_pagetable(pmd_page(*pmd), 993 - get_order(PMD_SIZE)); 989 + get_order(PMD_SIZE), 990 + altmap); 994 991 995 992 spin_lock(&init_mm.page_table_lock); 996 993 pmd_clear(pmd); ··· 1003 998 } 1004 999 1005 1000 pte_base = (pte_t *)pmd_page_vaddr(*pmd); 1006 - remove_pte_table(pte_base, addr, next, direct); 1007 - free_pte_table(pte_base, pmd); 1001 + remove_pte_table(pte_base, addr, next, altmap, direct); 1002 + free_pte_table(pte_base, pmd, altmap); 1008 1003 } 1009 1004 1010 1005 /* Call free_pmd_table() in remove_pud_table(). */ ··· 1014 1009 1015 1010 static void __meminit 1016 1011 remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, 1017 - bool direct) 1012 + struct vmem_altmap *altmap, bool direct) 1018 1013 { 1019 1014 unsigned long next, pages = 0; 1020 1015 pmd_t *pmd_base; ··· 1033 1028 IS_ALIGNED(next, PUD_SIZE)) { 1034 1029 if (!direct) 1035 1030 free_pagetable(pud_page(*pud), 1036 - get_order(PUD_SIZE)); 1031 + get_order(PUD_SIZE), 1032 + altmap); 1037 1033 1038 1034 spin_lock(&init_mm.page_table_lock); 1039 1035 pud_clear(pud); ··· 1048 1042 if (!memchr_inv(page_addr, PAGE_INUSE, 1049 1043 PUD_SIZE)) { 1050 1044 free_pagetable(pud_page(*pud), 1051 - get_order(PUD_SIZE)); 1045 + get_order(PUD_SIZE), 1046 + altmap); 1052 1047 1053 1048 spin_lock(&init_mm.page_table_lock); 1054 1049 pud_clear(pud); ··· 1061 1054 } 1062 1055 1063 1056 pmd_base = pmd_offset(pud, 0); 1064 - remove_pmd_table(pmd_base, addr, next, direct); 1065 - free_pmd_table(pmd_base, pud); 1057 + remove_pmd_table(pmd_base, addr, next, direct, altmap); 1058 + free_pmd_table(pmd_base, pud, altmap); 1066 1059 } 1067 1060 1068 1061 if (direct) ··· 1071 1064 1072 1065 static void __meminit 1073 1066 remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, 1074 - bool direct) 1067 + struct vmem_altmap *altmap, bool direct) 1075 1068 { 1076 1069 unsigned long next, pages = 0; 1077 1070 pud_t *pud_base; ··· 1087 1080 BUILD_BUG_ON(p4d_large(*p4d)); 1088 1081 1089 1082 pud_base = pud_offset(p4d, 0); 1090 - remove_pud_table(pud_base, addr, next, direct); 1083 + remove_pud_table(pud_base, addr, next, altmap, direct); 1091 1084 /* 1092 1085 * For 4-level page tables we do not want to free PUDs, but in the 1093 1086 * 5-level case we should free them. This code will have to change 1094 1087 * to adapt for boot-time switching between 4 and 5 level page tables. 1095 1088 */ 1096 1089 if (CONFIG_PGTABLE_LEVELS == 5) 1097 - free_pud_table(pud_base, p4d); 1090 + free_pud_table(pud_base, p4d, altmap); 1098 1091 } 1099 1092 1100 1093 if (direct) ··· 1103 1096 1104 1097 /* start and end are both virtual address. */ 1105 1098 static void __meminit 1106 - remove_pagetable(unsigned long start, unsigned long end, bool direct) 1099 + remove_pagetable(unsigned long start, unsigned long end, bool direct, 1100 + struct vmem_altmap *altmap) 1107 1101 { 1108 1102 unsigned long next; 1109 1103 unsigned long addr; ··· 1119 1111 continue; 1120 1112 1121 1113 p4d = p4d_offset(pgd, 0); 1122 - remove_p4d_table(p4d, addr, next, direct); 1114 + remove_p4d_table(p4d, addr, next, altmap, direct); 1123 1115 } 1124 1116 1125 1117 flush_tlb_all(); 1126 1118 } 1127 1119 1128 - void __ref vmemmap_free(unsigned long start, unsigned long end) 1120 + void __ref vmemmap_free(unsigned long start, unsigned long end, 1121 + struct vmem_altmap *altmap) 1129 1122 { 1130 - remove_pagetable(start, end, false); 1123 + remove_pagetable(start, end, false, altmap); 1131 1124 } 1132 1125 1133 1126 #ifdef CONFIG_MEMORY_HOTREMOVE ··· 1138 1129 start = (unsigned long)__va(start); 1139 1130 end = (unsigned long)__va(end); 1140 1131 1141 - remove_pagetable(start, end, true); 1132 + remove_pagetable(start, end, true, NULL); 1142 1133 } 1143 1134 1144 - int __ref arch_remove_memory(u64 start, u64 size) 1135 + int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 1145 1136 { 1146 1137 unsigned long start_pfn = start >> PAGE_SHIFT; 1147 1138 unsigned long nr_pages = size >> PAGE_SHIFT; 1148 1139 struct page *page = pfn_to_page(start_pfn); 1149 - struct vmem_altmap *altmap; 1150 1140 struct zone *zone; 1151 1141 int ret; 1152 1142 1153 1143 /* With altmap the first mapped page is offset from @start */ 1154 - altmap = to_vmem_altmap((unsigned long) page); 1155 1144 if (altmap) 1156 1145 page += vmem_altmap_offset(altmap); 1157 1146 zone = page_zone(page); 1158 - ret = __remove_pages(zone, start_pfn, nr_pages); 1147 + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); 1159 1148 WARN_ON_ONCE(ret); 1160 1149 kernel_physical_mapping_remove(start, start + size); 1161 1150 ··· 1385 1378 if (pmd_none(*pmd)) { 1386 1379 void *p; 1387 1380 1388 - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); 1381 + if (altmap) 1382 + p = altmap_alloc_block_buf(PMD_SIZE, altmap); 1383 + else 1384 + p = vmemmap_alloc_block_buf(PMD_SIZE, node); 1389 1385 if (p) { 1390 1386 pte_t entry; 1391 1387 ··· 1421 1411 return 0; 1422 1412 } 1423 1413 1424 - int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) 1414 + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 1415 + struct vmem_altmap *altmap) 1425 1416 { 1426 - struct vmem_altmap *altmap = to_vmem_altmap(start); 1427 1417 int err; 1428 1418 1429 1419 if (boot_cpu_has(X86_FEATURE_PSE))
+26
drivers/acpi/nfit/core.c
··· 838 838 return true; 839 839 } 840 840 841 + static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc, 842 + struct acpi_nfit_capabilities *pcap) 843 + { 844 + struct device *dev = acpi_desc->dev; 845 + u32 mask; 846 + 847 + mask = (1 << (pcap->highest_capability + 1)) - 1; 848 + acpi_desc->platform_cap = pcap->capabilities & mask; 849 + dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap); 850 + return true; 851 + } 852 + 841 853 static void *add_table(struct acpi_nfit_desc *acpi_desc, 842 854 struct nfit_table_prev *prev, void *table, const void *end) 843 855 { ··· 894 882 break; 895 883 case ACPI_NFIT_TYPE_SMBIOS: 896 884 dev_dbg(dev, "%s: smbios\n", __func__); 885 + break; 886 + case ACPI_NFIT_TYPE_CAPABILITIES: 887 + if (!add_platform_cap(acpi_desc, table)) 888 + return err; 897 889 break; 898 890 default: 899 891 dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); ··· 1883 1867 struct kernfs_node *nfit_kernfs; 1884 1868 1885 1869 nvdimm = nfit_mem->nvdimm; 1870 + if (!nvdimm) 1871 + continue; 1872 + 1886 1873 nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); 1887 1874 if (nfit_kernfs) 1888 1875 nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, ··· 2674 2655 spa->proximity_domain); 2675 2656 else 2676 2657 ndr_desc->numa_node = NUMA_NO_NODE; 2658 + 2659 + if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) 2660 + set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags); 2661 + 2662 + if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) 2663 + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags); 2677 2664 2678 2665 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { 2679 2666 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; ··· 3489 3464 BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); 3490 3465 BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); 3491 3466 BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); 3467 + BUILD_BUG_ON(sizeof(struct acpi_nfit_capabilities) != 16); 3492 3468 3493 3469 guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]); 3494 3470 guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
+1
drivers/acpi/nfit/nfit.h
··· 202 202 unsigned long dimm_cmd_force_en; 203 203 unsigned long bus_cmd_force_en; 204 204 unsigned long bus_nfit_cmd_force_en; 205 + unsigned int platform_cap; 205 206 int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 206 207 void *iobuf, u64 len, int rw); 207 208 };
+1 -1
drivers/dax/device.c
··· 133 133 dax_region->base = addr; 134 134 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 135 135 kfree(dax_region); 136 - return NULL;; 136 + return NULL; 137 137 } 138 138 139 139 kref_get(&dax_region->kref);
+11 -9
drivers/dax/pmem.c
··· 21 21 struct dax_pmem { 22 22 struct device *dev; 23 23 struct percpu_ref ref; 24 + struct dev_pagemap pgmap; 24 25 struct completion cmp; 25 26 }; 26 27 ··· 70 69 struct nd_namespace_common *ndns; 71 70 struct nd_dax *nd_dax = to_nd_dax(dev); 72 71 struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; 73 - struct vmem_altmap __altmap, *altmap = NULL; 74 72 75 73 ndns = nvdimm_namespace_common_probe(dev); 76 74 if (IS_ERR(ndns)) 77 75 return PTR_ERR(ndns); 78 76 nsio = to_nd_namespace_io(&ndns->dev); 79 77 78 + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); 79 + if (!dax_pmem) 80 + return -ENOMEM; 81 + 80 82 /* parse the 'pfn' info block via ->rw_bytes */ 81 83 rc = devm_nsio_enable(dev, nsio); 82 84 if (rc) 83 85 return rc; 84 - altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); 85 - if (IS_ERR(altmap)) 86 - return PTR_ERR(altmap); 86 + rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap); 87 + if (rc) 88 + return rc; 87 89 devm_nsio_disable(dev, nsio); 88 90 89 91 pfn_sb = nd_pfn->pfn_sb; ··· 97 93 dev_warn(dev, "could not reserve region %pR\n", &nsio->res); 98 94 return -EBUSY; 99 95 } 100 - 101 - dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); 102 - if (!dax_pmem) 103 - return -ENOMEM; 104 96 105 97 dax_pmem->dev = dev; 106 98 init_completion(&dax_pmem->cmp); ··· 110 110 if (rc) 111 111 return rc; 112 112 113 - addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); 113 + dax_pmem->pgmap.ref = &dax_pmem->ref; 114 + addr = devm_memremap_pages(dev, &dax_pmem->pgmap); 114 115 if (IS_ERR(addr)) 115 116 return PTR_ERR(addr); 116 117 ··· 121 120 return rc; 122 121 123 122 /* adjust the dax_region resource to the start of data */ 123 + memcpy(&res, &dax_pmem->pgmap.res, sizeof(res)); 124 124 res.start += le64_to_cpu(pfn_sb->dataoff); 125 125 126 126 rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
+10
drivers/dax/super.c
··· 15 15 #include <linux/mount.h> 16 16 #include <linux/magic.h> 17 17 #include <linux/genhd.h> 18 + #include <linux/pfn_t.h> 18 19 #include <linux/cdev.h> 19 20 #include <linux/hash.h> 20 21 #include <linux/slab.h> ··· 122 121 pr_debug("VFS (%s): error: dax access failed (%ld)\n", 123 122 sb->s_id, len); 124 123 return len < 0 ? len : -EIO; 124 + } 125 + 126 + if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) 127 + || pfn_t_devmap(pfn)) 128 + /* pass */; 129 + else { 130 + pr_debug("VFS (%s): error: dax support not enabled\n", 131 + sb->s_id); 132 + return -EOPNOTSUPP; 125 133 } 126 134 127 135 return 0;
+1 -1
drivers/nvdimm/btt.c
··· 753 753 return NULL; 754 754 arena->nd_btt = btt->nd_btt; 755 755 arena->sector_size = btt->sector_size; 756 + mutex_init(&arena->err_lock); 756 757 757 758 if (!size) 758 759 return arena; ··· 892 891 goto out; 893 892 } 894 893 895 - mutex_init(&arena->err_lock); 896 894 ret = btt_freelist_init(arena); 897 895 if (ret) 898 896 goto out;
-3
drivers/nvdimm/bus.c
··· 1142 1142 { 1143 1143 int rc; 1144 1144 1145 - BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128); 1146 - BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8); 1147 - 1148 1145 rc = bus_register(&nvdimm_bus_type); 1149 1146 if (rc) 1150 1147 return rc;
+1 -1
drivers/nvdimm/namespace_devs.c
··· 2408 2408 2409 2409 static struct device **create_namespaces(struct nd_region *nd_region) 2410 2410 { 2411 - struct nd_mapping *nd_mapping = &nd_region->mapping[0]; 2411 + struct nd_mapping *nd_mapping; 2412 2412 struct device **devs; 2413 2413 int i; 2414 2414
+4 -5
drivers/nvdimm/nd.h
··· 368 368 void nvdimm_badblocks_populate(struct nd_region *nd_region, 369 369 struct badblocks *bb, const struct resource *res); 370 370 #if IS_ENABLED(CONFIG_ND_CLAIM) 371 - struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 372 - struct resource *res, struct vmem_altmap *altmap); 371 + int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); 373 372 int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); 374 373 void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); 375 374 #else 376 - static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 377 - struct resource *res, struct vmem_altmap *altmap) 375 + static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 376 + struct dev_pagemap *pgmap) 378 377 { 379 - return ERR_PTR(-ENXIO); 378 + return -ENXIO; 380 379 } 381 380 static inline int devm_nsio_enable(struct device *dev, 382 381 struct nd_namespace_io *nsio)
+15 -12
drivers/nvdimm/pfn_devs.c
··· 542 542 return reserve; 543 543 } 544 544 545 - static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 546 - struct resource *res, struct vmem_altmap *altmap) 545 + static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 547 546 { 547 + struct resource *res = &pgmap->res; 548 + struct vmem_altmap *altmap = &pgmap->altmap; 548 549 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 549 550 u64 offset = le64_to_cpu(pfn_sb->dataoff); 550 551 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); ··· 562 561 res->start += start_pad; 563 562 res->end -= end_trunc; 564 563 564 + pgmap->type = MEMORY_DEVICE_HOST; 565 + 565 566 if (nd_pfn->mode == PFN_MODE_RAM) { 566 567 if (offset < SZ_8K) 567 - return ERR_PTR(-EINVAL); 568 + return -EINVAL; 568 569 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 569 - altmap = NULL; 570 + pgmap->altmap_valid = false; 570 571 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 571 572 nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) 572 573 - offset) / PAGE_SIZE); ··· 580 577 memcpy(altmap, &__altmap, sizeof(*altmap)); 581 578 altmap->free = PHYS_PFN(offset - SZ_8K); 582 579 altmap->alloc = 0; 580 + pgmap->altmap_valid = true; 583 581 } else 584 - return ERR_PTR(-ENXIO); 582 + return -ENXIO; 585 583 586 - return altmap; 584 + return 0; 587 585 } 588 586 589 587 static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) ··· 712 708 * Determine the effective resource range and vmem_altmap from an nd_pfn 713 709 * instance. 714 710 */ 715 - struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, 716 - struct resource *res, struct vmem_altmap *altmap) 711 + int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 717 712 { 718 713 int rc; 719 714 720 715 if (!nd_pfn->uuid || !nd_pfn->ndns) 721 - return ERR_PTR(-ENODEV); 716 + return -ENODEV; 722 717 723 718 rc = nd_pfn_init(nd_pfn); 724 719 if (rc) 725 - return ERR_PTR(rc); 720 + return rc; 726 721 727 - /* we need a valid pfn_sb before we can init a vmem_altmap */ 728 - return __nvdimm_setup_pfn(nd_pfn, res, altmap); 722 + /* we need a valid pfn_sb before we can init a dev_pagemap */ 723 + return __nvdimm_setup_pfn(nd_pfn, pgmap); 729 724 } 730 725 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
+27 -22
drivers/nvdimm/pmem.c
··· 35 35 #include "pmem.h" 36 36 #include "pfn.h" 37 37 #include "nd.h" 38 + #include "nd-core.h" 38 39 39 40 static struct device *to_dev(struct pmem_device *pmem) 40 41 { ··· 299 298 { 300 299 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 301 300 struct nd_region *nd_region = to_nd_region(dev->parent); 302 - struct vmem_altmap __altmap, *altmap = NULL; 303 301 int nid = dev_to_node(dev), fua, wbc; 304 302 struct resource *res = &nsio->res; 303 + struct resource bb_res; 305 304 struct nd_pfn *nd_pfn = NULL; 306 305 struct dax_device *dax_dev; 307 306 struct nd_pfn_sb *pfn_sb; 308 307 struct pmem_device *pmem; 309 - struct resource pfn_res; 310 308 struct request_queue *q; 311 309 struct device *gendev; 312 310 struct gendisk *disk; 313 311 void *addr; 314 - 315 - /* while nsio_rw_bytes is active, parse a pfn info block if present */ 316 - if (is_nd_pfn(dev)) { 317 - nd_pfn = to_nd_pfn(dev); 318 - altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); 319 - if (IS_ERR(altmap)) 320 - return PTR_ERR(altmap); 321 - } 322 - 323 - /* we're attaching a block device, disable raw namespace access */ 324 - devm_nsio_disable(dev, nsio); 312 + int rc; 325 313 326 314 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); 327 315 if (!pmem) 328 316 return -ENOMEM; 317 + 318 + /* while nsio_rw_bytes is active, parse a pfn info block if present */ 319 + if (is_nd_pfn(dev)) { 320 + nd_pfn = to_nd_pfn(dev); 321 + rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap); 322 + if (rc) 323 + return rc; 324 + } 325 + 326 + /* we're attaching a block device, disable raw namespace access */ 327 + devm_nsio_disable(dev, nsio); 329 328 330 329 dev_set_drvdata(dev, pmem); 331 330 pmem->phys_addr = res->start; ··· 335 334 dev_warn(dev, "unable to guarantee persistence of writes\n"); 336 335 fua = 0; 337 336 } 338 - wbc = nvdimm_has_cache(nd_region); 337 + wbc = nvdimm_has_cache(nd_region) && 338 + !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags); 339 339 340 340 if (!devm_request_mem_region(dev, res->start, resource_size(res), 341 341 dev_name(&ndns->dev))) { ··· 352 350 return -ENOMEM; 353 351 354 352 pmem->pfn_flags = PFN_DEV; 353 + pmem->pgmap.ref = &q->q_usage_counter; 355 354 if (is_nd_pfn(dev)) { 356 - addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, 357 - altmap); 355 + addr = devm_memremap_pages(dev, &pmem->pgmap); 358 356 pfn_sb = nd_pfn->pfn_sb; 359 357 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); 360 - pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); 358 + pmem->pfn_pad = resource_size(res) - 359 + resource_size(&pmem->pgmap.res); 361 360 pmem->pfn_flags |= PFN_MAP; 362 - res = &pfn_res; /* for badblocks populate */ 363 - res->start += pmem->data_offset; 361 + memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); 362 + bb_res.start += pmem->data_offset; 364 363 } else if (pmem_should_map_pages(dev)) { 365 - addr = devm_memremap_pages(dev, &nsio->res, 366 - &q->q_usage_counter, NULL); 364 + memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); 365 + pmem->pgmap.altmap_valid = false; 366 + addr = devm_memremap_pages(dev, &pmem->pgmap); 367 367 pmem->pfn_flags |= PFN_MAP; 368 + memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); 368 369 } else 369 370 addr = devm_memremap(dev, pmem->phys_addr, 370 371 pmem->size, ARCH_MEMREMAP_PMEM); ··· 406 401 / 512); 407 402 if (devm_init_badblocks(dev, &pmem->bb)) 408 403 return -ENOMEM; 409 - nvdimm_badblocks_populate(nd_region, &pmem->bb, res); 404 + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); 410 405 disk->bb = &pmem->bb; 411 406 412 407 dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
+1
drivers/nvdimm/pmem.h
··· 22 22 struct badblocks bb; 23 23 struct dax_device *dax_dev; 24 24 struct gendisk *disk; 25 + struct dev_pagemap pgmap; 25 26 }; 26 27 27 28 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
+13
drivers/nvdimm/region_devs.c
··· 528 528 } 529 529 static DEVICE_ATTR_RO(resource); 530 530 531 + static ssize_t persistence_domain_show(struct device *dev, 532 + struct device_attribute *attr, char *buf) 533 + { 534 + struct nd_region *nd_region = to_nd_region(dev); 535 + unsigned long flags = nd_region->flags; 536 + 537 + return sprintf(buf, "%s%s\n", 538 + flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "", 539 + flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : ""); 540 + } 541 + static DEVICE_ATTR_RO(persistence_domain); 542 + 531 543 static struct attribute *nd_region_attributes[] = { 532 544 &dev_attr_size.attr, 533 545 &dev_attr_nstype.attr, ··· 555 543 &dev_attr_init_namespaces.attr, 556 544 &dev_attr_badblocks.attr, 557 545 &dev_attr_resource.attr, 546 + &dev_attr_persistence_domain.attr, 558 547 NULL, 559 548 }; 560 549
+1
drivers/s390/block/Kconfig
··· 16 16 config DCSSBLK 17 17 def_tristate m 18 18 select DAX 19 + select FS_DAX_LIMITED 19 20 prompt "DCSSBLK support" 20 21 depends on S390 && BLOCK 21 22 help
+2 -1
drivers/s390/block/dcssblk.c
··· 916 916 917 917 dev_sz = dev_info->end - dev_info->start + 1; 918 918 *kaddr = (void *) dev_info->start + offset; 919 - *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); 919 + *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), 920 + PFN_DEV|PFN_SPECIAL); 920 921 921 922 return (dev_sz - offset) / PAGE_SIZE; 922 923 }
+7
fs/Kconfig
··· 58 58 depends on ZONE_DEVICE 59 59 depends on TRANSPARENT_HUGEPAGE 60 60 61 + # Selected by DAX drivers that do not expect filesystem DAX to support 62 + # get_user_pages() of DAX mappings. I.e. "limited" indicates no support 63 + # for fork() of processes with MAP_SHARED mappings or support for 64 + # direct-I/O to a DAX mapping. 65 + config FS_DAX_LIMITED 66 + bool 67 + 61 68 endif # BLOCK 62 69 63 70 # Posix ACL utility routines
+5 -2
fs/ext2/super.c
··· 962 962 963 963 if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { 964 964 err = bdev_dax_supported(sb, blocksize); 965 - if (err) 966 - goto failed_mount; 965 + if (err) { 966 + ext2_msg(sb, KERN_ERR, 967 + "DAX unsupported by block device. Turning off DAX."); 968 + sbi->s_mount_opt &= ~EXT2_MOUNT_DAX; 969 + } 967 970 } 968 971 969 972 /* If the blocksize doesn't match, re-read the thing.. */
+6 -3
fs/ext4/super.c
··· 3712 3712 if (ext4_has_feature_inline_data(sb)) { 3713 3713 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" 3714 3714 " that may contain inline data"); 3715 - goto failed_mount; 3715 + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX; 3716 3716 } 3717 3717 err = bdev_dax_supported(sb, blocksize); 3718 - if (err) 3719 - goto failed_mount; 3718 + if (err) { 3719 + ext4_msg(sb, KERN_ERR, 3720 + "DAX unsupported by block device. Turning off DAX."); 3721 + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX; 3722 + } 3720 3723 } 3721 3724 3722 3725 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
+11
include/linux/libnvdimm.h
··· 47 47 48 48 /* region flag indicating to direct-map persistent memory by default */ 49 49 ND_REGION_PAGEMAP = 0, 50 + /* 51 + * Platform ensures entire CPU store data path is flushed to pmem on 52 + * system power loss. 53 + */ 54 + ND_REGION_PERSIST_CACHE = 1, 55 + /* 56 + * Platform provides mechanisms to automatically flush outstanding 57 + * write data from memory controler to pmem on system power loss. 58 + * (ADR) 59 + */ 60 + ND_REGION_PERSIST_MEMCTRL = 2, 50 61 51 62 /* mark newly adjusted resources as requiring a label update */ 52 63 DPA_RESOURCE_ADJUSTED = 1 << 0,
+17 -12
include/linux/memory_hotplug.h
··· 13 13 struct mem_section; 14 14 struct memory_block; 15 15 struct resource; 16 + struct vmem_altmap; 16 17 17 18 #ifdef CONFIG_MEMORY_HOTPLUG 18 19 /* ··· 126 125 127 126 #ifdef CONFIG_MEMORY_HOTREMOVE 128 127 extern bool is_pageblock_removable_nolock(struct page *page); 129 - extern int arch_remove_memory(u64 start, u64 size); 128 + extern int arch_remove_memory(u64 start, u64 size, 129 + struct vmem_altmap *altmap); 130 130 extern int __remove_pages(struct zone *zone, unsigned long start_pfn, 131 - unsigned long nr_pages); 131 + unsigned long nr_pages, struct vmem_altmap *altmap); 132 132 #endif /* CONFIG_MEMORY_HOTREMOVE */ 133 133 134 134 /* reasonably generic interface to expand the physical pages */ 135 - extern int __add_pages(int nid, unsigned long start_pfn, 136 - unsigned long nr_pages, bool want_memblock); 135 + extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, 136 + struct vmem_altmap *altmap, bool want_memblock); 137 137 138 138 #ifndef CONFIG_ARCH_HAS_ADD_PAGES 139 139 static inline int add_pages(int nid, unsigned long start_pfn, 140 - unsigned long nr_pages, bool want_memblock) 140 + unsigned long nr_pages, struct vmem_altmap *altmap, 141 + bool want_memblock) 141 142 { 142 - return __add_pages(nid, start_pfn, nr_pages, want_memblock); 143 + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 143 144 } 144 145 #else /* ARCH_HAS_ADD_PAGES */ 145 - int add_pages(int nid, unsigned long start_pfn, 146 - unsigned long nr_pages, bool want_memblock); 146 + int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, 147 + struct vmem_altmap *altmap, bool want_memblock); 147 148 #endif /* ARCH_HAS_ADD_PAGES */ 148 149 149 150 #ifdef CONFIG_NUMA ··· 321 318 void *arg, int (*func)(struct memory_block *, void *)); 322 319 extern int add_memory(int nid, u64 start, u64 size); 323 320 extern int add_memory_resource(int nid, struct resource *resource, bool online); 324 - extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); 321 + extern int arch_add_memory(int nid, u64 start, u64 size, 322 + struct vmem_altmap *altmap, bool want_memblock); 325 323 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, 326 - unsigned long nr_pages); 324 + unsigned long nr_pages, struct vmem_altmap *altmap); 327 325 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); 328 326 extern bool is_memblock_offlined(struct memory_block *mem); 329 327 extern void remove_memory(int nid, u64 start, u64 size); 330 - extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); 328 + extern int sparse_add_one_section(struct pglist_data *pgdat, 329 + unsigned long start_pfn, struct vmem_altmap *altmap); 331 330 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, 332 - unsigned long map_offset); 331 + unsigned long map_offset, struct vmem_altmap *altmap); 333 332 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, 334 333 unsigned long pnum); 335 334 extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
+23 -54
include/linux/memremap.h
··· 26 26 unsigned long alloc; 27 27 }; 28 28 29 - unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); 30 - void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); 31 - 32 - #ifdef CONFIG_ZONE_DEVICE 33 - struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); 34 - #else 35 - static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) 36 - { 37 - return NULL; 38 - } 39 - #endif 40 - 41 29 /* 42 30 * Specialize ZONE_DEVICE memory into multiple types each having differents 43 31 * usage. ··· 113 125 struct dev_pagemap { 114 126 dev_page_fault_t page_fault; 115 127 dev_page_free_t page_free; 116 - struct vmem_altmap *altmap; 117 - const struct resource *res; 128 + struct vmem_altmap altmap; 129 + bool altmap_valid; 130 + struct resource res; 118 131 struct percpu_ref *ref; 119 132 struct device *dev; 120 133 void *data; ··· 123 134 }; 124 135 125 136 #ifdef CONFIG_ZONE_DEVICE 126 - void *devm_memremap_pages(struct device *dev, struct resource *res, 127 - struct percpu_ref *ref, struct vmem_altmap *altmap); 128 - struct dev_pagemap *find_dev_pagemap(resource_size_t phys); 137 + void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); 138 + struct dev_pagemap *get_dev_pagemap(unsigned long pfn, 139 + struct dev_pagemap *pgmap); 140 + 141 + unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); 142 + void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); 129 143 130 144 static inline bool is_zone_device_page(const struct page *page); 131 145 #else 132 146 static inline void *devm_memremap_pages(struct device *dev, 133 - struct resource *res, struct percpu_ref *ref, 134 - struct vmem_altmap *altmap) 147 + struct dev_pagemap *pgmap) 135 148 { 136 149 /* 137 150 * Fail attempts to call devm_memremap_pages() without ··· 144 153 return ERR_PTR(-ENXIO); 145 154 } 146 155 147 - static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) 156 + static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, 157 + struct dev_pagemap *pgmap) 148 158 { 149 159 return NULL; 150 160 } 151 - #endif 161 + 162 + static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) 163 + { 164 + return 0; 165 + } 166 + 167 + static inline void vmem_altmap_free(struct vmem_altmap *altmap, 168 + unsigned long nr_pfns) 169 + { 170 + } 171 + #endif /* CONFIG_ZONE_DEVICE */ 152 172 153 173 #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) 154 174 static inline bool is_device_private_page(const struct page *page) ··· 174 172 page->pgmap->type == MEMORY_DEVICE_PUBLIC; 175 173 } 176 174 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ 177 - 178 - /** 179 - * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn 180 - * @pfn: page frame number to lookup page_map 181 - * @pgmap: optional known pgmap that already has a reference 182 - * 183 - * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the 184 - * same mapping. 185 - */ 186 - static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, 187 - struct dev_pagemap *pgmap) 188 - { 189 - const struct resource *res = pgmap ? pgmap->res : NULL; 190 - resource_size_t phys = PFN_PHYS(pfn); 191 - 192 - /* 193 - * In the cached case we're already holding a live reference so 194 - * we can simply do a blind increment 195 - */ 196 - if (res && phys >= res->start && phys <= res->end) { 197 - percpu_ref_get(pgmap->ref); 198 - return pgmap; 199 - } 200 - 201 - /* fall back to slow path lookup */ 202 - rcu_read_lock(); 203 - pgmap = find_dev_pagemap(phys); 204 - if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) 205 - pgmap = NULL; 206 - rcu_read_unlock(); 207 - 208 - return pgmap; 209 - } 210 175 211 176 static inline void put_dev_pagemap(struct dev_pagemap *pgmap) 212 177 {
+10 -12
include/linux/mm.h
··· 2075 2075 #endif 2076 2076 2077 2077 extern void set_dma_reserve(unsigned long new_dma_reserve); 2078 - extern void memmap_init_zone(unsigned long, int, unsigned long, 2079 - unsigned long, enum memmap_context); 2078 + extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, 2079 + enum memmap_context, struct vmem_altmap *); 2080 2080 extern void setup_per_zone_wmarks(void); 2081 2081 extern int __meminit init_per_zone_wmark_min(void); 2082 2082 extern void mem_init(void); ··· 2544 2544 unsigned long map_count, 2545 2545 int nodeid); 2546 2546 2547 - struct page *sparse_mem_map_populate(unsigned long pnum, int nid); 2547 + struct page *sparse_mem_map_populate(unsigned long pnum, int nid, 2548 + struct vmem_altmap *altmap); 2548 2549 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); 2549 2550 p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); 2550 2551 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); ··· 2553 2552 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); 2554 2553 void *vmemmap_alloc_block(unsigned long size, int node); 2555 2554 struct vmem_altmap; 2556 - void *__vmemmap_alloc_block_buf(unsigned long size, int node, 2557 - struct vmem_altmap *altmap); 2558 - static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) 2559 - { 2560 - return __vmemmap_alloc_block_buf(size, node, NULL); 2561 - } 2562 - 2555 + void *vmemmap_alloc_block_buf(unsigned long size, int node); 2556 + void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); 2563 2557 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); 2564 2558 int vmemmap_populate_basepages(unsigned long start, unsigned long end, 2565 2559 int node); 2566 - int vmemmap_populate(unsigned long start, unsigned long end, int node); 2560 + int vmemmap_populate(unsigned long start, unsigned long end, int node, 2561 + struct vmem_altmap *altmap); 2567 2562 void vmemmap_populate_print_last(void); 2568 2563 #ifdef CONFIG_MEMORY_HOTPLUG 2569 - void vmemmap_free(unsigned long start, unsigned long end); 2564 + void vmemmap_free(unsigned long start, unsigned long end, 2565 + struct vmem_altmap *altmap); 2570 2566 #endif 2571 2567 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, 2572 2568 unsigned long nr_pages);
+13
include/linux/pfn_t.h
··· 15 15 #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) 16 16 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) 17 17 #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) 18 + #define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) 18 19 19 20 #define PFN_FLAGS_TRACE \ 21 + { PFN_SPECIAL, "SPECIAL" }, \ 20 22 { PFN_SG_CHAIN, "SG_CHAIN" }, \ 21 23 { PFN_SG_LAST, "SG_LAST" }, \ 22 24 { PFN_DEV, "DEV" }, \ ··· 122 120 #endif 123 121 #endif /* __HAVE_ARCH_PTE_DEVMAP */ 124 122 123 + #ifdef __HAVE_ARCH_PTE_SPECIAL 124 + static inline bool pfn_t_special(pfn_t pfn) 125 + { 126 + return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; 127 + } 128 + #else 129 + static inline bool pfn_t_special(pfn_t pfn) 130 + { 131 + return false; 132 + } 133 + #endif /* __HAVE_ARCH_PTE_SPECIAL */ 125 134 #endif /* _LINUX_PFN_T_H_ */
+1 -55
include/uapi/linux/ndctl.h
··· 15 15 16 16 #include <linux/types.h> 17 17 18 - struct nd_cmd_smart { 19 - __u32 status; 20 - __u8 data[128]; 21 - } __packed; 22 - 23 - #define ND_SMART_HEALTH_VALID (1 << 0) 24 - #define ND_SMART_SPARES_VALID (1 << 1) 25 - #define ND_SMART_USED_VALID (1 << 2) 26 - #define ND_SMART_TEMP_VALID (1 << 3) 27 - #define ND_SMART_CTEMP_VALID (1 << 4) 28 - #define ND_SMART_ALARM_VALID (1 << 9) 29 - #define ND_SMART_SHUTDOWN_VALID (1 << 10) 30 - #define ND_SMART_VENDOR_VALID (1 << 11) 31 - #define ND_SMART_SPARE_TRIP (1 << 0) 32 - #define ND_SMART_TEMP_TRIP (1 << 1) 33 - #define ND_SMART_CTEMP_TRIP (1 << 2) 34 - #define ND_SMART_NON_CRITICAL_HEALTH (1 << 0) 35 - #define ND_SMART_CRITICAL_HEALTH (1 << 1) 36 - #define ND_SMART_FATAL_HEALTH (1 << 2) 37 - 38 - struct nd_smart_payload { 39 - __u32 flags; 40 - __u8 reserved0[4]; 41 - __u8 health; 42 - __u8 spares; 43 - __u8 life_used; 44 - __u8 alarm_flags; 45 - __u16 temperature; 46 - __u16 ctrl_temperature; 47 - __u8 reserved1[15]; 48 - __u8 shutdown_state; 49 - __u32 vendor_size; 50 - __u8 vendor_data[92]; 51 - } __packed; 52 - 53 - struct nd_cmd_smart_threshold { 54 - __u32 status; 55 - __u8 data[8]; 56 - } __packed; 57 - 58 - struct nd_smart_threshold_payload { 59 - __u8 alarm_control; 60 - __u8 reserved0; 61 - __u16 temperature; 62 - __u8 spares; 63 - __u8 reserved[3]; 64 - } __packed; 65 - 66 18 struct nd_cmd_dimm_flags { 67 19 __u32 status; 68 20 __u32 flags; ··· 163 211 164 212 #define ND_IOCTL 'N' 165 213 166 - #define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\ 167 - struct nd_cmd_smart) 168 - 169 - #define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\ 170 - struct nd_cmd_smart_threshold) 171 - 172 214 #define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ 173 215 struct nd_cmd_dimm_flags) 174 216 ··· 209 263 }; 210 264 211 265 enum { 212 - ND_MIN_NAMESPACE_SIZE = 0x00400000, 266 + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, 213 267 }; 214 268 215 269 enum ars_masks {
+71 -103
kernel/memremap.c
··· 188 188 #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) 189 189 #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) 190 190 191 - struct page_map { 192 - struct resource res; 193 - struct percpu_ref *ref; 194 - struct dev_pagemap pgmap; 195 - struct vmem_altmap altmap; 196 - }; 197 - 198 191 static unsigned long order_at(struct resource *res, unsigned long pgoff) 199 192 { 200 193 unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; ··· 241 248 EXPORT_SYMBOL(device_private_entry_fault); 242 249 #endif /* CONFIG_DEVICE_PRIVATE */ 243 250 244 - static void pgmap_radix_release(struct resource *res) 251 + static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff) 245 252 { 246 253 unsigned long pgoff, order; 247 254 248 255 mutex_lock(&pgmap_lock); 249 - foreach_order_pgoff(res, order, pgoff) 256 + foreach_order_pgoff(res, order, pgoff) { 257 + if (pgoff >= end_pgoff) 258 + break; 250 259 radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); 260 + } 251 261 mutex_unlock(&pgmap_lock); 252 262 253 263 synchronize_rcu(); 254 264 } 255 265 256 - static unsigned long pfn_first(struct page_map *page_map) 266 + static unsigned long pfn_first(struct dev_pagemap *pgmap) 257 267 { 258 - struct dev_pagemap *pgmap = &page_map->pgmap; 259 - const struct resource *res = &page_map->res; 260 - struct vmem_altmap *altmap = pgmap->altmap; 268 + const struct resource *res = &pgmap->res; 269 + struct vmem_altmap *altmap = &pgmap->altmap; 261 270 unsigned long pfn; 262 271 263 272 pfn = res->start >> PAGE_SHIFT; 264 - if (altmap) 273 + if (pgmap->altmap_valid) 265 274 pfn += vmem_altmap_offset(altmap); 266 275 return pfn; 267 276 } 268 277 269 - static unsigned long pfn_end(struct page_map *page_map) 278 + static unsigned long pfn_end(struct dev_pagemap *pgmap) 270 279 { 271 - const struct resource *res = &page_map->res; 280 + const struct resource *res = &pgmap->res; 272 281 273 282 return (res->start + resource_size(res)) >> PAGE_SHIFT; 274 283 } ··· 278 283 #define for_each_device_pfn(pfn, map) \ 279 284 for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) 280 285 281 - static void devm_memremap_pages_release(struct device *dev, void *data) 286 + static void devm_memremap_pages_release(void *data) 282 287 { 283 - struct page_map *page_map = data; 284 - struct resource *res = &page_map->res; 288 + struct dev_pagemap *pgmap = data; 289 + struct device *dev = pgmap->dev; 290 + struct resource *res = &pgmap->res; 285 291 resource_size_t align_start, align_size; 286 - struct dev_pagemap *pgmap = &page_map->pgmap; 287 292 unsigned long pfn; 288 293 289 - for_each_device_pfn(pfn, page_map) 294 + for_each_device_pfn(pfn, pgmap) 290 295 put_page(pfn_to_page(pfn)); 291 296 292 297 if (percpu_ref_tryget_live(pgmap->ref)) { ··· 296 301 297 302 /* pages are dead and unused, undo the arch mapping */ 298 303 align_start = res->start & ~(SECTION_SIZE - 1); 299 - align_size = ALIGN(resource_size(res), SECTION_SIZE); 304 + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) 305 + - align_start; 300 306 301 307 mem_hotplug_begin(); 302 - arch_remove_memory(align_start, align_size); 308 + arch_remove_memory(align_start, align_size, pgmap->altmap_valid ? 309 + &pgmap->altmap : NULL); 303 310 mem_hotplug_done(); 304 311 305 312 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 306 - pgmap_radix_release(res); 307 - dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 308 - "%s: failed to free all reserved pages\n", __func__); 309 - } 310 - 311 - /* assumes rcu_read_lock() held at entry */ 312 - struct dev_pagemap *find_dev_pagemap(resource_size_t phys) 313 - { 314 - struct page_map *page_map; 315 - 316 - WARN_ON_ONCE(!rcu_read_lock_held()); 317 - 318 - page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); 319 - return page_map ? &page_map->pgmap : NULL; 313 + pgmap_radix_release(res, -1); 314 + dev_WARN_ONCE(dev, pgmap->altmap.alloc, 315 + "%s: failed to free all reserved pages\n", __func__); 320 316 } 321 317 322 318 /** 323 319 * devm_memremap_pages - remap and provide memmap backing for the given resource 324 320 * @dev: hosting device for @res 325 - * @res: "host memory" address range 326 - * @ref: a live per-cpu reference count 327 - * @altmap: optional descriptor for allocating the memmap from @res 321 + * @pgmap: pointer to a struct dev_pgmap 328 322 * 329 323 * Notes: 330 - * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time 331 - * (or devm release event). The expected order of events is that @ref has 324 + * 1/ At a minimum the res, ref and type members of @pgmap must be initialized 325 + * by the caller before passing it to this function 326 + * 327 + * 2/ The altmap field may optionally be initialized, in which case altmap_valid 328 + * must be set to true 329 + * 330 + * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages() 331 + * time (or devm release event). The expected order of events is that ref has 332 332 * been through percpu_ref_kill() before devm_memremap_pages_release(). The 333 333 * wait for the completion of all references being dropped and 334 334 * percpu_ref_exit() must occur after devm_memremap_pages_release(). 335 335 * 336 - * 2/ @res is expected to be a host memory range that could feasibly be 336 + * 4/ res is expected to be a host memory range that could feasibly be 337 337 * treated as a "System RAM" range, i.e. not a device mmio range, but 338 338 * this is not enforced. 339 339 */ 340 - void *devm_memremap_pages(struct device *dev, struct resource *res, 341 - struct percpu_ref *ref, struct vmem_altmap *altmap) 340 + void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) 342 341 { 343 342 resource_size_t align_start, align_size, align_end; 343 + struct vmem_altmap *altmap = pgmap->altmap_valid ? 344 + &pgmap->altmap : NULL; 344 345 unsigned long pfn, pgoff, order; 345 346 pgprot_t pgprot = PAGE_KERNEL; 346 - struct dev_pagemap *pgmap; 347 - struct page_map *page_map; 348 347 int error, nid, is_ram, i = 0; 348 + struct resource *res = &pgmap->res; 349 349 350 350 align_start = res->start & ~(SECTION_SIZE - 1); 351 351 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) ··· 357 367 if (is_ram == REGION_INTERSECTS) 358 368 return __va(res->start); 359 369 360 - if (!ref) 370 + if (!pgmap->ref) 361 371 return ERR_PTR(-EINVAL); 362 372 363 - page_map = devres_alloc_node(devm_memremap_pages_release, 364 - sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); 365 - if (!page_map) 366 - return ERR_PTR(-ENOMEM); 367 - pgmap = &page_map->pgmap; 368 - 369 - memcpy(&page_map->res, res, sizeof(*res)); 370 - 371 373 pgmap->dev = dev; 372 - if (altmap) { 373 - memcpy(&page_map->altmap, altmap, sizeof(*altmap)); 374 - pgmap->altmap = &page_map->altmap; 375 - } 376 - pgmap->ref = ref; 377 - pgmap->res = &page_map->res; 378 - pgmap->type = MEMORY_DEVICE_HOST; 379 - pgmap->page_fault = NULL; 380 - pgmap->page_free = NULL; 381 - pgmap->data = NULL; 382 374 383 375 mutex_lock(&pgmap_lock); 384 376 error = 0; 385 377 align_end = align_start + align_size - 1; 386 378 387 379 foreach_order_pgoff(res, order, pgoff) { 388 - struct dev_pagemap *dup; 389 - 390 - rcu_read_lock(); 391 - dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff)); 392 - rcu_read_unlock(); 393 - if (dup) { 394 - dev_err(dev, "%s: %pr collides with mapping for %s\n", 395 - __func__, res, dev_name(dup->dev)); 396 - error = -EBUSY; 397 - break; 398 - } 399 380 error = __radix_tree_insert(&pgmap_radix, 400 - PHYS_PFN(res->start) + pgoff, order, page_map); 381 + PHYS_PFN(res->start) + pgoff, order, pgmap); 401 382 if (error) { 402 383 dev_err(dev, "%s: failed: %d\n", __func__, error); 403 384 break; ··· 388 427 goto err_pfn_remap; 389 428 390 429 mem_hotplug_begin(); 391 - error = arch_add_memory(nid, align_start, align_size, false); 430 + error = arch_add_memory(nid, align_start, align_size, altmap, false); 392 431 if (!error) 393 432 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 394 433 align_start >> PAGE_SHIFT, 395 - align_size >> PAGE_SHIFT); 434 + align_size >> PAGE_SHIFT, altmap); 396 435 mem_hotplug_done(); 397 436 if (error) 398 437 goto err_add_memory; 399 438 400 - for_each_device_pfn(pfn, page_map) { 439 + for_each_device_pfn(pfn, pgmap) { 401 440 struct page *page = pfn_to_page(pfn); 402 441 403 442 /* ··· 408 447 */ 409 448 list_del(&page->lru); 410 449 page->pgmap = pgmap; 411 - percpu_ref_get(ref); 450 + percpu_ref_get(pgmap->ref); 412 451 if (!(++i % 1024)) 413 452 cond_resched(); 414 453 } 415 - devres_add(dev, page_map); 454 + 455 + devm_add_action(dev, devm_memremap_pages_release, pgmap); 456 + 416 457 return __va(res->start); 417 458 418 459 err_add_memory: 419 460 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 420 461 err_pfn_remap: 421 462 err_radix: 422 - pgmap_radix_release(res); 423 - devres_free(page_map); 463 + pgmap_radix_release(res, pgoff); 464 + devres_free(pgmap); 424 465 return ERR_PTR(error); 425 466 } 426 467 EXPORT_SYMBOL(devm_memremap_pages); ··· 438 475 altmap->alloc -= nr_pfns; 439 476 } 440 477 441 - struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) 478 + /** 479 + * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn 480 + * @pfn: page frame number to lookup page_map 481 + * @pgmap: optional known pgmap that already has a reference 482 + * 483 + * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap 484 + * is non-NULL but does not cover @pfn the reference to it will be released. 485 + */ 486 + struct dev_pagemap *get_dev_pagemap(unsigned long pfn, 487 + struct dev_pagemap *pgmap) 442 488 { 443 - /* 444 - * 'memmap_start' is the virtual address for the first "struct 445 - * page" in this range of the vmemmap array. In the case of 446 - * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple 447 - * pointer arithmetic, so we can perform this to_vmem_altmap() 448 - * conversion without concern for the initialization state of 449 - * the struct page fields. 450 - */ 451 - struct page *page = (struct page *) memmap_start; 452 - struct dev_pagemap *pgmap; 489 + resource_size_t phys = PFN_PHYS(pfn); 453 490 454 491 /* 455 - * Unconditionally retrieve a dev_pagemap associated with the 456 - * given physical address, this is only for use in the 457 - * arch_{add|remove}_memory() for setting up and tearing down 458 - * the memmap. 492 + * In the cached case we're already holding a live reference. 459 493 */ 494 + if (pgmap) { 495 + if (phys >= pgmap->res.start && phys <= pgmap->res.end) 496 + return pgmap; 497 + put_dev_pagemap(pgmap); 498 + } 499 + 500 + /* fall back to slow path lookup */ 460 501 rcu_read_lock(); 461 - pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page))); 502 + pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); 503 + if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) 504 + pgmap = NULL; 462 505 rcu_read_unlock(); 463 506 464 - return pgmap ? pgmap->altmap : NULL; 507 + return pgmap; 465 508 } 466 509 #endif /* CONFIG_ZONE_DEVICE */ 467 - 468 510 469 511 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) 470 512 void put_zone_device_private_or_public_page(struct page *page)
+5 -2
mm/gup.c
··· 1394 1394 1395 1395 VM_BUG_ON_PAGE(compound_head(page) != head, page); 1396 1396 1397 - put_dev_pagemap(pgmap); 1398 1397 SetPageReferenced(page); 1399 1398 pages[*nr] = page; 1400 1399 (*nr)++; ··· 1403 1404 ret = 1; 1404 1405 1405 1406 pte_unmap: 1407 + if (pgmap) 1408 + put_dev_pagemap(pgmap); 1406 1409 pte_unmap(ptem); 1407 1410 return ret; 1408 1411 } ··· 1444 1443 SetPageReferenced(page); 1445 1444 pages[*nr] = page; 1446 1445 get_page(page); 1447 - put_dev_pagemap(pgmap); 1448 1446 (*nr)++; 1449 1447 pfn++; 1450 1448 } while (addr += PAGE_SIZE, addr != end); 1449 + 1450 + if (pgmap) 1451 + put_dev_pagemap(pgmap); 1451 1452 return 1; 1452 1453 } 1453 1454
+7 -6
mm/hmm.c
··· 836 836 837 837 mem_hotplug_begin(); 838 838 if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) 839 - __remove_pages(zone, start_pfn, npages); 839 + __remove_pages(zone, start_pfn, npages, NULL); 840 840 else 841 841 arch_remove_memory(start_pfn << PAGE_SHIFT, 842 - npages << PAGE_SHIFT); 842 + npages << PAGE_SHIFT, NULL); 843 843 mem_hotplug_done(); 844 844 845 845 hmm_devmem_radix_release(resource); ··· 880 880 else 881 881 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 882 882 883 - devmem->pagemap.res = devmem->resource; 883 + devmem->pagemap.res = *devmem->resource; 884 884 devmem->pagemap.page_fault = hmm_devmem_fault; 885 885 devmem->pagemap.page_free = hmm_devmem_free; 886 886 devmem->pagemap.dev = devmem->device; ··· 929 929 * want the linear mapping and thus use arch_add_memory(). 930 930 */ 931 931 if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) 932 - ret = arch_add_memory(nid, align_start, align_size, false); 932 + ret = arch_add_memory(nid, align_start, align_size, NULL, 933 + false); 933 934 else 934 935 ret = add_pages(nid, align_start >> PAGE_SHIFT, 935 - align_size >> PAGE_SHIFT, false); 936 + align_size >> PAGE_SHIFT, NULL, false); 936 937 if (ret) { 937 938 mem_hotplug_done(); 938 939 goto error_add_memory; 939 940 } 940 941 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], 941 942 align_start >> PAGE_SHIFT, 942 - align_size >> PAGE_SHIFT); 943 + align_size >> PAGE_SHIFT, NULL); 943 944 mem_hotplug_done(); 944 945 945 946 for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
+15 -1
mm/memory.c
··· 1904 1904 } 1905 1905 EXPORT_SYMBOL(vm_insert_pfn_prot); 1906 1906 1907 + static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) 1908 + { 1909 + /* these checks mirror the abort conditions in vm_normal_page */ 1910 + if (vma->vm_flags & VM_MIXEDMAP) 1911 + return true; 1912 + if (pfn_t_devmap(pfn)) 1913 + return true; 1914 + if (pfn_t_special(pfn)) 1915 + return true; 1916 + if (is_zero_pfn(pfn_t_to_pfn(pfn))) 1917 + return true; 1918 + return false; 1919 + } 1920 + 1907 1921 static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, 1908 1922 pfn_t pfn, bool mkwrite) 1909 1923 { 1910 1924 pgprot_t pgprot = vma->vm_page_prot; 1911 1925 1912 - BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); 1926 + BUG_ON(!vm_mixed_ok(vma, pfn)); 1913 1927 1914 1928 if (addr < vma->vm_start || addr >= vma->vm_end) 1915 1929 return -EFAULT;
+18 -21
mm/memory_hotplug.c
··· 247 247 #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ 248 248 249 249 static int __meminit __add_section(int nid, unsigned long phys_start_pfn, 250 - bool want_memblock) 250 + struct vmem_altmap *altmap, bool want_memblock) 251 251 { 252 252 int ret; 253 253 int i; ··· 255 255 if (pfn_valid(phys_start_pfn)) 256 256 return -EEXIST; 257 257 258 - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); 258 + ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); 259 259 if (ret < 0) 260 260 return ret; 261 261 ··· 289 289 * add the new pages. 290 290 */ 291 291 int __ref __add_pages(int nid, unsigned long phys_start_pfn, 292 - unsigned long nr_pages, bool want_memblock) 292 + unsigned long nr_pages, struct vmem_altmap *altmap, 293 + bool want_memblock) 293 294 { 294 295 unsigned long i; 295 296 int err = 0; 296 297 int start_sec, end_sec; 297 - struct vmem_altmap *altmap; 298 298 299 299 /* during initialize mem_map, align hot-added range to section */ 300 300 start_sec = pfn_to_section_nr(phys_start_pfn); 301 301 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); 302 302 303 - altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn)); 304 303 if (altmap) { 305 304 /* 306 305 * Validate altmap is within bounds of the total request ··· 314 315 } 315 316 316 317 for (i = start_sec; i <= end_sec; i++) { 317 - err = __add_section(nid, section_nr_to_pfn(i), want_memblock); 318 + err = __add_section(nid, section_nr_to_pfn(i), altmap, 319 + want_memblock); 318 320 319 321 /* 320 322 * EEXIST is finally dealt with by ioresource collision ··· 331 331 out: 332 332 return err; 333 333 } 334 - EXPORT_SYMBOL_GPL(__add_pages); 335 334 336 335 #ifdef CONFIG_MEMORY_HOTREMOVE 337 336 /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ ··· 533 534 } 534 535 535 536 static int __remove_section(struct zone *zone, struct mem_section *ms, 536 - unsigned long map_offset) 537 + unsigned long map_offset, struct vmem_altmap *altmap) 537 538 { 538 539 unsigned long start_pfn; 539 540 int scn_nr; ··· 550 551 start_pfn = section_nr_to_pfn((unsigned long)scn_nr); 551 552 __remove_zone(zone, start_pfn); 552 553 553 - sparse_remove_one_section(zone, ms, map_offset); 554 + sparse_remove_one_section(zone, ms, map_offset, altmap); 554 555 return 0; 555 556 } 556 557 ··· 566 567 * calling offline_pages(). 567 568 */ 568 569 int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, 569 - unsigned long nr_pages) 570 + unsigned long nr_pages, struct vmem_altmap *altmap) 570 571 { 571 572 unsigned long i; 572 573 unsigned long map_offset = 0; ··· 574 575 575 576 /* In the ZONE_DEVICE case device driver owns the memory region */ 576 577 if (is_dev_zone(zone)) { 577 - struct page *page = pfn_to_page(phys_start_pfn); 578 - struct vmem_altmap *altmap; 579 - 580 - altmap = to_vmem_altmap((unsigned long) page); 581 578 if (altmap) 582 579 map_offset = vmem_altmap_offset(altmap); 583 580 } else { ··· 604 609 for (i = 0; i < sections_to_remove; i++) { 605 610 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; 606 611 607 - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); 612 + ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, 613 + altmap); 608 614 map_offset = 0; 609 615 if (ret) 610 616 break; ··· 795 799 pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; 796 800 } 797 801 798 - void __ref move_pfn_range_to_zone(struct zone *zone, 799 - unsigned long start_pfn, unsigned long nr_pages) 802 + void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, 803 + unsigned long nr_pages, struct vmem_altmap *altmap) 800 804 { 801 805 struct pglist_data *pgdat = zone->zone_pgdat; 802 806 int nid = pgdat->node_id; ··· 821 825 * expects the zone spans the pfn range. All the pages in the range 822 826 * are reserved so nobody should be touching them so we should be safe 823 827 */ 824 - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); 828 + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 829 + MEMMAP_HOTPLUG, altmap); 825 830 826 831 set_zone_contiguous(zone); 827 832 } ··· 894 897 struct zone *zone; 895 898 896 899 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 897 - move_pfn_range_to_zone(zone, start_pfn, nr_pages); 900 + move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL); 898 901 return zone; 899 902 } 900 903 ··· 1143 1146 } 1144 1147 1145 1148 /* call arch's memory hotadd */ 1146 - ret = arch_add_memory(nid, start, size, true); 1149 + ret = arch_add_memory(nid, start, size, NULL, true); 1147 1150 1148 1151 if (ret < 0) 1149 1152 goto error; ··· 1885 1888 memblock_free(start, size); 1886 1889 memblock_remove(start, size); 1887 1890 1888 - arch_remove_memory(start, size); 1891 + arch_remove_memory(start, size, NULL); 1889 1892 1890 1893 try_offline_node(nid); 1891 1894
+3 -3
mm/page_alloc.c
··· 5321 5321 * done. Non-atomic initialization, single-pass. 5322 5322 */ 5323 5323 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, 5324 - unsigned long start_pfn, enum memmap_context context) 5324 + unsigned long start_pfn, enum memmap_context context, 5325 + struct vmem_altmap *altmap) 5325 5326 { 5326 - struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn)); 5327 5327 unsigned long end_pfn = start_pfn + size; 5328 5328 pg_data_t *pgdat = NODE_DATA(nid); 5329 5329 unsigned long pfn; ··· 5429 5429 5430 5430 #ifndef __HAVE_ARCH_MEMMAP_INIT 5431 5431 #define memmap_init(size, nid, zone, start_pfn) \ 5432 - memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) 5432 + memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL) 5433 5433 #endif 5434 5434 5435 5435 static int zone_batchsize(struct zone *zone)
+23 -44
mm/sparse-vmemmap.c
··· 74 74 } 75 75 76 76 /* need to make sure size is all the same during early stage */ 77 - static void * __meminit alloc_block_buf(unsigned long size, int node) 77 + void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) 78 78 { 79 79 void *ptr; 80 80 ··· 107 107 } 108 108 109 109 /** 110 - * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation 111 - * @altmap - reserved page pool for the allocation 112 - * @nr_pfns - size (in pages) of the allocation 110 + * altmap_alloc_block_buf - allocate pages from the device page map 111 + * @altmap: device page map 112 + * @size: size (in bytes) of the allocation 113 113 * 114 - * Allocations are aligned to the size of the request 114 + * Allocations are aligned to the size of the request. 115 115 */ 116 - static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, 117 - unsigned long nr_pfns) 118 - { 119 - unsigned long pfn = vmem_altmap_next_pfn(altmap); 120 - unsigned long nr_align; 121 - 122 - nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); 123 - nr_align = ALIGN(pfn, nr_align) - pfn; 124 - 125 - if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) 126 - return ULONG_MAX; 127 - altmap->alloc += nr_pfns; 128 - altmap->align += nr_align; 129 - return pfn + nr_align; 130 - } 131 - 132 - static void * __meminit altmap_alloc_block_buf(unsigned long size, 116 + void * __meminit altmap_alloc_block_buf(unsigned long size, 133 117 struct vmem_altmap *altmap) 134 118 { 135 - unsigned long pfn, nr_pfns; 136 - void *ptr; 119 + unsigned long pfn, nr_pfns, nr_align; 137 120 138 121 if (size & ~PAGE_MASK) { 139 122 pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", ··· 124 141 return NULL; 125 142 } 126 143 144 + pfn = vmem_altmap_next_pfn(altmap); 127 145 nr_pfns = size >> PAGE_SHIFT; 128 - pfn = vmem_altmap_alloc(altmap, nr_pfns); 129 - if (pfn < ULONG_MAX) 130 - ptr = __va(__pfn_to_phys(pfn)); 131 - else 132 - ptr = NULL; 146 + nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); 147 + nr_align = ALIGN(pfn, nr_align) - pfn; 148 + if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) 149 + return NULL; 150 + 151 + altmap->alloc += nr_pfns; 152 + altmap->align += nr_align; 153 + pfn += nr_align; 154 + 133 155 pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", 134 156 __func__, pfn, altmap->alloc, altmap->align, nr_pfns); 135 - 136 - return ptr; 137 - } 138 - 139 - /* need to make sure size is all the same during early stage */ 140 - void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, 141 - struct vmem_altmap *altmap) 142 - { 143 - if (altmap) 144 - return altmap_alloc_block_buf(size, altmap); 145 - return alloc_block_buf(size, node); 157 + return __va(__pfn_to_phys(pfn)); 146 158 } 147 159 148 160 void __meminit vmemmap_verify(pte_t *pte, int node, ··· 156 178 pte_t *pte = pte_offset_kernel(pmd, addr); 157 179 if (pte_none(*pte)) { 158 180 pte_t entry; 159 - void *p = alloc_block_buf(PAGE_SIZE, node); 181 + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); 160 182 if (!p) 161 183 return NULL; 162 184 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); ··· 256 278 return 0; 257 279 } 258 280 259 - struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) 281 + struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid, 282 + struct vmem_altmap *altmap) 260 283 { 261 284 unsigned long start; 262 285 unsigned long end; ··· 267 288 start = (unsigned long)map; 268 289 end = (unsigned long)(map + PAGES_PER_SECTION); 269 290 270 - if (vmemmap_populate(start, end, nid)) 291 + if (vmemmap_populate(start, end, nid, altmap)) 271 292 return NULL; 272 293 273 294 return map; ··· 297 318 if (!present_section_nr(pnum)) 298 319 continue; 299 320 300 - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 321 + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); 301 322 if (map_map[pnum]) 302 323 continue; 303 324 ms = __nr_to_section(pnum);
+25 -18
mm/sparse.c
··· 421 421 } 422 422 423 423 #ifndef CONFIG_SPARSEMEM_VMEMMAP 424 - struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) 424 + struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, 425 + struct vmem_altmap *altmap) 425 426 { 426 427 struct page *map; 427 428 unsigned long size; ··· 477 476 478 477 if (!present_section_nr(pnum)) 479 478 continue; 480 - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 479 + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); 481 480 if (map_map[pnum]) 482 481 continue; 483 482 ms = __nr_to_section(pnum); ··· 505 504 struct mem_section *ms = __nr_to_section(pnum); 506 505 int nid = sparse_early_nid(ms); 507 506 508 - map = sparse_mem_map_populate(pnum, nid); 507 + map = sparse_mem_map_populate(pnum, nid, NULL); 509 508 if (map) 510 509 return map; 511 510 ··· 683 682 #endif 684 683 685 684 #ifdef CONFIG_SPARSEMEM_VMEMMAP 686 - static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) 685 + static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, 686 + struct vmem_altmap *altmap) 687 687 { 688 688 /* This will make the necessary allocations eventually. */ 689 - return sparse_mem_map_populate(pnum, nid); 689 + return sparse_mem_map_populate(pnum, nid, altmap); 690 690 } 691 - static void __kfree_section_memmap(struct page *memmap) 691 + static void __kfree_section_memmap(struct page *memmap, 692 + struct vmem_altmap *altmap) 692 693 { 693 694 unsigned long start = (unsigned long)memmap; 694 695 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 695 696 696 - vmemmap_free(start, end); 697 + vmemmap_free(start, end, altmap); 697 698 } 698 699 #ifdef CONFIG_MEMORY_HOTREMOVE 699 700 static void free_map_bootmem(struct page *memmap) ··· 703 700 unsigned long start = (unsigned long)memmap; 704 701 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 705 702 706 - vmemmap_free(start, end); 703 + vmemmap_free(start, end, NULL); 707 704 } 708 705 #endif /* CONFIG_MEMORY_HOTREMOVE */ 709 706 #else ··· 728 725 return ret; 729 726 } 730 727 731 - static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) 728 + static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, 729 + struct vmem_altmap *altmap) 732 730 { 733 731 return __kmalloc_section_memmap(); 734 732 } 735 733 736 - static void __kfree_section_memmap(struct page *memmap) 734 + static void __kfree_section_memmap(struct page *memmap, 735 + struct vmem_altmap *altmap) 737 736 { 738 737 if (is_vmalloc_addr(memmap)) 739 738 vfree(memmap); ··· 782 777 * set. If this is <=0, then that means that the passed-in 783 778 * map was not consumed and must be freed. 784 779 */ 785 - int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) 780 + int __meminit sparse_add_one_section(struct pglist_data *pgdat, 781 + unsigned long start_pfn, struct vmem_altmap *altmap) 786 782 { 787 783 unsigned long section_nr = pfn_to_section_nr(start_pfn); 788 784 struct mem_section *ms; ··· 799 793 ret = sparse_index_init(section_nr, pgdat->node_id); 800 794 if (ret < 0 && ret != -EEXIST) 801 795 return ret; 802 - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); 796 + memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); 803 797 if (!memmap) 804 798 return -ENOMEM; 805 799 usemap = __kmalloc_section_usemap(); 806 800 if (!usemap) { 807 - __kfree_section_memmap(memmap); 801 + __kfree_section_memmap(memmap, altmap); 808 802 return -ENOMEM; 809 803 } 810 804 ··· 826 820 pgdat_resize_unlock(pgdat, &flags); 827 821 if (ret <= 0) { 828 822 kfree(usemap); 829 - __kfree_section_memmap(memmap); 823 + __kfree_section_memmap(memmap, altmap); 830 824 } 831 825 return ret; 832 826 } ··· 853 847 } 854 848 #endif 855 849 856 - static void free_section_usemap(struct page *memmap, unsigned long *usemap) 850 + static void free_section_usemap(struct page *memmap, unsigned long *usemap, 851 + struct vmem_altmap *altmap) 857 852 { 858 853 struct page *usemap_page; 859 854 ··· 868 861 if (PageSlab(usemap_page) || PageCompound(usemap_page)) { 869 862 kfree(usemap); 870 863 if (memmap) 871 - __kfree_section_memmap(memmap); 864 + __kfree_section_memmap(memmap, altmap); 872 865 return; 873 866 } 874 867 ··· 882 875 } 883 876 884 877 void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, 885 - unsigned long map_offset) 878 + unsigned long map_offset, struct vmem_altmap *altmap) 886 879 { 887 880 struct page *memmap = NULL; 888 881 unsigned long *usemap = NULL, flags; ··· 900 893 901 894 clear_hwpoisoned_pages(memmap + map_offset, 902 895 PAGES_PER_SECTION - map_offset); 903 - free_section_usemap(memmap, usemap); 896 + free_section_usemap(memmap, usemap, altmap); 904 897 } 905 898 #endif /* CONFIG_MEMORY_HOTREMOVE */ 906 899 #endif /* CONFIG_MEMORY_HOTPLUG */
+4
tools/testing/nvdimm/Kbuild
··· 37 37 38 38 nfit-y := $(ACPI_SRC)/core.o 39 39 nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o 40 + nfit-y += acpi_nfit_test.o 40 41 nfit-y += config_check.o 41 42 42 43 nd_pmem-y := $(NVDIMM_SRC)/pmem.o 43 44 nd_pmem-y += pmem-dax.o 45 + nd_pmem-y += pmem_test.o 44 46 nd_pmem-y += config_check.o 45 47 46 48 nd_btt-y := $(NVDIMM_SRC)/btt.o ··· 59 57 60 58 device_dax-y := $(DAX_SRC)/device.o 61 59 device_dax-y += dax-dev.o 60 + device_dax-y += device_dax_test.o 62 61 device_dax-y += config_check.o 63 62 64 63 dax_pmem-y := $(DAX_SRC)/pmem.o ··· 78 75 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o 79 76 libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o 80 77 libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o 78 + libnvdimm-y += libnvdimm_test.o 81 79 libnvdimm-y += config_check.o 82 80 83 81 obj-m += test/
+8
tools/testing/nvdimm/acpi_nfit_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright(c) 2018 Intel Corporation. All rights reserved. 3 + 4 + #include <linux/module.h> 5 + #include <linux/printk.h> 6 + #include "watermark.h" 7 + 8 + nfit_test_watermark(acpi_nfit);
+8
tools/testing/nvdimm/device_dax_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright(c) 2018 Intel Corporation. All rights reserved. 3 + 4 + #include <linux/module.h> 5 + #include <linux/printk.h> 6 + #include "watermark.h" 7 + 8 + nfit_test_watermark(device_dax);
+8
tools/testing/nvdimm/libnvdimm_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright(c) 2018 Intel Corporation. All rights reserved. 3 + 4 + #include <linux/module.h> 5 + #include <linux/printk.h> 6 + #include "watermark.h" 7 + 8 + nfit_test_watermark(libnvdimm);
+8
tools/testing/nvdimm/pmem_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright(c) 2018 Intel Corporation. All rights reserved. 3 + 4 + #include <linux/module.h> 5 + #include <linux/printk.h> 6 + #include "watermark.h" 7 + 8 + nfit_test_watermark(pmem);
+3 -4
tools/testing/nvdimm/test/iomap.c
··· 104 104 } 105 105 EXPORT_SYMBOL(__wrap_devm_memremap); 106 106 107 - void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, 108 - struct percpu_ref *ref, struct vmem_altmap *altmap) 107 + void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) 109 108 { 110 - resource_size_t offset = res->start; 109 + resource_size_t offset = pgmap->res.start; 111 110 struct nfit_test_resource *nfit_res = get_nfit_res(offset); 112 111 113 112 if (nfit_res) 114 113 return nfit_res->buf + offset - nfit_res->res.start; 115 - return devm_memremap_pages(dev, res, ref, altmap); 114 + return devm_memremap_pages(dev, pgmap); 116 115 } 117 116 EXPORT_SYMBOL(__wrap_devm_memremap_pages); 118 117
+450 -48
tools/testing/nvdimm/test/nfit.c
··· 27 27 #include <nfit.h> 28 28 #include <nd.h> 29 29 #include "nfit_test.h" 30 + #include "../watermark.h" 30 31 31 32 /* 32 33 * Generate an NFIT table to describe the following topology: ··· 138 137 139 138 static unsigned long dimm_fail_cmd_flags[NUM_DCR]; 140 139 140 + struct nfit_test_fw { 141 + enum intel_fw_update_state state; 142 + u32 context; 143 + u64 version; 144 + u32 size_received; 145 + u64 end_time; 146 + }; 147 + 141 148 struct nfit_test { 142 149 struct acpi_nfit_desc acpi_desc; 143 150 struct platform_device pdev; ··· 177 168 spinlock_t lock; 178 169 } ars_state; 179 170 struct device *dimm_dev[NUM_DCR]; 171 + struct nd_intel_smart *smart; 172 + struct nd_intel_smart_threshold *smart_threshold; 180 173 struct badrange badrange; 181 174 struct work_struct work; 175 + struct nfit_test_fw *fw; 182 176 }; 183 177 184 178 static struct workqueue_struct *nfit_wq; ··· 191 179 struct platform_device *pdev = to_platform_device(dev); 192 180 193 181 return container_of(pdev, struct nfit_test, pdev); 182 + } 183 + 184 + static int nd_intel_test_get_fw_info(struct nfit_test *t, 185 + struct nd_intel_fw_info *nd_cmd, unsigned int buf_len, 186 + int idx) 187 + { 188 + struct device *dev = &t->pdev.dev; 189 + struct nfit_test_fw *fw = &t->fw[idx]; 190 + 191 + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n", 192 + __func__, t, nd_cmd, buf_len, idx); 193 + 194 + if (buf_len < sizeof(*nd_cmd)) 195 + return -EINVAL; 196 + 197 + nd_cmd->status = 0; 198 + nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE; 199 + nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN; 200 + nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL; 201 + nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME; 202 + nd_cmd->update_cap = 0; 203 + nd_cmd->fis_version = INTEL_FW_FIS_VERSION; 204 + nd_cmd->run_version = 0; 205 + nd_cmd->updated_version = fw->version; 206 + 207 + return 0; 208 + } 209 + 210 + static int nd_intel_test_start_update(struct nfit_test *t, 211 + struct nd_intel_fw_start *nd_cmd, unsigned int buf_len, 212 + int idx) 213 + { 214 + struct device *dev = &t->pdev.dev; 215 + struct nfit_test_fw *fw = &t->fw[idx]; 216 + 217 + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", 218 + __func__, t, nd_cmd, buf_len, idx); 219 + 220 + if (buf_len < sizeof(*nd_cmd)) 221 + return -EINVAL; 222 + 223 + if (fw->state != FW_STATE_NEW) { 224 + /* extended status, FW update in progress */ 225 + nd_cmd->status = 0x10007; 226 + return 0; 227 + } 228 + 229 + fw->state = FW_STATE_IN_PROGRESS; 230 + fw->context++; 231 + fw->size_received = 0; 232 + nd_cmd->status = 0; 233 + nd_cmd->context = fw->context; 234 + 235 + dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context); 236 + 237 + return 0; 238 + } 239 + 240 + static int nd_intel_test_send_data(struct nfit_test *t, 241 + struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len, 242 + int idx) 243 + { 244 + struct device *dev = &t->pdev.dev; 245 + struct nfit_test_fw *fw = &t->fw[idx]; 246 + u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length]; 247 + 248 + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", 249 + __func__, t, nd_cmd, buf_len, idx); 250 + 251 + if (buf_len < sizeof(*nd_cmd)) 252 + return -EINVAL; 253 + 254 + 255 + dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status); 256 + dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]); 257 + dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1, 258 + nd_cmd->data[nd_cmd->length-1]); 259 + 260 + if (fw->state != FW_STATE_IN_PROGRESS) { 261 + dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__); 262 + *status = 0x5; 263 + return 0; 264 + } 265 + 266 + if (nd_cmd->context != fw->context) { 267 + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", 268 + __func__, nd_cmd->context, fw->context); 269 + *status = 0x10007; 270 + return 0; 271 + } 272 + 273 + /* 274 + * check offset + len > size of fw storage 275 + * check length is > max send length 276 + */ 277 + if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE || 278 + nd_cmd->length > INTEL_FW_MAX_SEND_LEN) { 279 + *status = 0x3; 280 + dev_dbg(dev, "%s: buffer boundary violation\n", __func__); 281 + return 0; 282 + } 283 + 284 + fw->size_received += nd_cmd->length; 285 + dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n", 286 + __func__, nd_cmd->length, fw->size_received); 287 + *status = 0; 288 + return 0; 289 + } 290 + 291 + static int nd_intel_test_finish_fw(struct nfit_test *t, 292 + struct nd_intel_fw_finish_update *nd_cmd, 293 + unsigned int buf_len, int idx) 294 + { 295 + struct device *dev = &t->pdev.dev; 296 + struct nfit_test_fw *fw = &t->fw[idx]; 297 + 298 + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", 299 + __func__, t, nd_cmd, buf_len, idx); 300 + 301 + if (fw->state == FW_STATE_UPDATED) { 302 + /* update already done, need cold boot */ 303 + nd_cmd->status = 0x20007; 304 + return 0; 305 + } 306 + 307 + dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n", 308 + __func__, nd_cmd->context, nd_cmd->ctrl_flags); 309 + 310 + switch (nd_cmd->ctrl_flags) { 311 + case 0: /* finish */ 312 + if (nd_cmd->context != fw->context) { 313 + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", 314 + __func__, nd_cmd->context, 315 + fw->context); 316 + nd_cmd->status = 0x10007; 317 + return 0; 318 + } 319 + nd_cmd->status = 0; 320 + fw->state = FW_STATE_VERIFY; 321 + /* set 1 second of time for firmware "update" */ 322 + fw->end_time = jiffies + HZ; 323 + break; 324 + 325 + case 1: /* abort */ 326 + fw->size_received = 0; 327 + /* successfully aborted status */ 328 + nd_cmd->status = 0x40007; 329 + fw->state = FW_STATE_NEW; 330 + dev_dbg(dev, "%s: abort successful\n", __func__); 331 + break; 332 + 333 + default: /* bad control flag */ 334 + dev_warn(dev, "%s: unknown control flag: %#x\n", 335 + __func__, nd_cmd->ctrl_flags); 336 + return -EINVAL; 337 + } 338 + 339 + return 0; 340 + } 341 + 342 + static int nd_intel_test_finish_query(struct nfit_test *t, 343 + struct nd_intel_fw_finish_query *nd_cmd, 344 + unsigned int buf_len, int idx) 345 + { 346 + struct device *dev = &t->pdev.dev; 347 + struct nfit_test_fw *fw = &t->fw[idx]; 348 + 349 + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", 350 + __func__, t, nd_cmd, buf_len, idx); 351 + 352 + if (buf_len < sizeof(*nd_cmd)) 353 + return -EINVAL; 354 + 355 + if (nd_cmd->context != fw->context) { 356 + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", 357 + __func__, nd_cmd->context, fw->context); 358 + nd_cmd->status = 0x10007; 359 + return 0; 360 + } 361 + 362 + dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context); 363 + 364 + switch (fw->state) { 365 + case FW_STATE_NEW: 366 + nd_cmd->updated_fw_rev = 0; 367 + nd_cmd->status = 0; 368 + dev_dbg(dev, "%s: new state\n", __func__); 369 + break; 370 + 371 + case FW_STATE_IN_PROGRESS: 372 + /* sequencing error */ 373 + nd_cmd->status = 0x40007; 374 + nd_cmd->updated_fw_rev = 0; 375 + dev_dbg(dev, "%s: sequence error\n", __func__); 376 + break; 377 + 378 + case FW_STATE_VERIFY: 379 + if (time_is_after_jiffies64(fw->end_time)) { 380 + nd_cmd->updated_fw_rev = 0; 381 + nd_cmd->status = 0x20007; 382 + dev_dbg(dev, "%s: still verifying\n", __func__); 383 + break; 384 + } 385 + 386 + dev_dbg(dev, "%s: transition out verify\n", __func__); 387 + fw->state = FW_STATE_UPDATED; 388 + /* we are going to fall through if it's "done" */ 389 + case FW_STATE_UPDATED: 390 + nd_cmd->status = 0; 391 + /* bogus test version */ 392 + fw->version = nd_cmd->updated_fw_rev = 393 + INTEL_FW_FAKE_VERSION; 394 + dev_dbg(dev, "%s: updated\n", __func__); 395 + break; 396 + 397 + default: /* we should never get here */ 398 + return -EINVAL; 399 + } 400 + 401 + return 0; 194 402 } 195 403 196 404 static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, ··· 672 440 return 0; 673 441 } 674 442 675 - static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) 443 + static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len, 444 + struct nd_intel_smart *smart_data) 676 445 { 677 - static const struct nd_smart_payload smart_data = { 678 - .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID 679 - | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID 680 - | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, 681 - .health = ND_SMART_NON_CRITICAL_HEALTH, 682 - .temperature = 23 * 16, 683 - .spares = 75, 684 - .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, 685 - .life_used = 5, 686 - .shutdown_state = 0, 687 - .vendor_size = 0, 688 - }; 689 - 690 446 if (buf_len < sizeof(*smart)) 691 447 return -EINVAL; 692 - memcpy(smart->data, &smart_data, sizeof(smart_data)); 448 + memcpy(smart, smart_data, sizeof(*smart)); 693 449 return 0; 694 450 } 695 451 696 - static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, 697 - unsigned int buf_len) 452 + static int nfit_test_cmd_smart_threshold( 453 + struct nd_intel_smart_threshold *out, 454 + unsigned int buf_len, 455 + struct nd_intel_smart_threshold *smart_t) 698 456 { 699 - static const struct nd_smart_threshold_payload smart_t_data = { 700 - .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, 701 - .temperature = 40 * 16, 702 - .spares = 5, 703 - }; 704 - 705 457 if (buf_len < sizeof(*smart_t)) 706 458 return -EINVAL; 707 - memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); 459 + memcpy(out, smart_t, sizeof(*smart_t)); 460 + return 0; 461 + } 462 + 463 + static void smart_notify(struct device *bus_dev, 464 + struct device *dimm_dev, struct nd_intel_smart *smart, 465 + struct nd_intel_smart_threshold *thresh) 466 + { 467 + dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n", 468 + __func__, thresh->alarm_control, thresh->spares, 469 + smart->spares, thresh->media_temperature, 470 + smart->media_temperature, thresh->ctrl_temperature, 471 + smart->ctrl_temperature); 472 + if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP) 473 + && smart->spares 474 + <= thresh->spares) 475 + || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP) 476 + && smart->media_temperature 477 + >= thresh->media_temperature) 478 + || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) 479 + && smart->ctrl_temperature 480 + >= thresh->ctrl_temperature)) { 481 + device_lock(bus_dev); 482 + __acpi_nvdimm_notify(dimm_dev, 0x81); 483 + device_unlock(bus_dev); 484 + } 485 + } 486 + 487 + static int nfit_test_cmd_smart_set_threshold( 488 + struct nd_intel_smart_set_threshold *in, 489 + unsigned int buf_len, 490 + struct nd_intel_smart_threshold *thresh, 491 + struct nd_intel_smart *smart, 492 + struct device *bus_dev, struct device *dimm_dev) 493 + { 494 + unsigned int size; 495 + 496 + size = sizeof(*in) - 4; 497 + if (buf_len < size) 498 + return -EINVAL; 499 + memcpy(thresh->data, in, size); 500 + in->status = 0; 501 + smart_notify(bus_dev, dimm_dev, smart, thresh); 502 + 708 503 return 0; 709 504 } 710 505 ··· 822 563 return 0; 823 564 } 824 565 566 + static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t, 567 + struct nd_intel_lss *nd_cmd, unsigned int buf_len) 568 + { 569 + struct device *dev = &t->pdev.dev; 570 + 571 + if (buf_len < sizeof(*nd_cmd)) 572 + return -EINVAL; 573 + 574 + switch (nd_cmd->enable) { 575 + case 0: 576 + nd_cmd->status = 0; 577 + dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n", 578 + __func__); 579 + break; 580 + case 1: 581 + nd_cmd->status = 0; 582 + dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n", 583 + __func__); 584 + break; 585 + default: 586 + dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable); 587 + nd_cmd->status = 0x3; 588 + break; 589 + } 590 + 591 + 592 + return 0; 593 + } 594 + 595 + static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) 596 + { 597 + int i; 598 + 599 + /* lookup per-dimm data */ 600 + for (i = 0; i < ARRAY_SIZE(handle); i++) 601 + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) 602 + break; 603 + if (i >= ARRAY_SIZE(handle)) 604 + return -ENXIO; 605 + 606 + if ((1 << func) & dimm_fail_cmd_flags[i]) 607 + return -EIO; 608 + 609 + return i; 610 + } 611 + 825 612 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, 826 613 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 827 614 unsigned int buf_len, int *cmd_rc) ··· 896 591 func = call_pkg->nd_command; 897 592 if (call_pkg->nd_family != nfit_mem->family) 898 593 return -ENOTTY; 594 + 595 + i = get_dimm(nfit_mem, func); 596 + if (i < 0) 597 + return i; 598 + 599 + switch (func) { 600 + case ND_INTEL_ENABLE_LSS_STATUS: 601 + return nd_intel_test_cmd_set_lss_status(t, 602 + buf, buf_len); 603 + case ND_INTEL_FW_GET_INFO: 604 + return nd_intel_test_get_fw_info(t, buf, 605 + buf_len, i - t->dcr_idx); 606 + case ND_INTEL_FW_START_UPDATE: 607 + return nd_intel_test_start_update(t, buf, 608 + buf_len, i - t->dcr_idx); 609 + case ND_INTEL_FW_SEND_DATA: 610 + return nd_intel_test_send_data(t, buf, 611 + buf_len, i - t->dcr_idx); 612 + case ND_INTEL_FW_FINISH_UPDATE: 613 + return nd_intel_test_finish_fw(t, buf, 614 + buf_len, i - t->dcr_idx); 615 + case ND_INTEL_FW_FINISH_QUERY: 616 + return nd_intel_test_finish_query(t, buf, 617 + buf_len, i - t->dcr_idx); 618 + case ND_INTEL_SMART: 619 + return nfit_test_cmd_smart(buf, buf_len, 620 + &t->smart[i - t->dcr_idx]); 621 + case ND_INTEL_SMART_THRESHOLD: 622 + return nfit_test_cmd_smart_threshold(buf, 623 + buf_len, 624 + &t->smart_threshold[i - 625 + t->dcr_idx]); 626 + case ND_INTEL_SMART_SET_THRESHOLD: 627 + return nfit_test_cmd_smart_set_threshold(buf, 628 + buf_len, 629 + &t->smart_threshold[i - 630 + t->dcr_idx], 631 + &t->smart[i - t->dcr_idx], 632 + &t->pdev.dev, t->dimm_dev[i]); 633 + default: 634 + return -ENOTTY; 635 + } 899 636 } 900 637 901 638 if (!test_bit(cmd, &cmd_mask) 902 639 || !test_bit(func, &nfit_mem->dsm_mask)) 903 640 return -ENOTTY; 904 641 905 - /* lookup label space for the given dimm */ 906 - for (i = 0; i < ARRAY_SIZE(handle); i++) 907 - if (__to_nfit_memdev(nfit_mem)->device_handle == 908 - handle[i]) 909 - break; 910 - if (i >= ARRAY_SIZE(handle)) 911 - return -ENXIO; 912 - 913 - if ((1 << func) & dimm_fail_cmd_flags[i]) 914 - return -EIO; 642 + i = get_dimm(nfit_mem, func); 643 + if (i < 0) 644 + return i; 915 645 916 646 switch (func) { 917 647 case ND_CMD_GET_CONFIG_SIZE: ··· 959 619 case ND_CMD_SET_CONFIG_DATA: 960 620 rc = nfit_test_cmd_set_config_data(buf, buf_len, 961 621 t->label[i - t->dcr_idx]); 962 - break; 963 - case ND_CMD_SMART: 964 - rc = nfit_test_cmd_smart(buf, buf_len); 965 - break; 966 - case ND_CMD_SMART_THRESHOLD: 967 - rc = nfit_test_cmd_smart_threshold(buf, buf_len); 968 - device_lock(&t->pdev.dev); 969 - __acpi_nvdimm_notify(t->dimm_dev[i], 0x81); 970 - device_unlock(&t->pdev.dev); 971 622 break; 972 623 default: 973 624 return -ENOTTY; ··· 1203 872 NULL, 1204 873 }; 1205 874 875 + static void smart_init(struct nfit_test *t) 876 + { 877 + int i; 878 + const struct nd_intel_smart_threshold smart_t_data = { 879 + .alarm_control = ND_INTEL_SMART_SPARE_TRIP 880 + | ND_INTEL_SMART_TEMP_TRIP, 881 + .media_temperature = 40 * 16, 882 + .ctrl_temperature = 30 * 16, 883 + .spares = 5, 884 + }; 885 + const struct nd_intel_smart smart_data = { 886 + .flags = ND_INTEL_SMART_HEALTH_VALID 887 + | ND_INTEL_SMART_SPARES_VALID 888 + | ND_INTEL_SMART_ALARM_VALID 889 + | ND_INTEL_SMART_USED_VALID 890 + | ND_INTEL_SMART_SHUTDOWN_VALID 891 + | ND_INTEL_SMART_MTEMP_VALID, 892 + .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, 893 + .media_temperature = 23 * 16, 894 + .ctrl_temperature = 30 * 16, 895 + .pmic_temperature = 40 * 16, 896 + .spares = 75, 897 + .alarm_flags = ND_INTEL_SMART_SPARE_TRIP 898 + | ND_INTEL_SMART_TEMP_TRIP, 899 + .ait_status = 1, 900 + .life_used = 5, 901 + .shutdown_state = 0, 902 + .vendor_size = 0, 903 + .shutdown_count = 100, 904 + }; 905 + 906 + for (i = 0; i < t->num_dcr; i++) { 907 + memcpy(&t->smart[i], &smart_data, sizeof(smart_data)); 908 + memcpy(&t->smart_threshold[i], &smart_t_data, 909 + sizeof(smart_t_data)); 910 + } 911 + } 912 + 1206 913 static int nfit_test0_alloc(struct nfit_test *t) 1207 914 { 1208 915 size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA ··· 1250 881 window_size) * NUM_DCR 1251 882 + sizeof(struct acpi_nfit_data_region) * NUM_BDW 1252 883 + (sizeof(struct acpi_nfit_flush_address) 1253 - + sizeof(u64) * NUM_HINTS) * NUM_DCR; 884 + + sizeof(u64) * NUM_HINTS) * NUM_DCR 885 + + sizeof(struct acpi_nfit_capabilities); 1254 886 int i; 1255 887 1256 888 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); ··· 1309 939 return -ENOMEM; 1310 940 } 1311 941 942 + smart_init(t); 1312 943 return ars_state_init(&t->pdev.dev, &t->ars_state); 1313 944 } 1314 945 ··· 1340 969 if (!t->spa_set[1]) 1341 970 return -ENOMEM; 1342 971 972 + smart_init(t); 1343 973 return ars_state_init(&t->pdev.dev, &t->ars_state); 1344 974 } 1345 975 ··· 1365 993 struct acpi_nfit_control_region *dcr; 1366 994 struct acpi_nfit_data_region *bdw; 1367 995 struct acpi_nfit_flush_address *flush; 996 + struct acpi_nfit_capabilities *pcap; 1368 997 unsigned int offset, i; 1369 998 1370 999 /* ··· 1873 1500 for (i = 0; i < NUM_HINTS; i++) 1874 1501 flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); 1875 1502 1503 + /* platform capabilities */ 1504 + pcap = nfit_buf + offset + flush_hint_size * 4; 1505 + pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; 1506 + pcap->header.length = sizeof(*pcap); 1507 + pcap->highest_capability = 1; 1508 + pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | 1509 + ACPI_NFIT_CAPABILITY_MEM_FLUSH; 1510 + 1876 1511 if (t->setup_hotplug) { 1877 - offset = offset + flush_hint_size * 4; 1512 + offset = offset + flush_hint_size * 4 + sizeof(*pcap); 1878 1513 /* dcr-descriptor4: blk */ 1879 1514 dcr = nfit_buf + offset; 1880 1515 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; ··· 2023 1642 set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); 2024 1643 set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); 2025 1644 set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); 2026 - set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); 1645 + set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); 1646 + set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); 1647 + set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); 2027 1648 set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); 2028 1649 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); 2029 1650 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); 2030 1651 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); 2031 1652 set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); 2032 - set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); 2033 1653 set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); 2034 1654 set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); 2035 1655 set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); 2036 1656 set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); 1657 + set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); 1658 + set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); 1659 + set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); 1660 + set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); 1661 + set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); 1662 + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); 2037 1663 } 2038 1664 2039 1665 static void nfit_test1_setup(struct nfit_test *t) ··· 2138 1750 set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); 2139 1751 set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); 2140 1752 set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); 1753 + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); 2141 1754 } 2142 1755 2143 1756 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, ··· 2443 2054 sizeof(struct nfit_test_dcr *), GFP_KERNEL); 2444 2055 nfit_test->dcr_dma = devm_kcalloc(dev, num, 2445 2056 sizeof(dma_addr_t), GFP_KERNEL); 2057 + nfit_test->smart = devm_kcalloc(dev, num, 2058 + sizeof(struct nd_intel_smart), GFP_KERNEL); 2059 + nfit_test->smart_threshold = devm_kcalloc(dev, num, 2060 + sizeof(struct nd_intel_smart_threshold), 2061 + GFP_KERNEL); 2062 + nfit_test->fw = devm_kcalloc(dev, num, 2063 + sizeof(struct nfit_test_fw), GFP_KERNEL); 2446 2064 if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label 2447 2065 && nfit_test->label_dma && nfit_test->dcr 2448 2066 && nfit_test->dcr_dma && nfit_test->flush 2449 - && nfit_test->flush_dma) 2067 + && nfit_test->flush_dma 2068 + && nfit_test->fw) 2450 2069 /* pass */; 2451 2070 else 2452 2071 return -ENOMEM; ··· 2555 2158 static __init int nfit_test_init(void) 2556 2159 { 2557 2160 int rc, i; 2161 + 2162 + pmem_test(); 2163 + libnvdimm_test(); 2164 + acpi_nfit_test(); 2165 + device_dax_test(); 2558 2166 2559 2167 nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); 2560 2168
+134
tools/testing/nvdimm/test/nfit_test.h
··· 84 84 } __packed record[0]; 85 85 } __packed; 86 86 87 + #define ND_INTEL_SMART 1 88 + #define ND_INTEL_SMART_THRESHOLD 2 89 + #define ND_INTEL_ENABLE_LSS_STATUS 10 90 + #define ND_INTEL_FW_GET_INFO 12 91 + #define ND_INTEL_FW_START_UPDATE 13 92 + #define ND_INTEL_FW_SEND_DATA 14 93 + #define ND_INTEL_FW_FINISH_UPDATE 15 94 + #define ND_INTEL_FW_FINISH_QUERY 16 95 + #define ND_INTEL_SMART_SET_THRESHOLD 17 96 + 97 + #define ND_INTEL_SMART_HEALTH_VALID (1 << 0) 98 + #define ND_INTEL_SMART_SPARES_VALID (1 << 1) 99 + #define ND_INTEL_SMART_USED_VALID (1 << 2) 100 + #define ND_INTEL_SMART_MTEMP_VALID (1 << 3) 101 + #define ND_INTEL_SMART_CTEMP_VALID (1 << 4) 102 + #define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5) 103 + #define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6) 104 + #define ND_INTEL_SMART_PTEMP_VALID (1 << 7) 105 + #define ND_INTEL_SMART_ALARM_VALID (1 << 9) 106 + #define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10) 107 + #define ND_INTEL_SMART_VENDOR_VALID (1 << 11) 108 + #define ND_INTEL_SMART_SPARE_TRIP (1 << 0) 109 + #define ND_INTEL_SMART_TEMP_TRIP (1 << 1) 110 + #define ND_INTEL_SMART_CTEMP_TRIP (1 << 2) 111 + #define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) 112 + #define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) 113 + #define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) 114 + 115 + struct nd_intel_smart { 116 + __u32 status; 117 + union { 118 + struct { 119 + __u32 flags; 120 + __u8 reserved0[4]; 121 + __u8 health; 122 + __u8 spares; 123 + __u8 life_used; 124 + __u8 alarm_flags; 125 + __u16 media_temperature; 126 + __u16 ctrl_temperature; 127 + __u32 shutdown_count; 128 + __u8 ait_status; 129 + __u16 pmic_temperature; 130 + __u8 reserved1[8]; 131 + __u8 shutdown_state; 132 + __u32 vendor_size; 133 + __u8 vendor_data[92]; 134 + } __packed; 135 + __u8 data[128]; 136 + }; 137 + } __packed; 138 + 139 + struct nd_intel_smart_threshold { 140 + __u32 status; 141 + union { 142 + struct { 143 + __u16 alarm_control; 144 + __u8 spares; 145 + __u16 media_temperature; 146 + __u16 ctrl_temperature; 147 + __u8 reserved[1]; 148 + } __packed; 149 + __u8 data[8]; 150 + }; 151 + } __packed; 152 + 153 + struct nd_intel_smart_set_threshold { 154 + __u16 alarm_control; 155 + __u8 spares; 156 + __u16 media_temperature; 157 + __u16 ctrl_temperature; 158 + __u32 status; 159 + } __packed; 160 + 161 + #define INTEL_FW_STORAGE_SIZE 0x100000 162 + #define INTEL_FW_MAX_SEND_LEN 0xFFEC 163 + #define INTEL_FW_QUERY_INTERVAL 250000 164 + #define INTEL_FW_QUERY_MAX_TIME 3000000 165 + #define INTEL_FW_FIS_VERSION 0x0105 166 + #define INTEL_FW_FAKE_VERSION 0xffffffffabcd 167 + 168 + enum intel_fw_update_state { 169 + FW_STATE_NEW = 0, 170 + FW_STATE_IN_PROGRESS, 171 + FW_STATE_VERIFY, 172 + FW_STATE_UPDATED, 173 + }; 174 + 175 + struct nd_intel_fw_info { 176 + __u32 status; 177 + __u32 storage_size; 178 + __u32 max_send_len; 179 + __u32 query_interval; 180 + __u32 max_query_time; 181 + __u8 update_cap; 182 + __u8 reserved[3]; 183 + __u32 fis_version; 184 + __u64 run_version; 185 + __u64 updated_version; 186 + } __packed; 187 + 188 + struct nd_intel_fw_start { 189 + __u32 status; 190 + __u32 context; 191 + } __packed; 192 + 193 + /* this one has the output first because the variable input data size */ 194 + struct nd_intel_fw_send_data { 195 + __u32 context; 196 + __u32 offset; 197 + __u32 length; 198 + __u8 data[0]; 199 + /* this field is not declared due ot variable data from input */ 200 + /* __u32 status; */ 201 + } __packed; 202 + 203 + struct nd_intel_fw_finish_update { 204 + __u8 ctrl_flags; 205 + __u8 reserved[3]; 206 + __u32 context; 207 + __u32 status; 208 + } __packed; 209 + 210 + struct nd_intel_fw_finish_query { 211 + __u32 context; 212 + __u32 status; 213 + __u64 updated_fw_rev; 214 + } __packed; 215 + 216 + struct nd_intel_lss { 217 + __u8 enable; 218 + __u32 status; 219 + } __packed; 220 + 87 221 union acpi_object; 88 222 typedef void *acpi_handle; 89 223
+21
tools/testing/nvdimm/watermark.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright(c) 2018 Intel Corporation. All rights reserved. 3 + #ifndef _TEST_NVDIMM_WATERMARK_H_ 4 + #define _TEST_NVDIMM_WATERMARK_H_ 5 + int pmem_test(void); 6 + int libnvdimm_test(void); 7 + int acpi_nfit_test(void); 8 + int device_dax_test(void); 9 + 10 + /* 11 + * dummy routine for nfit_test to validate it is linking to the properly 12 + * mocked module and not the standard one from the base tree. 13 + */ 14 + #define nfit_test_watermark(x) \ 15 + int x##_test(void) \ 16 + { \ 17 + pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \ 18 + return 0; \ 19 + } \ 20 + EXPORT_SYMBOL(x##_test) 21 + #endif /* _TEST_NVDIMM_WATERMARK_H_ */