Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
"nvdimm fixes for v4.8, two of them are tagged for -stable:

- Fix devm_memremap_pages() to use track_pfn_insert(). Otherwise,
DAX pmd mappings end up with an uncached pgprot, and unusable
performance for the device-dax interface. The device-dax interface
appeared in 4.7 so this is tagged for -stable.

- Fix a couple VM_BUG_ON() checks in the show_smaps() path to
understand DAX pmd entries. This fix is tagged for -stable.

- Fix a mis-merge of the nfit machine-check handler to flip the
polarity of an if() to match the final version of the patch that
Vishal sent for 4.8-rc1. Without this the nfit machine check
handler never detects / inserts new 'badblocks' entries which
applications use to identify lost portions of files.

- For test purposes, fix the nvdimm_clear_poison() path to operate on
legacy / simulated nvdimm memory ranges. Without this fix a test
can set badblocks, but never clear them on these ranges.

- Fix the range checking done by dax_dev_pmd_fault(). This is not
tagged for -stable since this problem is mitigated by specifying
aligned resources at device-dax setup time.

These patches have appeared in a next release over the past week. The
recent rebase you can see in the timestamps was to drop an invalid fix
as identified by the updated device-dax unit tests [1]. The -mm
touches have an ack from Andrew"

[1]: "[ndctl PATCH 0/3] device-dax test for recent kernel bugs"
https://lists.01.org/pipermail/linux-nvdimm/2016-September/006855.html

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
libnvdimm: allow legacy (e820) pmem region to clear bad blocks
nfit, mce: Fix SPA matching logic in MCE handler
mm: fix cache mode of dax pmd mappings
mm: fix show_smap() for zone_device-pmd ranges
dax: fix mapping size check

Changed files
+30 -12
arch
x86
mm
drivers
acpi
nfit
dax
nvdimm
fs
proc
kernel
mm
+10 -7
arch/x86/mm/pat.c
··· 927 927 } 928 928 929 929 /* 930 - * prot is passed in as a parameter for the new mapping. If the vma has a 931 - * linear pfn mapping for the entire range reserve the entire vma range with 932 - * single reserve_pfn_range call. 930 + * prot is passed in as a parameter for the new mapping. If the vma has 931 + * a linear pfn mapping for the entire range, or no vma is provided, 932 + * reserve the entire pfn + size range with single reserve_pfn_range 933 + * call. 933 934 */ 934 935 int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 935 936 unsigned long pfn, unsigned long addr, unsigned long size) ··· 939 938 enum page_cache_mode pcm; 940 939 941 940 /* reserve the whole chunk starting from paddr */ 942 - if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { 941 + if (!vma || (addr == vma->vm_start 942 + && size == (vma->vm_end - vma->vm_start))) { 943 943 int ret; 944 944 945 945 ret = reserve_pfn_range(paddr, size, prot, 0); 946 - if (!ret) 946 + if (ret == 0 && vma) 947 947 vma->vm_flags |= VM_PAT; 948 948 return ret; 949 949 } ··· 999 997 resource_size_t paddr; 1000 998 unsigned long prot; 1001 999 1002 - if (!(vma->vm_flags & VM_PAT)) 1000 + if (vma && !(vma->vm_flags & VM_PAT)) 1003 1001 return; 1004 1002 1005 1003 /* free the chunk starting from pfn or the whole chunk */ ··· 1013 1011 size = vma->vm_end - vma->vm_start; 1014 1012 } 1015 1013 free_pfn_range(paddr, size); 1016 - vma->vm_flags &= ~VM_PAT; 1014 + if (vma) 1015 + vma->vm_flags &= ~VM_PAT; 1017 1016 } 1018 1017 1019 1018 /*
+1 -1
drivers/acpi/nfit/mce.c
··· 42 42 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 43 43 struct acpi_nfit_system_address *spa = nfit_spa->spa; 44 44 45 - if (nfit_spa_type(spa) == NFIT_SPA_PM) 45 + if (nfit_spa_type(spa) != NFIT_SPA_PM) 46 46 continue; 47 47 /* find the spa that covers the mce addr */ 48 48 if (spa->address > mce->addr)
+1 -1
drivers/dax/dax.c
··· 459 459 } 460 460 461 461 pgoff = linear_page_index(vma, pmd_addr); 462 - phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE); 462 + phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); 463 463 if (phys == -1) { 464 464 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, 465 465 pgoff);
+5 -1
drivers/nvdimm/bus.c
··· 185 185 return -ENXIO; 186 186 187 187 nd_desc = nvdimm_bus->nd_desc; 188 + /* 189 + * if ndctl does not exist, it's PMEM_LEGACY and 190 + * we want to just pretend everything is handled. 191 + */ 188 192 if (!nd_desc->ndctl) 189 - return -ENXIO; 193 + return len; 190 194 191 195 memset(&ars_cap, 0, sizeof(ars_cap)); 192 196 ars_cap.address = phys;
+2
fs/proc/task_mmu.c
··· 581 581 mss->anonymous_thp += HPAGE_PMD_SIZE; 582 582 else if (PageSwapBacked(page)) 583 583 mss->shmem_thp += HPAGE_PMD_SIZE; 584 + else if (is_zone_device_page(page)) 585 + /* pass */; 584 586 else 585 587 VM_BUG_ON_PAGE(1, page); 586 588 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
+9
kernel/memremap.c
··· 247 247 align_start = res->start & ~(SECTION_SIZE - 1); 248 248 align_size = ALIGN(resource_size(res), SECTION_SIZE); 249 249 arch_remove_memory(align_start, align_size); 250 + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 250 251 pgmap_radix_release(res); 251 252 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 252 253 "%s: failed to free all reserved pages\n", __func__); ··· 283 282 struct percpu_ref *ref, struct vmem_altmap *altmap) 284 283 { 285 284 resource_size_t key, align_start, align_size, align_end; 285 + pgprot_t pgprot = PAGE_KERNEL; 286 286 struct dev_pagemap *pgmap; 287 287 struct page_map *page_map; 288 288 int error, nid, is_ram; ··· 353 351 if (nid < 0) 354 352 nid = numa_mem_id(); 355 353 354 + error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0, 355 + align_size); 356 + if (error) 357 + goto err_pfn_remap; 358 + 356 359 error = arch_add_memory(nid, align_start, align_size, true); 357 360 if (error) 358 361 goto err_add_memory; ··· 378 371 return __va(res->start); 379 372 380 373 err_add_memory: 374 + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 375 + err_pfn_remap: 381 376 err_radix: 382 377 pgmap_radix_release(res); 383 378 devres_free(page_map);
+2 -2
mm/huge_memory.c
··· 1078 1078 goto out; 1079 1079 1080 1080 page = pmd_page(*pmd); 1081 - VM_BUG_ON_PAGE(!PageHead(page), page); 1081 + VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); 1082 1082 if (flags & FOLL_TOUCH) 1083 1083 touch_pmd(vma, addr, pmd); 1084 1084 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { ··· 1116 1116 } 1117 1117 skip_mlock: 1118 1118 page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; 1119 - VM_BUG_ON_PAGE(!PageCompound(page), page); 1119 + VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); 1120 1120 if (flags & FOLL_GET) 1121 1121 get_page(page); 1122 1122