Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
"libnvdimm updates for the latest ACPI and UEFI specifications. This
pull request also includes new 'struct dax_operations' enabling to
undo the abuse of copy_user_nocache() for copy operations to pmem.

The dax work originally missed 4.12 to address concerns raised by Al.

Summary:

- Introduce the _flushcache() family of memory copy helpers and use
them for persistent memory write operations on x86. The
_flushcache() semantic indicates that the cache is either bypassed
for the copy operation (movnt) or any lines dirtied by the copy
operation are written back (clwb, clflushopt, or clflush).

- Extend dax_operations with ->copy_from_iter() and ->flush()
operations. These operations and other infrastructure updates allow
all persistent memory specific dax functionality to be pushed into
libnvdimm and the pmem driver directly. It also allows dax-specific
sysfs attributes to be linked to a host device, for example:
/sys/block/pmem0/dax/write_cache

- Add support for the new NVDIMM platform/firmware mechanisms
introduced in ACPI 6.2 and UEFI 2.7. This support includes the v1.2
namespace label format, extensions to the address-range-scrub
command set, new error injection commands, and a new BTT
(block-translation-table) layout. These updates support inter-OS
and pre-OS compatibility.

- Fix a longstanding memory corruption bug in nfit_test.

- Make the pmem and nvdimm-region 'badblocks' sysfs files poll(2)
capable.

- Miscellaneous fixes and small updates across libnvdimm and the nfit
driver.

Acknowledgements that came after the branch was pushed: commit
6aa734a2f38e ("libnvdimm, region, pmem: fix 'badblocks'
sysfs_get_dirent() reference lifetime") was reviewed by Toshi Kani
<toshi.kani@hpe.com>"

* tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (42 commits)
libnvdimm, namespace: record 'lbasize' for pmem namespaces
acpi/nfit: Issue Start ARS to retrieve existing records
libnvdimm: New ACPI 6.2 DSM functions
acpi, nfit: Show bus_dsm_mask in sysfs
libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru.
acpi, nfit: Enable DSM pass thru for root functions.
libnvdimm: passthru functions clear to send
libnvdimm, btt: convert some info messages to warn/err
libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime
libnvdimm: fix the clear-error check in nsio_rw_bytes
libnvdimm, btt: fix btt_rw_page not returning errors
acpi, nfit: quiet invalid block-aperture-region warnings
libnvdimm, btt: BTT updates for UEFI 2.7 format
acpi, nfit: constify *_attribute_group
libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region
libnvdimm, pmem, dax: export a cache control attribute
dax: convert to bitmask for flags
dax: remove default copy_from_iter fallback
libnvdimm, nfit: enable support for volatile ranges
libnvdimm, pmem: fix persistence warning
...

+1505 -461
+1 -3
MAINTAINERS
··· 7680 7680 L: linux-nvdimm@lists.01.org 7681 7681 Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ 7682 7682 S: Supported 7683 - F: drivers/nvdimm/pmem.c 7684 - F: include/linux/pmem.h 7685 - F: arch/*/include/asm/pmem.h 7683 + F: drivers/nvdimm/pmem* 7686 7684 7687 7685 LIGHTNVM PLATFORM SUPPORT 7688 7686 M: Matias Bjorling <mb@lightnvm.io>
+8
arch/powerpc/sysdev/axonram.c
··· 45 45 #include <linux/of_device.h> 46 46 #include <linux/of_platform.h> 47 47 #include <linux/pfn_t.h> 48 + #include <linux/uio.h> 48 49 49 50 #include <asm/page.h> 50 51 #include <asm/prom.h> ··· 164 163 return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn); 165 164 } 166 165 166 + static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 167 + void *addr, size_t bytes, struct iov_iter *i) 168 + { 169 + return copy_from_iter(addr, bytes, i); 170 + } 171 + 167 172 static const struct dax_operations axon_ram_dax_ops = { 168 173 .direct_access = axon_ram_dax_direct_access, 174 + .copy_from_iter = axon_ram_copy_from_iter, 169 175 }; 170 176 171 177 /**
+1
arch/x86/Kconfig
··· 54 54 select ARCH_HAS_KCOV if X86_64 55 55 select ARCH_HAS_MMIO_FLUSH 56 56 select ARCH_HAS_PMEM_API if X86_64 57 + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 57 58 select ARCH_HAS_SET_MEMORY 58 59 select ARCH_HAS_SG_CHAIN 59 60 select ARCH_HAS_STRICT_KERNEL_RWX
-136
arch/x86/include/asm/pmem.h
··· 1 - /* 2 - * Copyright(c) 2015 Intel Corporation. All rights reserved. 3 - * 4 - * This program is free software; you can redistribute it and/or modify 5 - * it under the terms of version 2 of the GNU General Public License as 6 - * published by the Free Software Foundation. 7 - * 8 - * This program is distributed in the hope that it will be useful, but 9 - * WITHOUT ANY WARRANTY; without even the implied warranty of 10 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 - * General Public License for more details. 12 - */ 13 - #ifndef __ASM_X86_PMEM_H__ 14 - #define __ASM_X86_PMEM_H__ 15 - 16 - #include <linux/uaccess.h> 17 - #include <asm/cacheflush.h> 18 - #include <asm/cpufeature.h> 19 - #include <asm/special_insns.h> 20 - 21 - #ifdef CONFIG_ARCH_HAS_PMEM_API 22 - /** 23 - * arch_memcpy_to_pmem - copy data to persistent memory 24 - * @dst: destination buffer for the copy 25 - * @src: source buffer for the copy 26 - * @n: length of the copy in bytes 27 - * 28 - * Copy data to persistent memory media via non-temporal stores so that 29 - * a subsequent pmem driver flush operation will drain posted write queues. 30 - */ 31 - static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) 32 - { 33 - int rem; 34 - 35 - /* 36 - * We are copying between two kernel buffers, if 37 - * __copy_from_user_inatomic_nocache() returns an error (page 38 - * fault) we would have already reported a general protection fault 39 - * before the WARN+BUG. 40 - */ 41 - rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); 42 - if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", 43 - __func__, dst, src, rem)) 44 - BUG(); 45 - } 46 - 47 - /** 48 - * arch_wb_cache_pmem - write back a cache range with CLWB 49 - * @vaddr: virtual start address 50 - * @size: number of bytes to write back 51 - * 52 - * Write back a cache range using the CLWB (cache line write back) 53 - * instruction. Note that @size is internally rounded up to be cache 54 - * line size aligned. 55 - */ 56 - static inline void arch_wb_cache_pmem(void *addr, size_t size) 57 - { 58 - u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; 59 - unsigned long clflush_mask = x86_clflush_size - 1; 60 - void *vend = addr + size; 61 - void *p; 62 - 63 - for (p = (void *)((unsigned long)addr & ~clflush_mask); 64 - p < vend; p += x86_clflush_size) 65 - clwb(p); 66 - } 67 - 68 - /** 69 - * arch_copy_from_iter_pmem - copy data from an iterator to PMEM 70 - * @addr: PMEM destination address 71 - * @bytes: number of bytes to copy 72 - * @i: iterator with source data 73 - * 74 - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 75 - */ 76 - static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, 77 - struct iov_iter *i) 78 - { 79 - size_t len; 80 - 81 - /* TODO: skip the write-back by always using non-temporal stores */ 82 - len = copy_from_iter_nocache(addr, bytes, i); 83 - 84 - /* 85 - * In the iovec case on x86_64 copy_from_iter_nocache() uses 86 - * non-temporal stores for the bulk of the transfer, but we need 87 - * to manually flush if the transfer is unaligned. A cached 88 - * memory copy is used when destination or size is not naturally 89 - * aligned. That is: 90 - * - Require 8-byte alignment when size is 8 bytes or larger. 91 - * - Require 4-byte alignment when size is 4 bytes. 92 - * 93 - * In the non-iovec case the entire destination needs to be 94 - * flushed. 95 - */ 96 - if (iter_is_iovec(i)) { 97 - unsigned long flushed, dest = (unsigned long) addr; 98 - 99 - if (bytes < 8) { 100 - if (!IS_ALIGNED(dest, 4) || (bytes != 4)) 101 - arch_wb_cache_pmem(addr, bytes); 102 - } else { 103 - if (!IS_ALIGNED(dest, 8)) { 104 - dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); 105 - arch_wb_cache_pmem(addr, 1); 106 - } 107 - 108 - flushed = dest - (unsigned long) addr; 109 - if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8)) 110 - arch_wb_cache_pmem(addr + bytes - 1, 1); 111 - } 112 - } else 113 - arch_wb_cache_pmem(addr, bytes); 114 - 115 - return len; 116 - } 117 - 118 - /** 119 - * arch_clear_pmem - zero a PMEM memory range 120 - * @addr: virtual start address 121 - * @size: number of bytes to zero 122 - * 123 - * Write zeros into the memory range starting at 'addr' for 'size' bytes. 124 - */ 125 - static inline void arch_clear_pmem(void *addr, size_t size) 126 - { 127 - memset(addr, 0, size); 128 - arch_wb_cache_pmem(addr, size); 129 - } 130 - 131 - static inline void arch_invalidate_pmem(void *addr, size_t size) 132 - { 133 - clflush_cache_range(addr, size); 134 - } 135 - #endif /* CONFIG_ARCH_HAS_PMEM_API */ 136 - #endif /* __ASM_X86_PMEM_H__ */
+5
arch/x86/include/asm/string_64.h
··· 109 109 return 0; 110 110 } 111 111 112 + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 113 + #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 114 + void memcpy_flushcache(void *dst, const void *src, size_t cnt); 115 + #endif 116 + 112 117 #endif /* __KERNEL__ */ 113 118 114 119 #endif /* _ASM_X86_STRING_64_H */
+11
arch/x86/include/asm/uaccess_64.h
··· 171 171 extern long __copy_user_nocache(void *dst, const void __user *src, 172 172 unsigned size, int zerorest); 173 173 174 + extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); 175 + extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, 176 + size_t len); 177 + 174 178 static inline int 175 179 __copy_from_user_inatomic_nocache(void *dst, const void __user *src, 176 180 unsigned size) 177 181 { 178 182 kasan_check_write(dst, size); 179 183 return __copy_user_nocache(dst, src, size, 0); 184 + } 185 + 186 + static inline int 187 + __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) 188 + { 189 + kasan_check_write(dst, size); 190 + return __copy_user_flushcache(dst, src, size); 180 191 } 181 192 182 193 unsigned long
+134
arch/x86/lib/usercopy_64.c
··· 7 7 */ 8 8 #include <linux/export.h> 9 9 #include <linux/uaccess.h> 10 + #include <linux/highmem.h> 10 11 11 12 /* 12 13 * Zero Userspace ··· 74 73 clac(); 75 74 return len; 76 75 } 76 + 77 + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 78 + /** 79 + * clean_cache_range - write back a cache range with CLWB 80 + * @vaddr: virtual start address 81 + * @size: number of bytes to write back 82 + * 83 + * Write back a cache range using the CLWB (cache line write back) 84 + * instruction. Note that @size is internally rounded up to be cache 85 + * line size aligned. 86 + */ 87 + static void clean_cache_range(void *addr, size_t size) 88 + { 89 + u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; 90 + unsigned long clflush_mask = x86_clflush_size - 1; 91 + void *vend = addr + size; 92 + void *p; 93 + 94 + for (p = (void *)((unsigned long)addr & ~clflush_mask); 95 + p < vend; p += x86_clflush_size) 96 + clwb(p); 97 + } 98 + 99 + void arch_wb_cache_pmem(void *addr, size_t size) 100 + { 101 + clean_cache_range(addr, size); 102 + } 103 + EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); 104 + 105 + long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) 106 + { 107 + unsigned long flushed, dest = (unsigned long) dst; 108 + long rc = __copy_user_nocache(dst, src, size, 0); 109 + 110 + /* 111 + * __copy_user_nocache() uses non-temporal stores for the bulk 112 + * of the transfer, but we need to manually flush if the 113 + * transfer is unaligned. A cached memory copy is used when 114 + * destination or size is not naturally aligned. That is: 115 + * - Require 8-byte alignment when size is 8 bytes or larger. 116 + * - Require 4-byte alignment when size is 4 bytes. 117 + */ 118 + if (size < 8) { 119 + if (!IS_ALIGNED(dest, 4) || size != 4) 120 + clean_cache_range(dst, 1); 121 + } else { 122 + if (!IS_ALIGNED(dest, 8)) { 123 + dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); 124 + clean_cache_range(dst, 1); 125 + } 126 + 127 + flushed = dest - (unsigned long) dst; 128 + if (size > flushed && !IS_ALIGNED(size - flushed, 8)) 129 + clean_cache_range(dst + size - 1, 1); 130 + } 131 + 132 + return rc; 133 + } 134 + 135 + void memcpy_flushcache(void *_dst, const void *_src, size_t size) 136 + { 137 + unsigned long dest = (unsigned long) _dst; 138 + unsigned long source = (unsigned long) _src; 139 + 140 + /* cache copy and flush to align dest */ 141 + if (!IS_ALIGNED(dest, 8)) { 142 + unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest); 143 + 144 + memcpy((void *) dest, (void *) source, len); 145 + clean_cache_range((void *) dest, len); 146 + dest += len; 147 + source += len; 148 + size -= len; 149 + if (!size) 150 + return; 151 + } 152 + 153 + /* 4x8 movnti loop */ 154 + while (size >= 32) { 155 + asm("movq (%0), %%r8\n" 156 + "movq 8(%0), %%r9\n" 157 + "movq 16(%0), %%r10\n" 158 + "movq 24(%0), %%r11\n" 159 + "movnti %%r8, (%1)\n" 160 + "movnti %%r9, 8(%1)\n" 161 + "movnti %%r10, 16(%1)\n" 162 + "movnti %%r11, 24(%1)\n" 163 + :: "r" (source), "r" (dest) 164 + : "memory", "r8", "r9", "r10", "r11"); 165 + dest += 32; 166 + source += 32; 167 + size -= 32; 168 + } 169 + 170 + /* 1x8 movnti loop */ 171 + while (size >= 8) { 172 + asm("movq (%0), %%r8\n" 173 + "movnti %%r8, (%1)\n" 174 + :: "r" (source), "r" (dest) 175 + : "memory", "r8"); 176 + dest += 8; 177 + source += 8; 178 + size -= 8; 179 + } 180 + 181 + /* 1x4 movnti loop */ 182 + while (size >= 4) { 183 + asm("movl (%0), %%r8d\n" 184 + "movnti %%r8d, (%1)\n" 185 + :: "r" (source), "r" (dest) 186 + : "memory", "r8"); 187 + dest += 4; 188 + source += 4; 189 + size -= 4; 190 + } 191 + 192 + /* cache copy for remaining bytes */ 193 + if (size) { 194 + memcpy((void *) dest, (void *) source, size); 195 + clean_cache_range((void *) dest, size); 196 + } 197 + } 198 + EXPORT_SYMBOL_GPL(memcpy_flushcache); 199 + 200 + void memcpy_page_flushcache(char *to, struct page *page, size_t offset, 201 + size_t len) 202 + { 203 + char *from = kmap_atomic(page); 204 + 205 + memcpy_flushcache(to, from + offset, len); 206 + kunmap_atomic(from); 207 + } 208 + #endif
+6
arch/x86/mm/pageattr.c
··· 150 150 } 151 151 EXPORT_SYMBOL_GPL(clflush_cache_range); 152 152 153 + void arch_invalidate_pmem(void *addr, size_t size) 154 + { 155 + clflush_cache_range(addr, size); 156 + } 157 + EXPORT_SYMBOL_GPL(arch_invalidate_pmem); 158 + 153 159 static void __cpa_flush_all(void *arg) 154 160 { 155 161 unsigned long cache = (unsigned long)arg;
+142 -25
drivers/acpi/nfit/core.c
··· 20 20 #include <linux/list.h> 21 21 #include <linux/acpi.h> 22 22 #include <linux/sort.h> 23 - #include <linux/pmem.h> 24 23 #include <linux/io.h> 25 24 #include <linux/nd.h> 26 25 #include <asm/cacheflush.h> ··· 252 253 cmd_name = nvdimm_bus_cmd_name(cmd); 253 254 cmd_mask = nd_desc->cmd_mask; 254 255 dsm_mask = cmd_mask; 256 + if (cmd == ND_CMD_CALL) 257 + dsm_mask = nd_desc->bus_dsm_mask; 255 258 desc = nd_cmd_bus_desc(cmd); 256 259 guid = to_nfit_uuid(NFIT_DEV_BUS); 257 260 handle = adev->handle; ··· 928 927 return 0; 929 928 } 930 929 930 + static ssize_t bus_dsm_mask_show(struct device *dev, 931 + struct device_attribute *attr, char *buf) 932 + { 933 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 934 + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 935 + 936 + return sprintf(buf, "%#lx\n", nd_desc->bus_dsm_mask); 937 + } 938 + static struct device_attribute dev_attr_bus_dsm_mask = 939 + __ATTR(dsm_mask, 0444, bus_dsm_mask_show, NULL); 940 + 931 941 static ssize_t revision_show(struct device *dev, 932 942 struct device_attribute *attr, char *buf) 933 943 { ··· 1043 1031 if (nd_desc) { 1044 1032 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 1045 1033 1046 - rc = acpi_nfit_ars_rescan(acpi_desc); 1034 + rc = acpi_nfit_ars_rescan(acpi_desc, 0); 1047 1035 } 1048 1036 device_unlock(dev); 1049 1037 if (rc) ··· 1075 1063 &dev_attr_revision.attr, 1076 1064 &dev_attr_scrub.attr, 1077 1065 &dev_attr_hw_error_scrub.attr, 1066 + &dev_attr_bus_dsm_mask.attr, 1078 1067 NULL, 1079 1068 }; 1080 1069 1081 - static struct attribute_group acpi_nfit_attribute_group = { 1070 + static const struct attribute_group acpi_nfit_attribute_group = { 1082 1071 .name = "nfit", 1083 1072 .attrs = acpi_nfit_attributes, 1084 1073 .is_visible = nfit_visible, ··· 1359 1346 return a->mode; 1360 1347 } 1361 1348 1362 - static struct attribute_group acpi_nfit_dimm_attribute_group = { 1349 + static const struct attribute_group acpi_nfit_dimm_attribute_group = { 1363 1350 .name = "nfit", 1364 1351 .attrs = acpi_nfit_dimm_attributes, 1365 1352 .is_visible = acpi_nfit_dimm_attr_visible, ··· 1621 1608 acpi_desc); 1622 1609 } 1623 1610 1611 + /* 1612 + * These constants are private because there are no kernel consumers of 1613 + * these commands. 1614 + */ 1615 + enum nfit_aux_cmds { 1616 + NFIT_CMD_TRANSLATE_SPA = 5, 1617 + NFIT_CMD_ARS_INJECT_SET = 7, 1618 + NFIT_CMD_ARS_INJECT_CLEAR = 8, 1619 + NFIT_CMD_ARS_INJECT_GET = 9, 1620 + }; 1621 + 1624 1622 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) 1625 1623 { 1626 1624 struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; 1627 1625 const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS); 1628 1626 struct acpi_device *adev; 1627 + unsigned long dsm_mask; 1629 1628 int i; 1630 1629 1631 1630 nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; ··· 1648 1623 for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) 1649 1624 if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) 1650 1625 set_bit(i, &nd_desc->cmd_mask); 1626 + set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); 1627 + 1628 + dsm_mask = 1629 + (1 << ND_CMD_ARS_CAP) | 1630 + (1 << ND_CMD_ARS_START) | 1631 + (1 << ND_CMD_ARS_STATUS) | 1632 + (1 << ND_CMD_CLEAR_ERROR) | 1633 + (1 << NFIT_CMD_TRANSLATE_SPA) | 1634 + (1 << NFIT_CMD_ARS_INJECT_SET) | 1635 + (1 << NFIT_CMD_ARS_INJECT_CLEAR) | 1636 + (1 << NFIT_CMD_ARS_INJECT_GET); 1637 + for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) 1638 + if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) 1639 + set_bit(i, &nd_desc->bus_dsm_mask); 1651 1640 } 1652 1641 1653 1642 static ssize_t range_index_show(struct device *dev, ··· 1679 1640 NULL, 1680 1641 }; 1681 1642 1682 - static struct attribute_group acpi_nfit_region_attribute_group = { 1643 + static const struct attribute_group acpi_nfit_region_attribute_group = { 1683 1644 .name = "nfit", 1684 1645 .attrs = acpi_nfit_region_attributes, 1685 1646 }; ··· 1702 1663 } mapping[0]; 1703 1664 }; 1704 1665 1666 + struct nfit_set_info2 { 1667 + struct nfit_set_info_map2 { 1668 + u64 region_offset; 1669 + u32 serial_number; 1670 + u16 vendor_id; 1671 + u16 manufacturing_date; 1672 + u8 manufacturing_location; 1673 + u8 reserved[31]; 1674 + } mapping[0]; 1675 + }; 1676 + 1705 1677 static size_t sizeof_nfit_set_info(int num_mappings) 1706 1678 { 1707 1679 return sizeof(struct nfit_set_info) 1708 1680 + num_mappings * sizeof(struct nfit_set_info_map); 1681 + } 1682 + 1683 + static size_t sizeof_nfit_set_info2(int num_mappings) 1684 + { 1685 + return sizeof(struct nfit_set_info2) 1686 + + num_mappings * sizeof(struct nfit_set_info_map2); 1709 1687 } 1710 1688 1711 1689 static int cmp_map_compat(const void *m0, const void *m1) ··· 1738 1682 { 1739 1683 const struct nfit_set_info_map *map0 = m0; 1740 1684 const struct nfit_set_info_map *map1 = m1; 1685 + 1686 + if (map0->region_offset < map1->region_offset) 1687 + return -1; 1688 + else if (map0->region_offset > map1->region_offset) 1689 + return 1; 1690 + return 0; 1691 + } 1692 + 1693 + static int cmp_map2(const void *m0, const void *m1) 1694 + { 1695 + const struct nfit_set_info_map2 *map0 = m0; 1696 + const struct nfit_set_info_map2 *map1 = m1; 1741 1697 1742 1698 if (map0->region_offset < map1->region_offset) 1743 1699 return -1; ··· 1775 1707 struct nd_region_desc *ndr_desc, 1776 1708 struct acpi_nfit_system_address *spa) 1777 1709 { 1778 - int i, spa_type = nfit_spa_type(spa); 1779 1710 struct device *dev = acpi_desc->dev; 1780 1711 struct nd_interleave_set *nd_set; 1781 1712 u16 nr = ndr_desc->num_mappings; 1713 + struct nfit_set_info2 *info2; 1782 1714 struct nfit_set_info *info; 1783 - 1784 - if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE) 1785 - /* pass */; 1786 - else 1787 - return 0; 1715 + int i; 1788 1716 1789 1717 nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); 1790 1718 if (!nd_set) 1791 1719 return -ENOMEM; 1720 + ndr_desc->nd_set = nd_set; 1721 + guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid); 1792 1722 1793 1723 info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); 1794 1724 if (!info) 1795 1725 return -ENOMEM; 1726 + 1727 + info2 = devm_kzalloc(dev, sizeof_nfit_set_info2(nr), GFP_KERNEL); 1728 + if (!info2) 1729 + return -ENOMEM; 1730 + 1796 1731 for (i = 0; i < nr; i++) { 1797 1732 struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; 1798 1733 struct nfit_set_info_map *map = &info->mapping[i]; 1734 + struct nfit_set_info_map2 *map2 = &info2->mapping[i]; 1799 1735 struct nvdimm *nvdimm = mapping->nvdimm; 1800 1736 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); 1801 1737 struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, ··· 1812 1740 1813 1741 map->region_offset = memdev->region_offset; 1814 1742 map->serial_number = nfit_mem->dcr->serial_number; 1743 + 1744 + map2->region_offset = memdev->region_offset; 1745 + map2->serial_number = nfit_mem->dcr->serial_number; 1746 + map2->vendor_id = nfit_mem->dcr->vendor_id; 1747 + map2->manufacturing_date = nfit_mem->dcr->manufacturing_date; 1748 + map2->manufacturing_location = nfit_mem->dcr->manufacturing_location; 1815 1749 } 1816 1750 1751 + /* v1.1 namespaces */ 1817 1752 sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), 1818 1753 cmp_map, NULL); 1819 - nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); 1754 + nd_set->cookie1 = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); 1820 1755 1821 - /* support namespaces created with the wrong sort order */ 1756 + /* v1.2 namespaces */ 1757 + sort(&info2->mapping[0], nr, sizeof(struct nfit_set_info_map2), 1758 + cmp_map2, NULL); 1759 + nd_set->cookie2 = nd_fletcher64(info2, sizeof_nfit_set_info2(nr), 0); 1760 + 1761 + /* support v1.1 namespaces created with the wrong sort order */ 1822 1762 sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), 1823 1763 cmp_map_compat, NULL); 1824 1764 nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); 1825 1765 1826 1766 ndr_desc->nd_set = nd_set; 1827 1767 devm_kfree(dev, info); 1768 + devm_kfree(dev, info2); 1828 1769 1829 1770 return 0; 1830 1771 } ··· 1927 1842 } 1928 1843 1929 1844 if (rw) 1930 - memcpy_to_pmem(mmio->addr.aperture + offset, 1931 - iobuf + copied, c); 1845 + memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c); 1932 1846 else { 1933 1847 if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) 1934 1848 mmio_flush_range((void __force *) ··· 2041 1957 nfit_blk->bdw_offset = nfit_mem->bdw->offset; 2042 1958 mmio = &nfit_blk->mmio[BDW]; 2043 1959 mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, 2044 - nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM); 1960 + nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr)); 2045 1961 if (!mmio->addr.base) { 2046 1962 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, 2047 1963 nvdimm_name(nvdimm)); ··· 2135 2051 memset(&ars_start, 0, sizeof(ars_start)); 2136 2052 ars_start.address = spa->address; 2137 2053 ars_start.length = spa->length; 2054 + ars_start.flags = acpi_desc->ars_start_flags; 2138 2055 if (nfit_spa_type(spa) == NFIT_SPA_PM) 2139 2056 ars_start.type = ND_ARS_PERSISTENT; 2140 2057 else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) ··· 2162 2077 ars_start.address = ars_status->restart_address; 2163 2078 ars_start.length = ars_status->restart_length; 2164 2079 ars_start.type = ars_status->type; 2080 + ars_start.flags = acpi_desc->ars_start_flags; 2165 2081 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, 2166 2082 sizeof(ars_start), &cmd_rc); 2167 2083 if (rc < 0) ··· 2265 2179 struct acpi_nfit_system_address *spa = nfit_spa->spa; 2266 2180 struct nd_blk_region_desc *ndbr_desc; 2267 2181 struct nfit_mem *nfit_mem; 2268 - int blk_valid = 0; 2182 + int blk_valid = 0, rc; 2269 2183 2270 2184 if (!nvdimm) { 2271 2185 dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", ··· 2297 2211 ndbr_desc = to_blk_region_desc(ndr_desc); 2298 2212 ndbr_desc->enable = acpi_nfit_blk_region_enable; 2299 2213 ndbr_desc->do_io = acpi_desc->blk_do_io; 2214 + rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); 2215 + if (rc) 2216 + return rc; 2300 2217 nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, 2301 2218 ndr_desc); 2302 2219 if (!nfit_spa->nd_region) ··· 2316 2227 nfit_spa_type(spa) == NFIT_SPA_VCD || 2317 2228 nfit_spa_type(spa) == NFIT_SPA_PDISK || 2318 2229 nfit_spa_type(spa) == NFIT_SPA_PCD); 2230 + } 2231 + 2232 + static bool nfit_spa_is_volatile(struct acpi_nfit_system_address *spa) 2233 + { 2234 + return (nfit_spa_type(spa) == NFIT_SPA_VDISK || 2235 + nfit_spa_type(spa) == NFIT_SPA_VCD || 2236 + nfit_spa_type(spa) == NFIT_SPA_VOLATILE); 2319 2237 } 2320 2238 2321 2239 static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ··· 2399 2303 ndr_desc); 2400 2304 if (!nfit_spa->nd_region) 2401 2305 rc = -ENOMEM; 2402 - } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { 2306 + } else if (nfit_spa_is_volatile(spa)) { 2403 2307 nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, 2404 2308 ndr_desc); 2405 2309 if (!nfit_spa->nd_region) ··· 2691 2595 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2692 2596 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2693 2597 acpi_desc->scrub_count++; 2598 + acpi_desc->ars_start_flags = 0; 2694 2599 if (acpi_desc->scrub_count_state) 2695 2600 sysfs_notify_dirent(acpi_desc->scrub_count_state); 2696 2601 mutex_unlock(&acpi_desc->init_mutex); ··· 2710 2613 return rc; 2711 2614 } 2712 2615 2616 + acpi_desc->ars_start_flags = 0; 2713 2617 if (!acpi_desc->cancel) 2714 2618 queue_work(nfit_wq, &acpi_desc->work); 2715 2619 return 0; ··· 2915 2817 return 0; 2916 2818 } 2917 2819 2918 - int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) 2820 + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags) 2919 2821 { 2920 2822 struct device *dev = acpi_desc->dev; 2921 2823 struct nfit_spa *nfit_spa; ··· 2937 2839 2938 2840 nfit_spa->ars_required = 1; 2939 2841 } 2842 + acpi_desc->ars_start_flags = flags; 2940 2843 queue_work(nfit_wq, &acpi_desc->work); 2941 2844 dev_dbg(dev, "%s: ars_scan triggered\n", __func__); 2942 2845 mutex_unlock(&acpi_desc->init_mutex); ··· 3066 2967 return 0; 3067 2968 } 3068 2969 3069 - void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) 2970 + static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) 3070 2971 { 3071 2972 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); 3072 2973 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; 3073 2974 union acpi_object *obj; 3074 2975 acpi_status status; 3075 2976 int ret; 3076 - 3077 - dev_dbg(dev, "%s: event: %d\n", __func__, event); 3078 - 3079 - if (event != NFIT_NOTIFY_UPDATE) 3080 - return; 3081 2977 3082 2978 if (!dev->driver) { 3083 2979 /* dev->driver may be null if we're being removed */ ··· 3109 3015 } else 3110 3016 dev_err(dev, "Invalid _FIT\n"); 3111 3017 kfree(buf.pointer); 3018 + } 3019 + 3020 + static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) 3021 + { 3022 + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); 3023 + u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? 3024 + 0 : ND_ARS_RETURN_PREV_DATA; 3025 + 3026 + acpi_nfit_ars_rescan(acpi_desc, flags); 3027 + } 3028 + 3029 + void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) 3030 + { 3031 + dev_dbg(dev, "%s: event: 0x%x\n", __func__, event); 3032 + 3033 + switch (event) { 3034 + case NFIT_NOTIFY_UPDATE: 3035 + return acpi_nfit_update_notify(dev, handle); 3036 + case NFIT_NOTIFY_UC_MEMORY_ERROR: 3037 + return acpi_nfit_uc_error_notify(dev, handle); 3038 + default: 3039 + return; 3040 + } 3112 3041 } 3113 3042 EXPORT_SYMBOL_GPL(__acpi_nfit_notify); 3114 3043
+1 -1
drivers/acpi/nfit/mce.c
··· 79 79 * already in progress, just let that be the last 80 80 * authoritative one 81 81 */ 82 - acpi_nfit_ars_rescan(acpi_desc); 82 + acpi_nfit_ars_rescan(acpi_desc, 0); 83 83 } 84 84 break; 85 85 }
+3 -1
drivers/acpi/nfit/nfit.h
··· 79 79 80 80 enum nfit_root_notifiers { 81 81 NFIT_NOTIFY_UPDATE = 0x80, 82 + NFIT_NOTIFY_UC_MEMORY_ERROR = 0x81, 82 83 }; 83 84 84 85 enum nfit_dimm_notifiers { ··· 155 154 struct list_head idts; 156 155 struct nvdimm_bus *nvdimm_bus; 157 156 struct device *dev; 157 + u8 ars_start_flags; 158 158 struct nd_cmd_ars_status *ars_status; 159 159 size_t ars_status_size; 160 160 struct work_struct work; ··· 208 206 209 207 extern struct list_head acpi_descs; 210 208 extern struct mutex acpi_desc_lock; 211 - int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc); 209 + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags); 212 210 213 211 #ifdef CONFIG_X86_MCE 214 212 void nfit_mce_register(void);
+8
drivers/block/brd.c
··· 22 22 #ifdef CONFIG_BLK_DEV_RAM_DAX 23 23 #include <linux/pfn_t.h> 24 24 #include <linux/dax.h> 25 + #include <linux/uio.h> 25 26 #endif 26 27 27 28 #include <linux/uaccess.h> ··· 355 354 return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); 356 355 } 357 356 357 + static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 358 + void *addr, size_t bytes, struct iov_iter *i) 359 + { 360 + return copy_from_iter(addr, bytes, i); 361 + } 362 + 358 363 static const struct dax_operations brd_dax_ops = { 359 364 .direct_access = brd_dax_direct_access, 365 + .copy_from_iter = brd_dax_copy_from_iter, 360 366 }; 361 367 #endif 362 368
+112 -6
drivers/dax/super.c
··· 18 18 #include <linux/cdev.h> 19 19 #include <linux/hash.h> 20 20 #include <linux/slab.h> 21 + #include <linux/uio.h> 21 22 #include <linux/dax.h> 22 23 #include <linux/fs.h> 23 24 ··· 116 115 EXPORT_SYMBOL_GPL(__bdev_dax_supported); 117 116 #endif 118 117 118 + enum dax_device_flags { 119 + /* !alive + rcu grace period == no new operations / mappings */ 120 + DAXDEV_ALIVE, 121 + /* gate whether dax_flush() calls the low level flush routine */ 122 + DAXDEV_WRITE_CACHE, 123 + }; 124 + 119 125 /** 120 126 * struct dax_device - anchor object for dax services 121 127 * @inode: core vfs 122 128 * @cdev: optional character interface for "device dax" 123 129 * @host: optional name for lookups where the device path is not available 124 130 * @private: dax driver private data 125 - * @alive: !alive + rcu grace period == no new operations / mappings 131 + * @flags: state and boolean properties 126 132 */ 127 133 struct dax_device { 128 134 struct hlist_node list; ··· 137 129 struct cdev cdev; 138 130 const char *host; 139 131 void *private; 140 - bool alive; 132 + unsigned long flags; 141 133 const struct dax_operations *ops; 142 134 }; 135 + 136 + static ssize_t write_cache_show(struct device *dev, 137 + struct device_attribute *attr, char *buf) 138 + { 139 + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); 140 + ssize_t rc; 141 + 142 + WARN_ON_ONCE(!dax_dev); 143 + if (!dax_dev) 144 + return -ENXIO; 145 + 146 + rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE, 147 + &dax_dev->flags)); 148 + put_dax(dax_dev); 149 + return rc; 150 + } 151 + 152 + static ssize_t write_cache_store(struct device *dev, 153 + struct device_attribute *attr, const char *buf, size_t len) 154 + { 155 + bool write_cache; 156 + int rc = strtobool(buf, &write_cache); 157 + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); 158 + 159 + WARN_ON_ONCE(!dax_dev); 160 + if (!dax_dev) 161 + return -ENXIO; 162 + 163 + if (rc) 164 + len = rc; 165 + else if (write_cache) 166 + set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); 167 + else 168 + clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); 169 + 170 + put_dax(dax_dev); 171 + return len; 172 + } 173 + static DEVICE_ATTR_RW(write_cache); 174 + 175 + static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) 176 + { 177 + struct device *dev = container_of(kobj, typeof(*dev), kobj); 178 + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); 179 + 180 + WARN_ON_ONCE(!dax_dev); 181 + if (!dax_dev) 182 + return 0; 183 + 184 + if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush) 185 + return 0; 186 + return a->mode; 187 + } 188 + 189 + static struct attribute *dax_attributes[] = { 190 + &dev_attr_write_cache.attr, 191 + NULL, 192 + }; 193 + 194 + struct attribute_group dax_attribute_group = { 195 + .name = "dax", 196 + .attrs = dax_attributes, 197 + .is_visible = dax_visible, 198 + }; 199 + EXPORT_SYMBOL_GPL(dax_attribute_group); 143 200 144 201 /** 145 202 * dax_direct_access() - translate a device pgoff to an absolute pfn ··· 245 172 } 246 173 EXPORT_SYMBOL_GPL(dax_direct_access); 247 174 175 + size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 176 + size_t bytes, struct iov_iter *i) 177 + { 178 + if (!dax_alive(dax_dev)) 179 + return 0; 180 + 181 + return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); 182 + } 183 + EXPORT_SYMBOL_GPL(dax_copy_from_iter); 184 + 185 + void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 186 + size_t size) 187 + { 188 + if (!dax_alive(dax_dev)) 189 + return; 190 + 191 + if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)) 192 + return; 193 + 194 + if (dax_dev->ops->flush) 195 + dax_dev->ops->flush(dax_dev, pgoff, addr, size); 196 + } 197 + EXPORT_SYMBOL_GPL(dax_flush); 198 + 199 + void dax_write_cache(struct dax_device *dax_dev, bool wc) 200 + { 201 + if (wc) 202 + set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); 203 + else 204 + clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); 205 + } 206 + EXPORT_SYMBOL_GPL(dax_write_cache); 207 + 248 208 bool dax_alive(struct dax_device *dax_dev) 249 209 { 250 210 lockdep_assert_held(&dax_srcu); 251 - return dax_dev->alive; 211 + return test_bit(DAXDEV_ALIVE, &dax_dev->flags); 252 212 } 253 213 EXPORT_SYMBOL_GPL(dax_alive); 254 214 ··· 301 195 if (!dax_dev) 302 196 return; 303 197 304 - dax_dev->alive = false; 198 + clear_bit(DAXDEV_ALIVE, &dax_dev->flags); 305 199 306 200 synchronize_srcu(&dax_srcu); 307 201 ··· 345 239 { 346 240 struct dax_device *dax_dev = to_dax_dev(inode); 347 241 348 - WARN_ONCE(dax_dev->alive, 242 + WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags), 349 243 "kill_dax() must be called before final iput()\n"); 350 244 call_rcu(&inode->i_rcu, dax_i_callback); 351 245 } ··· 397 291 398 292 dax_dev = to_dax_dev(inode); 399 293 if (inode->i_state & I_NEW) { 400 - dax_dev->alive = true; 294 + set_bit(DAXDEV_ALIVE, &dax_dev->flags); 401 295 inode->i_cdev = &dax_dev->cdev; 402 296 inode->i_mode = S_IFCHR; 403 297 inode->i_flags = S_DAX;
+30
drivers/md/dm-linear.c
··· 170 170 return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); 171 171 } 172 172 173 + static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, 174 + void *addr, size_t bytes, struct iov_iter *i) 175 + { 176 + struct linear_c *lc = ti->private; 177 + struct block_device *bdev = lc->dev->bdev; 178 + struct dax_device *dax_dev = lc->dev->dax_dev; 179 + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; 180 + 181 + dev_sector = linear_map_sector(ti, sector); 182 + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) 183 + return 0; 184 + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 185 + } 186 + 187 + static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, 188 + size_t size) 189 + { 190 + struct linear_c *lc = ti->private; 191 + struct block_device *bdev = lc->dev->bdev; 192 + struct dax_device *dax_dev = lc->dev->dax_dev; 193 + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; 194 + 195 + dev_sector = linear_map_sector(ti, sector); 196 + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) 197 + return; 198 + dax_flush(dax_dev, pgoff, addr, size); 199 + } 200 + 173 201 static struct target_type linear_target = { 174 202 .name = "linear", 175 203 .version = {1, 4, 0}, ··· 211 183 .prepare_ioctl = linear_prepare_ioctl, 212 184 .iterate_devices = linear_iterate_devices, 213 185 .direct_access = linear_dax_direct_access, 186 + .dax_copy_from_iter = linear_dax_copy_from_iter, 187 + .dax_flush = linear_dax_flush, 214 188 }; 215 189 216 190 int __init dm_linear_init(void)
+40
drivers/md/dm-stripe.c
··· 332 332 return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); 333 333 } 334 334 335 + static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, 336 + void *addr, size_t bytes, struct iov_iter *i) 337 + { 338 + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; 339 + struct stripe_c *sc = ti->private; 340 + struct dax_device *dax_dev; 341 + struct block_device *bdev; 342 + uint32_t stripe; 343 + 344 + stripe_map_sector(sc, sector, &stripe, &dev_sector); 345 + dev_sector += sc->stripe[stripe].physical_start; 346 + dax_dev = sc->stripe[stripe].dev->dax_dev; 347 + bdev = sc->stripe[stripe].dev->bdev; 348 + 349 + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) 350 + return 0; 351 + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 352 + } 353 + 354 + static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, 355 + size_t size) 356 + { 357 + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; 358 + struct stripe_c *sc = ti->private; 359 + struct dax_device *dax_dev; 360 + struct block_device *bdev; 361 + uint32_t stripe; 362 + 363 + stripe_map_sector(sc, sector, &stripe, &dev_sector); 364 + dev_sector += sc->stripe[stripe].physical_start; 365 + dax_dev = sc->stripe[stripe].dev->dax_dev; 366 + bdev = sc->stripe[stripe].dev->bdev; 367 + 368 + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) 369 + return; 370 + dax_flush(dax_dev, pgoff, addr, size); 371 + } 372 + 335 373 /* 336 374 * Stripe status: 337 375 * ··· 490 452 .iterate_devices = stripe_iterate_devices, 491 453 .io_hints = stripe_io_hints, 492 454 .direct_access = stripe_dax_direct_access, 455 + .dax_copy_from_iter = stripe_dax_copy_from_iter, 456 + .dax_flush = stripe_dax_flush, 493 457 }; 494 458 495 459 int __init dm_stripe_init(void)
+45
drivers/md/dm.c
··· 19 19 #include <linux/dax.h> 20 20 #include <linux/slab.h> 21 21 #include <linux/idr.h> 22 + #include <linux/uio.h> 22 23 #include <linux/hdreg.h> 23 24 #include <linux/delay.h> 24 25 #include <linux/wait.h> ··· 971 970 dm_put_live_table(md, srcu_idx); 972 971 973 972 return ret; 973 + } 974 + 975 + static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 976 + void *addr, size_t bytes, struct iov_iter *i) 977 + { 978 + struct mapped_device *md = dax_get_private(dax_dev); 979 + sector_t sector = pgoff * PAGE_SECTORS; 980 + struct dm_target *ti; 981 + long ret = 0; 982 + int srcu_idx; 983 + 984 + ti = dm_dax_get_live_target(md, sector, &srcu_idx); 985 + 986 + if (!ti) 987 + goto out; 988 + if (!ti->type->dax_copy_from_iter) { 989 + ret = copy_from_iter(addr, bytes, i); 990 + goto out; 991 + } 992 + ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i); 993 + out: 994 + dm_put_live_table(md, srcu_idx); 995 + 996 + return ret; 997 + } 998 + 999 + static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 1000 + size_t size) 1001 + { 1002 + struct mapped_device *md = dax_get_private(dax_dev); 1003 + sector_t sector = pgoff * PAGE_SECTORS; 1004 + struct dm_target *ti; 1005 + int srcu_idx; 1006 + 1007 + ti = dm_dax_get_live_target(md, sector, &srcu_idx); 1008 + 1009 + if (!ti) 1010 + goto out; 1011 + if (ti->type->dax_flush) 1012 + ti->type->dax_flush(ti, pgoff, addr, size); 1013 + out: 1014 + dm_put_live_table(md, srcu_idx); 974 1015 } 975 1016 976 1017 /* ··· 3001 2958 3002 2959 static const struct dax_operations dm_dax_ops = { 3003 2960 .direct_access = dm_dax_direct_access, 2961 + .copy_from_iter = dm_dax_copy_from_iter, 2962 + .flush = dm_dax_flush, 3004 2963 }; 3005 2964 3006 2965 /*
+30 -15
drivers/nvdimm/btt.c
··· 37 37 struct nd_btt *nd_btt = arena->nd_btt; 38 38 struct nd_namespace_common *ndns = nd_btt->ndns; 39 39 40 - /* arena offsets are 4K from the base of the device */ 41 - offset += SZ_4K; 40 + /* arena offsets may be shifted from the base of the device */ 41 + offset += arena->nd_btt->initial_offset; 42 42 return nvdimm_read_bytes(ndns, offset, buf, n, flags); 43 43 } 44 44 ··· 48 48 struct nd_btt *nd_btt = arena->nd_btt; 49 49 struct nd_namespace_common *ndns = nd_btt->ndns; 50 50 51 - /* arena offsets are 4K from the base of the device */ 52 - offset += SZ_4K; 51 + /* arena offsets may be shifted from the base of the device */ 52 + offset += arena->nd_btt->initial_offset; 53 53 return nvdimm_write_bytes(ndns, offset, buf, n, flags); 54 54 } 55 55 ··· 323 323 324 324 old_ent = btt_log_get_old(log); 325 325 if (old_ent < 0 || old_ent > 1) { 326 - dev_info(to_dev(arena), 326 + dev_err(to_dev(arena), 327 327 "log corruption (%d): lane %d seq [%d, %d]\n", 328 328 old_ent, lane, log[0].seq, log[1].seq); 329 329 /* TODO set error state? */ ··· 576 576 arena->internal_lbasize = roundup(arena->external_lbasize, 577 577 INT_LBASIZE_ALIGNMENT); 578 578 arena->nfree = BTT_DEFAULT_NFREE; 579 - arena->version_major = 1; 580 - arena->version_minor = 1; 579 + arena->version_major = btt->nd_btt->version_major; 580 + arena->version_minor = btt->nd_btt->version_minor; 581 581 582 582 if (available % BTT_PG_SIZE) 583 583 available -= (available % BTT_PG_SIZE); ··· 684 684 dev_info(to_dev(arena), "No existing arenas\n"); 685 685 goto out; 686 686 } else { 687 - dev_info(to_dev(arena), 687 + dev_err(to_dev(arena), 688 688 "Found corrupted metadata!\n"); 689 689 ret = -ENODEV; 690 690 goto out; ··· 1227 1227 err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, 1228 1228 op_is_write(bio_op(bio)), iter.bi_sector); 1229 1229 if (err) { 1230 - dev_info(&btt->nd_btt->dev, 1230 + dev_err(&btt->nd_btt->dev, 1231 1231 "io error in %s sector %lld, len %d,\n", 1232 1232 (op_is_write(bio_op(bio))) ? "WRITE" : 1233 1233 "READ", ··· 1248 1248 struct page *page, bool is_write) 1249 1249 { 1250 1250 struct btt *btt = bdev->bd_disk->private_data; 1251 + int rc; 1251 1252 1252 - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); 1253 - page_endio(page, is_write, 0); 1254 - return 0; 1253 + rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); 1254 + if (rc == 0) 1255 + page_endio(page, is_write, 0); 1256 + 1257 + return rc; 1255 1258 } 1256 1259 1257 1260 ··· 1372 1369 } 1373 1370 1374 1371 if (btt->init_state != INIT_READY && nd_region->ro) { 1375 - dev_info(dev, "%s is read-only, unable to init btt metadata\n", 1372 + dev_warn(dev, "%s is read-only, unable to init btt metadata\n", 1376 1373 dev_name(&nd_region->dev)); 1377 1374 return NULL; 1378 1375 } else if (btt->init_state != INIT_READY) { ··· 1427 1424 { 1428 1425 struct nd_btt *nd_btt = to_nd_btt(ndns->claim); 1429 1426 struct nd_region *nd_region; 1427 + struct btt_sb *btt_sb; 1430 1428 struct btt *btt; 1431 1429 size_t rawsize; 1432 1430 ··· 1436 1432 return -ENODEV; 1437 1433 } 1438 1434 1439 - rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; 1435 + btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL); 1436 + 1437 + /* 1438 + * If this returns < 0, that is ok as it just means there wasn't 1439 + * an existing BTT, and we're creating a new one. We still need to 1440 + * call this as we need the version dependent fields in nd_btt to be 1441 + * set correctly based on the holder class 1442 + */ 1443 + nd_btt_version(nd_btt, ndns, btt_sb); 1444 + 1445 + rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset; 1440 1446 if (rawsize < ARENA_MIN_SIZE) { 1441 1447 dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n", 1442 - dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K); 1448 + dev_name(&ndns->dev), 1449 + ARENA_MIN_SIZE + nd_btt->initial_offset); 1443 1450 return -ENXIO; 1444 1451 } 1445 1452 nd_region = to_nd_region(nd_btt->dev.parent);
+2
drivers/nvdimm/btt.h
··· 184 184 }; 185 185 186 186 bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); 187 + int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, 188 + struct btt_sb *btt_sb); 187 189 188 190 #endif
+49 -5
drivers/nvdimm/btt_devs.c
··· 260 260 } 261 261 EXPORT_SYMBOL(nd_btt_arena_is_valid); 262 262 263 + int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, 264 + struct btt_sb *btt_sb) 265 + { 266 + if (ndns->claim_class == NVDIMM_CCLASS_BTT2) { 267 + /* Probe/setup for BTT v2.0 */ 268 + nd_btt->initial_offset = 0; 269 + nd_btt->version_major = 2; 270 + nd_btt->version_minor = 0; 271 + if (nvdimm_read_bytes(ndns, 0, btt_sb, sizeof(*btt_sb), 0)) 272 + return -ENXIO; 273 + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) 274 + return -ENODEV; 275 + if ((le16_to_cpu(btt_sb->version_major) != 2) || 276 + (le16_to_cpu(btt_sb->version_minor) != 0)) 277 + return -ENODEV; 278 + } else { 279 + /* 280 + * Probe/setup for BTT v1.1 (NVDIMM_CCLASS_NONE or 281 + * NVDIMM_CCLASS_BTT) 282 + */ 283 + nd_btt->initial_offset = SZ_4K; 284 + nd_btt->version_major = 1; 285 + nd_btt->version_minor = 1; 286 + if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0)) 287 + return -ENXIO; 288 + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) 289 + return -ENODEV; 290 + if ((le16_to_cpu(btt_sb->version_major) != 1) || 291 + (le16_to_cpu(btt_sb->version_minor) != 1)) 292 + return -ENODEV; 293 + } 294 + return 0; 295 + } 296 + EXPORT_SYMBOL(nd_btt_version); 297 + 263 298 static int __nd_btt_probe(struct nd_btt *nd_btt, 264 299 struct nd_namespace_common *ndns, struct btt_sb *btt_sb) 265 300 { 301 + int rc; 302 + 266 303 if (!btt_sb || !ndns || !nd_btt) 267 304 return -ENODEV; 268 - 269 - if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0)) 270 - return -ENXIO; 271 305 272 306 if (nvdimm_namespace_capacity(ndns) < SZ_16M) 273 307 return -ENXIO; 274 308 275 - if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) 276 - return -ENODEV; 309 + rc = nd_btt_version(nd_btt, ndns, btt_sb); 310 + if (rc < 0) 311 + return rc; 277 312 278 313 nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); 279 314 nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); ··· 329 294 330 295 if (ndns->force_raw) 331 296 return -ENODEV; 297 + 298 + switch (ndns->claim_class) { 299 + case NVDIMM_CCLASS_NONE: 300 + case NVDIMM_CCLASS_BTT: 301 + case NVDIMM_CCLASS_BTT2: 302 + break; 303 + default: 304 + return -ENODEV; 305 + } 332 306 333 307 nvdimm_bus_lock(&ndns->dev); 334 308 btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
+10 -5
drivers/nvdimm/bus.c
··· 38 38 { 39 39 if (is_nvdimm(dev)) 40 40 return ND_DEVICE_DIMM; 41 - else if (is_nd_pmem(dev)) 41 + else if (is_memory(dev)) 42 42 return ND_DEVICE_REGION_PMEM; 43 43 else if (is_nd_blk(dev)) 44 44 return ND_DEVICE_REGION_BLK; 45 45 else if (is_nd_dax(dev)) 46 46 return ND_DEVICE_DAX_PMEM; 47 - else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) 47 + else if (is_nd_region(dev->parent)) 48 48 return nd_region_to_nstype(to_nd_region(dev->parent)); 49 49 50 50 return 0; ··· 56 56 * Ensure that region devices always have their numa node set as 57 57 * early as possible. 58 58 */ 59 - if (is_nd_pmem(dev) || is_nd_blk(dev)) 59 + if (is_nd_region(dev)) 60 60 set_dev_node(dev, to_nd_region(dev)->numa_node); 61 61 return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, 62 62 to_nd_device_type(dev)); ··· 65 65 static struct module *to_bus_provider(struct device *dev) 66 66 { 67 67 /* pin bus providers while regions are enabled */ 68 - if (is_nd_pmem(dev) || is_nd_blk(dev)) { 68 + if (is_nd_region(dev)) { 69 69 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 70 70 71 71 return nvdimm_bus->nd_desc->module; ··· 197 197 198 198 sector = (ctx->phys - nd_region->ndr_start) / 512; 199 199 badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512); 200 + 201 + if (nd_region->bb_state) 202 + sysfs_notify_dirent(nd_region->bb_state); 200 203 201 204 return 0; 202 205 } ··· 910 907 static char in_env[ND_CMD_MAX_ENVELOPE]; 911 908 const struct nd_cmd_desc *desc = NULL; 912 909 unsigned int cmd = _IOC_NR(ioctl_cmd); 910 + unsigned int func = cmd; 913 911 void __user *p = (void __user *) arg; 914 912 struct device *dev = &nvdimm_bus->dev; 915 913 struct nd_cmd_pkg pkg; ··· 976 972 } 977 973 978 974 if (cmd == ND_CMD_CALL) { 975 + func = pkg.nd_command; 979 976 dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n", 980 977 __func__, dimm_name, pkg.nd_command, 981 978 in_len, out_len, buf_len); ··· 1025 1020 } 1026 1021 1027 1022 nvdimm_bus_lock(&nvdimm_bus->dev); 1028 - rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf); 1023 + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); 1029 1024 if (rc) 1030 1025 goto out_unlock; 1031 1026
+33 -5
drivers/nvdimm/claim.c
··· 12 12 */ 13 13 #include <linux/device.h> 14 14 #include <linux/sizes.h> 15 - #include <linux/pmem.h> 16 15 #include "nd-core.h" 16 + #include "pmem.h" 17 17 #include "pfn.h" 18 18 #include "btt.h" 19 19 #include "nd.h" ··· 184 184 } 185 185 186 186 ndns = to_ndns(found); 187 + 188 + switch (ndns->claim_class) { 189 + case NVDIMM_CCLASS_NONE: 190 + break; 191 + case NVDIMM_CCLASS_BTT: 192 + case NVDIMM_CCLASS_BTT2: 193 + if (!is_nd_btt(dev)) { 194 + len = -EBUSY; 195 + goto out_attach; 196 + } 197 + break; 198 + case NVDIMM_CCLASS_PFN: 199 + if (!is_nd_pfn(dev)) { 200 + len = -EBUSY; 201 + goto out_attach; 202 + } 203 + break; 204 + case NVDIMM_CCLASS_DAX: 205 + if (!is_nd_dax(dev)) { 206 + len = -EBUSY; 207 + goto out_attach; 208 + } 209 + break; 210 + default: 211 + len = -EBUSY; 212 + goto out_attach; 213 + break; 214 + } 215 + 187 216 if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { 188 217 dev_dbg(dev, "%s too small to host\n", name); 189 218 len = -ENXIO; ··· 289 260 * work around this collision. 290 261 */ 291 262 if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) 292 - && !(flags & NVDIMM_IO_ATOMIC) 293 - && !ndns->claim) { 263 + && !(flags & NVDIMM_IO_ATOMIC)) { 294 264 long cleared; 295 265 296 266 cleared = nvdimm_clear_poison(&ndns->dev, ··· 300 272 cleared /= 512; 301 273 badblocks_clear(&nsio->bb, sector, cleared); 302 274 } 303 - invalidate_pmem(nsio->addr + offset, size); 275 + arch_invalidate_pmem(nsio->addr + offset, size); 304 276 } else 305 277 rc = -EIO; 306 278 } 307 279 308 - memcpy_to_pmem(nsio->addr + offset, buf, size); 280 + memcpy_flushcache(nsio->addr + offset, buf, size); 309 281 nvdimm_flush(to_nd_region(ndns->dev.parent)); 310 282 311 283 return rc;
+4 -1
drivers/nvdimm/core.c
··· 504 504 struct nvdimm_bus *nvdimm_bus; 505 505 struct list_head *poison_list; 506 506 507 - if (!is_nd_pmem(&nd_region->dev)) { 507 + if (!is_memory(&nd_region->dev)) { 508 508 dev_WARN_ONCE(&nd_region->dev, 1, 509 509 "%s only valid for pmem regions\n", __func__); 510 510 return; ··· 699 699 rc = nd_region_init(); 700 700 if (rc) 701 701 goto err_region; 702 + 703 + nd_label_init(); 704 + 702 705 return 0; 703 706 err_region: 704 707 nvdimm_exit();
+9 -1
drivers/nvdimm/dax_devs.c
··· 89 89 struct device *dev = NULL; 90 90 struct nd_dax *nd_dax; 91 91 92 - if (!is_nd_pmem(&nd_region->dev)) 92 + if (!is_memory(&nd_region->dev)) 93 93 return NULL; 94 94 95 95 nd_dax = nd_dax_alloc(nd_region); ··· 110 110 111 111 if (ndns->force_raw) 112 112 return -ENODEV; 113 + 114 + switch (ndns->claim_class) { 115 + case NVDIMM_CCLASS_NONE: 116 + case NVDIMM_CCLASS_DAX: 117 + break; 118 + default: 119 + return -ENODEV; 120 + } 113 121 114 122 nvdimm_bus_lock(&ndns->dev); 115 123 nd_dax = nd_dax_alloc(nd_region);
+9 -1
drivers/nvdimm/dimm_devs.c
··· 20 20 #include <linux/mm.h> 21 21 #include "nd-core.h" 22 22 #include "label.h" 23 + #include "pmem.h" 23 24 #include "nd.h" 24 25 25 26 static DEFINE_IDA(dimm_ida); ··· 236 235 } 237 236 EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm); 238 237 238 + unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr) 239 + { 240 + /* pmem mapping properties are private to libnvdimm */ 241 + return ARCH_MEMREMAP_PMEM; 242 + } 243 + EXPORT_SYMBOL_GPL(nd_blk_memremap_flags); 244 + 239 245 struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) 240 246 { 241 247 struct nvdimm *nvdimm = nd_mapping->nvdimm; ··· 419 411 struct resource *res; 420 412 int i; 421 413 422 - if (!is_nd_pmem(dev)) 414 + if (!is_memory(dev)) 423 415 return 0; 424 416 425 417 nd_region = to_nd_region(dev);
+224 -27
drivers/nvdimm/label.c
··· 12 12 */ 13 13 #include <linux/device.h> 14 14 #include <linux/ndctl.h> 15 + #include <linux/uuid.h> 15 16 #include <linux/slab.h> 16 17 #include <linux/io.h> 17 18 #include <linux/nd.h> 18 19 #include "nd-core.h" 19 20 #include "label.h" 20 21 #include "nd.h" 22 + 23 + static guid_t nvdimm_btt_guid; 24 + static guid_t nvdimm_btt2_guid; 25 + static guid_t nvdimm_pfn_guid; 26 + static guid_t nvdimm_dax_guid; 21 27 22 28 static u32 best_seq(u32 a, u32 b) 23 29 { ··· 38 32 return b; 39 33 else 40 34 return a; 35 + } 36 + 37 + unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd) 38 + { 39 + return ndd->nslabel_size; 41 40 } 42 41 43 42 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) ··· 60 49 * starts to waste space at larger config_sizes, but it's 61 50 * unlikely we'll ever see anything but 128K. 62 51 */ 63 - index_span = ndd->nsarea.config_size / 129; 52 + index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); 64 53 index_span /= NSINDEX_ALIGN * 2; 65 54 ndd->nsindex_size = index_span * NSINDEX_ALIGN; 66 55 ··· 69 58 70 59 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) 71 60 { 72 - return ndd->nsarea.config_size / 129; 61 + return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); 73 62 } 74 63 75 - int nd_label_validate(struct nvdimm_drvdata *ndd) 64 + static int __nd_label_validate(struct nvdimm_drvdata *ndd) 76 65 { 77 66 /* 78 67 * On media label format consists of two index blocks followed ··· 115 104 u32 nslot; 116 105 u8 sig[NSINDEX_SIG_LEN]; 117 106 u64 sum_save, sum, size; 107 + unsigned int version, labelsize; 118 108 119 109 memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); 120 110 if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { ··· 123 111 __func__, i); 124 112 continue; 125 113 } 114 + 115 + /* label sizes larger than 128 arrived with v1.2 */ 116 + version = __le16_to_cpu(nsindex[i]->major) * 100 117 + + __le16_to_cpu(nsindex[i]->minor); 118 + if (version >= 102) 119 + labelsize = 1 << (7 + nsindex[i]->labelsize); 120 + else 121 + labelsize = 128; 122 + 123 + if (labelsize != sizeof_namespace_label(ndd)) { 124 + dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n", 125 + __func__, i, nsindex[i]->labelsize); 126 + continue; 127 + } 128 + 126 129 sum_save = __le64_to_cpu(nsindex[i]->checksum); 127 130 nsindex[i]->checksum = __cpu_to_le64(0); 128 131 sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); ··· 180 153 } 181 154 182 155 nslot = __le32_to_cpu(nsindex[i]->nslot); 183 - if (nslot * sizeof(struct nd_namespace_label) 156 + if (nslot * sizeof_namespace_label(ndd) 184 157 + 2 * sizeof_namespace_index(ndd) 185 158 > ndd->nsarea.config_size) { 186 159 dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", ··· 216 189 return -1; 217 190 } 218 191 192 + int nd_label_validate(struct nvdimm_drvdata *ndd) 193 + { 194 + /* 195 + * In order to probe for and validate namespace index blocks we 196 + * need to know the size of the labels, and we can't trust the 197 + * size of the labels until we validate the index blocks. 198 + * Resolve this dependency loop by probing for known label 199 + * sizes, but default to v1.2 256-byte namespace labels if 200 + * discovery fails. 201 + */ 202 + int label_size[] = { 128, 256 }; 203 + int i, rc; 204 + 205 + for (i = 0; i < ARRAY_SIZE(label_size); i++) { 206 + ndd->nslabel_size = label_size[i]; 207 + rc = __nd_label_validate(ndd); 208 + if (rc >= 0) 209 + return rc; 210 + } 211 + 212 + return -1; 213 + } 214 + 219 215 void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, 220 216 struct nd_namespace_index *src) 221 217 { ··· 260 210 static int to_slot(struct nvdimm_drvdata *ndd, 261 211 struct nd_namespace_label *nd_label) 262 212 { 263 - return nd_label - nd_label_base(ndd); 213 + unsigned long label, base; 214 + 215 + label = (unsigned long) nd_label; 216 + base = (unsigned long) nd_label_base(ndd); 217 + 218 + return (label - base) / sizeof_namespace_label(ndd); 219 + } 220 + 221 + static struct nd_namespace_label *to_label(struct nvdimm_drvdata *ndd, int slot) 222 + { 223 + unsigned long label, base; 224 + 225 + base = (unsigned long) nd_label_base(ndd); 226 + label = base + sizeof_namespace_label(ndd) * slot; 227 + 228 + return (struct nd_namespace_label *) label; 264 229 } 265 230 266 231 #define for_each_clear_bit_le(bit, addr, size) \ ··· 333 268 free, nslot); 334 269 } 335 270 336 - static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) 271 + static bool slot_valid(struct nvdimm_drvdata *ndd, 272 + struct nd_namespace_label *nd_label, u32 slot) 337 273 { 338 274 /* check that we are written where we expect to be written */ 339 275 if (slot != __le32_to_cpu(nd_label->slot)) ··· 344 278 if ((__le64_to_cpu(nd_label->dpa) 345 279 | __le64_to_cpu(nd_label->rawsize)) % SZ_4K) 346 280 return false; 281 + 282 + /* check checksum */ 283 + if (namespace_label_has(ndd, checksum)) { 284 + u64 sum, sum_save; 285 + 286 + sum_save = __le64_to_cpu(nd_label->checksum); 287 + nd_label->checksum = __cpu_to_le64(0); 288 + sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); 289 + nd_label->checksum = __cpu_to_le64(sum_save); 290 + if (sum != sum_save) { 291 + dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n", 292 + __func__, slot, sum); 293 + return false; 294 + } 295 + } 347 296 348 297 return true; 349 298 } ··· 380 299 struct resource *res; 381 300 u32 flags; 382 301 383 - nd_label = nd_label_base(ndd) + slot; 302 + nd_label = to_label(ndd, slot); 384 303 385 - if (!slot_valid(nd_label, slot)) 304 + if (!slot_valid(ndd, nd_label, slot)) 386 305 continue; 387 306 388 307 memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); ··· 412 331 for_each_clear_bit_le(slot, free, nslot) { 413 332 struct nd_namespace_label *nd_label; 414 333 415 - nd_label = nd_label_base(ndd) + slot; 334 + nd_label = to_label(ndd, slot); 416 335 417 - if (!slot_valid(nd_label, slot)) { 336 + if (!slot_valid(ndd, nd_label, slot)) { 418 337 u32 label_slot = __le32_to_cpu(nd_label->slot); 419 338 u64 size = __le64_to_cpu(nd_label->rawsize); 420 339 u64 dpa = __le64_to_cpu(nd_label->dpa); ··· 441 360 for_each_clear_bit_le(slot, free, nslot) { 442 361 struct nd_namespace_label *nd_label; 443 362 444 - nd_label = nd_label_base(ndd) + slot; 445 - if (!slot_valid(nd_label, slot)) 363 + nd_label = to_label(ndd, slot); 364 + if (!slot_valid(ndd, nd_label, slot)) 446 365 continue; 447 366 448 367 if (n-- == 0) 449 - return nd_label_base(ndd) + slot; 368 + return to_label(ndd, slot); 450 369 } 451 370 452 371 return NULL; ··· 518 437 nslot = __le32_to_cpu(nsindex->nslot); 519 438 520 439 memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); 521 - nsindex->flags = __cpu_to_le32(0); 440 + memset(&nsindex->flags, 0, 3); 441 + nsindex->labelsize = sizeof_namespace_label(ndd) >> 8; 522 442 nsindex->seq = __cpu_to_le32(seq); 523 443 offset = (unsigned long) nsindex 524 444 - (unsigned long) to_namespace_index(ndd, 0); ··· 534 452 nsindex->labeloff = __cpu_to_le64(offset); 535 453 nsindex->nslot = __cpu_to_le32(nslot); 536 454 nsindex->major = __cpu_to_le16(1); 537 - nsindex->minor = __cpu_to_le16(1); 455 + if (sizeof_namespace_label(ndd) < 256) 456 + nsindex->minor = __cpu_to_le16(1); 457 + else 458 + nsindex->minor = __cpu_to_le16(2); 538 459 nsindex->checksum = __cpu_to_le64(0); 539 460 if (flags & ND_NSINDEX_INIT) { 540 461 unsigned long *free = (unsigned long *) nsindex->free; ··· 575 490 - (unsigned long) to_namespace_index(ndd, 0); 576 491 } 577 492 493 + enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid) 494 + { 495 + if (guid_equal(guid, &nvdimm_btt_guid)) 496 + return NVDIMM_CCLASS_BTT; 497 + else if (guid_equal(guid, &nvdimm_btt2_guid)) 498 + return NVDIMM_CCLASS_BTT2; 499 + else if (guid_equal(guid, &nvdimm_pfn_guid)) 500 + return NVDIMM_CCLASS_PFN; 501 + else if (guid_equal(guid, &nvdimm_dax_guid)) 502 + return NVDIMM_CCLASS_DAX; 503 + else if (guid_equal(guid, &guid_null)) 504 + return NVDIMM_CCLASS_NONE; 505 + 506 + return NVDIMM_CCLASS_UNKNOWN; 507 + } 508 + 509 + static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, 510 + guid_t *target) 511 + { 512 + if (claim_class == NVDIMM_CCLASS_BTT) 513 + return &nvdimm_btt_guid; 514 + else if (claim_class == NVDIMM_CCLASS_BTT2) 515 + return &nvdimm_btt2_guid; 516 + else if (claim_class == NVDIMM_CCLASS_PFN) 517 + return &nvdimm_pfn_guid; 518 + else if (claim_class == NVDIMM_CCLASS_DAX) 519 + return &nvdimm_dax_guid; 520 + else if (claim_class == NVDIMM_CCLASS_UNKNOWN) { 521 + /* 522 + * If we're modifying a namespace for which we don't 523 + * know the claim_class, don't touch the existing guid. 524 + */ 525 + return target; 526 + } else 527 + return &guid_null; 528 + } 529 + 578 530 static int __pmem_label_update(struct nd_region *nd_region, 579 531 struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, 580 532 int pos) 581 533 { 582 - u64 cookie = nd_region_interleave_set_cookie(nd_region); 534 + struct nd_namespace_common *ndns = &nspm->nsio.common; 535 + struct nd_interleave_set *nd_set = nd_region->nd_set; 583 536 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 584 537 struct nd_label_ent *label_ent, *victim = NULL; 585 538 struct nd_namespace_label *nd_label; ··· 627 504 unsigned long *free; 628 505 u32 nslot, slot; 629 506 size_t offset; 507 + u64 cookie; 630 508 int rc; 631 509 632 510 if (!preamble_next(ndd, &nsindex, &free, &nslot)) 633 511 return -ENXIO; 634 512 513 + cookie = nd_region_interleave_set_cookie(nd_region, nsindex); 635 514 nd_label_gen_id(&label_id, nspm->uuid, 0); 636 515 for_each_dpa_resource(ndd, res) 637 516 if (strcmp(res->name, label_id.id) == 0) ··· 650 525 return -ENXIO; 651 526 dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); 652 527 653 - nd_label = nd_label_base(ndd) + slot; 654 - memset(nd_label, 0, sizeof(struct nd_namespace_label)); 528 + nd_label = to_label(ndd, slot); 529 + memset(nd_label, 0, sizeof_namespace_label(ndd)); 655 530 memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); 656 531 if (nspm->alt_name) 657 532 memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); ··· 660 535 nd_label->position = __cpu_to_le16(pos); 661 536 nd_label->isetcookie = __cpu_to_le64(cookie); 662 537 nd_label->rawsize = __cpu_to_le64(resource_size(res)); 538 + nd_label->lbasize = __cpu_to_le64(nspm->lbasize); 663 539 nd_label->dpa = __cpu_to_le64(res->start); 664 540 nd_label->slot = __cpu_to_le32(slot); 541 + if (namespace_label_has(ndd, type_guid)) 542 + guid_copy(&nd_label->type_guid, &nd_set->type_guid); 543 + if (namespace_label_has(ndd, abstraction_guid)) 544 + guid_copy(&nd_label->abstraction_guid, 545 + to_abstraction_guid(ndns->claim_class, 546 + &nd_label->abstraction_guid)); 547 + if (namespace_label_has(ndd, checksum)) { 548 + u64 sum; 549 + 550 + nd_label->checksum = __cpu_to_le64(0); 551 + sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); 552 + nd_label->checksum = __cpu_to_le64(sum); 553 + } 665 554 nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); 666 555 667 556 /* update label */ 668 557 offset = nd_label_offset(ndd, nd_label); 669 558 rc = nvdimm_set_config_data(ndd, offset, nd_label, 670 - sizeof(struct nd_namespace_label)); 559 + sizeof_namespace_label(ndd)); 671 560 if (rc < 0) 672 561 return rc; 673 562 ··· 763 624 int num_labels) 764 625 { 765 626 int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; 627 + struct nd_interleave_set *nd_set = nd_region->nd_set; 628 + struct nd_namespace_common *ndns = &nsblk->common; 766 629 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 767 630 struct nd_namespace_label *nd_label; 768 631 struct nd_label_ent *label_ent, *e; ··· 773 632 struct resource *res, **old_res_list; 774 633 struct nd_label_id label_id; 775 634 u8 uuid[NSLABEL_UUID_LEN]; 635 + int min_dpa_idx = 0; 776 636 LIST_HEAD(list); 777 637 u32 nslot, slot; 778 638 ··· 810 668 811 669 /* mark unused labels for garbage collection */ 812 670 for_each_clear_bit_le(slot, free, nslot) { 813 - nd_label = nd_label_base(ndd) + slot; 671 + nd_label = to_label(ndd, slot); 814 672 memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); 815 673 if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) 816 674 continue; ··· 845 703 } 846 704 } 847 705 706 + /* 707 + * Find the resource associated with the first label in the set 708 + * per the v1.2 namespace specification. 709 + */ 710 + for (i = 0; i < nsblk->num_resources; i++) { 711 + struct resource *min = nsblk->res[min_dpa_idx]; 712 + 713 + res = nsblk->res[i]; 714 + if (res->start < min->start) 715 + min_dpa_idx = i; 716 + } 717 + 848 718 for (i = 0; i < nsblk->num_resources; i++) { 849 719 size_t offset; 850 720 ··· 868 714 goto abort; 869 715 dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); 870 716 871 - nd_label = nd_label_base(ndd) + slot; 872 - memset(nd_label, 0, sizeof(struct nd_namespace_label)); 717 + nd_label = to_label(ndd, slot); 718 + memset(nd_label, 0, sizeof_namespace_label(ndd)); 873 719 memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN); 874 720 if (nsblk->alt_name) 875 721 memcpy(nd_label->name, nsblk->alt_name, 876 722 NSLABEL_NAME_LEN); 877 723 nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL); 878 - nd_label->nlabel = __cpu_to_le16(0); /* N/A */ 879 - nd_label->position = __cpu_to_le16(0); /* N/A */ 880 - nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ 724 + 725 + /* 726 + * Use the presence of the type_guid as a flag to 727 + * determine isetcookie usage and nlabel + position 728 + * policy for blk-aperture namespaces. 729 + */ 730 + if (namespace_label_has(ndd, type_guid)) { 731 + if (i == min_dpa_idx) { 732 + nd_label->nlabel = __cpu_to_le16(nsblk->num_resources); 733 + nd_label->position = __cpu_to_le16(0); 734 + } else { 735 + nd_label->nlabel = __cpu_to_le16(0xffff); 736 + nd_label->position = __cpu_to_le16(0xffff); 737 + } 738 + nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2); 739 + } else { 740 + nd_label->nlabel = __cpu_to_le16(0); /* N/A */ 741 + nd_label->position = __cpu_to_le16(0); /* N/A */ 742 + nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ 743 + } 744 + 881 745 nd_label->dpa = __cpu_to_le64(res->start); 882 746 nd_label->rawsize = __cpu_to_le64(resource_size(res)); 883 747 nd_label->lbasize = __cpu_to_le64(nsblk->lbasize); 884 748 nd_label->slot = __cpu_to_le32(slot); 749 + if (namespace_label_has(ndd, type_guid)) 750 + guid_copy(&nd_label->type_guid, &nd_set->type_guid); 751 + if (namespace_label_has(ndd, abstraction_guid)) 752 + guid_copy(&nd_label->abstraction_guid, 753 + to_abstraction_guid(ndns->claim_class, 754 + &nd_label->abstraction_guid)); 755 + 756 + if (namespace_label_has(ndd, checksum)) { 757 + u64 sum; 758 + 759 + nd_label->checksum = __cpu_to_le64(0); 760 + sum = nd_fletcher64(nd_label, 761 + sizeof_namespace_label(ndd), 1); 762 + nd_label->checksum = __cpu_to_le64(sum); 763 + } 885 764 886 765 /* update label */ 887 766 offset = nd_label_offset(ndd, nd_label); 888 767 rc = nvdimm_set_config_data(ndd, offset, nd_label, 889 - sizeof(struct nd_namespace_label)); 768 + sizeof_namespace_label(ndd)); 890 769 if (rc < 0) 891 770 goto abort; 892 771 } ··· 977 790 goto out; 978 791 } 979 792 for_each_clear_bit_le(slot, free, nslot) { 980 - nd_label = nd_label_base(ndd) + slot; 793 + nd_label = to_label(ndd, slot); 981 794 memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); 982 795 if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) 983 796 continue; ··· 1159 972 return count; 1160 973 1161 974 return __blk_label_update(nd_region, nd_mapping, nsblk, count); 975 + } 976 + 977 + int __init nd_label_init(void) 978 + { 979 + WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid)); 980 + WARN_ON(guid_parse(NVDIMM_BTT2_GUID, &nvdimm_btt2_guid)); 981 + WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid)); 982 + WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid)); 983 + 984 + return 0; 1162 985 }
+19 -2
drivers/nvdimm/label.h
··· 15 15 16 16 #include <linux/ndctl.h> 17 17 #include <linux/sizes.h> 18 + #include <linux/uuid.h> 18 19 #include <linux/io.h> 19 20 20 21 enum { ··· 61 60 */ 62 61 struct nd_namespace_index { 63 62 u8 sig[NSINDEX_SIG_LEN]; 64 - __le32 flags; 63 + u8 flags[3]; 64 + u8 labelsize; 65 65 __le32 seq; 66 66 __le64 myoff; 67 67 __le64 mysize; ··· 100 98 __le64 dpa; 101 99 __le64 rawsize; 102 100 __le32 slot; 103 - __le32 unused; 101 + /* 102 + * Accessing fields past this point should be gated by a 103 + * namespace_label_has() check. 104 + */ 105 + u8 align; 106 + u8 reserved[3]; 107 + guid_t type_guid; 108 + guid_t abstraction_guid; 109 + u8 reserved2[88]; 110 + __le64 checksum; 104 111 }; 112 + 113 + #define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a" 114 + #define NVDIMM_BTT2_GUID "18633bfc-1735-4217-8ac9-17239282d3f8" 115 + #define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225" 116 + #define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088" 105 117 106 118 /** 107 119 * struct nd_label_id - identifier string for dpa allocation ··· 147 131 u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd); 148 132 bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot); 149 133 u32 nd_label_nfree(struct nvdimm_drvdata *ndd); 134 + enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid); 150 135 struct nd_region; 151 136 struct nd_namespace_pmem; 152 137 struct nd_namespace_blk;
+242 -42
drivers/nvdimm/namespace_devs.c
··· 14 14 #include <linux/device.h> 15 15 #include <linux/sort.h> 16 16 #include <linux/slab.h> 17 - #include <linux/pmem.h> 18 17 #include <linux/list.h> 19 18 #include <linux/nd.h> 20 19 #include "nd-core.h" 20 + #include "pmem.h" 21 21 #include "nd.h" 22 22 23 23 static void namespace_io_release(struct device *dev) ··· 112 112 113 113 static int is_namespace_uuid_busy(struct device *dev, void *data) 114 114 { 115 - if (is_nd_pmem(dev) || is_nd_blk(dev)) 115 + if (is_nd_region(dev)) 116 116 return device_for_each_child(dev, data, is_uuid_busy); 117 117 return 0; 118 118 } ··· 155 155 IORES_DESC_NONE) == REGION_MIXED) 156 156 return false; 157 157 158 - #ifdef ARCH_MEMREMAP_PMEM 159 158 return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; 160 - #else 161 - return false; 162 - #endif 163 159 } 164 160 EXPORT_SYMBOL(pmem_should_map_pages); 161 + 162 + unsigned int pmem_sector_size(struct nd_namespace_common *ndns) 163 + { 164 + if (is_namespace_pmem(&ndns->dev)) { 165 + struct nd_namespace_pmem *nspm; 166 + 167 + nspm = to_nd_namespace_pmem(&ndns->dev); 168 + if (nspm->lbasize == 0 || nspm->lbasize == 512) 169 + /* default */; 170 + else if (nspm->lbasize == 4096) 171 + return 4096; 172 + else 173 + dev_WARN(&ndns->dev, "unsupported sector size: %ld\n", 174 + nspm->lbasize); 175 + } 176 + 177 + /* 178 + * There is no namespace label (is_namespace_io()), or the label 179 + * indicates the default sector size. 180 + */ 181 + return 512; 182 + } 183 + EXPORT_SYMBOL(pmem_sector_size); 165 184 166 185 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, 167 186 char *name) ··· 806 787 struct nd_label_id label_id; 807 788 int i; 808 789 809 - if (!is_nd_pmem(dev)) 790 + if (!is_memory(dev)) 810 791 return 0; 811 792 812 793 nd_region = to_nd_region(dev); ··· 1302 1283 } 1303 1284 static DEVICE_ATTR_RO(resource); 1304 1285 1305 - static const unsigned long ns_lbasize_supported[] = { 512, 520, 528, 1286 + static const unsigned long blk_lbasize_supported[] = { 512, 520, 528, 1306 1287 4096, 4104, 4160, 4224, 0 }; 1288 + 1289 + static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 }; 1307 1290 1308 1291 static ssize_t sector_size_show(struct device *dev, 1309 1292 struct device_attribute *attr, char *buf) 1310 1293 { 1311 - struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); 1294 + if (is_namespace_blk(dev)) { 1295 + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); 1312 1296 1313 - if (!is_namespace_blk(dev)) 1314 - return -ENXIO; 1297 + return nd_sector_size_show(nsblk->lbasize, 1298 + blk_lbasize_supported, buf); 1299 + } 1315 1300 1316 - return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf); 1301 + if (is_namespace_pmem(dev)) { 1302 + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); 1303 + 1304 + return nd_sector_size_show(nspm->lbasize, 1305 + pmem_lbasize_supported, buf); 1306 + } 1307 + return -ENXIO; 1317 1308 } 1318 1309 1319 1310 static ssize_t sector_size_store(struct device *dev, 1320 1311 struct device_attribute *attr, const char *buf, size_t len) 1321 1312 { 1322 - struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); 1323 1313 struct nd_region *nd_region = to_nd_region(dev->parent); 1314 + const unsigned long *supported; 1315 + unsigned long *lbasize; 1324 1316 ssize_t rc = 0; 1325 1317 1326 - if (!is_namespace_blk(dev)) 1318 + if (is_namespace_blk(dev)) { 1319 + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); 1320 + 1321 + lbasize = &nsblk->lbasize; 1322 + supported = blk_lbasize_supported; 1323 + } else if (is_namespace_pmem(dev)) { 1324 + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); 1325 + 1326 + lbasize = &nspm->lbasize; 1327 + supported = pmem_lbasize_supported; 1328 + } else 1327 1329 return -ENXIO; 1328 1330 1329 1331 device_lock(dev); ··· 1352 1312 if (to_ndns(dev)->claim) 1353 1313 rc = -EBUSY; 1354 1314 if (rc >= 0) 1355 - rc = nd_sector_size_store(dev, buf, &nsblk->lbasize, 1356 - ns_lbasize_supported); 1315 + rc = nd_sector_size_store(dev, buf, lbasize, supported); 1357 1316 if (rc >= 0) 1358 1317 rc = nd_namespace_label_update(nd_region, dev); 1359 1318 dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, ··· 1407 1368 } 1408 1369 static DEVICE_ATTR_RO(dpa_extents); 1409 1370 1371 + static int btt_claim_class(struct device *dev) 1372 + { 1373 + struct nd_region *nd_region = to_nd_region(dev->parent); 1374 + int i, loop_bitmask = 0; 1375 + 1376 + for (i = 0; i < nd_region->ndr_mappings; i++) { 1377 + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 1378 + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 1379 + struct nd_namespace_index *nsindex; 1380 + 1381 + nsindex = to_namespace_index(ndd, ndd->ns_current); 1382 + if (nsindex == NULL) 1383 + loop_bitmask |= 1; 1384 + else { 1385 + /* check whether existing labels are v1.1 or v1.2 */ 1386 + if (__le16_to_cpu(nsindex->major) == 1 1387 + && __le16_to_cpu(nsindex->minor) == 1) 1388 + loop_bitmask |= 2; 1389 + else 1390 + loop_bitmask |= 4; 1391 + } 1392 + } 1393 + /* 1394 + * If nsindex is null loop_bitmask's bit 0 will be set, and if an index 1395 + * block is found, a v1.1 label for any mapping will set bit 1, and a 1396 + * v1.2 label will set bit 2. 1397 + * 1398 + * At the end of the loop, at most one of the three bits must be set. 1399 + * If multiple bits were set, it means the different mappings disagree 1400 + * about their labels, and this must be cleaned up first. 1401 + * 1402 + * If all the label index blocks are found to agree, nsindex of NULL 1403 + * implies labels haven't been initialized yet, and when they will, 1404 + * they will be of the 1.2 format, so we can assume BTT2.0 1405 + * 1406 + * If 1.1 labels are found, we enforce BTT1.1, and if 1.2 labels are 1407 + * found, we enforce BTT2.0 1408 + * 1409 + * If the loop was never entered, default to BTT1.1 (legacy namespaces) 1410 + */ 1411 + switch (loop_bitmask) { 1412 + case 0: 1413 + case 2: 1414 + return NVDIMM_CCLASS_BTT; 1415 + case 1: 1416 + case 4: 1417 + return NVDIMM_CCLASS_BTT2; 1418 + default: 1419 + return -ENXIO; 1420 + } 1421 + } 1422 + 1410 1423 static ssize_t holder_show(struct device *dev, 1411 1424 struct device_attribute *attr, char *buf) 1412 1425 { ··· 1472 1381 return rc; 1473 1382 } 1474 1383 static DEVICE_ATTR_RO(holder); 1384 + 1385 + static ssize_t __holder_class_store(struct device *dev, const char *buf) 1386 + { 1387 + struct nd_namespace_common *ndns = to_ndns(dev); 1388 + 1389 + if (dev->driver || ndns->claim) 1390 + return -EBUSY; 1391 + 1392 + if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0) 1393 + ndns->claim_class = btt_claim_class(dev); 1394 + else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0) 1395 + ndns->claim_class = NVDIMM_CCLASS_PFN; 1396 + else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0) 1397 + ndns->claim_class = NVDIMM_CCLASS_DAX; 1398 + else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0) 1399 + ndns->claim_class = NVDIMM_CCLASS_NONE; 1400 + else 1401 + return -EINVAL; 1402 + 1403 + /* btt_claim_class() could've returned an error */ 1404 + if (ndns->claim_class < 0) 1405 + return ndns->claim_class; 1406 + 1407 + return 0; 1408 + } 1409 + 1410 + static ssize_t holder_class_store(struct device *dev, 1411 + struct device_attribute *attr, const char *buf, size_t len) 1412 + { 1413 + struct nd_region *nd_region = to_nd_region(dev->parent); 1414 + ssize_t rc; 1415 + 1416 + device_lock(dev); 1417 + nvdimm_bus_lock(dev); 1418 + wait_nvdimm_bus_probe_idle(dev); 1419 + rc = __holder_class_store(dev, buf); 1420 + if (rc >= 0) 1421 + rc = nd_namespace_label_update(nd_region, dev); 1422 + dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); 1423 + nvdimm_bus_unlock(dev); 1424 + device_unlock(dev); 1425 + 1426 + return rc < 0 ? rc : len; 1427 + } 1428 + 1429 + static ssize_t holder_class_show(struct device *dev, 1430 + struct device_attribute *attr, char *buf) 1431 + { 1432 + struct nd_namespace_common *ndns = to_ndns(dev); 1433 + ssize_t rc; 1434 + 1435 + device_lock(dev); 1436 + if (ndns->claim_class == NVDIMM_CCLASS_NONE) 1437 + rc = sprintf(buf, "\n"); 1438 + else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || 1439 + (ndns->claim_class == NVDIMM_CCLASS_BTT2)) 1440 + rc = sprintf(buf, "btt\n"); 1441 + else if (ndns->claim_class == NVDIMM_CCLASS_PFN) 1442 + rc = sprintf(buf, "pfn\n"); 1443 + else if (ndns->claim_class == NVDIMM_CCLASS_DAX) 1444 + rc = sprintf(buf, "dax\n"); 1445 + else 1446 + rc = sprintf(buf, "<unknown>\n"); 1447 + device_unlock(dev); 1448 + 1449 + return rc; 1450 + } 1451 + static DEVICE_ATTR_RW(holder_class); 1475 1452 1476 1453 static ssize_t mode_show(struct device *dev, 1477 1454 struct device_attribute *attr, char *buf) ··· 1599 1440 &dev_attr_force_raw.attr, 1600 1441 &dev_attr_sector_size.attr, 1601 1442 &dev_attr_dpa_extents.attr, 1443 + &dev_attr_holder_class.attr, 1602 1444 NULL, 1603 1445 }; 1604 1446 ··· 1618 1458 if (a == &dev_attr_size.attr) 1619 1459 return 0644; 1620 1460 1621 - if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr) 1622 - return 0; 1623 - 1624 1461 return a->mode; 1625 1462 } 1626 1463 1627 1464 if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr 1628 1465 || a == &dev_attr_holder.attr 1466 + || a == &dev_attr_holder_class.attr 1629 1467 || a == &dev_attr_force_raw.attr 1630 1468 || a == &dev_attr_mode.attr) 1631 1469 return a->mode; ··· 1757 1599 1758 1600 for (i = 0; i < nd_region->ndr_mappings; i++) { 1759 1601 struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 1602 + struct nd_interleave_set *nd_set = nd_region->nd_set; 1603 + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 1760 1604 struct nd_label_ent *label_ent; 1761 1605 bool found_uuid = false; 1762 1606 ··· 1779 1619 if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0) 1780 1620 continue; 1781 1621 1622 + if (namespace_label_has(ndd, type_guid) 1623 + && !guid_equal(&nd_set->type_guid, 1624 + &nd_label->type_guid)) { 1625 + dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", 1626 + nd_set->type_guid.b, 1627 + nd_label->type_guid.b); 1628 + continue; 1629 + } 1630 + 1782 1631 if (found_uuid) { 1783 - dev_dbg(to_ndd(nd_mapping)->dev, 1632 + dev_dbg(ndd->dev, 1784 1633 "%s duplicate entry for uuid\n", 1785 1634 __func__); 1786 1635 return false; ··· 1867 1698 * @nd_label: target pmem namespace label to evaluate 1868 1699 */ 1869 1700 struct device *create_namespace_pmem(struct nd_region *nd_region, 1701 + struct nd_namespace_index *nsindex, 1870 1702 struct nd_namespace_label *nd_label) 1871 1703 { 1704 + u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex); 1872 1705 u64 altcookie = nd_region_interleave_set_altcookie(nd_region); 1873 - u64 cookie = nd_region_interleave_set_cookie(nd_region); 1874 1706 struct nd_label_ent *label_ent; 1875 1707 struct nd_namespace_pmem *nspm; 1876 1708 struct nd_mapping *nd_mapping; ··· 1945 1775 /* Calculate total size and populate namespace properties from label0 */ 1946 1776 for (i = 0; i < nd_region->ndr_mappings; i++) { 1947 1777 struct nd_namespace_label *label0; 1778 + struct nvdimm_drvdata *ndd; 1948 1779 1949 1780 nd_mapping = &nd_region->mapping[i]; 1950 1781 label_ent = list_first_entry_or_null(&nd_mapping->labels, ··· 1965 1794 NSLABEL_NAME_LEN, GFP_KERNEL); 1966 1795 nspm->uuid = kmemdup((void __force *) label0->uuid, 1967 1796 NSLABEL_UUID_LEN, GFP_KERNEL); 1797 + nspm->lbasize = __le64_to_cpu(label0->lbasize); 1798 + ndd = to_ndd(nd_mapping); 1799 + if (namespace_label_has(ndd, abstraction_guid)) 1800 + nspm->nsio.common.claim_class 1801 + = to_nvdimm_cclass(&label0->abstraction_guid); 1802 + 1968 1803 } 1969 1804 1970 1805 if (!nspm->alt_name || !nspm->uuid) { ··· 2053 1876 struct resource *res; 2054 1877 struct device *dev; 2055 1878 2056 - if (!is_nd_pmem(&nd_region->dev)) 1879 + if (!is_memory(&nd_region->dev)) 2057 1880 return NULL; 2058 1881 2059 1882 nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); ··· 2182 2005 { 2183 2006 2184 2007 struct nd_mapping *nd_mapping = &nd_region->mapping[0]; 2008 + struct nd_interleave_set *nd_set = nd_region->nd_set; 2185 2009 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 2186 2010 struct nd_namespace_blk *nsblk; 2187 2011 char name[NSLABEL_NAME_LEN]; 2188 2012 struct device *dev = NULL; 2189 2013 struct resource *res; 2014 + 2015 + if (namespace_label_has(ndd, type_guid)) { 2016 + if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) { 2017 + dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", 2018 + nd_set->type_guid.b, 2019 + nd_label->type_guid.b); 2020 + return ERR_PTR(-EAGAIN); 2021 + } 2022 + 2023 + if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) { 2024 + dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n", 2025 + nd_set->cookie2, 2026 + __le64_to_cpu(nd_label->isetcookie)); 2027 + return ERR_PTR(-EAGAIN); 2028 + } 2029 + } 2190 2030 2191 2031 nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); 2192 2032 if (!nsblk) ··· 2215 2021 nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); 2216 2022 nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, 2217 2023 GFP_KERNEL); 2024 + if (namespace_label_has(ndd, abstraction_guid)) 2025 + nsblk->common.claim_class 2026 + = to_nvdimm_cclass(&nd_label->abstraction_guid); 2218 2027 if (!nsblk->uuid) 2219 2028 goto blk_err; 2220 2029 memcpy(name, nd_label->name, NSLABEL_NAME_LEN); ··· 2299 2102 kfree(devs); 2300 2103 devs = __devs; 2301 2104 2302 - if (is_nd_blk(&nd_region->dev)) { 2105 + if (is_nd_blk(&nd_region->dev)) 2303 2106 dev = create_namespace_blk(nd_region, nd_label, count); 2304 - if (IS_ERR(dev)) 2305 - goto err; 2306 - devs[count++] = dev; 2307 - } else { 2308 - dev = create_namespace_pmem(nd_region, nd_label); 2309 - if (IS_ERR(dev)) { 2310 - switch (PTR_ERR(dev)) { 2311 - case -EAGAIN: 2312 - /* skip invalid labels */ 2313 - continue; 2314 - case -ENODEV: 2315 - /* fallthrough to seed creation */ 2316 - break; 2317 - default: 2318 - goto err; 2319 - } 2320 - } else 2321 - devs[count++] = dev; 2107 + else { 2108 + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 2109 + struct nd_namespace_index *nsindex; 2110 + 2111 + nsindex = to_namespace_index(ndd, ndd->ns_current); 2112 + dev = create_namespace_pmem(nd_region, nsindex, nd_label); 2322 2113 } 2114 + 2115 + if (IS_ERR(dev)) { 2116 + switch (PTR_ERR(dev)) { 2117 + case -EAGAIN: 2118 + /* skip invalid labels */ 2119 + continue; 2120 + case -ENODEV: 2121 + /* fallthrough to seed creation */ 2122 + break; 2123 + default: 2124 + goto err; 2125 + } 2126 + } else 2127 + devs[count++] = dev; 2128 + 2323 2129 } 2324 2130 2325 2131 dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n", ··· 2356 2156 } 2357 2157 dev->parent = &nd_region->dev; 2358 2158 devs[count++] = dev; 2359 - } else if (is_nd_pmem(&nd_region->dev)) { 2159 + } else if (is_memory(&nd_region->dev)) { 2360 2160 /* clean unselected labels */ 2361 2161 for (i = 0; i < nd_region->ndr_mappings; i++) { 2362 2162 struct list_head *l, *e;
+9
drivers/nvdimm/nd-core.h
··· 64 64 65 65 bool is_nvdimm(struct device *dev); 66 66 bool is_nd_pmem(struct device *dev); 67 + bool is_nd_volatile(struct device *dev); 67 68 bool is_nd_blk(struct device *dev); 69 + static inline bool is_nd_region(struct device *dev) 70 + { 71 + return is_nd_pmem(dev) || is_nd_blk(dev) || is_nd_volatile(dev); 72 + } 73 + static inline bool is_memory(struct device *dev) 74 + { 75 + return is_nd_pmem(dev) || is_nd_volatile(dev); 76 + } 68 77 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); 69 78 int __init nvdimm_bus_init(void); 70 79 void nvdimm_bus_exit(void);
+15 -2
drivers/nvdimm/nd.h
··· 42 42 43 43 struct nvdimm_drvdata { 44 44 struct device *dev; 45 - int nsindex_size; 45 + int nsindex_size, nslabel_size; 46 46 struct nd_cmd_get_config_size nsarea; 47 47 void *data; 48 48 int ns_current, ns_next; ··· 95 95 { 96 96 return to_namespace_index(ndd, ndd->ns_next); 97 97 } 98 + 99 + unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd); 100 + 101 + #define namespace_label_has(ndd, field) \ 102 + (offsetof(struct nd_namespace_label, field) \ 103 + < sizeof_namespace_label(ndd)) 98 104 99 105 #define nd_dbg_dpa(r, d, res, fmt, arg...) \ 100 106 dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ ··· 161 155 u64 ndr_start; 162 156 int id, num_lanes, ro, numa_node; 163 157 void *provider_data; 158 + struct kernfs_node *bb_state; 164 159 struct badblocks bb; 165 160 struct nd_interleave_set *nd_set; 166 161 struct nd_percpu_lane __percpu *lane; ··· 195 188 u64 size; 196 189 u8 *uuid; 197 190 int id; 191 + int initial_offset; 192 + u16 version_major; 193 + u16 version_minor; 198 194 }; 199 195 200 196 enum nd_pfn_mode { ··· 239 229 unsigned long *current_lbasize, const unsigned long *supported); 240 230 int __init nvdimm_init(void); 241 231 int __init nd_region_init(void); 232 + int __init nd_label_init(void); 242 233 void nvdimm_exit(void); 243 234 void nd_region_exit(void); 244 235 struct nvdimm; ··· 341 330 struct nd_region *to_nd_region(struct device *dev); 342 331 int nd_region_to_nstype(struct nd_region *nd_region); 343 332 int nd_region_register_namespaces(struct nd_region *nd_region, int *err); 344 - u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); 333 + u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, 334 + struct nd_namespace_index *nsindex); 345 335 u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region); 346 336 void nvdimm_bus_lock(struct device *dev); 347 337 void nvdimm_bus_unlock(struct device *dev); ··· 361 349 int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt); 362 350 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, 363 351 char *name); 352 + unsigned int pmem_sector_size(struct nd_namespace_common *ndns); 364 353 void nvdimm_badblocks_populate(struct nd_region *nd_region, 365 354 struct badblocks *bb, const struct resource *res); 366 355 #if IS_ENABLED(CONFIG_ND_CLAIM)
+10 -2
drivers/nvdimm/pfn_devs.c
··· 331 331 struct nd_pfn *nd_pfn; 332 332 struct device *dev; 333 333 334 - if (!is_nd_pmem(&nd_region->dev)) 334 + if (!is_memory(&nd_region->dev)) 335 335 return NULL; 336 336 337 337 nd_pfn = nd_pfn_alloc(nd_region); ··· 354 354 if (!pfn_sb || !ndns) 355 355 return -ENODEV; 356 356 357 - if (!is_nd_pmem(nd_pfn->dev.parent)) 357 + if (!is_memory(nd_pfn->dev.parent)) 358 358 return -ENODEV; 359 359 360 360 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) ··· 470 470 471 471 if (ndns->force_raw) 472 472 return -ENODEV; 473 + 474 + switch (ndns->claim_class) { 475 + case NVDIMM_CCLASS_NONE: 476 + case NVDIMM_CCLASS_PFN: 477 + break; 478 + default: 479 + return -ENODEV; 480 + } 473 481 474 482 nvdimm_bus_lock(&ndns->dev); 475 483 nd_pfn = nd_pfn_alloc(nd_region);
+57 -6
drivers/nvdimm/pmem.c
··· 28 28 #include <linux/blk-mq.h> 29 29 #include <linux/pfn_t.h> 30 30 #include <linux/slab.h> 31 - #include <linux/pmem.h> 31 + #include <linux/uio.h> 32 32 #include <linux/dax.h> 33 33 #include <linux/nd.h> 34 34 #include "pmem.h" ··· 68 68 (unsigned long long) sector, cleared, 69 69 cleared > 1 ? "s" : ""); 70 70 badblocks_clear(&pmem->bb, sector, cleared); 71 + if (pmem->bb_state) 72 + sysfs_notify_dirent(pmem->bb_state); 71 73 } 72 74 73 - invalidate_pmem(pmem->virt_addr + offset, len); 75 + arch_invalidate_pmem(pmem->virt_addr + offset, len); 74 76 75 77 return rc; 76 78 } ··· 82 80 { 83 81 void *mem = kmap_atomic(page); 84 82 85 - memcpy_to_pmem(pmem_addr, mem + off, len); 83 + memcpy_flushcache(pmem_addr, mem + off, len); 86 84 kunmap_atomic(mem); 87 85 } 88 86 ··· 237 235 return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); 238 236 } 239 237 238 + static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 239 + void *addr, size_t bytes, struct iov_iter *i) 240 + { 241 + return copy_from_iter_flushcache(addr, bytes, i); 242 + } 243 + 244 + static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, 245 + void *addr, size_t size) 246 + { 247 + arch_wb_cache_pmem(addr, size); 248 + } 249 + 240 250 static const struct dax_operations pmem_dax_ops = { 241 251 .direct_access = pmem_dax_direct_access, 252 + .copy_from_iter = pmem_copy_from_iter, 253 + .flush = pmem_dax_flush, 254 + }; 255 + 256 + static const struct attribute_group *pmem_attribute_groups[] = { 257 + &dax_attribute_group, 258 + NULL, 242 259 }; 243 260 244 261 static void pmem_release_queue(void *q) ··· 286 265 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 287 266 struct nd_region *nd_region = to_nd_region(dev->parent); 288 267 struct vmem_altmap __altmap, *altmap = NULL; 268 + int nid = dev_to_node(dev), fua, wbc; 289 269 struct resource *res = &nsio->res; 290 270 struct nd_pfn *nd_pfn = NULL; 291 271 struct dax_device *dax_dev; 292 - int nid = dev_to_node(dev); 293 272 struct nd_pfn_sb *pfn_sb; 294 273 struct pmem_device *pmem; 295 274 struct resource pfn_res; 296 275 struct request_queue *q; 276 + struct device *gendev; 297 277 struct gendisk *disk; 298 278 void *addr; 299 279 ··· 316 294 dev_set_drvdata(dev, pmem); 317 295 pmem->phys_addr = res->start; 318 296 pmem->size = resource_size(res); 319 - if (nvdimm_has_flush(nd_region) < 0) 297 + fua = nvdimm_has_flush(nd_region); 298 + if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) { 320 299 dev_warn(dev, "unable to guarantee persistence of writes\n"); 300 + fua = 0; 301 + } 302 + wbc = nvdimm_has_cache(nd_region); 321 303 322 304 if (!devm_request_mem_region(dev, res->start, resource_size(res), 323 305 dev_name(&ndns->dev))) { ··· 365 339 return PTR_ERR(addr); 366 340 pmem->virt_addr = addr; 367 341 368 - blk_queue_write_cache(q, true, true); 342 + blk_queue_write_cache(q, wbc, fua); 369 343 blk_queue_make_request(q, pmem_make_request); 370 344 blk_queue_physical_block_size(q, PAGE_SIZE); 345 + blk_queue_logical_block_size(q, pmem_sector_size(ndns)); 371 346 blk_queue_max_hw_sectors(q, UINT_MAX); 372 347 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 373 348 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); ··· 395 368 put_disk(disk); 396 369 return -ENOMEM; 397 370 } 371 + dax_write_cache(dax_dev, wbc); 398 372 pmem->dax_dev = dax_dev; 373 + 374 + gendev = disk_to_dev(disk); 375 + gendev->groups = pmem_attribute_groups; 399 376 400 377 device_add_disk(dev, disk); 401 378 if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) 402 379 return -ENOMEM; 403 380 404 381 revalidate_disk(disk); 382 + 383 + pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, 384 + "badblocks"); 385 + if (!pmem->bb_state) 386 + dev_warn(dev, "'badblocks' notification disabled\n"); 405 387 406 388 return 0; 407 389 } ··· 443 407 444 408 static int nd_pmem_remove(struct device *dev) 445 409 { 410 + struct pmem_device *pmem = dev_get_drvdata(dev); 411 + 446 412 if (is_nd_btt(dev)) 447 413 nvdimm_namespace_detach_btt(to_nd_btt(dev)); 414 + else { 415 + /* 416 + * Note, this assumes device_lock() context to not race 417 + * nd_pmem_notify() 418 + */ 419 + sysfs_put(pmem->bb_state); 420 + pmem->bb_state = NULL; 421 + } 448 422 nvdimm_flush(to_nd_region(dev->parent)); 449 423 450 424 return 0; ··· 473 427 struct nd_namespace_io *nsio; 474 428 struct resource res; 475 429 struct badblocks *bb; 430 + struct kernfs_node *bb_state; 476 431 477 432 if (event != NVDIMM_REVALIDATE_POISON) 478 433 return; ··· 485 438 nd_region = to_nd_region(ndns->dev.parent); 486 439 nsio = to_nd_namespace_io(&ndns->dev); 487 440 bb = &nsio->bb; 441 + bb_state = NULL; 488 442 } else { 489 443 struct pmem_device *pmem = dev_get_drvdata(dev); 490 444 491 445 nd_region = to_region(pmem); 492 446 bb = &pmem->bb; 447 + bb_state = pmem->bb_state; 493 448 494 449 if (is_nd_pfn(dev)) { 495 450 struct nd_pfn *nd_pfn = to_nd_pfn(dev); ··· 511 462 res.start = nsio->res.start + offset; 512 463 res.end = nsio->res.end - end_trunc; 513 464 nvdimm_badblocks_populate(nd_region, bb, &res); 465 + if (bb_state) 466 + sysfs_notify_dirent(bb_state); 514 467 } 515 468 516 469 MODULE_ALIAS("pmem");
+15
drivers/nvdimm/pmem.h
··· 5 5 #include <linux/pfn_t.h> 6 6 #include <linux/fs.h> 7 7 8 + #ifdef CONFIG_ARCH_HAS_PMEM_API 9 + #define ARCH_MEMREMAP_PMEM MEMREMAP_WB 10 + void arch_wb_cache_pmem(void *addr, size_t size); 11 + void arch_invalidate_pmem(void *addr, size_t size); 12 + #else 13 + #define ARCH_MEMREMAP_PMEM MEMREMAP_WT 14 + static inline void arch_wb_cache_pmem(void *addr, size_t size) 15 + { 16 + } 17 + static inline void arch_invalidate_pmem(void *addr, size_t size) 18 + { 19 + } 20 + #endif 21 + 8 22 /* this definition is in it's own header for tools/testing/nvdimm to consume */ 9 23 struct pmem_device { 10 24 /* One contiguous memory region per device */ ··· 31 17 size_t size; 32 18 /* trim size when namespace capacity has been section aligned */ 33 19 u32 pfn_pad; 20 + struct kernfs_node *bb_state; 34 21 struct badblocks bb; 35 22 struct dax_device *dax_dev; 36 23 struct gendisk *disk;
+15 -2
drivers/nvdimm/region.c
··· 58 58 59 59 if (devm_init_badblocks(dev, &nd_region->bb)) 60 60 return -ENODEV; 61 + nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd, 62 + "badblocks"); 63 + if (!nd_region->bb_state) 64 + dev_warn(&nd_region->dev, 65 + "'badblocks' notification disabled\n"); 61 66 ndr_res.start = nd_region->ndr_start; 62 67 ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1; 63 - nvdimm_badblocks_populate(nd_region, 64 - &nd_region->bb, &ndr_res); 68 + nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res); 65 69 } 66 70 67 71 nd_region->btt_seed = nd_btt_create(nd_region); ··· 109 105 dev_set_drvdata(dev, NULL); 110 106 nvdimm_bus_unlock(dev); 111 107 108 + /* 109 + * Note, this assumes device_lock() context to not race 110 + * nd_region_notify() 111 + */ 112 + sysfs_put(nd_region->bb_state); 113 + nd_region->bb_state = NULL; 114 + 112 115 return 0; 113 116 } 114 117 ··· 137 126 nd_region->ndr_size - 1; 138 127 nvdimm_badblocks_populate(nd_region, 139 128 &nd_region->bb, &res); 129 + if (nd_region->bb_state) 130 + sysfs_notify_dirent(nd_region->bb_state); 140 131 } 141 132 } 142 133 device_for_each_child(dev, &event, child_notify);
+64 -24
drivers/nvdimm/region_devs.c
··· 15 15 #include <linux/sched.h> 16 16 #include <linux/slab.h> 17 17 #include <linux/hash.h> 18 - #include <linux/pmem.h> 19 18 #include <linux/sort.h> 20 19 #include <linux/io.h> 21 20 #include <linux/nd.h> ··· 168 169 return dev ? dev->type == &nd_blk_device_type : false; 169 170 } 170 171 172 + bool is_nd_volatile(struct device *dev) 173 + { 174 + return dev ? dev->type == &nd_volatile_device_type : false; 175 + } 176 + 171 177 struct nd_region *to_nd_region(struct device *dev) 172 178 { 173 179 struct nd_region *nd_region = container_of(dev, struct nd_region, dev); ··· 219 215 */ 220 216 int nd_region_to_nstype(struct nd_region *nd_region) 221 217 { 222 - if (is_nd_pmem(&nd_region->dev)) { 218 + if (is_memory(&nd_region->dev)) { 223 219 u16 i, alias; 224 220 225 221 for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) { ··· 247 243 struct nd_region *nd_region = to_nd_region(dev); 248 244 unsigned long long size = 0; 249 245 250 - if (is_nd_pmem(dev)) { 246 + if (is_memory(dev)) { 251 247 size = nd_region->ndr_size; 252 248 } else if (nd_region->ndr_mappings == 1) { 253 249 struct nd_mapping *nd_mapping = &nd_region->mapping[0]; ··· 311 307 { 312 308 struct nd_region *nd_region = to_nd_region(dev); 313 309 struct nd_interleave_set *nd_set = nd_region->nd_set; 310 + ssize_t rc = 0; 314 311 315 - if (is_nd_pmem(dev) && nd_set) 312 + if (is_memory(dev) && nd_set) 316 313 /* pass, should be precluded by region_visible */; 317 314 else 318 315 return -ENXIO; 319 316 320 - return sprintf(buf, "%#llx\n", nd_set->cookie); 317 + /* 318 + * The cookie to show depends on which specification of the 319 + * labels we are using. If there are not labels then default to 320 + * the v1.1 namespace label cookie definition. To read all this 321 + * data we need to wait for probing to settle. 322 + */ 323 + device_lock(dev); 324 + nvdimm_bus_lock(dev); 325 + wait_nvdimm_bus_probe_idle(dev); 326 + if (nd_region->ndr_mappings) { 327 + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; 328 + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 329 + 330 + if (ndd) { 331 + struct nd_namespace_index *nsindex; 332 + 333 + nsindex = to_namespace_index(ndd, ndd->ns_current); 334 + rc = sprintf(buf, "%#llx\n", 335 + nd_region_interleave_set_cookie(nd_region, 336 + nsindex)); 337 + } 338 + } 339 + nvdimm_bus_unlock(dev); 340 + device_unlock(dev); 341 + 342 + if (rc) 343 + return rc; 344 + return sprintf(buf, "%#llx\n", nd_set->cookie1); 321 345 } 322 346 static DEVICE_ATTR_RO(set_cookie); 323 347 ··· 367 335 if (!ndd) 368 336 return 0; 369 337 370 - if (is_nd_pmem(&nd_region->dev)) { 338 + if (is_memory(&nd_region->dev)) { 371 339 available += nd_pmem_available_dpa(nd_region, 372 340 nd_mapping, &overlap); 373 341 if (overlap > blk_max_overlap) { ··· 553 521 struct nd_interleave_set *nd_set = nd_region->nd_set; 554 522 int type = nd_region_to_nstype(nd_region); 555 523 556 - if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) 524 + if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr) 557 525 return 0; 558 526 559 - if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) 527 + if (!is_memory(dev) && a == &dev_attr_dax_seed.attr) 560 528 return 0; 561 529 562 530 if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) ··· 584 552 || type == ND_DEVICE_NAMESPACE_BLK) 585 553 && a == &dev_attr_available_size.attr) 586 554 return a->mode; 587 - else if (is_nd_pmem(dev) && nd_set) 555 + else if (is_memory(dev) && nd_set) 588 556 return a->mode; 589 557 590 558 return 0; ··· 596 564 }; 597 565 EXPORT_SYMBOL_GPL(nd_region_attribute_group); 598 566 599 - u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) 567 + u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, 568 + struct nd_namespace_index *nsindex) 600 569 { 601 570 struct nd_interleave_set *nd_set = nd_region->nd_set; 602 571 603 - if (nd_set) 604 - return nd_set->cookie; 605 - return 0; 572 + if (!nd_set) 573 + return 0; 574 + 575 + if (nsindex && __le16_to_cpu(nsindex->major) == 1 576 + && __le16_to_cpu(nsindex->minor) == 1) 577 + return nd_set->cookie1; 578 + return nd_set->cookie2; 606 579 } 607 580 608 581 u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region) ··· 641 604 { 642 605 struct nd_region *nd_region; 643 606 644 - if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) { 607 + if (!probe && is_nd_region(dev)) { 645 608 int i; 646 609 647 610 nd_region = to_nd_region(dev); ··· 659 622 if (ndd) 660 623 atomic_dec(&nvdimm->busy); 661 624 } 662 - 663 - if (is_nd_pmem(dev)) 664 - return; 665 625 } 666 - if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent)) 667 - && probe) { 626 + if (dev->parent && is_nd_region(dev->parent) && probe) { 668 627 nd_region = to_nd_region(dev->parent); 669 628 nvdimm_bus_lock(dev); 670 629 if (nd_region->ns_seed == dev) ··· 833 800 return 0; 834 801 835 802 if (nd_region->ndr_mappings < 1) { 836 - dev_err(dev, "invalid BLK region\n"); 803 + dev_dbg(dev, "invalid BLK region\n"); 837 804 return -ENXIO; 838 805 } 839 806 ··· 1048 1015 * The first wmb() is needed to 'sfence' all previous writes 1049 1016 * such that they are architecturally visible for the platform 1050 1017 * buffer flush. Note that we've already arranged for pmem 1051 - * writes to avoid the cache via arch_memcpy_to_pmem(). The 1052 - * final wmb() ensures ordering for the NVDIMM flush write. 1018 + * writes to avoid the cache via memcpy_flushcache(). The final 1019 + * wmb() ensures ordering for the NVDIMM flush write. 1053 1020 */ 1054 1021 wmb(); 1055 1022 for (i = 0; i < nd_region->ndr_mappings; i++) ··· 1071 1038 { 1072 1039 int i; 1073 1040 1074 - /* no nvdimm == flushing capability unknown */ 1075 - if (nd_region->ndr_mappings == 0) 1041 + /* no nvdimm or pmem api == flushing capability unknown */ 1042 + if (nd_region->ndr_mappings == 0 1043 + || !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) 1076 1044 return -ENXIO; 1077 1045 1078 1046 for (i = 0; i < nd_region->ndr_mappings; i++) { ··· 1092 1058 return 0; 1093 1059 } 1094 1060 EXPORT_SYMBOL_GPL(nvdimm_has_flush); 1061 + 1062 + int nvdimm_has_cache(struct nd_region *nd_region) 1063 + { 1064 + return is_nd_pmem(&nd_region->dev); 1065 + } 1066 + EXPORT_SYMBOL_GPL(nvdimm_has_cache); 1095 1067 1096 1068 void __exit nd_region_devs_exit(void) 1097 1069 {
+8
drivers/s390/block/dcssblk.c
··· 18 18 #include <linux/interrupt.h> 19 19 #include <linux/platform_device.h> 20 20 #include <linux/pfn_t.h> 21 + #include <linux/uio.h> 21 22 #include <linux/dax.h> 22 23 #include <asm/extmem.h> 23 24 #include <asm/io.h> ··· 44 43 .release = dcssblk_release, 45 44 }; 46 45 46 + static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, 47 + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) 48 + { 49 + return copy_from_iter(addr, bytes, i); 50 + } 51 + 47 52 static const struct dax_operations dcssblk_dax_ops = { 48 53 .direct_access = dcssblk_dax_direct_access, 54 + .copy_from_iter = dcssblk_dax_copy_from_iter, 49 55 }; 50 56 51 57 struct dcssblk_dev_info {
+5 -4
fs/dax.c
··· 25 25 #include <linux/mm.h> 26 26 #include <linux/mutex.h> 27 27 #include <linux/pagevec.h> 28 - #include <linux/pmem.h> 29 28 #include <linux/sched.h> 30 29 #include <linux/sched/signal.h> 31 30 #include <linux/uio.h> ··· 783 784 } 784 785 785 786 dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); 786 - wb_cache_pmem(kaddr, size); 787 + dax_flush(dax_dev, pgoff, kaddr, size); 787 788 /* 788 789 * After we have flushed the cache, we can clear the dirty tag. There 789 790 * cannot be new dirty data in the pfn after the flush has completed as ··· 975 976 dax_read_unlock(id); 976 977 return rc; 977 978 } 978 - clear_pmem(kaddr + offset, size); 979 + memset(kaddr + offset, 0, size); 980 + dax_flush(dax_dev, pgoff, kaddr + offset, size); 979 981 dax_read_unlock(id); 980 982 } 981 983 return 0; ··· 1055 1055 map_len = end - pos; 1056 1056 1057 1057 if (iov_iter_rw(iter) == WRITE) 1058 - map_len = copy_from_iter_pmem(kaddr, map_len, iter); 1058 + map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, 1059 + map_len, iter); 1059 1060 else 1060 1061 map_len = copy_to_iter(kaddr, map_len, iter); 1061 1062 if (map_len <= 0) {
+12
include/linux/dax.h
··· 16 16 */ 17 17 long (*direct_access)(struct dax_device *, pgoff_t, long, 18 18 void **, pfn_t *); 19 + /* copy_from_iter: required operation for fs-dax direct-i/o */ 20 + size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, 21 + struct iov_iter *); 22 + /* flush: optional driver-specific cache management after writes */ 23 + void (*flush)(struct dax_device *, pgoff_t, void *, size_t); 19 24 }; 25 + 26 + extern struct attribute_group dax_attribute_group; 20 27 21 28 #if IS_ENABLED(CONFIG_DAX) 22 29 struct dax_device *dax_get_by_host(const char *host); ··· 82 75 void *dax_get_private(struct dax_device *dax_dev); 83 76 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, 84 77 void **kaddr, pfn_t *pfn); 78 + size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 79 + size_t bytes, struct iov_iter *i); 80 + void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 81 + size_t size); 82 + void dax_write_cache(struct dax_device *dax_dev, bool wc); 85 83 86 84 /* 87 85 * We use lowest available bit in exceptional entry for locking, one bit for
+6
include/linux/device-mapper.h
··· 132 132 */ 133 133 typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, 134 134 long nr_pages, void **kaddr, pfn_t *pfn); 135 + typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, 136 + void *addr, size_t bytes, struct iov_iter *i); 137 + typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, 138 + size_t size); 135 139 #define PAGE_SECTORS (PAGE_SIZE / 512) 136 140 137 141 void dm_error(const char *message); ··· 185 181 dm_iterate_devices_fn iterate_devices; 186 182 dm_io_hints_fn io_hints; 187 183 dm_dax_direct_access_fn direct_access; 184 + dm_dax_copy_from_iter_fn dax_copy_from_iter; 185 + dm_dax_flush_fn dax_flush; 188 186 189 187 /* For internal device-mapper use. */ 190 188 struct list_head list;
+10 -1
include/linux/libnvdimm.h
··· 17 17 #include <linux/kernel.h> 18 18 #include <linux/sizes.h> 19 19 #include <linux/types.h> 20 + #include <linux/uuid.h> 20 21 21 22 enum { 22 23 /* when a dimm supports both PMEM and BLK access a label is required */ ··· 55 54 56 55 struct nvdimm_bus_descriptor { 57 56 const struct attribute_group **attr_groups; 57 + unsigned long bus_dsm_mask; 58 58 unsigned long cmd_mask; 59 59 struct module *module; 60 60 char *provider_name; ··· 73 71 }; 74 72 75 73 struct nd_interleave_set { 76 - u64 cookie; 74 + /* v1.1 definition of the interleave-set-cookie algorithm */ 75 + u64 cookie1; 76 + /* v1.2 definition of the interleave-set-cookie algorithm */ 77 + u64 cookie2; 77 78 /* compatibility with initial buggy Linux implementation */ 78 79 u64 altcookie; 80 + 81 + guid_t type_guid; 79 82 }; 80 83 81 84 struct nd_mapping_desc { ··· 166 159 void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); 167 160 void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); 168 161 struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); 162 + unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); 169 163 unsigned int nd_region_acquire_lane(struct nd_region *nd_region); 170 164 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); 171 165 u64 nd_fletcher64(void *addr, size_t len, bool le); 172 166 void nvdimm_flush(struct nd_region *nd_region); 173 167 int nvdimm_has_flush(struct nd_region *nd_region); 168 + int nvdimm_has_cache(struct nd_region *nd_region); 174 169 #endif /* __LIBNVDIMM_H__ */
+13
include/linux/nd.h
··· 21 21 NVDIMM_REVALIDATE_POISON, 22 22 }; 23 23 24 + enum nvdimm_claim_class { 25 + NVDIMM_CCLASS_NONE, 26 + NVDIMM_CCLASS_BTT, 27 + NVDIMM_CCLASS_BTT2, 28 + NVDIMM_CCLASS_PFN, 29 + NVDIMM_CCLASS_DAX, 30 + NVDIMM_CCLASS_UNKNOWN, 31 + }; 32 + 24 33 struct nd_device_driver { 25 34 struct device_driver drv; 26 35 unsigned long type; ··· 50 41 * @force_raw: ignore other personalities for the namespace (e.g. btt) 51 42 * @dev: device model node 52 43 * @claim: when set a another personality has taken ownership of the namespace 44 + * @claim_class: restrict claim type to a given class 53 45 * @rw_bytes: access the raw namespace capacity with byte-aligned transfers 54 46 */ 55 47 struct nd_namespace_common { 56 48 int force_raw; 57 49 struct device dev; 58 50 struct device *claim; 51 + enum nvdimm_claim_class claim_class; 59 52 int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, 60 53 void *buf, size_t size, int rw, unsigned long flags); 61 54 }; ··· 86 75 /** 87 76 * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory 88 77 * @nsio: device and system physical address range to drive 78 + * @lbasize: logical sector size for the namespace in block-device-mode 89 79 * @alt_name: namespace name supplied in the dimm label 90 80 * @uuid: namespace name supplied in the dimm label 91 81 * @id: ida allocated id 92 82 */ 93 83 struct nd_namespace_pmem { 94 84 struct nd_namespace_io nsio; 85 + unsigned long lbasize; 95 86 char *alt_name; 96 87 u8 *uuid; 97 88 int id;
-142
include/linux/pmem.h
··· 1 - /* 2 - * Copyright(c) 2015 Intel Corporation. All rights reserved. 3 - * 4 - * This program is free software; you can redistribute it and/or modify 5 - * it under the terms of version 2 of the GNU General Public License as 6 - * published by the Free Software Foundation. 7 - * 8 - * This program is distributed in the hope that it will be useful, but 9 - * WITHOUT ANY WARRANTY; without even the implied warranty of 10 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 - * General Public License for more details. 12 - */ 13 - #ifndef __PMEM_H__ 14 - #define __PMEM_H__ 15 - 16 - #include <linux/io.h> 17 - #include <linux/uio.h> 18 - 19 - #ifdef CONFIG_ARCH_HAS_PMEM_API 20 - #define ARCH_MEMREMAP_PMEM MEMREMAP_WB 21 - #include <asm/pmem.h> 22 - #else 23 - #define ARCH_MEMREMAP_PMEM MEMREMAP_WT 24 - /* 25 - * These are simply here to enable compilation, all call sites gate 26 - * calling these symbols with arch_has_pmem_api() and redirect to the 27 - * implementation in asm/pmem.h. 28 - */ 29 - static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) 30 - { 31 - BUG(); 32 - } 33 - 34 - static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, 35 - struct iov_iter *i) 36 - { 37 - BUG(); 38 - return 0; 39 - } 40 - 41 - static inline void arch_clear_pmem(void *addr, size_t size) 42 - { 43 - BUG(); 44 - } 45 - 46 - static inline void arch_wb_cache_pmem(void *addr, size_t size) 47 - { 48 - BUG(); 49 - } 50 - 51 - static inline void arch_invalidate_pmem(void *addr, size_t size) 52 - { 53 - BUG(); 54 - } 55 - #endif 56 - 57 - static inline bool arch_has_pmem_api(void) 58 - { 59 - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); 60 - } 61 - 62 - /** 63 - * memcpy_to_pmem - copy data to persistent memory 64 - * @dst: destination buffer for the copy 65 - * @src: source buffer for the copy 66 - * @n: length of the copy in bytes 67 - * 68 - * Perform a memory copy that results in the destination of the copy 69 - * being effectively evicted from, or never written to, the processor 70 - * cache hierarchy after the copy completes. After memcpy_to_pmem() 71 - * data may still reside in cpu or platform buffers, so this operation 72 - * must be followed by a blkdev_issue_flush() on the pmem block device. 73 - */ 74 - static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) 75 - { 76 - if (arch_has_pmem_api()) 77 - arch_memcpy_to_pmem(dst, src, n); 78 - else 79 - memcpy(dst, src, n); 80 - } 81 - 82 - /** 83 - * copy_from_iter_pmem - copy data from an iterator to PMEM 84 - * @addr: PMEM destination address 85 - * @bytes: number of bytes to copy 86 - * @i: iterator with source data 87 - * 88 - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 89 - * See blkdev_issue_flush() note for memcpy_to_pmem(). 90 - */ 91 - static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, 92 - struct iov_iter *i) 93 - { 94 - if (arch_has_pmem_api()) 95 - return arch_copy_from_iter_pmem(addr, bytes, i); 96 - return copy_from_iter_nocache(addr, bytes, i); 97 - } 98 - 99 - /** 100 - * clear_pmem - zero a PMEM memory range 101 - * @addr: virtual start address 102 - * @size: number of bytes to zero 103 - * 104 - * Write zeros into the memory range starting at 'addr' for 'size' bytes. 105 - * See blkdev_issue_flush() note for memcpy_to_pmem(). 106 - */ 107 - static inline void clear_pmem(void *addr, size_t size) 108 - { 109 - if (arch_has_pmem_api()) 110 - arch_clear_pmem(addr, size); 111 - else 112 - memset(addr, 0, size); 113 - } 114 - 115 - /** 116 - * invalidate_pmem - flush a pmem range from the cache hierarchy 117 - * @addr: virtual start address 118 - * @size: bytes to invalidate (internally aligned to cache line size) 119 - * 120 - * For platforms that support clearing poison this flushes any poisoned 121 - * ranges out of the cache 122 - */ 123 - static inline void invalidate_pmem(void *addr, size_t size) 124 - { 125 - if (arch_has_pmem_api()) 126 - arch_invalidate_pmem(addr, size); 127 - } 128 - 129 - /** 130 - * wb_cache_pmem - write back processor cache for PMEM memory range 131 - * @addr: virtual start address 132 - * @size: number of bytes to write back 133 - * 134 - * Write back the processor cache range starting at 'addr' for 'size' bytes. 135 - * See blkdev_issue_flush() note for memcpy_to_pmem(). 136 - */ 137 - static inline void wb_cache_pmem(void *addr, size_t size) 138 - { 139 - if (arch_has_pmem_api()) 140 - arch_wb_cache_pmem(addr, size); 141 - } 142 - #endif /* __PMEM_H__ */
+6
include/linux/string.h
··· 122 122 return 0; 123 123 } 124 124 #endif 125 + #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE 126 + static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) 127 + { 128 + memcpy(dst, src, cnt); 129 + } 130 + #endif 125 131 void *memchr_inv(const void *s, int c, size_t n); 126 132 char *strreplace(char *s, char old, char new); 127 133
+15
include/linux/uio.h
··· 95 95 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); 96 96 bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); 97 97 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); 98 + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 99 + /* 100 + * Note, users like pmem that depend on the stricter semantics of 101 + * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for 102 + * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the 103 + * destination is flushed from the cache on return. 104 + */ 105 + size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); 106 + #else 107 + static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes, 108 + struct iov_iter *i) 109 + { 110 + return copy_from_iter_nocache(addr, bytes, i); 111 + } 112 + #endif 98 113 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); 99 114 size_t iov_iter_zero(size_t bytes, struct iov_iter *); 100 115 unsigned long iov_iter_alignment(const struct iov_iter *i);
+41 -1
include/uapi/linux/ndctl.h
··· 105 105 __u32 status; 106 106 __u32 max_ars_out; 107 107 __u32 clear_err_unit; 108 - __u32 reserved; 108 + __u16 flags; 109 + __u16 reserved; 109 110 } __packed; 110 111 111 112 struct nd_cmd_ars_start { ··· 145 144 __u64 cleared; 146 145 } __packed; 147 146 147 + struct nd_cmd_trans_spa { 148 + __u64 spa; 149 + __u32 status; 150 + __u8 flags; 151 + __u8 _reserved[3]; 152 + __u64 trans_length; 153 + __u32 num_nvdimms; 154 + struct nd_nvdimm_device { 155 + __u32 nfit_device_handle; 156 + __u32 _reserved; 157 + __u64 dpa; 158 + } __packed devices[0]; 159 + 160 + } __packed; 161 + 162 + struct nd_cmd_ars_err_inj { 163 + __u64 err_inj_spa_range_base; 164 + __u64 err_inj_spa_range_length; 165 + __u8 err_inj_options; 166 + __u32 status; 167 + } __packed; 168 + 169 + struct nd_cmd_ars_err_inj_clr { 170 + __u64 err_inj_clr_spa_range_base; 171 + __u64 err_inj_clr_spa_range_length; 172 + __u32 status; 173 + } __packed; 174 + 175 + struct nd_cmd_ars_err_inj_stat { 176 + __u32 status; 177 + __u32 inj_err_rec_count; 178 + struct nd_error_stat_query_record { 179 + __u64 err_inj_stat_spa_range_base; 180 + __u64 err_inj_stat_spa_range_length; 181 + } __packed record[0]; 182 + } __packed; 183 + 148 184 enum { 149 185 ND_CMD_IMPLEMENTED = 0, 150 186 ··· 207 169 enum { 208 170 ND_ARS_VOLATILE = 1, 209 171 ND_ARS_PERSISTENT = 2, 172 + ND_ARS_RETURN_PREV_DATA = 1 << 1, 210 173 ND_CONFIG_LOCKED = 1, 211 174 }; 212 175 ··· 218 179 [ND_CMD_ARS_START] = "ars_start", 219 180 [ND_CMD_ARS_STATUS] = "ars_status", 220 181 [ND_CMD_CLEAR_ERROR] = "clear_error", 182 + [ND_CMD_CALL] = "cmd_call", 221 183 }; 222 184 223 185 if (cmd < ARRAY_SIZE(names) && names[cmd])
+3
lib/Kconfig
··· 556 556 config ARCH_HAS_PMEM_API 557 557 bool 558 558 559 + config ARCH_HAS_UACCESS_FLUSHCACHE 560 + bool 561 + 559 562 config ARCH_HAS_MMIO_FLUSH 560 563 bool 561 564
+22
lib/iov_iter.c
··· 615 615 } 616 616 EXPORT_SYMBOL(copy_from_iter_nocache); 617 617 618 + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 619 + size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 620 + { 621 + char *to = addr; 622 + if (unlikely(i->type & ITER_PIPE)) { 623 + WARN_ON(1); 624 + return 0; 625 + } 626 + iterate_and_advance(i, bytes, v, 627 + __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 628 + v.iov_base, v.iov_len), 629 + memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 630 + v.bv_offset, v.bv_len), 631 + memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 632 + v.iov_len) 633 + ) 634 + 635 + return bytes; 636 + } 637 + EXPORT_SYMBOL_GPL(copy_from_iter_flushcache); 638 + #endif 639 + 618 640 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 619 641 { 620 642 char *to = addr;
+1 -1
tools/testing/nvdimm/test/nfit.c
··· 1943 1943 nfit_test->setup = nfit_test0_setup; 1944 1944 break; 1945 1945 case 1: 1946 - nfit_test->num_pm = 1; 1946 + nfit_test->num_pm = 2; 1947 1947 nfit_test->dcr_idx = NUM_DCR; 1948 1948 nfit_test->num_dcr = 2; 1949 1949 nfit_test->alloc = nfit_test1_alloc;