Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:

- Replace pcommit with ADR / directed-flushing.

The pcommit instruction, which has not shipped on any product, is
deprecated. Instead, the requirement is that platforms implement
either ADR, or provide one or more flush addresses per nvdimm.

ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers
to the memory controller on a power-fail event.

Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware
Interface Table (NFIT) sub-structure: "Flush Hint Address Structure".
A flush hint is an mmio address that when written and fenced assures
that all previous posted writes targeting a given dimm have been
flushed to media.

- On-demand ARS (address range scrub).

Linux uses the results of the ACPI ARS commands to track bad blocks
in pmem devices. When latent errors are detected we re-scrub the
media to refresh the bad block list, userspace can also request a
re-scrub at any time.

- Support for the Microsoft DSM (device specific method) command
format.

- Support for EDK2/OVMF virtual disk device memory ranges.

- Various fixes and cleanups across the subsystem.

* tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits)
libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register"
nfit: do an ARS scrub on hitting a latent media error
nfit: move to nfit/ sub-directory
nfit, libnvdimm: allow an ARS scrub to be triggered on demand
libnvdimm: register nvdimm_bus devices with an nd_bus driver
pmem: clarify a debug print in pmem_clear_poison
x86/insn: remove pcommit
Revert "KVM: x86: add pcommit support"
nfit, tools/testing/nvdimm/: unify shutdown paths
libnvdimm: move ->module to struct nvdimm_bus_descriptor
nfit: cleanup acpi_nfit_init calling convention
nfit: fix _FIT evaluation memory leak + use after free
tools/testing/nvdimm: add manufacturing_{date|location} dimm properties
tools/testing/nvdimm: add virtual ramdisk range
acpi, nfit: treat virtual ramdisk SPA as pmem region
pmem: kill __pmem address space
pmem: kill wmb_pmem()
libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes
fs/dax: remove wmb_pmem()
libnvdimm, pmem: flush posted-write queues on shutdown
...

+1375 -1015
+1 -1
Documentation/filesystems/Locking
··· 395 395 int (*release) (struct gendisk *, fmode_t); 396 396 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 397 397 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 398 - int (*direct_access) (struct block_device *, sector_t, void __pmem **, 398 + int (*direct_access) (struct block_device *, sector_t, void **, 399 399 unsigned long *); 400 400 int (*media_changed) (struct gendisk *); 401 401 void (*unlock_native_capacity) (struct gendisk *);
+9 -19
Documentation/nvdimm/btt.txt
··· 256 256 only state using a flag in the info block. 257 257 258 258 259 - 5. In-kernel usage 260 - ================== 259 + 5. Usage 260 + ======== 261 261 262 - Any block driver that supports byte granularity IO to the storage may register 263 - with the BTT. It will have to provide the rw_bytes interface in its 264 - block_device_operations struct: 262 + The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem 263 + (pmem, or blk mode). The easiest way to set up such a namespace is using the 264 + 'ndctl' utility [1]: 265 265 266 - int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw); 266 + For example, the ndctl command line to setup a btt with a 4k sector size is: 267 267 268 - It may register with the BTT after it adds its own gendisk, using btt_init: 268 + ndctl create-namespace -f -e namespace0.0 -m sector -l 4k 269 269 270 - struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize, 271 - u32 lbasize, u8 uuid[], int maxlane); 270 + See ndctl create-namespace --help for more options. 272 271 273 - note that maxlane is the maximum amount of concurrency the driver wishes to 274 - allow the BTT to use. 275 - 276 - The BTT 'disk' appears as a stacked block device that grabs the underlying block 277 - device in the O_EXCL mode. 278 - 279 - When the driver wishes to remove the backing disk, it should similarly call 280 - btt_fini using the same struct btt* handle that was provided to it by btt_init. 281 - 282 - void btt_fini(struct btt *btt); 272 + [1]: https://github.com/pmem/ndctl 283 273
+2 -2
arch/powerpc/sysdev/axonram.c
··· 143 143 */ 144 144 static long 145 145 axon_ram_direct_access(struct block_device *device, sector_t sector, 146 - void __pmem **kaddr, pfn_t *pfn, long size) 146 + void **kaddr, pfn_t *pfn, long size) 147 147 { 148 148 struct axon_ram_bank *bank = device->bd_disk->private_data; 149 149 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; 150 150 151 - *kaddr = (void __pmem __force *) bank->io_addr + offset; 151 + *kaddr = (void *) bank->io_addr + offset; 152 152 *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); 153 153 return bank->size - offset; 154 154 }
-1
arch/x86/include/asm/cpufeatures.h
··· 225 225 #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 226 226 #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 227 227 #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 228 - #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ 229 228 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 230 229 #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ 231 230 #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+19 -58
arch/x86/include/asm/pmem.h
··· 26 26 * @n: length of the copy in bytes 27 27 * 28 28 * Copy data to persistent memory media via non-temporal stores so that 29 - * a subsequent arch_wmb_pmem() can flush cpu and memory controller 30 - * write buffers to guarantee durability. 29 + * a subsequent pmem driver flush operation will drain posted write queues. 31 30 */ 32 - static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, 33 - size_t n) 31 + static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) 34 32 { 35 - int unwritten; 33 + int rem; 36 34 37 35 /* 38 36 * We are copying between two kernel buffers, if ··· 38 40 * fault) we would have already reported a general protection fault 39 41 * before the WARN+BUG. 40 42 */ 41 - unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, 42 - (void __user *) src, n); 43 - if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", 44 - __func__, dst, src, unwritten)) 43 + rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); 44 + if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", 45 + __func__, dst, src, rem)) 45 46 BUG(); 46 47 } 47 48 48 - static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, 49 - size_t n) 49 + static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) 50 50 { 51 51 if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) 52 - return memcpy_mcsafe(dst, (void __force *) src, n); 53 - memcpy(dst, (void __force *) src, n); 52 + return memcpy_mcsafe(dst, src, n); 53 + memcpy(dst, src, n); 54 54 return 0; 55 - } 56 - 57 - /** 58 - * arch_wmb_pmem - synchronize writes to persistent memory 59 - * 60 - * After a series of arch_memcpy_to_pmem() operations this drains data 61 - * from cpu write buffers and any platform (memory controller) buffers 62 - * to ensure that written data is durable on persistent memory media. 63 - */ 64 - static inline void arch_wmb_pmem(void) 65 - { 66 - /* 67 - * wmb() to 'sfence' all previous writes such that they are 68 - * architecturally visible to 'pcommit'. Note, that we've 69 - * already arranged for pmem writes to avoid the cache via 70 - * arch_memcpy_to_pmem(). 71 - */ 72 - wmb(); 73 - pcommit_sfence(); 74 55 } 75 56 76 57 /** ··· 58 81 * @size: number of bytes to write back 59 82 * 60 83 * Write back a cache range using the CLWB (cache line write back) 61 - * instruction. This function requires explicit ordering with an 62 - * arch_wmb_pmem() call. 84 + * instruction. 63 85 */ 64 - static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) 86 + static inline void arch_wb_cache_pmem(void *addr, size_t size) 65 87 { 66 88 u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; 67 89 unsigned long clflush_mask = x86_clflush_size - 1; 68 - void *vaddr = (void __force *)addr; 69 - void *vend = vaddr + size; 90 + void *vend = addr + size; 70 91 void *p; 71 92 72 - for (p = (void *)((unsigned long)vaddr & ~clflush_mask); 93 + for (p = (void *)((unsigned long)addr & ~clflush_mask); 73 94 p < vend; p += x86_clflush_size) 74 95 clwb(p); 75 96 } ··· 88 113 * @i: iterator with source data 89 114 * 90 115 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 91 - * This function requires explicit ordering with an arch_wmb_pmem() call. 92 116 */ 93 - static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, 117 + static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, 94 118 struct iov_iter *i) 95 119 { 96 - void *vaddr = (void __force *)addr; 97 120 size_t len; 98 121 99 122 /* TODO: skip the write-back by always using non-temporal stores */ 100 - len = copy_from_iter_nocache(vaddr, bytes, i); 123 + len = copy_from_iter_nocache(addr, bytes, i); 101 124 102 125 if (__iter_needs_pmem_wb(i)) 103 126 arch_wb_cache_pmem(addr, bytes); ··· 109 136 * @size: number of bytes to zero 110 137 * 111 138 * Write zeros into the memory range starting at 'addr' for 'size' bytes. 112 - * This function requires explicit ordering with an arch_wmb_pmem() call. 113 139 */ 114 - static inline void arch_clear_pmem(void __pmem *addr, size_t size) 140 + static inline void arch_clear_pmem(void *addr, size_t size) 115 141 { 116 - void *vaddr = (void __force *)addr; 117 - 118 - memset(vaddr, 0, size); 142 + memset(addr, 0, size); 119 143 arch_wb_cache_pmem(addr, size); 120 144 } 121 145 122 - static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) 146 + static inline void arch_invalidate_pmem(void *addr, size_t size) 123 147 { 124 - clflush_cache_range((void __force *) addr, size); 125 - } 126 - 127 - static inline bool __arch_has_wmb_pmem(void) 128 - { 129 - /* 130 - * We require that wmb() be an 'sfence', that is only guaranteed on 131 - * 64-bit builds 132 - */ 133 - return static_cpu_has(X86_FEATURE_PCOMMIT); 148 + clflush_cache_range(addr, size); 134 149 } 135 150 #endif /* CONFIG_ARCH_HAS_PMEM_API */ 136 151 #endif /* __ASM_X86_PMEM_H__ */
-46
arch/x86/include/asm/special_insns.h
··· 253 253 : [pax] "a" (p)); 254 254 } 255 255 256 - /** 257 - * pcommit_sfence() - persistent commit and fence 258 - * 259 - * The PCOMMIT instruction ensures that data that has been flushed from the 260 - * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to 261 - * memory and is durable on the DIMM. The primary use case for this is 262 - * persistent memory. 263 - * 264 - * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT 265 - * with appropriate fencing. 266 - * 267 - * Example: 268 - * void flush_and_commit_buffer(void *vaddr, unsigned int size) 269 - * { 270 - * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; 271 - * void *vend = vaddr + size; 272 - * void *p; 273 - * 274 - * for (p = (void *)((unsigned long)vaddr & ~clflush_mask); 275 - * p < vend; p += boot_cpu_data.x86_clflush_size) 276 - * clwb(p); 277 - * 278 - * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes 279 - * // MFENCE via mb() also works 280 - * wmb(); 281 - * 282 - * // PCOMMIT and the required SFENCE for ordering 283 - * pcommit_sfence(); 284 - * } 285 - * 286 - * After this function completes the data pointed to by 'vaddr' has been 287 - * accepted to memory and will be durable if the 'vaddr' points to persistent 288 - * memory. 289 - * 290 - * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify 291 - * things we include both the PCOMMIT and the required SFENCE in the 292 - * alternatives generated by pcommit_sfence(). 293 - */ 294 - static inline void pcommit_sfence(void) 295 - { 296 - alternative(ASM_NOP7, 297 - ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */ 298 - "sfence", 299 - X86_FEATURE_PCOMMIT); 300 - } 301 - 302 256 #define nop() asm volatile ("nop") 303 257 304 258
-1
arch/x86/include/asm/vmx.h
··· 72 72 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 73 73 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 74 74 #define SECONDARY_EXEC_XSAVES 0x00100000 75 - #define SECONDARY_EXEC_PCOMMIT 0x00200000 76 75 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 77 76 78 77 #define PIN_BASED_EXT_INTR_MASK 0x00000001
+1 -3
arch/x86/include/uapi/asm/vmx.h
··· 78 78 #define EXIT_REASON_PML_FULL 62 79 79 #define EXIT_REASON_XSAVES 63 80 80 #define EXIT_REASON_XRSTORS 64 81 - #define EXIT_REASON_PCOMMIT 65 82 81 83 82 #define VMX_EXIT_REASONS \ 84 83 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ ··· 126 127 { EXIT_REASON_INVVPID, "INVVPID" }, \ 127 128 { EXIT_REASON_INVPCID, "INVPCID" }, \ 128 129 { EXIT_REASON_XSAVES, "XSAVES" }, \ 129 - { EXIT_REASON_XRSTORS, "XRSTORS" }, \ 130 - { EXIT_REASON_PCOMMIT, "PCOMMIT" } 130 + { EXIT_REASON_XRSTORS, "XRSTORS" } 131 131 132 132 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 133 133 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
+1 -1
arch/x86/kvm/cpuid.c
··· 366 366 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 367 367 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | 368 368 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | 369 - F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); 369 + F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB); 370 370 371 371 /* cpuid 0xD.1.eax */ 372 372 const u32 kvm_cpuid_D_1_eax_x86_features =
-8
arch/x86/kvm/cpuid.h
··· 144 144 return best && (best->ebx & bit(X86_FEATURE_RTM)); 145 145 } 146 146 147 - static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) 148 - { 149 - struct kvm_cpuid_entry2 *best; 150 - 151 - best = kvm_find_cpuid_entry(vcpu, 7, 0); 152 - return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); 153 - } 154 - 155 147 static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) 156 148 { 157 149 struct kvm_cpuid_entry2 *best;
+4 -28
arch/x86/kvm/vmx.c
··· 2707 2707 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2708 2708 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2709 2709 SECONDARY_EXEC_WBINVD_EXITING | 2710 - SECONDARY_EXEC_XSAVES | 2711 - SECONDARY_EXEC_PCOMMIT; 2710 + SECONDARY_EXEC_XSAVES; 2712 2711 2713 2712 if (enable_ept) { 2714 2713 /* nested EPT: emulate EPT also to L1 */ ··· 3269 3270 SECONDARY_EXEC_SHADOW_VMCS | 3270 3271 SECONDARY_EXEC_XSAVES | 3271 3272 SECONDARY_EXEC_ENABLE_PML | 3272 - SECONDARY_EXEC_PCOMMIT | 3273 3273 SECONDARY_EXEC_TSC_SCALING; 3274 3274 if (adjust_vmx_controls(min2, opt2, 3275 3275 MSR_IA32_VMX_PROCBASED_CTLS2, ··· 4856 4858 if (!enable_pml) 4857 4859 exec_control &= ~SECONDARY_EXEC_ENABLE_PML; 4858 4860 4859 - /* Currently, we allow L1 guest to directly run pcommit instruction. */ 4860 - exec_control &= ~SECONDARY_EXEC_PCOMMIT; 4861 - 4862 4861 return exec_control; 4863 4862 } 4864 4863 ··· 4899 4904 4900 4905 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); 4901 4906 4902 - if (cpu_has_secondary_exec_ctrls()) 4907 + if (cpu_has_secondary_exec_ctrls()) { 4903 4908 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, 4904 4909 vmx_secondary_exec_control(vmx)); 4910 + } 4905 4911 4906 4912 if (kvm_vcpu_apicv_active(&vmx->vcpu)) { 4907 4913 vmcs_write64(EOI_EXIT_BITMAP0, 0); ··· 7560 7564 return 1; 7561 7565 } 7562 7566 7563 - static int handle_pcommit(struct kvm_vcpu *vcpu) 7564 - { 7565 - /* we never catch pcommit instruct for L1 guest. */ 7566 - WARN_ON(1); 7567 - return 1; 7568 - } 7569 - 7570 7567 /* 7571 7568 * The exit handlers return 1 if the exit was handled fully and guest execution 7572 7569 * may resume. Otherwise they set the kvm_run parameter to indicate what needs ··· 7610 7621 [EXIT_REASON_XSAVES] = handle_xsaves, 7611 7622 [EXIT_REASON_XRSTORS] = handle_xrstors, 7612 7623 [EXIT_REASON_PML_FULL] = handle_pml_full, 7613 - [EXIT_REASON_PCOMMIT] = handle_pcommit, 7614 7624 }; 7615 7625 7616 7626 static const int kvm_vmx_max_exit_handlers = ··· 7918 7930 * the XSS exit bitmap in vmcs12. 7919 7931 */ 7920 7932 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); 7921 - case EXIT_REASON_PCOMMIT: 7922 - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); 7923 7933 default: 7924 7934 return true; 7925 7935 } ··· 9080 9094 9081 9095 if (cpu_has_secondary_exec_ctrls()) 9082 9096 vmcs_set_secondary_exec_control(secondary_exec_ctl); 9083 - 9084 - if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { 9085 - if (guest_cpuid_has_pcommit(vcpu)) 9086 - vmx->nested.nested_vmx_secondary_ctls_high |= 9087 - SECONDARY_EXEC_PCOMMIT; 9088 - else 9089 - vmx->nested.nested_vmx_secondary_ctls_high &= 9090 - ~SECONDARY_EXEC_PCOMMIT; 9091 - } 9092 9097 } 9093 9098 9094 9099 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ··· 9692 9715 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 9693 9716 SECONDARY_EXEC_RDTSCP | 9694 9717 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 9695 - SECONDARY_EXEC_APIC_REGISTER_VIRT | 9696 - SECONDARY_EXEC_PCOMMIT); 9718 + SECONDARY_EXEC_APIC_REGISTER_VIRT); 9697 9719 if (nested_cpu_has(vmcs12, 9698 9720 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 9699 9721 exec_control |= vmcs12->secondary_vm_exec_control;
+1 -1
arch/x86/lib/x86-opcode-map.txt
··· 1012 1012 4: XSAVE 1013 1013 5: XRSTOR | lfence (11B) 1014 1014 6: XSAVEOPT | clwb (66) | mfence (11B) 1015 - 7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) 1015 + 7: clflush | clflushopt (66) | sfence (11B) 1016 1016 EndTable 1017 1017 1018 1018 GrpTable: Grp16
+1 -26
drivers/acpi/Kconfig
··· 454 454 455 455 If you are unsure what to do, do not enable this option. 456 456 457 - config ACPI_NFIT 458 - tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" 459 - depends on PHYS_ADDR_T_64BIT 460 - depends on BLK_DEV 461 - depends on ARCH_HAS_MMIO_FLUSH 462 - select LIBNVDIMM 463 - help 464 - Infrastructure to probe ACPI 6 compliant platforms for 465 - NVDIMMs (NFIT) and register a libnvdimm device tree. In 466 - addition to storage devices this also enables libnvdimm to pass 467 - ACPI._DSM messages for platform/dimm configuration. 468 - 469 - To compile this driver as a module, choose M here: 470 - the module will be called nfit. 471 - 472 - config ACPI_NFIT_DEBUG 473 - bool "NFIT DSM debug" 474 - depends on ACPI_NFIT 475 - depends on DYNAMIC_DEBUG 476 - default n 477 - help 478 - Enabling this option causes the nfit driver to dump the 479 - input and output buffers of _DSM operations on the ACPI0012 480 - device and its children. This can be very verbose, so leave 481 - it disabled unless you are debugging a hardware / firmware 482 - issue. 457 + source "drivers/acpi/nfit/Kconfig" 483 458 484 459 source "drivers/acpi/apei/Kconfig" 485 460 source "drivers/acpi/dptf/Kconfig"
+1 -1
drivers/acpi/Makefile
··· 69 69 obj-$(CONFIG_ACPI_PROCESSOR) += processor.o 70 70 obj-$(CONFIG_ACPI) += container.o 71 71 obj-$(CONFIG_ACPI_THERMAL) += thermal.o 72 - obj-$(CONFIG_ACPI_NFIT) += nfit.o 72 + obj-$(CONFIG_ACPI_NFIT) += nfit/ 73 73 obj-$(CONFIG_ACPI) += acpi_memhotplug.o 74 74 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o 75 75 obj-$(CONFIG_ACPI_BATTERY) += battery.o
+359 -288
drivers/acpi/nfit.c drivers/acpi/nfit/core.c
··· 15 15 #include <linux/module.h> 16 16 #include <linux/mutex.h> 17 17 #include <linux/ndctl.h> 18 + #include <linux/sysfs.h> 18 19 #include <linux/delay.h> 19 20 #include <linux/list.h> 20 21 #include <linux/acpi.h> ··· 50 49 module_param(disable_vendor_specific, bool, S_IRUGO); 51 50 MODULE_PARM_DESC(disable_vendor_specific, 52 51 "Limit commands to the publicly specified set\n"); 52 + 53 + LIST_HEAD(acpi_descs); 54 + DEFINE_MUTEX(acpi_desc_lock); 53 55 54 56 static struct workqueue_struct *nfit_wq; 55 57 ··· 364 360 return to_name[type]; 365 361 } 366 362 367 - static int nfit_spa_type(struct acpi_nfit_system_address *spa) 363 + int nfit_spa_type(struct acpi_nfit_system_address *spa) 368 364 { 369 365 int i; 370 366 ··· 378 374 struct nfit_table_prev *prev, 379 375 struct acpi_nfit_system_address *spa) 380 376 { 381 - size_t length = min_t(size_t, sizeof(*spa), spa->header.length); 382 377 struct device *dev = acpi_desc->dev; 383 378 struct nfit_spa *nfit_spa; 384 379 380 + if (spa->header.length != sizeof(*spa)) 381 + return false; 382 + 385 383 list_for_each_entry(nfit_spa, &prev->spas, list) { 386 - if (memcmp(nfit_spa->spa, spa, length) == 0) { 384 + if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) { 387 385 list_move_tail(&nfit_spa->list, &acpi_desc->spas); 388 386 return true; 389 387 } 390 388 } 391 389 392 - nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL); 390 + nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa), 391 + GFP_KERNEL); 393 392 if (!nfit_spa) 394 393 return false; 395 394 INIT_LIST_HEAD(&nfit_spa->list); 396 - nfit_spa->spa = spa; 395 + memcpy(nfit_spa->spa, spa, sizeof(*spa)); 397 396 list_add_tail(&nfit_spa->list, &acpi_desc->spas); 398 397 dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, 399 398 spa->range_index, ··· 408 401 struct nfit_table_prev *prev, 409 402 struct acpi_nfit_memory_map *memdev) 410 403 { 411 - size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length); 412 404 struct device *dev = acpi_desc->dev; 413 405 struct nfit_memdev *nfit_memdev; 414 406 407 + if (memdev->header.length != sizeof(*memdev)) 408 + return false; 409 + 415 410 list_for_each_entry(nfit_memdev, &prev->memdevs, list) 416 - if (memcmp(nfit_memdev->memdev, memdev, length) == 0) { 411 + if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) { 417 412 list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs); 418 413 return true; 419 414 } 420 415 421 - nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL); 416 + nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev), 417 + GFP_KERNEL); 422 418 if (!nfit_memdev) 423 419 return false; 424 420 INIT_LIST_HEAD(&nfit_memdev->list); 425 - nfit_memdev->memdev = memdev; 421 + memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev)); 426 422 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); 427 423 dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n", 428 424 __func__, memdev->device_handle, memdev->range_index, ··· 433 423 return true; 434 424 } 435 425 426 + /* 427 + * An implementation may provide a truncated control region if no block windows 428 + * are defined. 429 + */ 430 + static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr) 431 + { 432 + if (dcr->header.length < offsetof(struct acpi_nfit_control_region, 433 + window_size)) 434 + return 0; 435 + if (dcr->windows) 436 + return sizeof(*dcr); 437 + return offsetof(struct acpi_nfit_control_region, window_size); 438 + } 439 + 436 440 static bool add_dcr(struct acpi_nfit_desc *acpi_desc, 437 441 struct nfit_table_prev *prev, 438 442 struct acpi_nfit_control_region *dcr) 439 443 { 440 - size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length); 441 444 struct device *dev = acpi_desc->dev; 442 445 struct nfit_dcr *nfit_dcr; 443 446 447 + if (!sizeof_dcr(dcr)) 448 + return false; 449 + 444 450 list_for_each_entry(nfit_dcr, &prev->dcrs, list) 445 - if (memcmp(nfit_dcr->dcr, dcr, length) == 0) { 451 + if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) { 446 452 list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs); 447 453 return true; 448 454 } 449 455 450 - nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL); 456 + nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr), 457 + GFP_KERNEL); 451 458 if (!nfit_dcr) 452 459 return false; 453 460 INIT_LIST_HEAD(&nfit_dcr->list); 454 - nfit_dcr->dcr = dcr; 461 + memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)); 455 462 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); 456 463 dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, 457 464 dcr->region_index, dcr->windows); ··· 479 452 struct nfit_table_prev *prev, 480 453 struct acpi_nfit_data_region *bdw) 481 454 { 482 - size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length); 483 455 struct device *dev = acpi_desc->dev; 484 456 struct nfit_bdw *nfit_bdw; 485 457 458 + if (bdw->header.length != sizeof(*bdw)) 459 + return false; 486 460 list_for_each_entry(nfit_bdw, &prev->bdws, list) 487 - if (memcmp(nfit_bdw->bdw, bdw, length) == 0) { 461 + if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) { 488 462 list_move_tail(&nfit_bdw->list, &acpi_desc->bdws); 489 463 return true; 490 464 } 491 465 492 - nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL); 466 + nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw), 467 + GFP_KERNEL); 493 468 if (!nfit_bdw) 494 469 return false; 495 470 INIT_LIST_HEAD(&nfit_bdw->list); 496 - nfit_bdw->bdw = bdw; 471 + memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw)); 497 472 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); 498 473 dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, 499 474 bdw->region_index, bdw->windows); 500 475 return true; 501 476 } 502 477 478 + static size_t sizeof_idt(struct acpi_nfit_interleave *idt) 479 + { 480 + if (idt->header.length < sizeof(*idt)) 481 + return 0; 482 + return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1); 483 + } 484 + 503 485 static bool add_idt(struct acpi_nfit_desc *acpi_desc, 504 486 struct nfit_table_prev *prev, 505 487 struct acpi_nfit_interleave *idt) 506 488 { 507 - size_t length = min_t(size_t, sizeof(*idt), idt->header.length); 508 489 struct device *dev = acpi_desc->dev; 509 490 struct nfit_idt *nfit_idt; 510 491 511 - list_for_each_entry(nfit_idt, &prev->idts, list) 512 - if (memcmp(nfit_idt->idt, idt, length) == 0) { 492 + if (!sizeof_idt(idt)) 493 + return false; 494 + 495 + list_for_each_entry(nfit_idt, &prev->idts, list) { 496 + if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt)) 497 + continue; 498 + 499 + if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) { 513 500 list_move_tail(&nfit_idt->list, &acpi_desc->idts); 514 501 return true; 515 502 } 503 + } 516 504 517 - nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL); 505 + nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt), 506 + GFP_KERNEL); 518 507 if (!nfit_idt) 519 508 return false; 520 509 INIT_LIST_HEAD(&nfit_idt->list); 521 - nfit_idt->idt = idt; 510 + memcpy(nfit_idt->idt, idt, sizeof_idt(idt)); 522 511 list_add_tail(&nfit_idt->list, &acpi_desc->idts); 523 512 dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, 524 513 idt->interleave_index, idt->line_count); 525 514 return true; 526 515 } 527 516 517 + static size_t sizeof_flush(struct acpi_nfit_flush_address *flush) 518 + { 519 + if (flush->header.length < sizeof(*flush)) 520 + return 0; 521 + return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1); 522 + } 523 + 528 524 static bool add_flush(struct acpi_nfit_desc *acpi_desc, 529 525 struct nfit_table_prev *prev, 530 526 struct acpi_nfit_flush_address *flush) 531 527 { 532 - size_t length = min_t(size_t, sizeof(*flush), flush->header.length); 533 528 struct device *dev = acpi_desc->dev; 534 529 struct nfit_flush *nfit_flush; 535 530 536 - list_for_each_entry(nfit_flush, &prev->flushes, list) 537 - if (memcmp(nfit_flush->flush, flush, length) == 0) { 531 + if (!sizeof_flush(flush)) 532 + return false; 533 + 534 + list_for_each_entry(nfit_flush, &prev->flushes, list) { 535 + if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush)) 536 + continue; 537 + 538 + if (memcmp(nfit_flush->flush, flush, 539 + sizeof_flush(flush)) == 0) { 538 540 list_move_tail(&nfit_flush->list, &acpi_desc->flushes); 539 541 return true; 540 542 } 543 + } 541 544 542 - nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL); 545 + nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush) 546 + + sizeof_flush(flush), GFP_KERNEL); 543 547 if (!nfit_flush) 544 548 return false; 545 549 INIT_LIST_HEAD(&nfit_flush->list); 546 - nfit_flush->flush = flush; 550 + memcpy(nfit_flush->flush, flush, sizeof_flush(flush)); 547 551 list_add_tail(&nfit_flush->list, &acpi_desc->flushes); 548 552 dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, 549 553 flush->device_handle, flush->hint_count); ··· 672 614 { 673 615 u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; 674 616 struct nfit_memdev *nfit_memdev; 675 - struct nfit_flush *nfit_flush; 676 617 struct nfit_bdw *nfit_bdw; 677 618 struct nfit_idt *nfit_idt; 678 619 u16 idt_idx, range_index; ··· 704 647 nfit_mem->idt_bdw = nfit_idt->idt; 705 648 break; 706 649 } 707 - 708 - list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { 709 - if (nfit_flush->flush->device_handle != 710 - nfit_memdev->memdev->device_handle) 711 - continue; 712 - nfit_mem->nfit_flush = nfit_flush; 713 - break; 714 - } 715 650 break; 716 651 } 717 652 } ··· 724 675 } 725 676 726 677 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { 678 + struct nfit_flush *nfit_flush; 727 679 struct nfit_dcr *nfit_dcr; 728 680 u32 device_handle; 729 681 u16 dcr; ··· 768 718 else if (nfit_mem->dcr->windows == 0 769 719 && nfit_dcr->dcr->windows) 770 720 nfit_mem->dcr = nfit_dcr->dcr; 721 + break; 722 + } 723 + 724 + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { 725 + struct acpi_nfit_flush_address *flush; 726 + u16 i; 727 + 728 + if (nfit_flush->flush->device_handle != device_handle) 729 + continue; 730 + nfit_mem->nfit_flush = nfit_flush; 731 + flush = nfit_flush->flush; 732 + nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev, 733 + flush->hint_count 734 + * sizeof(struct resource), GFP_KERNEL); 735 + if (!nfit_mem->flush_wpq) 736 + return -ENOMEM; 737 + for (i = 0; i < flush->hint_count; i++) { 738 + struct resource *res = &nfit_mem->flush_wpq[i]; 739 + 740 + res->start = flush->hint_address[i]; 741 + res->end = res->start + 8 - 1; 742 + } 771 743 break; 772 744 } 773 745 ··· 878 806 } 879 807 static DEVICE_ATTR_RO(revision); 880 808 809 + /* 810 + * This shows the number of full Address Range Scrubs that have been 811 + * completed since driver load time. Userspace can wait on this using 812 + * select/poll etc. A '+' at the end indicates an ARS is in progress 813 + */ 814 + static ssize_t scrub_show(struct device *dev, 815 + struct device_attribute *attr, char *buf) 816 + { 817 + struct nvdimm_bus_descriptor *nd_desc; 818 + ssize_t rc = -ENXIO; 819 + 820 + device_lock(dev); 821 + nd_desc = dev_get_drvdata(dev); 822 + if (nd_desc) { 823 + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 824 + 825 + rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, 826 + (work_busy(&acpi_desc->work)) ? "+\n" : "\n"); 827 + } 828 + device_unlock(dev); 829 + return rc; 830 + } 831 + 832 + static ssize_t scrub_store(struct device *dev, 833 + struct device_attribute *attr, const char *buf, size_t size) 834 + { 835 + struct nvdimm_bus_descriptor *nd_desc; 836 + ssize_t rc; 837 + long val; 838 + 839 + rc = kstrtol(buf, 0, &val); 840 + if (rc) 841 + return rc; 842 + if (val != 1) 843 + return -EINVAL; 844 + 845 + device_lock(dev); 846 + nd_desc = dev_get_drvdata(dev); 847 + if (nd_desc) { 848 + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 849 + 850 + rc = acpi_nfit_ars_rescan(acpi_desc); 851 + } 852 + device_unlock(dev); 853 + if (rc) 854 + return rc; 855 + return size; 856 + } 857 + static DEVICE_ATTR_RW(scrub); 858 + 859 + static bool ars_supported(struct nvdimm_bus *nvdimm_bus) 860 + { 861 + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 862 + const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START 863 + | 1 << ND_CMD_ARS_STATUS; 864 + 865 + return (nd_desc->cmd_mask & mask) == mask; 866 + } 867 + 868 + static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n) 869 + { 870 + struct device *dev = container_of(kobj, struct device, kobj); 871 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 872 + 873 + if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus)) 874 + return 0; 875 + return a->mode; 876 + } 877 + 881 878 static struct attribute *acpi_nfit_attributes[] = { 882 879 &dev_attr_revision.attr, 880 + &dev_attr_scrub.attr, 883 881 NULL, 884 882 }; 885 883 886 884 static struct attribute_group acpi_nfit_attribute_group = { 887 885 .name = "nfit", 888 886 .attrs = acpi_nfit_attributes, 887 + .is_visible = nfit_visible, 889 888 }; 890 889 891 890 static const struct attribute_group *acpi_nfit_attribute_groups[] = { ··· 1273 1130 } 1274 1131 1275 1132 /* 1276 - * Until standardization materializes we need to consider up to 3 1133 + * Until standardization materializes we need to consider 4 1277 1134 * different command sets. Note, that checking for function0 (bit0) 1278 1135 * tells us if any commands are reachable through this uuid. 1279 1136 */ 1280 - for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++) 1137 + for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) 1281 1138 if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) 1282 1139 break; 1283 1140 ··· 1287 1144 dsm_mask = 0x3fe; 1288 1145 if (disable_vendor_specific) 1289 1146 dsm_mask &= ~(1 << ND_CMD_VENDOR); 1290 - } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) 1147 + } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) { 1291 1148 dsm_mask = 0x1c3c76; 1292 - else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) { 1149 + } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) { 1293 1150 dsm_mask = 0x1fe; 1294 1151 if (disable_vendor_specific) 1295 1152 dsm_mask &= ~(1 << 8); 1153 + } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) { 1154 + dsm_mask = 0xffffffff; 1296 1155 } else { 1297 1156 dev_dbg(dev, "unknown dimm command family\n"); 1298 1157 nfit_mem->family = -1; ··· 1316 1171 int dimm_count = 0; 1317 1172 1318 1173 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { 1174 + struct acpi_nfit_flush_address *flush; 1319 1175 unsigned long flags = 0, cmd_mask; 1320 1176 struct nvdimm *nvdimm; 1321 1177 u32 device_handle; ··· 1350 1204 if (nfit_mem->family == NVDIMM_FAMILY_INTEL) 1351 1205 cmd_mask |= nfit_mem->dsm_mask; 1352 1206 1207 + flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush 1208 + : NULL; 1353 1209 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, 1354 1210 acpi_nfit_dimm_attribute_groups, 1355 - flags, cmd_mask); 1211 + flags, cmd_mask, flush ? flush->hint_count : 0, 1212 + nfit_mem->flush_wpq); 1356 1213 if (!nvdimm) 1357 1214 return -ENOMEM; 1358 1215 ··· 1523 1374 return mmio->base_offset + line_offset + table_offset + sub_line_offset; 1524 1375 } 1525 1376 1526 - static void wmb_blk(struct nfit_blk *nfit_blk) 1527 - { 1528 - 1529 - if (nfit_blk->nvdimm_flush) { 1530 - /* 1531 - * The first wmb() is needed to 'sfence' all previous writes 1532 - * such that they are architecturally visible for the platform 1533 - * buffer flush. Note that we've already arranged for pmem 1534 - * writes to avoid the cache via arch_memcpy_to_pmem(). The 1535 - * final wmb() ensures ordering for the NVDIMM flush write. 1536 - */ 1537 - wmb(); 1538 - writeq(1, nfit_blk->nvdimm_flush); 1539 - wmb(); 1540 - } else 1541 - wmb_pmem(); 1542 - } 1543 - 1544 1377 static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) 1545 1378 { 1546 1379 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; ··· 1557 1426 offset = to_interleave_offset(offset, mmio); 1558 1427 1559 1428 writeq(cmd, mmio->addr.base + offset); 1560 - wmb_blk(nfit_blk); 1429 + nvdimm_flush(nfit_blk->nd_region); 1561 1430 1562 1431 if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) 1563 1432 readq(mmio->addr.base + offset); ··· 1608 1477 } 1609 1478 1610 1479 if (rw) 1611 - wmb_blk(nfit_blk); 1480 + nvdimm_flush(nfit_blk->nd_region); 1612 1481 1613 1482 rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; 1614 1483 return rc; ··· 1638 1507 nd_region_release_lane(nd_region, lane); 1639 1508 1640 1509 return rc; 1641 - } 1642 - 1643 - static void nfit_spa_mapping_release(struct kref *kref) 1644 - { 1645 - struct nfit_spa_mapping *spa_map = to_spa_map(kref); 1646 - struct acpi_nfit_system_address *spa = spa_map->spa; 1647 - struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc; 1648 - 1649 - WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); 1650 - dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index); 1651 - if (spa_map->type == SPA_MAP_APERTURE) 1652 - memunmap((void __force *)spa_map->addr.aperture); 1653 - else 1654 - iounmap(spa_map->addr.base); 1655 - release_mem_region(spa->address, spa->length); 1656 - list_del(&spa_map->list); 1657 - kfree(spa_map); 1658 - } 1659 - 1660 - static struct nfit_spa_mapping *find_spa_mapping( 1661 - struct acpi_nfit_desc *acpi_desc, 1662 - struct acpi_nfit_system_address *spa) 1663 - { 1664 - struct nfit_spa_mapping *spa_map; 1665 - 1666 - WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); 1667 - list_for_each_entry(spa_map, &acpi_desc->spa_maps, list) 1668 - if (spa_map->spa == spa) 1669 - return spa_map; 1670 - 1671 - return NULL; 1672 - } 1673 - 1674 - static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, 1675 - struct acpi_nfit_system_address *spa) 1676 - { 1677 - struct nfit_spa_mapping *spa_map; 1678 - 1679 - mutex_lock(&acpi_desc->spa_map_mutex); 1680 - spa_map = find_spa_mapping(acpi_desc, spa); 1681 - 1682 - if (spa_map) 1683 - kref_put(&spa_map->kref, nfit_spa_mapping_release); 1684 - mutex_unlock(&acpi_desc->spa_map_mutex); 1685 - } 1686 - 1687 - static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, 1688 - struct acpi_nfit_system_address *spa, enum spa_map_type type) 1689 - { 1690 - resource_size_t start = spa->address; 1691 - resource_size_t n = spa->length; 1692 - struct nfit_spa_mapping *spa_map; 1693 - struct resource *res; 1694 - 1695 - WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); 1696 - 1697 - spa_map = find_spa_mapping(acpi_desc, spa); 1698 - if (spa_map) { 1699 - kref_get(&spa_map->kref); 1700 - return spa_map->addr.base; 1701 - } 1702 - 1703 - spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL); 1704 - if (!spa_map) 1705 - return NULL; 1706 - 1707 - INIT_LIST_HEAD(&spa_map->list); 1708 - spa_map->spa = spa; 1709 - kref_init(&spa_map->kref); 1710 - spa_map->acpi_desc = acpi_desc; 1711 - 1712 - res = request_mem_region(start, n, dev_name(acpi_desc->dev)); 1713 - if (!res) 1714 - goto err_mem; 1715 - 1716 - spa_map->type = type; 1717 - if (type == SPA_MAP_APERTURE) 1718 - spa_map->addr.aperture = (void __pmem *)memremap(start, n, 1719 - ARCH_MEMREMAP_PMEM); 1720 - else 1721 - spa_map->addr.base = ioremap_nocache(start, n); 1722 - 1723 - 1724 - if (!spa_map->addr.base) 1725 - goto err_map; 1726 - 1727 - list_add_tail(&spa_map->list, &acpi_desc->spa_maps); 1728 - return spa_map->addr.base; 1729 - 1730 - err_map: 1731 - release_mem_region(start, n); 1732 - err_mem: 1733 - kfree(spa_map); 1734 - return NULL; 1735 - } 1736 - 1737 - /** 1738 - * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges 1739 - * @nvdimm_bus: NFIT-bus that provided the spa table entry 1740 - * @nfit_spa: spa table to map 1741 - * @type: aperture or control region 1742 - * 1743 - * In the case where block-data-window apertures and 1744 - * dimm-control-regions are interleaved they will end up sharing a 1745 - * single request_mem_region() + ioremap() for the address range. In 1746 - * the style of devm nfit_spa_map() mappings are automatically dropped 1747 - * when all region devices referencing the same mapping are disabled / 1748 - * unbound. 1749 - */ 1750 - static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, 1751 - struct acpi_nfit_system_address *spa, enum spa_map_type type) 1752 - { 1753 - void __iomem *iomem; 1754 - 1755 - mutex_lock(&acpi_desc->spa_map_mutex); 1756 - iomem = __nfit_spa_map(acpi_desc, spa, type); 1757 - mutex_unlock(&acpi_desc->spa_map_mutex); 1758 - 1759 - return iomem; 1760 1510 } 1761 1511 1762 1512 static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, ··· 1681 1669 struct device *dev) 1682 1670 { 1683 1671 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 1684 - struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 1685 1672 struct nd_blk_region *ndbr = to_nd_blk_region(dev); 1686 - struct nfit_flush *nfit_flush; 1687 1673 struct nfit_blk_mmio *mmio; 1688 1674 struct nfit_blk *nfit_blk; 1689 1675 struct nfit_mem *nfit_mem; ··· 1707 1697 /* map block aperture memory */ 1708 1698 nfit_blk->bdw_offset = nfit_mem->bdw->offset; 1709 1699 mmio = &nfit_blk->mmio[BDW]; 1710 - mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, 1711 - SPA_MAP_APERTURE); 1700 + mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, 1701 + nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM); 1712 1702 if (!mmio->addr.base) { 1713 1703 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, 1714 1704 nvdimm_name(nvdimm)); ··· 1730 1720 nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; 1731 1721 nfit_blk->stat_offset = nfit_mem->dcr->status_offset; 1732 1722 mmio = &nfit_blk->mmio[DCR]; 1733 - mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, 1734 - SPA_MAP_CONTROL); 1723 + mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address, 1724 + nfit_mem->spa_dcr->length); 1735 1725 if (!mmio->addr.base) { 1736 1726 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, 1737 1727 nvdimm_name(nvdimm)); ··· 1756 1746 return rc; 1757 1747 } 1758 1748 1759 - nfit_flush = nfit_mem->nfit_flush; 1760 - if (nfit_flush && nfit_flush->flush->hint_count != 0) { 1761 - nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, 1762 - nfit_flush->flush->hint_address[0], 8); 1763 - if (!nfit_blk->nvdimm_flush) 1764 - return -ENOMEM; 1765 - } 1766 - 1767 - if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush) 1749 + if (nvdimm_has_flush(nfit_blk->nd_region) < 0) 1768 1750 dev_warn(dev, "unable to guarantee persistence of writes\n"); 1769 1751 1770 1752 if (mmio->line_size == 0) ··· 1773 1771 } 1774 1772 1775 1773 return 0; 1776 - } 1777 - 1778 - static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, 1779 - struct device *dev) 1780 - { 1781 - struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 1782 - struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 1783 - struct nd_blk_region *ndbr = to_nd_blk_region(dev); 1784 - struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr); 1785 - int i; 1786 - 1787 - if (!nfit_blk) 1788 - return; /* never enabled */ 1789 - 1790 - /* auto-free BLK spa mappings */ 1791 - for (i = 0; i < 2; i++) { 1792 - struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i]; 1793 - 1794 - if (mmio->addr.base) 1795 - nfit_spa_unmap(acpi_desc, mmio->spa); 1796 - } 1797 - nd_blk_region_set_provider_data(ndbr, NULL); 1798 - /* devm will free nfit_blk */ 1799 1774 } 1800 1775 1801 1776 static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, ··· 1898 1919 if (ret) 1899 1920 return ret; 1900 1921 1901 - ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); 1902 - if (ret) { 1903 - remove_resource(res); 1922 + ret = devm_add_action_or_reset(acpi_desc->dev, 1923 + acpi_nfit_remove_resource, 1924 + res); 1925 + if (ret) 1904 1926 return ret; 1905 - } 1906 1927 1907 1928 return 0; 1908 1929 } ··· 1948 1969 ndr_desc->num_mappings = blk_valid; 1949 1970 ndbr_desc = to_blk_region_desc(ndr_desc); 1950 1971 ndbr_desc->enable = acpi_nfit_blk_region_enable; 1951 - ndbr_desc->disable = acpi_nfit_blk_region_disable; 1952 1972 ndbr_desc->do_io = acpi_desc->blk_do_io; 1953 1973 nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, 1954 1974 ndr_desc); ··· 1957 1979 } 1958 1980 1959 1981 return 0; 1982 + } 1983 + 1984 + static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa) 1985 + { 1986 + return (nfit_spa_type(spa) == NFIT_SPA_VDISK || 1987 + nfit_spa_type(spa) == NFIT_SPA_VCD || 1988 + nfit_spa_type(spa) == NFIT_SPA_PDISK || 1989 + nfit_spa_type(spa) == NFIT_SPA_PCD); 1960 1990 } 1961 1991 1962 1992 static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ··· 1982 1996 if (nfit_spa->nd_region) 1983 1997 return 0; 1984 1998 1985 - if (spa->range_index == 0) { 1999 + if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) { 1986 2000 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", 1987 2001 __func__); 1988 2002 return 0; ··· 2043 2057 rc = -ENOMEM; 2044 2058 } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { 2045 2059 nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, 2060 + ndr_desc); 2061 + if (!nfit_spa->nd_region) 2062 + rc = -ENOMEM; 2063 + } else if (nfit_spa_is_virtual(spa)) { 2064 + nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus, 2046 2065 ndr_desc); 2047 2066 if (!nfit_spa->nd_region) 2048 2067 rc = -ENOMEM; ··· 2130 2139 unsigned int tmo = scrub_timeout; 2131 2140 int rc; 2132 2141 2133 - if (nfit_spa->ars_done || !nfit_spa->nd_region) 2142 + if (!nfit_spa->ars_required || !nfit_spa->nd_region) 2134 2143 return; 2135 2144 2136 2145 rc = ars_start(acpi_desc, nfit_spa); ··· 2219 2228 * firmware initiated scrubs to complete and then we go search for the 2220 2229 * affected spa regions to mark them scanned. In the second phase we 2221 2230 * initiate a directed scrub for every range that was not scrubbed in 2222 - * phase 1. 2231 + * phase 1. If we're called for a 'rescan', we harmlessly pass through 2232 + * the first phase, but really only care about running phase 2, where 2233 + * regions can be notified of new poison. 2223 2234 */ 2224 2235 2225 2236 /* process platform firmware initiated scrubs */ ··· 2324 2331 * Flag all the ranges that still need scrubbing, but 2325 2332 * register them now to make data available. 2326 2333 */ 2327 - if (nfit_spa->nd_region) 2328 - nfit_spa->ars_done = 1; 2329 - else 2334 + if (!nfit_spa->nd_region) { 2335 + nfit_spa->ars_required = 1; 2330 2336 acpi_nfit_register_region(acpi_desc, nfit_spa); 2337 + } 2331 2338 } 2332 2339 2333 2340 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2334 2341 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2342 + acpi_desc->scrub_count++; 2343 + if (acpi_desc->scrub_count_state) 2344 + sysfs_notify_dirent(acpi_desc->scrub_count_state); 2335 2345 mutex_unlock(&acpi_desc->init_mutex); 2336 2346 } 2337 2347 ··· 2372 2376 return 0; 2373 2377 } 2374 2378 2375 - int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) 2379 + static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc) 2380 + { 2381 + struct device *dev = acpi_desc->dev; 2382 + struct kernfs_node *nfit; 2383 + struct device *bus_dev; 2384 + 2385 + if (!ars_supported(acpi_desc->nvdimm_bus)) 2386 + return 0; 2387 + 2388 + bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2389 + nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit"); 2390 + if (!nfit) { 2391 + dev_err(dev, "sysfs_get_dirent 'nfit' failed\n"); 2392 + return -ENODEV; 2393 + } 2394 + acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub"); 2395 + sysfs_put(nfit); 2396 + if (!acpi_desc->scrub_count_state) { 2397 + dev_err(dev, "sysfs_get_dirent 'scrub' failed\n"); 2398 + return -ENODEV; 2399 + } 2400 + 2401 + return 0; 2402 + } 2403 + 2404 + static void acpi_nfit_destruct(void *data) 2405 + { 2406 + struct acpi_nfit_desc *acpi_desc = data; 2407 + struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2408 + 2409 + /* 2410 + * Destruct under acpi_desc_lock so that nfit_handle_mce does not 2411 + * race teardown 2412 + */ 2413 + mutex_lock(&acpi_desc_lock); 2414 + acpi_desc->cancel = 1; 2415 + /* 2416 + * Bounce the nvdimm bus lock to make sure any in-flight 2417 + * acpi_nfit_ars_rescan() submissions have had a chance to 2418 + * either submit or see ->cancel set. 2419 + */ 2420 + device_lock(bus_dev); 2421 + device_unlock(bus_dev); 2422 + 2423 + flush_workqueue(nfit_wq); 2424 + if (acpi_desc->scrub_count_state) 2425 + sysfs_put(acpi_desc->scrub_count_state); 2426 + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2427 + acpi_desc->nvdimm_bus = NULL; 2428 + list_del(&acpi_desc->list); 2429 + mutex_unlock(&acpi_desc_lock); 2430 + } 2431 + 2432 + int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) 2376 2433 { 2377 2434 struct device *dev = acpi_desc->dev; 2378 2435 struct nfit_table_prev prev; 2379 2436 const void *end; 2380 - u8 *data; 2381 2437 int rc; 2438 + 2439 + if (!acpi_desc->nvdimm_bus) { 2440 + acpi_nfit_init_dsms(acpi_desc); 2441 + 2442 + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, 2443 + &acpi_desc->nd_desc); 2444 + if (!acpi_desc->nvdimm_bus) 2445 + return -ENOMEM; 2446 + 2447 + rc = devm_add_action_or_reset(dev, acpi_nfit_destruct, 2448 + acpi_desc); 2449 + if (rc) 2450 + return rc; 2451 + 2452 + rc = acpi_nfit_desc_init_scrub_attr(acpi_desc); 2453 + if (rc) 2454 + return rc; 2455 + 2456 + /* register this acpi_desc for mce notifications */ 2457 + mutex_lock(&acpi_desc_lock); 2458 + list_add_tail(&acpi_desc->list, &acpi_descs); 2459 + mutex_unlock(&acpi_desc_lock); 2460 + } 2382 2461 2383 2462 mutex_lock(&acpi_desc->init_mutex); 2384 2463 ··· 2477 2406 list_cut_position(&prev.flushes, &acpi_desc->flushes, 2478 2407 acpi_desc->flushes.prev); 2479 2408 2480 - data = (u8 *) acpi_desc->nfit; 2481 2409 end = data + sz; 2482 2410 while (!IS_ERR_OR_NULL(data)) 2483 2411 data = add_table(acpi_desc, &prev, data, end); ··· 2492 2422 if (rc) 2493 2423 goto out_unlock; 2494 2424 2495 - if (nfit_mem_init(acpi_desc) != 0) { 2496 - rc = -ENOMEM; 2425 + rc = nfit_mem_init(acpi_desc); 2426 + if (rc) 2497 2427 goto out_unlock; 2498 - } 2499 - 2500 - acpi_nfit_init_dsms(acpi_desc); 2501 2428 2502 2429 rc = acpi_nfit_register_dimms(acpi_desc); 2503 2430 if (rc) ··· 2563 2496 return 0; 2564 2497 } 2565 2498 2499 + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) 2500 + { 2501 + struct device *dev = acpi_desc->dev; 2502 + struct nfit_spa *nfit_spa; 2503 + 2504 + if (work_busy(&acpi_desc->work)) 2505 + return -EBUSY; 2506 + 2507 + if (acpi_desc->cancel) 2508 + return 0; 2509 + 2510 + mutex_lock(&acpi_desc->init_mutex); 2511 + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2512 + struct acpi_nfit_system_address *spa = nfit_spa->spa; 2513 + 2514 + if (nfit_spa_type(spa) != NFIT_SPA_PM) 2515 + continue; 2516 + 2517 + nfit_spa->ars_required = 1; 2518 + } 2519 + queue_work(nfit_wq, &acpi_desc->work); 2520 + dev_dbg(dev, "%s: ars_scan triggered\n", __func__); 2521 + mutex_unlock(&acpi_desc->init_mutex); 2522 + 2523 + return 0; 2524 + } 2525 + 2566 2526 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) 2567 2527 { 2568 2528 struct nvdimm_bus_descriptor *nd_desc; ··· 2599 2505 acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io; 2600 2506 nd_desc = &acpi_desc->nd_desc; 2601 2507 nd_desc->provider_name = "ACPI.NFIT"; 2508 + nd_desc->module = THIS_MODULE; 2602 2509 nd_desc->ndctl = acpi_nfit_ctl; 2603 2510 nd_desc->flush_probe = acpi_nfit_flush_probe; 2604 2511 nd_desc->clear_to_send = acpi_nfit_clear_to_send; 2605 2512 nd_desc->attr_groups = acpi_nfit_attribute_groups; 2606 2513 2607 - INIT_LIST_HEAD(&acpi_desc->spa_maps); 2608 2514 INIT_LIST_HEAD(&acpi_desc->spas); 2609 2515 INIT_LIST_HEAD(&acpi_desc->dcrs); 2610 2516 INIT_LIST_HEAD(&acpi_desc->bdws); ··· 2612 2518 INIT_LIST_HEAD(&acpi_desc->flushes); 2613 2519 INIT_LIST_HEAD(&acpi_desc->memdevs); 2614 2520 INIT_LIST_HEAD(&acpi_desc->dimms); 2615 - mutex_init(&acpi_desc->spa_map_mutex); 2521 + INIT_LIST_HEAD(&acpi_desc->list); 2616 2522 mutex_init(&acpi_desc->init_mutex); 2617 2523 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); 2618 2524 } ··· 2626 2532 struct acpi_table_header *tbl; 2627 2533 acpi_status status = AE_OK; 2628 2534 acpi_size sz; 2629 - int rc; 2535 + int rc = 0; 2630 2536 2631 2537 status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz); 2632 2538 if (ACPI_FAILURE(status)) { ··· 2639 2545 if (!acpi_desc) 2640 2546 return -ENOMEM; 2641 2547 acpi_nfit_desc_init(acpi_desc, &adev->dev); 2642 - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); 2643 - if (!acpi_desc->nvdimm_bus) 2644 - return -ENOMEM; 2645 2548 2646 - /* 2647 - * Save the acpi header for later and then skip it, 2648 - * making nfit point to the first nfit table header. 2649 - */ 2549 + /* Save the acpi header for exporting the revision via sysfs */ 2650 2550 acpi_desc->acpi_header = *tbl; 2651 - acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit); 2652 - sz -= sizeof(struct acpi_table_nfit); 2653 2551 2654 2552 /* Evaluate _FIT and override with that if present */ 2655 2553 status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); 2656 2554 if (ACPI_SUCCESS(status) && buf.length > 0) { 2657 - union acpi_object *obj; 2658 - /* 2659 - * Adjust for the acpi_object header of the _FIT 2660 - */ 2661 - obj = buf.pointer; 2662 - if (obj->type == ACPI_TYPE_BUFFER) { 2663 - acpi_desc->nfit = 2664 - (struct acpi_nfit_header *)obj->buffer.pointer; 2665 - sz = obj->buffer.length; 2666 - } else 2555 + union acpi_object *obj = buf.pointer; 2556 + 2557 + if (obj->type == ACPI_TYPE_BUFFER) 2558 + rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer, 2559 + obj->buffer.length); 2560 + else 2667 2561 dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n", 2668 2562 __func__, (int) obj->type); 2669 - } 2670 - 2671 - rc = acpi_nfit_init(acpi_desc, sz); 2672 - if (rc) { 2673 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2674 - return rc; 2675 - } 2676 - return 0; 2563 + kfree(buf.pointer); 2564 + } else 2565 + /* skip over the lead-in header table */ 2566 + rc = acpi_nfit_init(acpi_desc, (void *) tbl 2567 + + sizeof(struct acpi_table_nfit), 2568 + sz - sizeof(struct acpi_table_nfit)); 2569 + return rc; 2677 2570 } 2678 2571 2679 2572 static int acpi_nfit_remove(struct acpi_device *adev) 2680 2573 { 2681 - struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); 2682 - 2683 - acpi_desc->cancel = 1; 2684 - flush_workqueue(nfit_wq); 2685 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2574 + /* see acpi_nfit_destruct */ 2686 2575 return 0; 2687 2576 } 2688 2577 ··· 2673 2596 { 2674 2597 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); 2675 2598 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; 2676 - struct acpi_nfit_header *nfit_saved; 2677 - union acpi_object *obj; 2678 2599 struct device *dev = &adev->dev; 2600 + union acpi_object *obj; 2679 2601 acpi_status status; 2680 2602 int ret; 2681 2603 ··· 2692 2616 if (!acpi_desc) 2693 2617 goto out_unlock; 2694 2618 acpi_nfit_desc_init(acpi_desc, &adev->dev); 2695 - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); 2696 - if (!acpi_desc->nvdimm_bus) 2697 - goto out_unlock; 2698 2619 } else { 2699 2620 /* 2700 2621 * Finish previous registration before considering new ··· 2707 2634 goto out_unlock; 2708 2635 } 2709 2636 2710 - nfit_saved = acpi_desc->nfit; 2711 2637 obj = buf.pointer; 2712 2638 if (obj->type == ACPI_TYPE_BUFFER) { 2713 - acpi_desc->nfit = 2714 - (struct acpi_nfit_header *)obj->buffer.pointer; 2715 - ret = acpi_nfit_init(acpi_desc, obj->buffer.length); 2716 - if (ret) { 2717 - /* Merge failed, restore old nfit, and exit */ 2718 - acpi_desc->nfit = nfit_saved; 2639 + ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer, 2640 + obj->buffer.length); 2641 + if (ret) 2719 2642 dev_err(dev, "failed to merge updated NFIT\n"); 2720 - } 2721 - } else { 2722 - /* Bad _FIT, restore old nfit */ 2643 + } else 2723 2644 dev_err(dev, "Invalid _FIT\n"); 2724 - } 2725 2645 kfree(buf.pointer); 2726 2646 2727 2647 out_unlock: ··· 2759 2693 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); 2760 2694 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); 2761 2695 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); 2696 + acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]); 2762 2697 2763 2698 nfit_wq = create_singlethread_workqueue("nfit"); 2764 2699 if (!nfit_wq) 2765 2700 return -ENOMEM; 2701 + 2702 + nfit_mce_register(); 2766 2703 2767 2704 return acpi_bus_register_driver(&acpi_nfit_driver); 2768 2705 } 2769 2706 2770 2707 static __exit void nfit_exit(void) 2771 2708 { 2709 + nfit_mce_unregister(); 2772 2710 acpi_bus_unregister_driver(&acpi_nfit_driver); 2773 2711 destroy_workqueue(nfit_wq); 2712 + WARN_ON(!list_empty(&acpi_descs)); 2774 2713 } 2775 2714 2776 2715 module_init(nfit_init);
+32 -28
drivers/acpi/nfit.h drivers/acpi/nfit/nfit.h
··· 16 16 #define __NFIT_H__ 17 17 #include <linux/workqueue.h> 18 18 #include <linux/libnvdimm.h> 19 + #include <linux/ndctl.h> 19 20 #include <linux/types.h> 20 21 #include <linux/uuid.h> 21 22 #include <linux/acpi.h> ··· 32 31 #define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6" 33 32 #define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e" 34 33 34 + /* https://msdn.microsoft.com/library/windows/hardware/mt604741 */ 35 + #define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05" 36 + 35 37 #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ 36 38 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ 37 39 | ACPI_NFIT_MEM_NOT_ARMED) ··· 44 40 NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, 45 41 NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1, 46 42 NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2, 43 + NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT, 47 44 NFIT_SPA_VOLATILE, 48 45 NFIT_SPA_PM, 49 46 NFIT_SPA_DCR, ··· 79 74 }; 80 75 81 76 struct nfit_spa { 82 - struct acpi_nfit_system_address *spa; 83 77 struct list_head list; 84 78 struct nd_region *nd_region; 85 - unsigned int ars_done:1; 79 + unsigned int ars_required:1; 86 80 u32 clear_err_unit; 87 81 u32 max_ars; 82 + struct acpi_nfit_system_address spa[0]; 88 83 }; 89 84 90 85 struct nfit_dcr { 91 - struct acpi_nfit_control_region *dcr; 92 86 struct list_head list; 87 + struct acpi_nfit_control_region dcr[0]; 93 88 }; 94 89 95 90 struct nfit_bdw { 96 - struct acpi_nfit_data_region *bdw; 97 91 struct list_head list; 92 + struct acpi_nfit_data_region bdw[0]; 98 93 }; 99 94 100 95 struct nfit_idt { 101 - struct acpi_nfit_interleave *idt; 102 96 struct list_head list; 97 + struct acpi_nfit_interleave idt[0]; 103 98 }; 104 99 105 100 struct nfit_flush { 106 - struct acpi_nfit_flush_address *flush; 107 101 struct list_head list; 102 + struct acpi_nfit_flush_address flush[0]; 108 103 }; 109 104 110 105 struct nfit_memdev { 111 - struct acpi_nfit_memory_map *memdev; 112 106 struct list_head list; 107 + struct acpi_nfit_memory_map memdev[0]; 113 108 }; 114 109 115 110 /* assembled tables for a given dimm/memory-device */ ··· 128 123 struct list_head list; 129 124 struct acpi_device *adev; 130 125 struct acpi_nfit_desc *acpi_desc; 126 + struct resource *flush_wpq; 131 127 unsigned long dsm_mask; 132 128 int family; 133 129 }; ··· 136 130 struct acpi_nfit_desc { 137 131 struct nvdimm_bus_descriptor nd_desc; 138 132 struct acpi_table_header acpi_header; 139 - struct acpi_nfit_header *nfit; 140 - struct mutex spa_map_mutex; 141 133 struct mutex init_mutex; 142 - struct list_head spa_maps; 143 134 struct list_head memdevs; 144 135 struct list_head flushes; 145 136 struct list_head dimms; ··· 149 146 struct nd_cmd_ars_status *ars_status; 150 147 size_t ars_status_size; 151 148 struct work_struct work; 149 + struct list_head list; 150 + struct kernfs_node *scrub_count_state; 151 + unsigned int scrub_count; 152 152 unsigned int cancel:1; 153 153 unsigned long dimm_cmd_force_en; 154 154 unsigned long bus_cmd_force_en; ··· 167 161 struct nd_blk_addr { 168 162 union { 169 163 void __iomem *base; 170 - void __pmem *aperture; 164 + void *aperture; 171 165 }; 172 166 }; 173 167 ··· 186 180 u64 bdw_offset; /* post interleave offset */ 187 181 u64 stat_offset; 188 182 u64 cmd_offset; 189 - void __iomem *nvdimm_flush; 190 183 u32 dimm_flags; 191 184 }; 192 185 193 - enum spa_map_type { 194 - SPA_MAP_CONTROL, 195 - SPA_MAP_APERTURE, 196 - }; 186 + extern struct list_head acpi_descs; 187 + extern struct mutex acpi_desc_lock; 188 + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc); 197 189 198 - struct nfit_spa_mapping { 199 - struct acpi_nfit_desc *acpi_desc; 200 - struct acpi_nfit_system_address *spa; 201 - struct list_head list; 202 - struct kref kref; 203 - enum spa_map_type type; 204 - struct nd_blk_addr addr; 205 - }; 206 - 207 - static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) 190 + #ifdef CONFIG_X86_MCE 191 + void nfit_mce_register(void); 192 + void nfit_mce_unregister(void); 193 + #else 194 + static inline void nfit_mce_register(void) 208 195 { 209 - return container_of(kref, struct nfit_spa_mapping, kref); 210 196 } 197 + static inline void nfit_mce_unregister(void) 198 + { 199 + } 200 + #endif 201 + 202 + int nfit_spa_type(struct acpi_nfit_system_address *spa); 211 203 212 204 static inline struct acpi_nfit_memory_map *__to_nfit_memdev( 213 205 struct nfit_mem *nfit_mem) ··· 222 218 } 223 219 224 220 const u8 *to_nfit_uuid(enum nfit_uuids id); 225 - int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); 221 + int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); 226 222 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); 227 223 #endif /* __NFIT_H__ */
+26
drivers/acpi/nfit/Kconfig
··· 1 + config ACPI_NFIT 2 + tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" 3 + depends on PHYS_ADDR_T_64BIT 4 + depends on BLK_DEV 5 + depends on ARCH_HAS_MMIO_FLUSH 6 + select LIBNVDIMM 7 + help 8 + Infrastructure to probe ACPI 6 compliant platforms for 9 + NVDIMMs (NFIT) and register a libnvdimm device tree. In 10 + addition to storage devices this also enables libnvdimm to pass 11 + ACPI._DSM messages for platform/dimm configuration. 12 + 13 + To compile this driver as a module, choose M here: 14 + the module will be called nfit. 15 + 16 + config ACPI_NFIT_DEBUG 17 + bool "NFIT DSM debug" 18 + depends on ACPI_NFIT 19 + depends on DYNAMIC_DEBUG 20 + default n 21 + help 22 + Enabling this option causes the nfit driver to dump the 23 + input and output buffers of _DSM operations on the ACPI0012 24 + device and its children. This can be very verbose, so leave 25 + it disabled unless you are debugging a hardware / firmware 26 + issue.
+3
drivers/acpi/nfit/Makefile
··· 1 + obj-$(CONFIG_ACPI_NFIT) := nfit.o 2 + nfit-y := core.o 3 + nfit-$(CONFIG_X86_MCE) += mce.o
+89
drivers/acpi/nfit/mce.c
··· 1 + /* 2 + * NFIT - Machine Check Handler 3 + * 4 + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of version 2 of the GNU General Public License as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + */ 15 + #include <linux/notifier.h> 16 + #include <linux/acpi.h> 17 + #include <asm/mce.h> 18 + #include "nfit.h" 19 + 20 + static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, 21 + void *data) 22 + { 23 + struct mce *mce = (struct mce *)data; 24 + struct acpi_nfit_desc *acpi_desc; 25 + struct nfit_spa *nfit_spa; 26 + 27 + /* We only care about memory errors */ 28 + if (!(mce->status & MCACOD)) 29 + return NOTIFY_DONE; 30 + 31 + /* 32 + * mce->addr contains the physical addr accessed that caused the 33 + * machine check. We need to walk through the list of NFITs, and see 34 + * if any of them matches that address, and only then start a scrub. 35 + */ 36 + mutex_lock(&acpi_desc_lock); 37 + list_for_each_entry(acpi_desc, &acpi_descs, list) { 38 + struct device *dev = acpi_desc->dev; 39 + int found_match = 0; 40 + 41 + mutex_lock(&acpi_desc->init_mutex); 42 + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 43 + struct acpi_nfit_system_address *spa = nfit_spa->spa; 44 + 45 + if (nfit_spa_type(spa) == NFIT_SPA_PM) 46 + continue; 47 + /* find the spa that covers the mce addr */ 48 + if (spa->address > mce->addr) 49 + continue; 50 + if ((spa->address + spa->length - 1) < mce->addr) 51 + continue; 52 + found_match = 1; 53 + dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n", 54 + __func__, spa->range_index, spa->address, 55 + spa->length); 56 + /* 57 + * We can break at the first match because we're going 58 + * to rescan all the SPA ranges. There shouldn't be any 59 + * aliasing anyway. 60 + */ 61 + break; 62 + } 63 + mutex_unlock(&acpi_desc->init_mutex); 64 + 65 + /* 66 + * We can ignore an -EBUSY here because if an ARS is already 67 + * in progress, just let that be the last authoritative one 68 + */ 69 + if (found_match) 70 + acpi_nfit_ars_rescan(acpi_desc); 71 + } 72 + 73 + mutex_unlock(&acpi_desc_lock); 74 + return NOTIFY_DONE; 75 + } 76 + 77 + static struct notifier_block nfit_mce_dec = { 78 + .notifier_call = nfit_handle_mce, 79 + }; 80 + 81 + void nfit_mce_register(void) 82 + { 83 + mce_register_decode_chain(&nfit_mce_dec); 84 + } 85 + 86 + void nfit_mce_unregister(void) 87 + { 88 + mce_unregister_decode_chain(&nfit_mce_dec); 89 + }
+2 -2
drivers/block/brd.c
··· 379 379 380 380 #ifdef CONFIG_BLK_DEV_RAM_DAX 381 381 static long brd_direct_access(struct block_device *bdev, sector_t sector, 382 - void __pmem **kaddr, pfn_t *pfn, long size) 382 + void **kaddr, pfn_t *pfn, long size) 383 383 { 384 384 struct brd_device *brd = bdev->bd_disk->private_data; 385 385 struct page *page; ··· 389 389 page = brd_insert_page(brd, sector); 390 390 if (!page) 391 391 return -ENOSPC; 392 - *kaddr = (void __pmem *)page_address(page); 392 + *kaddr = page_address(page); 393 393 *pfn = page_to_pfn_t(page); 394 394 395 395 return PAGE_SIZE;
+2 -4
drivers/dax/dax.c
··· 211 211 } 212 212 dax_dev->dev = dev; 213 213 214 - rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); 215 - if (rc) { 216 - unregister_dax_dev(dev); 214 + rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev); 215 + if (rc) 217 216 return rc; 218 - } 219 217 220 218 return 0; 221 219
+6 -8
drivers/dax/pmem.c
··· 102 102 if (rc) 103 103 return rc; 104 104 105 - rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); 106 - if (rc) { 107 - dax_pmem_percpu_exit(&dax_pmem->ref); 105 + rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit, 106 + &dax_pmem->ref); 107 + if (rc) 108 108 return rc; 109 - } 110 109 111 110 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); 112 111 if (IS_ERR(addr)) 113 112 return PTR_ERR(addr); 114 113 115 - rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); 116 - if (rc) { 117 - dax_pmem_percpu_kill(&dax_pmem->ref); 114 + rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill, 115 + &dax_pmem->ref); 116 + if (rc) 118 117 return rc; 119 - } 120 118 121 119 nd_region = to_nd_region(dev->parent); 122 120 dax_region = alloc_dax_region(dev, nd_region->id, &res,
+1 -1
drivers/md/dm-linear.c
··· 142 142 } 143 143 144 144 static long linear_direct_access(struct dm_target *ti, sector_t sector, 145 - void __pmem **kaddr, pfn_t *pfn, long size) 145 + void **kaddr, pfn_t *pfn, long size) 146 146 { 147 147 struct linear_c *lc = ti->private; 148 148 struct block_device *bdev = lc->dev->bdev;
+1 -1
drivers/md/dm-snap.c
··· 2303 2303 } 2304 2304 2305 2305 static long origin_direct_access(struct dm_target *ti, sector_t sector, 2306 - void __pmem **kaddr, pfn_t *pfn, long size) 2306 + void **kaddr, pfn_t *pfn, long size) 2307 2307 { 2308 2308 DMWARN("device does not support dax."); 2309 2309 return -EIO;
+1 -1
drivers/md/dm-stripe.c
··· 309 309 } 310 310 311 311 static long stripe_direct_access(struct dm_target *ti, sector_t sector, 312 - void __pmem **kaddr, pfn_t *pfn, long size) 312 + void **kaddr, pfn_t *pfn, long size) 313 313 { 314 314 struct stripe_c *sc = ti->private; 315 315 uint32_t stripe;
+1 -1
drivers/md/dm-target.c
··· 149 149 } 150 150 151 151 static long io_err_direct_access(struct dm_target *ti, sector_t sector, 152 - void __pmem **kaddr, pfn_t *pfn, long size) 152 + void **kaddr, pfn_t *pfn, long size) 153 153 { 154 154 return -EIO; 155 155 }
+1 -1
drivers/md/dm.c
··· 906 906 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 907 907 908 908 static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, 909 - void __pmem **kaddr, pfn_t *pfn, long size) 909 + void **kaddr, pfn_t *pfn, long size) 910 910 { 911 911 struct mapped_device *md = bdev->bd_disk->private_data; 912 912 struct dm_table *map;
+1 -1
drivers/nvdimm/Kconfig
··· 1 1 menuconfig LIBNVDIMM 2 2 tristate "NVDIMM (Non-Volatile Memory Device) Support" 3 3 depends on PHYS_ADDR_T_64BIT 4 + depends on HAS_IOMEM 4 5 depends on BLK_DEV 5 6 help 6 7 Generic support for non-volatile memory devices including ··· 20 19 config BLK_DEV_PMEM 21 20 tristate "PMEM: Persistent memory block device support" 22 21 default LIBNVDIMM 23 - depends on HAS_IOMEM 24 22 select ND_BTT if BTT 25 23 select ND_PFN if NVDIMM_PFN 26 24 help
+4 -7
drivers/nvdimm/blk.c
··· 267 267 q = blk_alloc_queue(GFP_KERNEL); 268 268 if (!q) 269 269 return -ENOMEM; 270 - if (devm_add_action(dev, nd_blk_release_queue, q)) { 271 - blk_cleanup_queue(q); 270 + if (devm_add_action_or_reset(dev, nd_blk_release_queue, q)) 272 271 return -ENOMEM; 273 - } 274 272 275 273 blk_queue_make_request(q, nd_blk_make_request); 276 274 blk_queue_max_hw_sectors(q, UINT_MAX); ··· 280 282 disk = alloc_disk(0); 281 283 if (!disk) 282 284 return -ENOMEM; 283 - if (devm_add_action(dev, nd_blk_release_disk, disk)) { 284 - put_disk(disk); 285 - return -ENOMEM; 286 - } 287 285 288 286 disk->first_minor = 0; 289 287 disk->fops = &nd_blk_fops; ··· 288 294 nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); 289 295 set_capacity(disk, 0); 290 296 device_add_disk(dev, disk); 297 + 298 + if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) 299 + return -ENOMEM; 291 300 292 301 if (nsblk_meta_size(nsblk)) { 293 302 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
+1 -2
drivers/nvdimm/btt_devs.c
··· 198 198 { 199 199 struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); 200 200 201 - if (dev) 202 - __nd_device_register(dev); 201 + __nd_device_register(dev); 203 202 return dev; 204 203 } 205 204
+200 -12
drivers/nvdimm/bus.c
··· 31 31 int nvdimm_major; 32 32 static int nvdimm_bus_major; 33 33 static struct class *nd_class; 34 + static DEFINE_IDA(nd_ida); 34 35 35 36 static int to_nd_device_type(struct device *dev) 36 37 { ··· 61 60 to_nd_device_type(dev)); 62 61 } 63 62 64 - static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) 65 - { 66 - struct nd_device_driver *nd_drv = to_nd_device_driver(drv); 67 - 68 - return !!test_bit(to_nd_device_type(dev), &nd_drv->type); 69 - } 70 - 71 63 static struct module *to_bus_provider(struct device *dev) 72 64 { 73 65 /* pin bus providers while regions are enabled */ 74 66 if (is_nd_pmem(dev) || is_nd_blk(dev)) { 75 67 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 76 68 77 - return nvdimm_bus->module; 69 + return nvdimm_bus->nd_desc->module; 78 70 } 79 71 return NULL; 80 72 } ··· 128 134 dev_name(dev), rc); 129 135 module_put(provider); 130 136 return rc; 137 + } 138 + 139 + static void nvdimm_bus_shutdown(struct device *dev) 140 + { 141 + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 142 + struct nd_device_driver *nd_drv = NULL; 143 + 144 + if (dev->driver) 145 + nd_drv = to_nd_device_driver(dev->driver); 146 + 147 + if (nd_drv && nd_drv->shutdown) { 148 + nd_drv->shutdown(dev); 149 + dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n", 150 + dev->driver->name, dev_name(dev)); 151 + } 131 152 } 132 153 133 154 void nd_device_notify(struct device *dev, enum nvdimm_event event) ··· 217 208 } 218 209 EXPORT_SYMBOL_GPL(nvdimm_clear_poison); 219 210 211 + static int nvdimm_bus_match(struct device *dev, struct device_driver *drv); 212 + 220 213 static struct bus_type nvdimm_bus_type = { 221 214 .name = "nd", 222 215 .uevent = nvdimm_bus_uevent, 223 216 .match = nvdimm_bus_match, 224 217 .probe = nvdimm_bus_probe, 225 218 .remove = nvdimm_bus_remove, 219 + .shutdown = nvdimm_bus_shutdown, 226 220 }; 221 + 222 + static void nvdimm_bus_release(struct device *dev) 223 + { 224 + struct nvdimm_bus *nvdimm_bus; 225 + 226 + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 227 + ida_simple_remove(&nd_ida, nvdimm_bus->id); 228 + kfree(nvdimm_bus); 229 + } 230 + 231 + static bool is_nvdimm_bus(struct device *dev) 232 + { 233 + return dev->release == nvdimm_bus_release; 234 + } 235 + 236 + struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 237 + { 238 + struct device *dev; 239 + 240 + for (dev = nd_dev; dev; dev = dev->parent) 241 + if (is_nvdimm_bus(dev)) 242 + break; 243 + dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); 244 + if (dev) 245 + return to_nvdimm_bus(dev); 246 + return NULL; 247 + } 248 + 249 + struct nvdimm_bus *to_nvdimm_bus(struct device *dev) 250 + { 251 + struct nvdimm_bus *nvdimm_bus; 252 + 253 + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 254 + WARN_ON(!is_nvdimm_bus(dev)); 255 + return nvdimm_bus; 256 + } 257 + EXPORT_SYMBOL_GPL(to_nvdimm_bus); 258 + 259 + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, 260 + struct nvdimm_bus_descriptor *nd_desc) 261 + { 262 + struct nvdimm_bus *nvdimm_bus; 263 + int rc; 264 + 265 + nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); 266 + if (!nvdimm_bus) 267 + return NULL; 268 + INIT_LIST_HEAD(&nvdimm_bus->list); 269 + INIT_LIST_HEAD(&nvdimm_bus->mapping_list); 270 + INIT_LIST_HEAD(&nvdimm_bus->poison_list); 271 + init_waitqueue_head(&nvdimm_bus->probe_wait); 272 + nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 273 + mutex_init(&nvdimm_bus->reconfig_mutex); 274 + if (nvdimm_bus->id < 0) { 275 + kfree(nvdimm_bus); 276 + return NULL; 277 + } 278 + nvdimm_bus->nd_desc = nd_desc; 279 + nvdimm_bus->dev.parent = parent; 280 + nvdimm_bus->dev.release = nvdimm_bus_release; 281 + nvdimm_bus->dev.groups = nd_desc->attr_groups; 282 + nvdimm_bus->dev.bus = &nvdimm_bus_type; 283 + dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 284 + rc = device_register(&nvdimm_bus->dev); 285 + if (rc) { 286 + dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); 287 + goto err; 288 + } 289 + 290 + return nvdimm_bus; 291 + err: 292 + put_device(&nvdimm_bus->dev); 293 + return NULL; 294 + } 295 + EXPORT_SYMBOL_GPL(nvdimm_bus_register); 296 + 297 + void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) 298 + { 299 + if (!nvdimm_bus) 300 + return; 301 + device_unregister(&nvdimm_bus->dev); 302 + } 303 + EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); 304 + 305 + static int child_unregister(struct device *dev, void *data) 306 + { 307 + /* 308 + * the singular ndctl class device per bus needs to be 309 + * "device_destroy"ed, so skip it here 310 + * 311 + * i.e. remove classless children 312 + */ 313 + if (dev->class) 314 + /* pass */; 315 + else 316 + nd_device_unregister(dev, ND_SYNC); 317 + return 0; 318 + } 319 + 320 + static void free_poison_list(struct list_head *poison_list) 321 + { 322 + struct nd_poison *pl, *next; 323 + 324 + list_for_each_entry_safe(pl, next, poison_list, list) { 325 + list_del(&pl->list); 326 + kfree(pl); 327 + } 328 + list_del_init(poison_list); 329 + } 330 + 331 + static int nd_bus_remove(struct device *dev) 332 + { 333 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 334 + 335 + mutex_lock(&nvdimm_bus_list_mutex); 336 + list_del_init(&nvdimm_bus->list); 337 + mutex_unlock(&nvdimm_bus_list_mutex); 338 + 339 + nd_synchronize(); 340 + device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 341 + 342 + nvdimm_bus_lock(&nvdimm_bus->dev); 343 + free_poison_list(&nvdimm_bus->poison_list); 344 + nvdimm_bus_unlock(&nvdimm_bus->dev); 345 + 346 + nvdimm_bus_destroy_ndctl(nvdimm_bus); 347 + 348 + return 0; 349 + } 350 + 351 + static int nd_bus_probe(struct device *dev) 352 + { 353 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 354 + int rc; 355 + 356 + rc = nvdimm_bus_create_ndctl(nvdimm_bus); 357 + if (rc) 358 + return rc; 359 + 360 + mutex_lock(&nvdimm_bus_list_mutex); 361 + list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); 362 + mutex_unlock(&nvdimm_bus_list_mutex); 363 + 364 + /* enable bus provider attributes to look up their local context */ 365 + dev_set_drvdata(dev, nvdimm_bus->nd_desc); 366 + 367 + return 0; 368 + } 369 + 370 + static struct nd_device_driver nd_bus_driver = { 371 + .probe = nd_bus_probe, 372 + .remove = nd_bus_remove, 373 + .drv = { 374 + .name = "nd_bus", 375 + .suppress_bind_attrs = true, 376 + .bus = &nvdimm_bus_type, 377 + .owner = THIS_MODULE, 378 + .mod_name = KBUILD_MODNAME, 379 + }, 380 + }; 381 + 382 + static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) 383 + { 384 + struct nd_device_driver *nd_drv = to_nd_device_driver(drv); 385 + 386 + if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver) 387 + return true; 388 + 389 + return !!test_bit(to_nd_device_type(dev), &nd_drv->type); 390 + } 227 391 228 392 static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); 229 393 ··· 577 395 dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, 578 396 "ndctl%d", nvdimm_bus->id); 579 397 580 - if (IS_ERR(dev)) { 398 + if (IS_ERR(dev)) 581 399 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", 582 400 nvdimm_bus->id, PTR_ERR(dev)); 583 - return PTR_ERR(dev); 584 - } 585 - return 0; 401 + return PTR_ERR_OR_ZERO(dev); 586 402 } 587 403 588 404 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) ··· 1030 850 goto err_class; 1031 851 } 1032 852 853 + rc = driver_register(&nd_bus_driver.drv); 854 + if (rc) 855 + goto err_nd_bus; 856 + 1033 857 return 0; 1034 858 859 + err_nd_bus: 860 + class_destroy(nd_class); 1035 861 err_class: 1036 862 unregister_chrdev(nvdimm_major, "dimmctl"); 1037 863 err_dimm_chrdev: ··· 1050 864 1051 865 void nvdimm_bus_exit(void) 1052 866 { 867 + driver_unregister(&nd_bus_driver.drv); 1053 868 class_destroy(nd_class); 1054 869 unregister_chrdev(nvdimm_bus_major, "ndctl"); 1055 870 unregister_chrdev(nvdimm_major, "dimmctl"); 1056 871 bus_unregister(&nvdimm_bus_type); 872 + ida_destroy(&nd_ida); 1057 873 }
+3 -4
drivers/nvdimm/claim.c
··· 240 240 return memcpy_from_pmem(buf, nsio->addr + offset, size); 241 241 } else { 242 242 memcpy_to_pmem(nsio->addr + offset, buf, size); 243 - wmb_pmem(); 243 + nvdimm_flush(to_nd_region(ndns->dev.parent)); 244 244 } 245 245 246 246 return 0; ··· 266 266 267 267 nsio->addr = devm_memremap(dev, res->start, resource_size(res), 268 268 ARCH_MEMREMAP_PMEM); 269 - if (IS_ERR(nsio->addr)) 270 - return PTR_ERR(nsio->addr); 271 - return 0; 269 + 270 + return PTR_ERR_OR_ZERO(nsio->addr); 272 271 } 273 272 EXPORT_SYMBOL_GPL(devm_nsio_enable); 274 273
+128 -125
drivers/nvdimm/core.c
··· 20 20 #include <linux/ndctl.h> 21 21 #include <linux/mutex.h> 22 22 #include <linux/slab.h> 23 + #include <linux/io.h> 23 24 #include "nd-core.h" 24 25 #include "nd.h" 25 26 26 27 LIST_HEAD(nvdimm_bus_list); 27 28 DEFINE_MUTEX(nvdimm_bus_list_mutex); 28 - static DEFINE_IDA(nd_ida); 29 29 30 30 void nvdimm_bus_lock(struct device *dev) 31 31 { ··· 57 57 } 58 58 EXPORT_SYMBOL(is_nvdimm_bus_locked); 59 59 60 + struct nvdimm_map { 61 + struct nvdimm_bus *nvdimm_bus; 62 + struct list_head list; 63 + resource_size_t offset; 64 + unsigned long flags; 65 + size_t size; 66 + union { 67 + void *mem; 68 + void __iomem *iomem; 69 + }; 70 + struct kref kref; 71 + }; 72 + 73 + static struct nvdimm_map *find_nvdimm_map(struct device *dev, 74 + resource_size_t offset) 75 + { 76 + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 77 + struct nvdimm_map *nvdimm_map; 78 + 79 + list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list) 80 + if (nvdimm_map->offset == offset) 81 + return nvdimm_map; 82 + return NULL; 83 + } 84 + 85 + static struct nvdimm_map *alloc_nvdimm_map(struct device *dev, 86 + resource_size_t offset, size_t size, unsigned long flags) 87 + { 88 + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 89 + struct nvdimm_map *nvdimm_map; 90 + 91 + nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL); 92 + if (!nvdimm_map) 93 + return NULL; 94 + 95 + INIT_LIST_HEAD(&nvdimm_map->list); 96 + nvdimm_map->nvdimm_bus = nvdimm_bus; 97 + nvdimm_map->offset = offset; 98 + nvdimm_map->flags = flags; 99 + nvdimm_map->size = size; 100 + kref_init(&nvdimm_map->kref); 101 + 102 + if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) 103 + goto err_request_region; 104 + 105 + if (flags) 106 + nvdimm_map->mem = memremap(offset, size, flags); 107 + else 108 + nvdimm_map->iomem = ioremap(offset, size); 109 + 110 + if (!nvdimm_map->mem) 111 + goto err_map; 112 + 113 + dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!", 114 + __func__); 115 + list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list); 116 + 117 + return nvdimm_map; 118 + 119 + err_map: 120 + release_mem_region(offset, size); 121 + err_request_region: 122 + kfree(nvdimm_map); 123 + return NULL; 124 + } 125 + 126 + static void nvdimm_map_release(struct kref *kref) 127 + { 128 + struct nvdimm_bus *nvdimm_bus; 129 + struct nvdimm_map *nvdimm_map; 130 + 131 + nvdimm_map = container_of(kref, struct nvdimm_map, kref); 132 + nvdimm_bus = nvdimm_map->nvdimm_bus; 133 + 134 + dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset); 135 + list_del(&nvdimm_map->list); 136 + if (nvdimm_map->flags) 137 + memunmap(nvdimm_map->mem); 138 + else 139 + iounmap(nvdimm_map->iomem); 140 + release_mem_region(nvdimm_map->offset, nvdimm_map->size); 141 + kfree(nvdimm_map); 142 + } 143 + 144 + static void nvdimm_map_put(void *data) 145 + { 146 + struct nvdimm_map *nvdimm_map = data; 147 + struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus; 148 + 149 + nvdimm_bus_lock(&nvdimm_bus->dev); 150 + kref_put(&nvdimm_map->kref, nvdimm_map_release); 151 + nvdimm_bus_unlock(&nvdimm_bus->dev); 152 + } 153 + 154 + /** 155 + * devm_nvdimm_memremap - map a resource that is shared across regions 156 + * @dev: device that will own a reference to the shared mapping 157 + * @offset: physical base address of the mapping 158 + * @size: mapping size 159 + * @flags: memremap flags, or, if zero, perform an ioremap instead 160 + */ 161 + void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, 162 + size_t size, unsigned long flags) 163 + { 164 + struct nvdimm_map *nvdimm_map; 165 + 166 + nvdimm_bus_lock(dev); 167 + nvdimm_map = find_nvdimm_map(dev, offset); 168 + if (!nvdimm_map) 169 + nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags); 170 + else 171 + kref_get(&nvdimm_map->kref); 172 + nvdimm_bus_unlock(dev); 173 + 174 + if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map)) 175 + return NULL; 176 + 177 + return nvdimm_map->mem; 178 + } 179 + EXPORT_SYMBOL_GPL(devm_nvdimm_memremap); 180 + 60 181 u64 nd_fletcher64(void *addr, size_t len, bool le) 61 182 { 62 183 u32 *buf = addr; ··· 194 73 } 195 74 EXPORT_SYMBOL_GPL(nd_fletcher64); 196 75 197 - static void nvdimm_bus_release(struct device *dev) 198 - { 199 - struct nvdimm_bus *nvdimm_bus; 200 - 201 - nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 202 - ida_simple_remove(&nd_ida, nvdimm_bus->id); 203 - kfree(nvdimm_bus); 204 - } 205 - 206 - struct nvdimm_bus *to_nvdimm_bus(struct device *dev) 207 - { 208 - struct nvdimm_bus *nvdimm_bus; 209 - 210 - nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 211 - WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release); 212 - return nvdimm_bus; 213 - } 214 - EXPORT_SYMBOL_GPL(to_nvdimm_bus); 215 - 216 76 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) 217 77 { 218 78 /* struct nvdimm_bus definition is private to libnvdimm */ ··· 201 99 } 202 100 EXPORT_SYMBOL_GPL(to_nd_desc); 203 101 204 - struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 102 + struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus) 205 103 { 206 - struct device *dev; 207 - 208 - for (dev = nd_dev; dev; dev = dev->parent) 209 - if (dev->release == nvdimm_bus_release) 210 - break; 211 - dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); 212 - if (dev) 213 - return to_nvdimm_bus(dev); 214 - return NULL; 104 + /* struct nvdimm_bus definition is private to libnvdimm */ 105 + return &nvdimm_bus->dev; 215 106 } 107 + EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); 216 108 217 109 static bool is_uuid_sep(char sep) 218 110 { ··· 421 325 }; 422 326 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); 423 327 424 - struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 425 - struct nvdimm_bus_descriptor *nd_desc, struct module *module) 426 - { 427 - struct nvdimm_bus *nvdimm_bus; 428 - int rc; 429 - 430 - nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); 431 - if (!nvdimm_bus) 432 - return NULL; 433 - INIT_LIST_HEAD(&nvdimm_bus->list); 434 - INIT_LIST_HEAD(&nvdimm_bus->poison_list); 435 - init_waitqueue_head(&nvdimm_bus->probe_wait); 436 - nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 437 - mutex_init(&nvdimm_bus->reconfig_mutex); 438 - if (nvdimm_bus->id < 0) { 439 - kfree(nvdimm_bus); 440 - return NULL; 441 - } 442 - nvdimm_bus->nd_desc = nd_desc; 443 - nvdimm_bus->module = module; 444 - nvdimm_bus->dev.parent = parent; 445 - nvdimm_bus->dev.release = nvdimm_bus_release; 446 - nvdimm_bus->dev.groups = nd_desc->attr_groups; 447 - dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 448 - rc = device_register(&nvdimm_bus->dev); 449 - if (rc) { 450 - dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); 451 - goto err; 452 - } 453 - 454 - rc = nvdimm_bus_create_ndctl(nvdimm_bus); 455 - if (rc) 456 - goto err; 457 - 458 - mutex_lock(&nvdimm_bus_list_mutex); 459 - list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); 460 - mutex_unlock(&nvdimm_bus_list_mutex); 461 - 462 - return nvdimm_bus; 463 - err: 464 - put_device(&nvdimm_bus->dev); 465 - return NULL; 466 - } 467 - EXPORT_SYMBOL_GPL(__nvdimm_bus_register); 468 - 469 328 static void set_badblock(struct badblocks *bb, sector_t s, int num) 470 329 { 471 330 dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", ··· 596 545 } 597 546 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); 598 547 599 - static void free_poison_list(struct list_head *poison_list) 600 - { 601 - struct nd_poison *pl, *next; 602 - 603 - list_for_each_entry_safe(pl, next, poison_list, list) { 604 - list_del(&pl->list); 605 - kfree(pl); 606 - } 607 - list_del_init(poison_list); 608 - } 609 - 610 - static int child_unregister(struct device *dev, void *data) 611 - { 612 - /* 613 - * the singular ndctl class device per bus needs to be 614 - * "device_destroy"ed, so skip it here 615 - * 616 - * i.e. remove classless children 617 - */ 618 - if (dev->class) 619 - /* pass */; 620 - else 621 - nd_device_unregister(dev, ND_SYNC); 622 - return 0; 623 - } 624 - 625 - void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) 626 - { 627 - if (!nvdimm_bus) 628 - return; 629 - 630 - mutex_lock(&nvdimm_bus_list_mutex); 631 - list_del_init(&nvdimm_bus->list); 632 - mutex_unlock(&nvdimm_bus_list_mutex); 633 - 634 - nd_synchronize(); 635 - device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 636 - 637 - nvdimm_bus_lock(&nvdimm_bus->dev); 638 - free_poison_list(&nvdimm_bus->poison_list); 639 - nvdimm_bus_unlock(&nvdimm_bus->dev); 640 - 641 - nvdimm_bus_destroy_ndctl(nvdimm_bus); 642 - 643 - device_unregister(&nvdimm_bus->dev); 644 - } 645 - EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); 646 - 647 548 #ifdef CONFIG_BLK_DEV_INTEGRITY 648 549 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) 649 550 { ··· 604 601 if (meta_size == 0) 605 602 return 0; 606 603 607 - bi.profile = NULL; 604 + memset(&bi, 0, sizeof(bi)); 605 + 608 606 bi.tuple_size = meta_size; 609 607 bi.tag_size = meta_size; 610 608 ··· 654 650 nvdimm_bus_exit(); 655 651 nd_region_devs_exit(); 656 652 nvdimm_devs_exit(); 657 - ida_destroy(&nd_ida); 658 653 } 659 654 660 655 MODULE_LICENSE("GPL v2");
+4 -1
drivers/nvdimm/dimm_devs.c
··· 346 346 347 347 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 348 348 const struct attribute_group **groups, unsigned long flags, 349 - unsigned long cmd_mask) 349 + unsigned long cmd_mask, int num_flush, 350 + struct resource *flush_wpq) 350 351 { 351 352 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); 352 353 struct device *dev; ··· 363 362 nvdimm->provider_data = provider_data; 364 363 nvdimm->flags = flags; 365 364 nvdimm->cmd_mask = cmd_mask; 365 + nvdimm->num_flush = num_flush; 366 + nvdimm->flush_wpq = flush_wpq; 366 367 atomic_set(&nvdimm->busy, 0); 367 368 dev = &nvdimm->dev; 368 369 dev_set_name(dev, "nmem%d", nvdimm->id);
+1
drivers/nvdimm/e820.c
··· 47 47 48 48 nd_desc.attr_groups = e820_pmem_attribute_groups; 49 49 nd_desc.provider_name = "e820"; 50 + nd_desc.module = THIS_MODULE; 50 51 nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); 51 52 if (!nvdimm_bus) 52 53 goto err;
+3 -2
drivers/nvdimm/nd-core.h
··· 26 26 struct nvdimm_bus { 27 27 struct nvdimm_bus_descriptor *nd_desc; 28 28 wait_queue_head_t probe_wait; 29 - struct module *module; 30 29 struct list_head list; 31 30 struct device dev; 32 31 int id, probe_active; 33 32 struct list_head poison_list; 33 + struct list_head mapping_list; 34 34 struct mutex reconfig_mutex; 35 35 }; 36 36 ··· 40 40 unsigned long cmd_mask; 41 41 struct device dev; 42 42 atomic_t busy; 43 - int id; 43 + int id, num_flush; 44 + struct resource *flush_wpq; 44 45 }; 45 46 46 47 bool is_nvdimm(struct device *dev);
+6 -4
drivers/nvdimm/nd.h
··· 49 49 struct kref kref; 50 50 }; 51 51 52 - struct nd_region_namespaces { 53 - int count; 54 - int active; 52 + struct nd_region_data { 53 + int ns_count; 54 + int ns_active; 55 + unsigned int flush_mask; 56 + void __iomem *flush_wpq[0][0]; 55 57 }; 56 58 57 59 static inline struct nd_namespace_index *to_namespace_index( ··· 121 119 122 120 struct nd_blk_region { 123 121 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 124 - void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 125 122 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 126 123 void *iobuf, u64 len, int rw); 127 124 void *blk_provider_data; ··· 326 325 } 327 326 #endif 328 327 int nd_blk_region_init(struct nd_region *nd_region); 328 + int nd_region_activate(struct nd_region *nd_region); 329 329 void __nd_iostat_start(struct bio *bio, unsigned long *start); 330 330 static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) 331 331 {
+51 -34
drivers/nvdimm/pmem.c
··· 29 29 #include <linux/slab.h> 30 30 #include <linux/pmem.h> 31 31 #include <linux/nd.h> 32 + #include "pmem.h" 32 33 #include "pfn.h" 33 34 #include "nd.h" 34 35 35 - struct pmem_device { 36 - /* One contiguous memory region per device */ 37 - phys_addr_t phys_addr; 38 - /* when non-zero this device is hosting a 'pfn' instance */ 39 - phys_addr_t data_offset; 40 - u64 pfn_flags; 41 - void __pmem *virt_addr; 42 - /* immutable base size of the namespace */ 43 - size_t size; 44 - /* trim size when namespace capacity has been section aligned */ 45 - u32 pfn_pad; 46 - struct badblocks bb; 47 - }; 36 + static struct device *to_dev(struct pmem_device *pmem) 37 + { 38 + /* 39 + * nvdimm bus services need a 'dev' parameter, and we record the device 40 + * at init in bb.dev. 41 + */ 42 + return pmem->bb.dev; 43 + } 44 + 45 + static struct nd_region *to_region(struct pmem_device *pmem) 46 + { 47 + return to_nd_region(to_dev(pmem)->parent); 48 + } 48 49 49 50 static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 50 51 unsigned int len) 51 52 { 52 - struct device *dev = pmem->bb.dev; 53 + struct device *dev = to_dev(pmem); 53 54 sector_t sector; 54 55 long cleared; 55 56 ··· 58 57 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); 59 58 60 59 if (cleared > 0 && cleared / 512) { 61 - dev_dbg(dev, "%s: %llx clear %ld sector%s\n", 60 + dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", 62 61 __func__, (unsigned long long) sector, 63 62 cleared / 512, cleared / 512 > 1 ? "s" : ""); 64 63 badblocks_clear(&pmem->bb, sector, cleared / 512); ··· 74 73 bool bad_pmem = false; 75 74 void *mem = kmap_atomic(page); 76 75 phys_addr_t pmem_off = sector * 512 + pmem->data_offset; 77 - void __pmem *pmem_addr = pmem->virt_addr + pmem_off; 76 + void *pmem_addr = pmem->virt_addr + pmem_off; 78 77 79 78 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) 80 79 bad_pmem = true; ··· 113 112 return rc; 114 113 } 115 114 115 + /* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */ 116 + #ifndef REQ_FLUSH 117 + #define REQ_FLUSH REQ_PREFLUSH 118 + #endif 119 + 116 120 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) 117 121 { 118 122 int rc = 0; ··· 126 120 struct bio_vec bvec; 127 121 struct bvec_iter iter; 128 122 struct pmem_device *pmem = q->queuedata; 123 + struct nd_region *nd_region = to_region(pmem); 124 + 125 + if (bio->bi_rw & REQ_FLUSH) 126 + nvdimm_flush(nd_region); 129 127 130 128 do_acct = nd_iostat_start(bio, &start); 131 129 bio_for_each_segment(bvec, bio, iter) { ··· 144 134 if (do_acct) 145 135 nd_iostat_end(bio, start); 146 136 147 - if (bio_data_dir(bio)) 148 - wmb_pmem(); 137 + if (bio->bi_rw & REQ_FUA) 138 + nvdimm_flush(nd_region); 149 139 150 140 bio_endio(bio); 151 141 return BLK_QC_T_NONE; ··· 158 148 int rc; 159 149 160 150 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); 161 - if (rw & WRITE) 162 - wmb_pmem(); 163 151 164 152 /* 165 153 * The ->rw_page interface is subtle and tricky. The core ··· 171 163 return rc; 172 164 } 173 165 174 - static long pmem_direct_access(struct block_device *bdev, sector_t sector, 175 - void __pmem **kaddr, pfn_t *pfn, long size) 166 + /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ 167 + __weak long pmem_direct_access(struct block_device *bdev, sector_t sector, 168 + void **kaddr, pfn_t *pfn, long size) 176 169 { 177 170 struct pmem_device *pmem = bdev->bd_queue->queuedata; 178 171 resource_size_t offset = sector * 512 + pmem->data_offset; ··· 204 195 blk_cleanup_queue(q); 205 196 } 206 197 207 - void pmem_release_disk(void *disk) 198 + static void pmem_release_disk(void *disk) 208 199 { 209 200 del_gendisk(disk); 210 201 put_disk(disk); ··· 214 205 struct nd_namespace_common *ndns) 215 206 { 216 207 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 208 + struct nd_region *nd_region = to_nd_region(dev->parent); 217 209 struct vmem_altmap __altmap, *altmap = NULL; 218 210 struct resource *res = &nsio->res; 219 211 struct nd_pfn *nd_pfn = NULL; ··· 244 234 dev_set_drvdata(dev, pmem); 245 235 pmem->phys_addr = res->start; 246 236 pmem->size = resource_size(res); 247 - if (!arch_has_wmb_pmem()) 237 + if (nvdimm_has_flush(nd_region) < 0) 248 238 dev_warn(dev, "unable to guarantee persistence of writes\n"); 249 239 250 240 if (!devm_request_mem_region(dev, res->start, resource_size(res), ··· 279 269 * At release time the queue must be dead before 280 270 * devm_memremap_pages is unwound 281 271 */ 282 - if (devm_add_action(dev, pmem_release_queue, q)) { 283 - blk_cleanup_queue(q); 272 + if (devm_add_action_or_reset(dev, pmem_release_queue, q)) 284 273 return -ENOMEM; 285 - } 286 274 287 275 if (IS_ERR(addr)) 288 276 return PTR_ERR(addr); 289 - pmem->virt_addr = (void __pmem *) addr; 277 + pmem->virt_addr = addr; 290 278 279 + blk_queue_write_cache(q, true, true); 291 280 blk_queue_make_request(q, pmem_make_request); 292 281 blk_queue_physical_block_size(q, PAGE_SIZE); 293 282 blk_queue_max_hw_sectors(q, UINT_MAX); ··· 298 289 disk = alloc_disk_node(0, nid); 299 290 if (!disk) 300 291 return -ENOMEM; 301 - if (devm_add_action(dev, pmem_release_disk, disk)) { 302 - put_disk(disk); 303 - return -ENOMEM; 304 - } 305 292 306 293 disk->fops = &pmem_fops; 307 294 disk->queue = q; ··· 307 302 / 512); 308 303 if (devm_init_badblocks(dev, &pmem->bb)) 309 304 return -ENOMEM; 310 - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); 305 + nvdimm_badblocks_populate(nd_region, &pmem->bb, res); 311 306 disk->bb = &pmem->bb; 312 307 device_add_disk(dev, disk); 308 + 309 + if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) 310 + return -ENOMEM; 311 + 313 312 revalidate_disk(disk); 314 313 315 314 return 0; ··· 349 340 { 350 341 if (is_nd_btt(dev)) 351 342 nvdimm_namespace_detach_btt(to_nd_btt(dev)); 343 + nvdimm_flush(to_nd_region(dev->parent)); 344 + 352 345 return 0; 346 + } 347 + 348 + static void nd_pmem_shutdown(struct device *dev) 349 + { 350 + nvdimm_flush(to_nd_region(dev->parent)); 353 351 } 354 352 355 353 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 356 354 { 357 - struct nd_region *nd_region = to_nd_region(dev->parent); 358 355 struct pmem_device *pmem = dev_get_drvdata(dev); 356 + struct nd_region *nd_region = to_region(pmem); 359 357 resource_size_t offset = 0, end_trunc = 0; 360 358 struct nd_namespace_common *ndns; 361 359 struct nd_namespace_io *nsio; ··· 398 382 .probe = nd_pmem_probe, 399 383 .remove = nd_pmem_remove, 400 384 .notify = nd_pmem_notify, 385 + .shutdown = nd_pmem_shutdown, 401 386 .drv = { 402 387 .name = "nd_pmem", 403 388 },
+24
drivers/nvdimm/pmem.h
··· 1 + #ifndef __NVDIMM_PMEM_H__ 2 + #define __NVDIMM_PMEM_H__ 3 + #include <linux/badblocks.h> 4 + #include <linux/types.h> 5 + #include <linux/pfn_t.h> 6 + #include <linux/fs.h> 7 + 8 + long pmem_direct_access(struct block_device *bdev, sector_t sector, 9 + void **kaddr, pfn_t *pfn, long size); 10 + /* this definition is in it's own header for tools/testing/nvdimm to consume */ 11 + struct pmem_device { 12 + /* One contiguous memory region per device */ 13 + phys_addr_t phys_addr; 14 + /* when non-zero this device is hosting a 'pfn' instance */ 15 + phys_addr_t data_offset; 16 + u64 pfn_flags; 17 + void *virt_addr; 18 + /* immutable base size of the namespace */ 19 + size_t size; 20 + /* trim size when namespace capacity has been section aligned */ 21 + u32 pfn_pad; 22 + struct badblocks bb; 23 + }; 24 + #endif /* __NVDIMM_PMEM_H__ */
+10 -9
drivers/nvdimm/region.c
··· 20 20 { 21 21 int err, rc; 22 22 static unsigned long once; 23 - struct nd_region_namespaces *num_ns; 23 + struct nd_region_data *ndrd; 24 24 struct nd_region *nd_region = to_nd_region(dev); 25 25 26 26 if (nd_region->num_lanes > num_online_cpus() ··· 33 33 nd_region->num_lanes); 34 34 } 35 35 36 + rc = nd_region_activate(nd_region); 37 + if (rc) 38 + return rc; 39 + 36 40 rc = nd_blk_region_init(nd_region); 37 41 if (rc) 38 42 return rc; 39 43 40 44 rc = nd_region_register_namespaces(nd_region, &err); 41 - num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL); 42 - if (!num_ns) 43 - return -ENOMEM; 44 - 45 45 if (rc < 0) 46 46 return rc; 47 47 48 - num_ns->active = rc; 49 - num_ns->count = rc + err; 50 - dev_set_drvdata(dev, num_ns); 48 + ndrd = dev_get_drvdata(dev); 49 + ndrd->ns_active = rc; 50 + ndrd->ns_count = rc + err; 51 51 52 52 if (rc && err && rc == err) 53 53 return -ENODEV; ··· 82 82 { 83 83 struct nd_region *nd_region = to_nd_region(dev); 84 84 85 + device_for_each_child(dev, NULL, child_unregister); 86 + 85 87 /* flush attribute readers and disable */ 86 88 nvdimm_bus_lock(dev); 87 89 nd_region->ns_seed = NULL; ··· 93 91 dev_set_drvdata(dev, NULL); 94 92 nvdimm_bus_unlock(dev); 95 93 96 - device_for_each_child(dev, NULL, child_unregister); 97 94 return 0; 98 95 } 99 96
+148 -6
drivers/nvdimm/region_devs.c
··· 14 14 #include <linux/highmem.h> 15 15 #include <linux/sched.h> 16 16 #include <linux/slab.h> 17 + #include <linux/hash.h> 18 + #include <linux/pmem.h> 17 19 #include <linux/sort.h> 18 20 #include <linux/io.h> 19 21 #include <linux/nd.h> 20 22 #include "nd-core.h" 21 23 #include "nd.h" 22 24 25 + /* 26 + * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is 27 + * irrelevant. 28 + */ 29 + #include <linux/io-64-nonatomic-hi-lo.h> 30 + 23 31 static DEFINE_IDA(region_ida); 32 + static DEFINE_PER_CPU(int, flush_idx); 33 + 34 + static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, 35 + struct nd_region_data *ndrd) 36 + { 37 + int i, j; 38 + 39 + dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), 40 + nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); 41 + for (i = 0; i < nvdimm->num_flush; i++) { 42 + struct resource *res = &nvdimm->flush_wpq[i]; 43 + unsigned long pfn = PHYS_PFN(res->start); 44 + void __iomem *flush_page; 45 + 46 + /* check if flush hints share a page */ 47 + for (j = 0; j < i; j++) { 48 + struct resource *res_j = &nvdimm->flush_wpq[j]; 49 + unsigned long pfn_j = PHYS_PFN(res_j->start); 50 + 51 + if (pfn == pfn_j) 52 + break; 53 + } 54 + 55 + if (j < i) 56 + flush_page = (void __iomem *) ((unsigned long) 57 + ndrd->flush_wpq[dimm][j] & PAGE_MASK); 58 + else 59 + flush_page = devm_nvdimm_ioremap(dev, 60 + PHYS_PFN(pfn), PAGE_SIZE); 61 + if (!flush_page) 62 + return -ENXIO; 63 + ndrd->flush_wpq[dimm][i] = flush_page 64 + + (res->start & ~PAGE_MASK); 65 + } 66 + 67 + return 0; 68 + } 69 + 70 + int nd_region_activate(struct nd_region *nd_region) 71 + { 72 + int i, num_flush = 0; 73 + struct nd_region_data *ndrd; 74 + struct device *dev = &nd_region->dev; 75 + size_t flush_data_size = sizeof(void *); 76 + 77 + nvdimm_bus_lock(&nd_region->dev); 78 + for (i = 0; i < nd_region->ndr_mappings; i++) { 79 + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 80 + struct nvdimm *nvdimm = nd_mapping->nvdimm; 81 + 82 + /* at least one null hint slot per-dimm for the "no-hint" case */ 83 + flush_data_size += sizeof(void *); 84 + num_flush = min_not_zero(num_flush, nvdimm->num_flush); 85 + if (!nvdimm->num_flush) 86 + continue; 87 + flush_data_size += nvdimm->num_flush * sizeof(void *); 88 + } 89 + nvdimm_bus_unlock(&nd_region->dev); 90 + 91 + ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL); 92 + if (!ndrd) 93 + return -ENOMEM; 94 + dev_set_drvdata(dev, ndrd); 95 + 96 + ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; 97 + for (i = 0; i < nd_region->ndr_mappings; i++) { 98 + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 99 + struct nvdimm *nvdimm = nd_mapping->nvdimm; 100 + int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd); 101 + 102 + if (rc) 103 + return rc; 104 + } 105 + 106 + return 0; 107 + } 24 108 25 109 static void nd_region_release(struct device *dev) 26 110 { ··· 326 242 static ssize_t init_namespaces_show(struct device *dev, 327 243 struct device_attribute *attr, char *buf) 328 244 { 329 - struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); 245 + struct nd_region_data *ndrd = dev_get_drvdata(dev); 330 246 ssize_t rc; 331 247 332 248 nvdimm_bus_lock(dev); 333 - if (num_ns) 334 - rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); 249 + if (ndrd) 250 + rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count); 335 251 else 336 252 rc = -ENXIO; 337 253 nvdimm_bus_unlock(dev); ··· 517 433 518 434 if (is_nd_pmem(dev)) 519 435 return; 520 - 521 - to_nd_blk_region(dev)->disable(nvdimm_bus, dev); 522 436 } 523 437 if (dev->parent && is_nd_blk(dev->parent) && probe) { 524 438 nd_region = to_nd_region(dev->parent); ··· 780 698 if (ndbr) { 781 699 nd_region = &ndbr->nd_region; 782 700 ndbr->enable = ndbr_desc->enable; 783 - ndbr->disable = ndbr_desc->disable; 784 701 ndbr->do_io = ndbr_desc->do_io; 785 702 } 786 703 region_buf = ndbr; ··· 874 793 __func__); 875 794 } 876 795 EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); 796 + 797 + /** 798 + * nvdimm_flush - flush any posted write queues between the cpu and pmem media 799 + * @nd_region: blk or interleaved pmem region 800 + */ 801 + void nvdimm_flush(struct nd_region *nd_region) 802 + { 803 + struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); 804 + int i, idx; 805 + 806 + /* 807 + * Try to encourage some diversity in flush hint addresses 808 + * across cpus assuming a limited number of flush hints. 809 + */ 810 + idx = this_cpu_read(flush_idx); 811 + idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8)); 812 + 813 + /* 814 + * The first wmb() is needed to 'sfence' all previous writes 815 + * such that they are architecturally visible for the platform 816 + * buffer flush. Note that we've already arranged for pmem 817 + * writes to avoid the cache via arch_memcpy_to_pmem(). The 818 + * final wmb() ensures ordering for the NVDIMM flush write. 819 + */ 820 + wmb(); 821 + for (i = 0; i < nd_region->ndr_mappings; i++) 822 + if (ndrd->flush_wpq[i][0]) 823 + writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); 824 + wmb(); 825 + } 826 + EXPORT_SYMBOL_GPL(nvdimm_flush); 827 + 828 + /** 829 + * nvdimm_has_flush - determine write flushing requirements 830 + * @nd_region: blk or interleaved pmem region 831 + * 832 + * Returns 1 if writes require flushing 833 + * Returns 0 if writes do not require flushing 834 + * Returns -ENXIO if flushing capability can not be determined 835 + */ 836 + int nvdimm_has_flush(struct nd_region *nd_region) 837 + { 838 + struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); 839 + int i; 840 + 841 + /* no nvdimm == flushing capability unknown */ 842 + if (nd_region->ndr_mappings == 0) 843 + return -ENXIO; 844 + 845 + for (i = 0; i < nd_region->ndr_mappings; i++) 846 + /* flush hints present, flushing required */ 847 + if (ndrd->flush_wpq[i][0]) 848 + return 1; 849 + 850 + /* 851 + * The platform defines dimm devices without hints, assume 852 + * platform persistence mechanism like ADR 853 + */ 854 + return 0; 855 + } 856 + EXPORT_SYMBOL_GPL(nvdimm_has_flush); 877 857 878 858 void __exit nd_region_devs_exit(void) 879 859 {
+3 -3
drivers/s390/block/dcssblk.c
··· 31 31 static blk_qc_t dcssblk_make_request(struct request_queue *q, 32 32 struct bio *bio); 33 33 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, 34 - void __pmem **kaddr, pfn_t *pfn, long size); 34 + void **kaddr, pfn_t *pfn, long size); 35 35 36 36 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; 37 37 ··· 884 884 885 885 static long 886 886 dcssblk_direct_access (struct block_device *bdev, sector_t secnum, 887 - void __pmem **kaddr, pfn_t *pfn, long size) 887 + void **kaddr, pfn_t *pfn, long size) 888 888 { 889 889 struct dcssblk_dev_info *dev_info; 890 890 unsigned long offset, dev_sz; ··· 894 894 return -ENODEV; 895 895 dev_sz = dev_info->end - dev_info->start; 896 896 offset = secnum * 512; 897 - *kaddr = (void __pmem *) (dev_info->start + offset); 897 + *kaddr = (void *) dev_info->start + offset; 898 898 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); 899 899 900 900 return dev_sz - offset;
+4 -9
fs/dax.c
··· 75 75 struct request_queue *q = bdev->bd_queue; 76 76 long rc = -EIO; 77 77 78 - dax->addr = (void __pmem *) ERR_PTR(-EIO); 78 + dax->addr = ERR_PTR(-EIO); 79 79 if (blk_queue_enter(q, true) != 0) 80 80 return rc; 81 81 82 82 rc = bdev_direct_access(bdev, dax); 83 83 if (rc < 0) { 84 - dax->addr = (void __pmem *) ERR_PTR(rc); 84 + dax->addr = ERR_PTR(rc); 85 85 blk_queue_exit(q); 86 86 return rc; 87 87 } ··· 147 147 struct buffer_head *bh) 148 148 { 149 149 loff_t pos = start, max = start, bh_max = start; 150 - bool hole = false, need_wmb = false; 150 + bool hole = false; 151 151 struct block_device *bdev = NULL; 152 152 int rw = iov_iter_rw(iter), rc; 153 153 long map_len = 0; 154 154 struct blk_dax_ctl dax = { 155 - .addr = (void __pmem *) ERR_PTR(-EIO), 155 + .addr = ERR_PTR(-EIO), 156 156 }; 157 157 unsigned blkbits = inode->i_blkbits; 158 158 sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1) ··· 218 218 219 219 if (iov_iter_rw(iter) == WRITE) { 220 220 len = copy_from_iter_pmem(dax.addr, max - pos, iter); 221 - need_wmb = true; 222 221 } else if (!hole) 223 222 len = copy_to_iter((void __force *) dax.addr, max - pos, 224 223 iter); ··· 234 235 dax.addr += len; 235 236 } 236 237 237 - if (need_wmb) 238 - wmb_pmem(); 239 238 dax_unmap_atomic(bdev, &dax); 240 239 241 240 return (pos == start) ? rc : pos - start; ··· 785 788 return ret; 786 789 } 787 790 } 788 - wmb_pmem(); 789 791 return 0; 790 792 } 791 793 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); ··· 1183 1187 if (dax_map_atomic(bdev, &dax) < 0) 1184 1188 return PTR_ERR(dax.addr); 1185 1189 clear_pmem(dax.addr + offset, length); 1186 - wmb_pmem(); 1187 1190 dax_unmap_atomic(bdev, &dax); 1188 1191 } 1189 1192 return 0;
+3 -3
include/linux/blkdev.h
··· 1665 1665 */ 1666 1666 struct blk_dax_ctl { 1667 1667 sector_t sector; 1668 - void __pmem *addr; 1668 + void *addr; 1669 1669 long size; 1670 1670 pfn_t pfn; 1671 1671 }; ··· 1676 1676 int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); 1677 1677 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1678 1678 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1679 - long (*direct_access)(struct block_device *, sector_t, void __pmem **, 1680 - pfn_t *, long); 1679 + long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, 1680 + long); 1681 1681 unsigned int (*check_events) (struct gendisk *disk, 1682 1682 unsigned int clearing); 1683 1683 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
-2
include/linux/compiler.h
··· 17 17 # define __release(x) __context__(x,-1) 18 18 # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 19 19 # define __percpu __attribute__((noderef, address_space(3))) 20 - # define __pmem __attribute__((noderef, address_space(5))) 21 20 #ifdef CONFIG_SPARSE_RCU_POINTER 22 21 # define __rcu __attribute__((noderef, address_space(4))) 23 22 #else /* CONFIG_SPARSE_RCU_POINTER */ ··· 44 45 # define __cond_lock(x,c) (c) 45 46 # define __percpu 46 47 # define __rcu 47 - # define __pmem 48 48 # define __private 49 49 # define ACCESS_PRIVATE(p, member) ((p)->member) 50 50 #endif /* __CHECKER__ */
+1 -1
include/linux/device-mapper.h
··· 131 131 * >= 0 : the number of bytes accessible at the address 132 132 */ 133 133 typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, 134 - void __pmem **kaddr, pfn_t *pfn, long size); 134 + void **kaddr, pfn_t *pfn, long size); 135 135 136 136 void dm_error(const char *message); 137 137
+18 -6
include/linux/libnvdimm.h
··· 52 52 53 53 struct nd_namespace_label; 54 54 struct nvdimm_drvdata; 55 + 55 56 struct nd_mapping { 56 57 struct nvdimm *nvdimm; 57 58 struct nd_namespace_label **labels; ··· 70 69 struct nvdimm_bus_descriptor { 71 70 const struct attribute_group **attr_groups; 72 71 unsigned long cmd_mask; 72 + struct module *module; 73 73 char *provider_name; 74 74 ndctl_fn ndctl; 75 75 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); ··· 101 99 unsigned long flags; 102 100 }; 103 101 102 + struct device; 103 + void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, 104 + size_t size, unsigned long flags); 105 + static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, 106 + resource_size_t offset, size_t size) 107 + { 108 + return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0); 109 + } 110 + 104 111 struct nvdimm_bus; 105 112 struct module; 106 113 struct device; 107 114 struct nd_blk_region; 108 115 struct nd_blk_region_desc { 109 116 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 110 - void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 111 117 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 112 118 void *iobuf, u64 len, int rw); 113 119 struct nd_region_desc ndr_desc; ··· 129 119 } 130 120 131 121 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); 132 - struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 133 - struct nvdimm_bus_descriptor *nfit_desc, struct module *module); 134 - #define nvdimm_bus_register(parent, desc) \ 135 - __nvdimm_bus_register(parent, desc, THIS_MODULE) 122 + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, 123 + struct nvdimm_bus_descriptor *nfit_desc); 136 124 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); 137 125 struct nvdimm_bus *to_nvdimm_bus(struct device *dev); 138 126 struct nvdimm *to_nvdimm(struct device *dev); 139 127 struct nd_region *to_nd_region(struct device *dev); 140 128 struct nd_blk_region *to_nd_blk_region(struct device *dev); 141 129 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); 130 + struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); 142 131 const char *nvdimm_name(struct nvdimm *nvdimm); 143 132 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); 144 133 void *nvdimm_provider_data(struct nvdimm *nvdimm); 145 134 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 146 135 const struct attribute_group **groups, unsigned long flags, 147 - unsigned long cmd_mask); 136 + unsigned long cmd_mask, int num_flush, 137 + struct resource *flush_wpq); 148 138 const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); 149 139 const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); 150 140 u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, ··· 166 156 unsigned int nd_region_acquire_lane(struct nd_region *nd_region); 167 157 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); 168 158 u64 nd_fletcher64(void *addr, size_t len, bool le); 159 + void nvdimm_flush(struct nd_region *nd_region); 160 + int nvdimm_has_flush(struct nd_region *nd_region); 169 161 #endif /* __LIBNVDIMM_H__ */
+2 -1
include/linux/nd.h
··· 26 26 unsigned long type; 27 27 int (*probe)(struct device *dev); 28 28 int (*remove)(struct device *dev); 29 + void (*shutdown)(struct device *dev); 29 30 void (*notify)(struct device *dev, enum nvdimm_event event); 30 31 }; 31 32 ··· 68 67 struct nd_namespace_common common; 69 68 struct resource res; 70 69 resource_size_t size; 71 - void __pmem *addr; 70 + void *addr; 72 71 struct badblocks bb; 73 72 }; 74 73
+4 -1
include/linux/pfn_t.h
··· 28 28 return __pfn_to_pfn_t(pfn, 0); 29 29 } 30 30 31 - extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); 31 + static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) 32 + { 33 + return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); 34 + } 32 35 33 36 static inline bool pfn_t_has_page(pfn_t pfn) 34 37 {
+21 -96
include/linux/pmem.h
··· 26 26 * calling these symbols with arch_has_pmem_api() and redirect to the 27 27 * implementation in asm/pmem.h. 28 28 */ 29 - static inline bool __arch_has_wmb_pmem(void) 30 - { 31 - return false; 32 - } 33 - 34 - static inline void arch_wmb_pmem(void) 29 + static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) 35 30 { 36 31 BUG(); 37 32 } 38 33 39 - static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, 40 - size_t n) 41 - { 42 - BUG(); 43 - } 44 - 45 - static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, 46 - size_t n) 34 + static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) 47 35 { 48 36 BUG(); 49 37 return -EFAULT; 50 38 } 51 39 52 - static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, 40 + static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, 53 41 struct iov_iter *i) 54 42 { 55 43 BUG(); 56 44 return 0; 57 45 } 58 46 59 - static inline void arch_clear_pmem(void __pmem *addr, size_t size) 47 + static inline void arch_clear_pmem(void *addr, size_t size) 60 48 { 61 49 BUG(); 62 50 } 63 51 64 - static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) 52 + static inline void arch_wb_cache_pmem(void *addr, size_t size) 65 53 { 66 54 BUG(); 67 55 } 68 56 69 - static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) 57 + static inline void arch_invalidate_pmem(void *addr, size_t size) 70 58 { 71 59 BUG(); 72 60 } ··· 65 77 return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); 66 78 } 67 79 68 - static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, 69 - size_t size) 70 - { 71 - memcpy(dst, (void __force *) src, size); 72 - return 0; 73 - } 74 - 75 80 /* 76 81 * memcpy_from_pmem - read from persistent memory with error handling 77 82 * @dst: destination buffer ··· 73 92 * 74 93 * Returns 0 on success negative error code on failure. 75 94 */ 76 - static inline int memcpy_from_pmem(void *dst, void __pmem const *src, 77 - size_t size) 95 + static inline int memcpy_from_pmem(void *dst, void const *src, size_t size) 78 96 { 79 97 if (arch_has_pmem_api()) 80 98 return arch_memcpy_from_pmem(dst, src, size); 81 99 else 82 - return default_memcpy_from_pmem(dst, src, size); 83 - } 84 - 85 - /** 86 - * arch_has_wmb_pmem - true if wmb_pmem() ensures durability 87 - * 88 - * For a given cpu implementation within an architecture it is possible 89 - * that wmb_pmem() resolves to a nop. In the case this returns 90 - * false, pmem api users are unable to ensure durability and may want to 91 - * fall back to a different data consistency model, or otherwise notify 92 - * the user. 93 - */ 94 - static inline bool arch_has_wmb_pmem(void) 95 - { 96 - return arch_has_pmem_api() && __arch_has_wmb_pmem(); 97 - } 98 - 99 - /* 100 - * These defaults seek to offer decent performance and minimize the 101 - * window between i/o completion and writes being durable on media. 102 - * However, it is undefined / architecture specific whether 103 - * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for 104 - * making data durable relative to i/o completion. 105 - */ 106 - static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, 107 - size_t size) 108 - { 109 - memcpy((void __force *) dst, src, size); 110 - } 111 - 112 - static inline size_t default_copy_from_iter_pmem(void __pmem *addr, 113 - size_t bytes, struct iov_iter *i) 114 - { 115 - return copy_from_iter_nocache((void __force *)addr, bytes, i); 116 - } 117 - 118 - static inline void default_clear_pmem(void __pmem *addr, size_t size) 119 - { 120 - if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0) 121 - clear_page((void __force *)addr); 122 - else 123 - memset((void __force *)addr, 0, size); 100 + memcpy(dst, src, size); 101 + return 0; 124 102 } 125 103 126 104 /** ··· 92 152 * being effectively evicted from, or never written to, the processor 93 153 * cache hierarchy after the copy completes. After memcpy_to_pmem() 94 154 * data may still reside in cpu or platform buffers, so this operation 95 - * must be followed by a wmb_pmem(). 155 + * must be followed by a blkdev_issue_flush() on the pmem block device. 96 156 */ 97 - static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) 157 + static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) 98 158 { 99 159 if (arch_has_pmem_api()) 100 160 arch_memcpy_to_pmem(dst, src, n); 101 161 else 102 - default_memcpy_to_pmem(dst, src, n); 103 - } 104 - 105 - /** 106 - * wmb_pmem - synchronize writes to persistent memory 107 - * 108 - * After a series of memcpy_to_pmem() operations this drains data from 109 - * cpu write buffers and any platform (memory controller) buffers to 110 - * ensure that written data is durable on persistent memory media. 111 - */ 112 - static inline void wmb_pmem(void) 113 - { 114 - if (arch_has_wmb_pmem()) 115 - arch_wmb_pmem(); 116 - else 117 - wmb(); 162 + memcpy(dst, src, n); 118 163 } 119 164 120 165 /** ··· 109 184 * @i: iterator with source data 110 185 * 111 186 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 112 - * This function requires explicit ordering with a wmb_pmem() call. 187 + * See blkdev_issue_flush() note for memcpy_to_pmem(). 113 188 */ 114 - static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes, 189 + static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, 115 190 struct iov_iter *i) 116 191 { 117 192 if (arch_has_pmem_api()) 118 193 return arch_copy_from_iter_pmem(addr, bytes, i); 119 - return default_copy_from_iter_pmem(addr, bytes, i); 194 + return copy_from_iter_nocache(addr, bytes, i); 120 195 } 121 196 122 197 /** ··· 125 200 * @size: number of bytes to zero 126 201 * 127 202 * Write zeros into the memory range starting at 'addr' for 'size' bytes. 128 - * This function requires explicit ordering with a wmb_pmem() call. 203 + * See blkdev_issue_flush() note for memcpy_to_pmem(). 129 204 */ 130 - static inline void clear_pmem(void __pmem *addr, size_t size) 205 + static inline void clear_pmem(void *addr, size_t size) 131 206 { 132 207 if (arch_has_pmem_api()) 133 208 arch_clear_pmem(addr, size); 134 209 else 135 - default_clear_pmem(addr, size); 210 + memset(addr, 0, size); 136 211 } 137 212 138 213 /** ··· 143 218 * For platforms that support clearing poison this flushes any poisoned 144 219 * ranges out of the cache 145 220 */ 146 - static inline void invalidate_pmem(void __pmem *addr, size_t size) 221 + static inline void invalidate_pmem(void *addr, size_t size) 147 222 { 148 223 if (arch_has_pmem_api()) 149 224 arch_invalidate_pmem(addr, size); ··· 155 230 * @size: number of bytes to write back 156 231 * 157 232 * Write back the processor cache range starting at 'addr' for 'size' bytes. 158 - * This function requires explicit ordering with a wmb_pmem() call. 233 + * See blkdev_issue_flush() note for memcpy_to_pmem(). 159 234 */ 160 - static inline void wb_cache_pmem(void __pmem *addr, size_t size) 235 + static inline void wb_cache_pmem(void *addr, size_t size) 161 236 { 162 237 if (arch_has_pmem_api()) 163 238 arch_wb_cache_pmem(addr, size);
+1
include/uapi/linux/ndctl.h
··· 298 298 #define NVDIMM_FAMILY_INTEL 0 299 299 #define NVDIMM_FAMILY_HPE1 1 300 300 #define NVDIMM_FAMILY_HPE2 2 301 + #define NVDIMM_FAMILY_MSFT 3 301 302 302 303 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ 303 304 struct nd_cmd_pkg)
-6
kernel/memremap.c
··· 169 169 } 170 170 EXPORT_SYMBOL(devm_memunmap); 171 171 172 - pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) 173 - { 174 - return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); 175 - } 176 - EXPORT_SYMBOL(phys_to_pfn_t); 177 - 178 172 #ifdef CONFIG_ZONE_DEVICE 179 173 static DEFINE_MUTEX(pgmap_lock); 180 174 static RADIX_TREE(pgmap_radix, GFP_KERNEL);
-1
scripts/checkpatch.pl
··· 313 313 __kernel| 314 314 __force| 315 315 __iomem| 316 - __pmem| 317 316 __must_check| 318 317 __init_refok| 319 318 __kprobes|
+1 -1
tools/objtool/arch/x86/insn/x86-opcode-map.txt
··· 947 947 4: XSAVE 948 948 5: XRSTOR | lfence (11B) 949 949 6: XSAVEOPT | clwb (66) | mfence (11B) 950 - 7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) 950 + 7: clflush | clflushopt (66) | sfence (11B) 951 951 EndTable 952 952 953 953 GrpTable: Grp16
-2
tools/perf/arch/x86/tests/insn-x86-dat-32.c
··· 1664 1664 "0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, 1665 1665 {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", 1666 1666 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, 1667 - {{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", 1668 - "66 0f ae f8 \tpcommit ",},
-2
tools/perf/arch/x86/tests/insn-x86-dat-64.c
··· 1696 1696 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, 1697 1697 {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", 1698 1698 "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, 1699 - {{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", 1700 - "66 0f ae f8 \tpcommit ",},
-4
tools/perf/arch/x86/tests/insn-x86-dat-src.c
··· 2655 2655 2656 2656 #endif /* #ifndef __x86_64__ */ 2657 2657 2658 - /* pcommit */ 2659 - 2660 - asm volatile("pcommit"); 2661 - 2662 2658 /* Following line is a marker for the awk script - do not change */ 2663 2659 asm volatile("rdtsc"); /* Stop here */ 2664 2660
+1 -1
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
··· 1012 1012 4: XSAVE 1013 1013 5: XRSTOR | lfence (11B) 1014 1014 6: XSAVEOPT | clwb (66) | mfence (11B) 1015 - 7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) 1015 + 7: clflush | clflushopt (66) | sfence (11B) 1016 1016 EndTable 1017 1017 1018 1018 GrpTable: Grp16
+7 -3
tools/testing/nvdimm/Kbuild
··· 11 11 ldflags-y += --wrap=__request_region 12 12 ldflags-y += --wrap=__release_region 13 13 ldflags-y += --wrap=devm_memremap_pages 14 - ldflags-y += --wrap=phys_to_pfn_t 14 + ldflags-y += --wrap=insert_resource 15 + ldflags-y += --wrap=remove_resource 15 16 16 17 DRIVERS := ../../../drivers 17 18 NVDIMM_SRC := $(DRIVERS)/nvdimm 18 - ACPI_SRC := $(DRIVERS)/acpi 19 + ACPI_SRC := $(DRIVERS)/acpi/nfit 19 20 DAX_SRC := $(DRIVERS)/dax 21 + ccflags-y := -I$(src)/$(NVDIMM_SRC)/ 20 22 21 23 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o 22 24 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o ··· 29 27 obj-$(CONFIG_DEV_DAX) += dax.o 30 28 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 31 29 32 - nfit-y := $(ACPI_SRC)/nfit.o 30 + nfit-y := $(ACPI_SRC)/core.o 31 + nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o 33 32 nfit-y += config_check.o 34 33 35 34 nd_pmem-y := $(NVDIMM_SRC)/pmem.o 35 + nd_pmem-y += pmem-dax.o 36 36 nd_pmem-y += config_check.o 37 37 38 38 nd_btt-y := $(NVDIMM_SRC)/btt.o
+1
tools/testing/nvdimm/config_check.c
··· 10 10 BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); 11 11 BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); 12 12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); 13 + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN)); 13 14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); 14 15 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); 15 16 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
+54
tools/testing/nvdimm/pmem-dax.c
··· 1 + /* 2 + * Copyright (c) 2014-2016, Intel Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #include "test/nfit_test.h" 14 + #include <linux/blkdev.h> 15 + #include <pmem.h> 16 + #include <nd.h> 17 + 18 + long pmem_direct_access(struct block_device *bdev, sector_t sector, 19 + void **kaddr, pfn_t *pfn, long size) 20 + { 21 + struct pmem_device *pmem = bdev->bd_queue->queuedata; 22 + resource_size_t offset = sector * 512 + pmem->data_offset; 23 + 24 + if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) 25 + return -EIO; 26 + 27 + /* 28 + * Limit dax to a single page at a time given vmalloc()-backed 29 + * in the nfit_test case. 30 + */ 31 + if (get_nfit_res(pmem->phys_addr + offset)) { 32 + struct page *page; 33 + 34 + *kaddr = pmem->virt_addr + offset; 35 + page = vmalloc_to_page(pmem->virt_addr + offset); 36 + *pfn = page_to_pfn_t(page); 37 + dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, 38 + "%s: sector: %#llx pfn: %#lx\n", __func__, 39 + (unsigned long long) sector, page_to_pfn(page)); 40 + 41 + return PAGE_SIZE; 42 + } 43 + 44 + *kaddr = pmem->virt_addr + offset; 45 + *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); 46 + 47 + /* 48 + * If badblocks are present, limit known good range to the 49 + * requested range. 50 + */ 51 + if (unlikely(pmem->bb.count)) 52 + return size; 53 + return pmem->size - pmem->pfn_pad - offset; 54 + }
+1 -1
tools/testing/nvdimm/test/Kbuild
··· 1 1 ccflags-y := -I$(src)/../../../../drivers/nvdimm/ 2 - ccflags-y += -I$(src)/../../../../drivers/acpi/ 2 + ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/ 3 3 4 4 obj-m += nfit_test.o 5 5 obj-m += nfit_test_iomap.o
+20 -18
tools/testing/nvdimm/test/iomap.c
··· 10 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 11 * General Public License for more details. 12 12 */ 13 + #include <linux/memremap.h> 13 14 #include <linux/rculist.h> 14 15 #include <linux/export.h> 15 16 #include <linux/ioport.h> 16 17 #include <linux/module.h> 17 18 #include <linux/types.h> 19 + #include <linux/pfn_t.h> 18 20 #include <linux/io.h> 19 21 #include <linux/mm.h> 20 22 #include "nfit_test.h" ··· 54 52 return NULL; 55 53 } 56 54 57 - static struct nfit_test_resource *get_nfit_res(resource_size_t resource) 55 + struct nfit_test_resource *get_nfit_res(resource_size_t resource) 58 56 { 59 57 struct nfit_test_resource *res; 60 58 ··· 64 62 65 63 return res; 66 64 } 65 + EXPORT_SYMBOL(get_nfit_res); 67 66 68 67 void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, 69 68 void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) ··· 100 97 } 101 98 EXPORT_SYMBOL(__wrap_devm_memremap); 102 99 103 - #ifdef __HAVE_ARCH_PTE_DEVMAP 104 - #include <linux/memremap.h> 105 - #include <linux/pfn_t.h> 106 - 107 100 void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, 108 101 struct percpu_ref *ref, struct vmem_altmap *altmap) 109 102 { ··· 121 122 return phys_to_pfn_t(addr, flags); 122 123 } 123 124 EXPORT_SYMBOL(__wrap_phys_to_pfn_t); 124 - #else 125 - /* to be removed post 4.5-rc1 */ 126 - void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res) 127 - { 128 - resource_size_t offset = res->start; 129 - struct nfit_test_resource *nfit_res = get_nfit_res(offset); 130 - 131 - if (nfit_res) 132 - return nfit_res->buf + offset - nfit_res->res->start; 133 - return devm_memremap_pages(dev, res); 134 - } 135 - EXPORT_SYMBOL(__wrap_devm_memremap_pages); 136 - #endif 137 125 138 126 void *__wrap_memremap(resource_size_t offset, size_t size, 139 127 unsigned long flags) ··· 214 228 return nfit_test_request_region(NULL, parent, start, n, name, flags); 215 229 } 216 230 EXPORT_SYMBOL(__wrap___request_region); 231 + 232 + int __wrap_insert_resource(struct resource *parent, struct resource *res) 233 + { 234 + if (get_nfit_res(res->start)) 235 + return 0; 236 + return insert_resource(parent, res); 237 + } 238 + EXPORT_SYMBOL(__wrap_insert_resource); 239 + 240 + int __wrap_remove_resource(struct resource *res) 241 + { 242 + if (get_nfit_res(res->start)) 243 + return 0; 244 + return remove_resource(res); 245 + } 246 + EXPORT_SYMBOL(__wrap_remove_resource); 217 247 218 248 struct resource *__wrap___devm_request_region(struct device *dev, 219 249 struct resource *parent, resource_size_t start,
+83 -116
tools/testing/nvdimm/test/nfit.c
··· 98 98 enum { 99 99 NUM_PM = 3, 100 100 NUM_DCR = 5, 101 + NUM_HINTS = 8, 101 102 NUM_BDW = NUM_DCR, 102 103 NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, 103 104 NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, 104 105 DIMM_SIZE = SZ_32M, 105 106 LABEL_SIZE = SZ_128K, 107 + SPA_VCD_SIZE = SZ_4M, 106 108 SPA0_SIZE = DIMM_SIZE, 107 109 SPA1_SIZE = DIMM_SIZE*2, 108 110 SPA2_SIZE = DIMM_SIZE, ··· 472 470 list_del(&nfit_res->list); 473 471 spin_unlock(&nfit_test_lock); 474 472 475 - if (is_vmalloc_addr(nfit_res->buf)) 476 - vfree(nfit_res->buf); 477 - else 478 - dma_free_coherent(nfit_res->dev, resource_size(res), 479 - nfit_res->buf, res->start); 473 + vfree(nfit_res->buf); 480 474 kfree(res); 481 475 kfree(nfit_res); 482 476 } ··· 505 507 506 508 return nfit_res->buf; 507 509 err: 508 - if (buf && !is_vmalloc_addr(buf)) 509 - dma_free_coherent(dev, size, buf, *dma); 510 - else if (buf) 510 + if (buf) 511 511 vfree(buf); 512 512 kfree(res); 513 513 kfree(nfit_res); ··· 517 521 void *buf = vmalloc(size); 518 522 519 523 *dma = (unsigned long) buf; 520 - return __test_alloc(t, size, dma, buf); 521 - } 522 - 523 - static void *test_alloc_coherent(struct nfit_test *t, size_t size, 524 - dma_addr_t *dma) 525 - { 526 - struct device *dev = &t->pdev.dev; 527 - void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); 528 - 529 524 return __test_alloc(t, size, dma, buf); 530 525 } 531 526 ··· 571 584 + offsetof(struct acpi_nfit_control_region, 572 585 window_size) * NUM_DCR 573 586 + sizeof(struct acpi_nfit_data_region) * NUM_BDW 574 - + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; 587 + + (sizeof(struct acpi_nfit_flush_address) 588 + + sizeof(u64) * NUM_HINTS) * NUM_DCR; 575 589 int i; 576 590 577 591 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); ··· 580 592 return -ENOMEM; 581 593 t->nfit_size = nfit_size; 582 594 583 - t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); 595 + t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]); 584 596 if (!t->spa_set[0]) 585 597 return -ENOMEM; 586 598 587 - t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); 599 + t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]); 588 600 if (!t->spa_set[1]) 589 601 return -ENOMEM; 590 602 591 - t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]); 603 + t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]); 592 604 if (!t->spa_set[2]) 593 605 return -ENOMEM; 594 606 ··· 602 614 return -ENOMEM; 603 615 sprintf(t->label[i], "label%d", i); 604 616 605 - t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); 617 + t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS, 618 + &t->flush_dma[i]); 606 619 if (!t->flush[i]) 607 620 return -ENOMEM; 608 621 } ··· 619 630 620 631 static int nfit_test1_alloc(struct nfit_test *t) 621 632 { 622 - size_t nfit_size = sizeof(struct acpi_nfit_system_address) 633 + size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 623 634 + sizeof(struct acpi_nfit_memory_map) 624 635 + offsetof(struct acpi_nfit_control_region, window_size); 625 636 ··· 628 639 return -ENOMEM; 629 640 t->nfit_size = nfit_size; 630 641 631 - t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); 642 + t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]); 632 643 if (!t->spa_set[0]) 644 + return -ENOMEM; 645 + 646 + t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]); 647 + if (!t->spa_set[1]) 633 648 return -ENOMEM; 634 649 635 650 return ars_state_init(&t->pdev.dev, &t->ars_state); 636 651 } 637 652 653 + static void dcr_common_init(struct acpi_nfit_control_region *dcr) 654 + { 655 + dcr->vendor_id = 0xabcd; 656 + dcr->device_id = 0; 657 + dcr->revision_id = 1; 658 + dcr->valid_fields = 1; 659 + dcr->manufacturing_location = 0xa; 660 + dcr->manufacturing_date = cpu_to_be16(2016); 661 + } 662 + 638 663 static void nfit_test0_setup(struct nfit_test *t) 639 664 { 665 + const int flush_hint_size = sizeof(struct acpi_nfit_flush_address) 666 + + (sizeof(u64) * NUM_HINTS); 640 667 struct acpi_nfit_desc *acpi_desc; 641 668 struct acpi_nfit_memory_map *memdev; 642 669 void *nfit_buf = t->nfit_buf; ··· 660 655 struct acpi_nfit_control_region *dcr; 661 656 struct acpi_nfit_data_region *bdw; 662 657 struct acpi_nfit_flush_address *flush; 663 - unsigned int offset; 658 + unsigned int offset, i; 664 659 665 660 /* 666 661 * spa0 (interleave first half of dimm0 and dimm1, note storage ··· 977 972 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 978 973 dcr->header.length = sizeof(struct acpi_nfit_control_region); 979 974 dcr->region_index = 0+1; 980 - dcr->vendor_id = 0xabcd; 981 - dcr->device_id = 0; 982 - dcr->revision_id = 1; 975 + dcr_common_init(dcr); 983 976 dcr->serial_number = ~handle[0]; 984 977 dcr->code = NFIT_FIC_BLK; 985 978 dcr->windows = 1; ··· 992 989 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 993 990 dcr->header.length = sizeof(struct acpi_nfit_control_region); 994 991 dcr->region_index = 1+1; 995 - dcr->vendor_id = 0xabcd; 996 - dcr->device_id = 0; 997 - dcr->revision_id = 1; 992 + dcr_common_init(dcr); 998 993 dcr->serial_number = ~handle[1]; 999 994 dcr->code = NFIT_FIC_BLK; 1000 995 dcr->windows = 1; ··· 1007 1006 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1008 1007 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1009 1008 dcr->region_index = 2+1; 1010 - dcr->vendor_id = 0xabcd; 1011 - dcr->device_id = 0; 1012 - dcr->revision_id = 1; 1009 + dcr_common_init(dcr); 1013 1010 dcr->serial_number = ~handle[2]; 1014 1011 dcr->code = NFIT_FIC_BLK; 1015 1012 dcr->windows = 1; ··· 1022 1023 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1023 1024 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1024 1025 dcr->region_index = 3+1; 1025 - dcr->vendor_id = 0xabcd; 1026 - dcr->device_id = 0; 1027 - dcr->revision_id = 1; 1026 + dcr_common_init(dcr); 1028 1027 dcr->serial_number = ~handle[3]; 1029 1028 dcr->code = NFIT_FIC_BLK; 1030 1029 dcr->windows = 1; ··· 1039 1042 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1040 1043 window_size); 1041 1044 dcr->region_index = 4+1; 1042 - dcr->vendor_id = 0xabcd; 1043 - dcr->device_id = 0; 1044 - dcr->revision_id = 1; 1045 + dcr_common_init(dcr); 1045 1046 dcr->serial_number = ~handle[0]; 1046 1047 dcr->code = NFIT_FIC_BYTEN; 1047 1048 dcr->windows = 0; ··· 1051 1056 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1052 1057 window_size); 1053 1058 dcr->region_index = 5+1; 1054 - dcr->vendor_id = 0xabcd; 1055 - dcr->device_id = 0; 1056 - dcr->revision_id = 1; 1059 + dcr_common_init(dcr); 1057 1060 dcr->serial_number = ~handle[1]; 1058 1061 dcr->code = NFIT_FIC_BYTEN; 1059 1062 dcr->windows = 0; ··· 1063 1070 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1064 1071 window_size); 1065 1072 dcr->region_index = 6+1; 1066 - dcr->vendor_id = 0xabcd; 1067 - dcr->device_id = 0; 1068 - dcr->revision_id = 1; 1073 + dcr_common_init(dcr); 1069 1074 dcr->serial_number = ~handle[2]; 1070 1075 dcr->code = NFIT_FIC_BYTEN; 1071 1076 dcr->windows = 0; ··· 1075 1084 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1076 1085 window_size); 1077 1086 dcr->region_index = 7+1; 1078 - dcr->vendor_id = 0xabcd; 1079 - dcr->device_id = 0; 1080 - dcr->revision_id = 1; 1087 + dcr_common_init(dcr); 1081 1088 dcr->serial_number = ~handle[3]; 1082 1089 dcr->code = NFIT_FIC_BYTEN; 1083 1090 dcr->windows = 0; ··· 1130 1141 /* flush0 (dimm0) */ 1131 1142 flush = nfit_buf + offset; 1132 1143 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1133 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1144 + flush->header.length = flush_hint_size; 1134 1145 flush->device_handle = handle[0]; 1135 - flush->hint_count = 1; 1136 - flush->hint_address[0] = t->flush_dma[0]; 1146 + flush->hint_count = NUM_HINTS; 1147 + for (i = 0; i < NUM_HINTS; i++) 1148 + flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); 1137 1149 1138 1150 /* flush1 (dimm1) */ 1139 - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; 1151 + flush = nfit_buf + offset + flush_hint_size * 1; 1140 1152 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1141 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1153 + flush->header.length = flush_hint_size; 1142 1154 flush->device_handle = handle[1]; 1143 - flush->hint_count = 1; 1144 - flush->hint_address[0] = t->flush_dma[1]; 1155 + flush->hint_count = NUM_HINTS; 1156 + for (i = 0; i < NUM_HINTS; i++) 1157 + flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); 1145 1158 1146 1159 /* flush2 (dimm2) */ 1147 - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; 1160 + flush = nfit_buf + offset + flush_hint_size * 2; 1148 1161 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1149 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1162 + flush->header.length = flush_hint_size; 1150 1163 flush->device_handle = handle[2]; 1151 - flush->hint_count = 1; 1152 - flush->hint_address[0] = t->flush_dma[2]; 1164 + flush->hint_count = NUM_HINTS; 1165 + for (i = 0; i < NUM_HINTS; i++) 1166 + flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); 1153 1167 1154 1168 /* flush3 (dimm3) */ 1155 - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; 1169 + flush = nfit_buf + offset + flush_hint_size * 3; 1156 1170 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1157 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1171 + flush->header.length = flush_hint_size; 1158 1172 flush->device_handle = handle[3]; 1159 - flush->hint_count = 1; 1160 - flush->hint_address[0] = t->flush_dma[3]; 1173 + flush->hint_count = NUM_HINTS; 1174 + for (i = 0; i < NUM_HINTS; i++) 1175 + flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); 1161 1176 1162 1177 if (t->setup_hotplug) { 1163 - offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; 1178 + offset = offset + flush_hint_size * 4; 1164 1179 /* dcr-descriptor4: blk */ 1165 1180 dcr = nfit_buf + offset; 1166 1181 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1167 1182 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1168 1183 dcr->region_index = 8+1; 1169 - dcr->vendor_id = 0xabcd; 1170 - dcr->device_id = 0; 1171 - dcr->revision_id = 1; 1184 + dcr_common_init(dcr); 1172 1185 dcr->serial_number = ~handle[4]; 1173 1186 dcr->code = NFIT_FIC_BLK; 1174 1187 dcr->windows = 1; ··· 1187 1196 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1188 1197 window_size); 1189 1198 dcr->region_index = 9+1; 1190 - dcr->vendor_id = 0xabcd; 1191 - dcr->device_id = 0; 1192 - dcr->revision_id = 1; 1199 + dcr_common_init(dcr); 1193 1200 dcr->serial_number = ~handle[4]; 1194 1201 dcr->code = NFIT_FIC_BYTEN; 1195 1202 dcr->windows = 0; ··· 1289 1300 /* flush3 (dimm4) */ 1290 1301 flush = nfit_buf + offset; 1291 1302 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1292 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1303 + flush->header.length = flush_hint_size; 1293 1304 flush->device_handle = handle[4]; 1294 - flush->hint_count = 1; 1295 - flush->hint_address[0] = t->flush_dma[4]; 1305 + flush->hint_count = NUM_HINTS; 1306 + for (i = 0; i < NUM_HINTS; i++) 1307 + flush->hint_address[i] = t->flush_dma[4] 1308 + + i * sizeof(u64); 1296 1309 } 1297 1310 1298 1311 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); ··· 1330 1339 spa->address = t->spa_set_dma[0]; 1331 1340 spa->length = SPA2_SIZE; 1332 1341 1333 - offset += sizeof(*spa); 1342 + /* virtual cd region */ 1343 + spa = nfit_buf + sizeof(*spa); 1344 + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1345 + spa->header.length = sizeof(*spa); 1346 + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); 1347 + spa->range_index = 0; 1348 + spa->address = t->spa_set_dma[1]; 1349 + spa->length = SPA_VCD_SIZE; 1350 + 1351 + offset += sizeof(*spa) * 2; 1334 1352 /* mem-region0 (spa0, dimm0) */ 1335 1353 memdev = nfit_buf + offset; 1336 1354 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; ··· 1365 1365 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1366 1366 window_size); 1367 1367 dcr->region_index = 0+1; 1368 - dcr->vendor_id = 0xabcd; 1369 - dcr->device_id = 0; 1370 - dcr->revision_id = 1; 1368 + dcr_common_init(dcr); 1371 1369 dcr->serial_number = ~0; 1372 1370 dcr->code = NFIT_FIC_BYTE; 1373 1371 dcr->windows = 0; ··· 1460 1462 nfit_test->setup(nfit_test); 1461 1463 acpi_desc = &nfit_test->acpi_desc; 1462 1464 acpi_nfit_desc_init(acpi_desc, &pdev->dev); 1463 - acpi_desc->nfit = nfit_test->nfit_buf; 1464 1465 acpi_desc->blk_do_io = nfit_test_blk_do_io; 1465 1466 nd_desc = &acpi_desc->nd_desc; 1466 1467 nd_desc->provider_name = NULL; 1468 + nd_desc->module = THIS_MODULE; 1467 1469 nd_desc->ndctl = nfit_test_ctl; 1468 - acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); 1469 - if (!acpi_desc->nvdimm_bus) 1470 - return -ENXIO; 1471 1470 1472 - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); 1473 - if (rc) { 1474 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1471 + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, 1472 + nfit_test->nfit_size); 1473 + if (rc) 1475 1474 return rc; 1476 - } 1477 1475 1478 1476 if (nfit_test->setup != nfit_test0_setup) 1479 1477 return 0; ··· 1477 1483 nfit_test->setup_hotplug = 1; 1478 1484 nfit_test->setup(nfit_test); 1479 1485 1480 - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); 1481 - if (rc) { 1482 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1486 + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, 1487 + nfit_test->nfit_size); 1488 + if (rc) 1483 1489 return rc; 1484 - } 1485 1490 1486 1491 return 0; 1487 1492 } 1488 1493 1489 1494 static int nfit_test_remove(struct platform_device *pdev) 1490 1495 { 1491 - struct nfit_test *nfit_test = to_nfit_test(&pdev->dev); 1492 - struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc; 1493 - 1494 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1495 - 1496 1496 return 0; 1497 1497 } 1498 1498 ··· 1511 1523 .id_table = nfit_test_id, 1512 1524 }; 1513 1525 1514 - #ifdef CONFIG_CMA_SIZE_MBYTES 1515 - #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES 1516 - #else 1517 - #define CMA_SIZE_MBYTES 0 1518 - #endif 1519 - 1520 1526 static __init int nfit_test_init(void) 1521 1527 { 1522 1528 int rc, i; ··· 1520 1538 for (i = 0; i < NUM_NFITS; i++) { 1521 1539 struct nfit_test *nfit_test; 1522 1540 struct platform_device *pdev; 1523 - static int once; 1524 1541 1525 1542 nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); 1526 1543 if (!nfit_test) { ··· 1558 1577 goto err_register; 1559 1578 1560 1579 instances[i] = nfit_test; 1561 - 1562 - if (!once++) { 1563 - dma_addr_t dma; 1564 - void *buf; 1565 - 1566 - buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma, 1567 - GFP_KERNEL); 1568 - if (!buf) { 1569 - rc = -ENOMEM; 1570 - dev_warn(&pdev->dev, "need 128M of free cma\n"); 1571 - goto err_register; 1572 - } 1573 - dma_free_coherent(&pdev->dev, SZ_128M, buf, dma); 1574 - } 1575 1580 } 1576 1581 1577 1582 rc = platform_driver_register(&nfit_test_driver);
+2
tools/testing/nvdimm/test/nfit_test.h
··· 12 12 */ 13 13 #ifndef __NFIT_TEST_H__ 14 14 #define __NFIT_TEST_H__ 15 + #include <linux/list.h> 15 16 16 17 struct nfit_test_resource { 17 18 struct list_head list; ··· 27 26 void __wrap_iounmap(volatile void __iomem *addr); 28 27 void nfit_test_setup(nfit_test_lookup_fn lookup); 29 28 void nfit_test_teardown(void); 29 + struct nfit_test_resource *get_nfit_res(resource_size_t resource); 30 30 #endif