Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

+1 -1

Documentation/filesystems/Locking

··· 395 395 int (*release) (struct gendisk *, fmode_t); 396 396 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 397 397 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 398 - int (*direct_access) (struct block_device *, sector_t, void __pmem **, 398 + int (*direct_access) (struct block_device *, sector_t, void **, 399 399 unsigned long *); 400 400 int (*media_changed) (struct gendisk *); 401 401 void (*unlock_native_capacity) (struct gendisk *);

+9 -19

Documentation/nvdimm/btt.txt

··· 256 256 only state using a flag in the info block. 257 257 258 258 259 - 5. In-kernel usage 260 - ================== 259 + 5. Usage 260 + ======== 261 261 262 - Any block driver that supports byte granularity IO to the storage may register 263 - with the BTT. It will have to provide the rw_bytes interface in its 264 - block_device_operations struct: 262 + The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem 263 + (pmem, or blk mode). The easiest way to set up such a namespace is using the 264 + 'ndctl' utility [1]: 265 265 266 - int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw); 266 + For example, the ndctl command line to setup a btt with a 4k sector size is: 267 267 268 - It may register with the BTT after it adds its own gendisk, using btt_init: 268 + ndctl create-namespace -f -e namespace0.0 -m sector -l 4k 269 269 270 - struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize, 271 - u32 lbasize, u8 uuid[], int maxlane); 270 + See ndctl create-namespace --help for more options. 272 271 273 - note that maxlane is the maximum amount of concurrency the driver wishes to 274 - allow the BTT to use. 275 - 276 - The BTT 'disk' appears as a stacked block device that grabs the underlying block 277 - device in the O_EXCL mode. 278 - 279 - When the driver wishes to remove the backing disk, it should similarly call 280 - btt_fini using the same struct btt* handle that was provided to it by btt_init. 281 - 282 - void btt_fini(struct btt *btt); 272 + [1]: https://github.com/pmem/ndctl 283 273

+2 -2

arch/powerpc/sysdev/axonram.c

··· 143 143 */ 144 144 static long 145 145 axon_ram_direct_access(struct block_device *device, sector_t sector, 146 - void __pmem **kaddr, pfn_t *pfn, long size) 146 + void **kaddr, pfn_t *pfn, long size) 147 147 { 148 148 struct axon_ram_bank *bank = device->bd_disk->private_data; 149 149 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; 150 150 151 - *kaddr = (void __pmem __force *) bank->io_addr + offset; 151 + *kaddr = (void *) bank->io_addr + offset; 152 152 *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); 153 153 return bank->size - offset; 154 154 }

-1

arch/x86/include/asm/cpufeatures.h

··· 225 225 #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 226 226 #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 227 227 #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 228 - #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ 229 228 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 230 229 #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ 231 230 #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */

+19 -58

arch/x86/include/asm/pmem.h

··· 26 26 * @n: length of the copy in bytes 27 27 * 28 28 * Copy data to persistent memory media via non-temporal stores so that 29 - * a subsequent arch_wmb_pmem() can flush cpu and memory controller 30 - * write buffers to guarantee durability. 29 + * a subsequent pmem driver flush operation will drain posted write queues. 31 30 */ 32 - static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, 33 - size_t n) 31 + static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) 34 32 { 35 - int unwritten; 33 + int rem; 36 34 37 35 /* 38 36 * We are copying between two kernel buffers, if ··· 38 40 * fault) we would have already reported a general protection fault 39 41 * before the WARN+BUG. 40 42 */ 41 - unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, 42 - (void __user *) src, n); 43 - if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", 44 - __func__, dst, src, unwritten)) 43 + rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); 44 + if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", 45 + __func__, dst, src, rem)) 45 46 BUG(); 46 47 } 47 48 48 - static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, 49 - size_t n) 49 + static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) 50 50 { 51 51 if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) 52 - return memcpy_mcsafe(dst, (void __force *) src, n); 53 - memcpy(dst, (void __force *) src, n); 52 + return memcpy_mcsafe(dst, src, n); 53 + memcpy(dst, src, n); 54 54 return 0; 55 - } 56 - 57 - /** 58 - * arch_wmb_pmem - synchronize writes to persistent memory 59 - * 60 - * After a series of arch_memcpy_to_pmem() operations this drains data 61 - * from cpu write buffers and any platform (memory controller) buffers 62 - * to ensure that written data is durable on persistent memory media. 63 - */ 64 - static inline void arch_wmb_pmem(void) 65 - { 66 - /* 67 - * wmb() to 'sfence' all previous writes such that they are 68 - * architecturally visible to 'pcommit'. Note, that we've 69 - * already arranged for pmem writes to avoid the cache via 70 - * arch_memcpy_to_pmem(). 71 - */ 72 - wmb(); 73 - pcommit_sfence(); 74 55 } 75 56 76 57 /** ··· 58 81 * @size: number of bytes to write back 59 82 * 60 83 * Write back a cache range using the CLWB (cache line write back) 61 - * instruction. This function requires explicit ordering with an 62 - * arch_wmb_pmem() call. 84 + * instruction. 63 85 */ 64 - static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) 86 + static inline void arch_wb_cache_pmem(void *addr, size_t size) 65 87 { 66 88 u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; 67 89 unsigned long clflush_mask = x86_clflush_size - 1; 68 - void *vaddr = (void __force *)addr; 69 - void *vend = vaddr + size; 90 + void *vend = addr + size; 70 91 void *p; 71 92 72 - for (p = (void *)((unsigned long)vaddr & ~clflush_mask); 93 + for (p = (void *)((unsigned long)addr & ~clflush_mask); 73 94 p < vend; p += x86_clflush_size) 74 95 clwb(p); 75 96 } ··· 88 113 * @i: iterator with source data 89 114 * 90 115 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 91 - * This function requires explicit ordering with an arch_wmb_pmem() call. 92 116 */ 93 - static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, 117 + static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, 94 118 struct iov_iter *i) 95 119 { 96 - void *vaddr = (void __force *)addr; 97 120 size_t len; 98 121 99 122 /* TODO: skip the write-back by always using non-temporal stores */ 100 - len = copy_from_iter_nocache(vaddr, bytes, i); 123 + len = copy_from_iter_nocache(addr, bytes, i); 101 124 102 125 if (__iter_needs_pmem_wb(i)) 103 126 arch_wb_cache_pmem(addr, bytes); ··· 109 136 * @size: number of bytes to zero 110 137 * 111 138 * Write zeros into the memory range starting at 'addr' for 'size' bytes. 112 - * This function requires explicit ordering with an arch_wmb_pmem() call. 113 139 */ 114 - static inline void arch_clear_pmem(void __pmem *addr, size_t size) 140 + static inline void arch_clear_pmem(void *addr, size_t size) 115 141 { 116 - void *vaddr = (void __force *)addr; 117 - 118 - memset(vaddr, 0, size); 142 + memset(addr, 0, size); 119 143 arch_wb_cache_pmem(addr, size); 120 144 } 121 145 122 - static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) 146 + static inline void arch_invalidate_pmem(void *addr, size_t size) 123 147 { 124 - clflush_cache_range((void __force *) addr, size); 125 - } 126 - 127 - static inline bool __arch_has_wmb_pmem(void) 128 - { 129 - /* 130 - * We require that wmb() be an 'sfence', that is only guaranteed on 131 - * 64-bit builds 132 - */ 133 - return static_cpu_has(X86_FEATURE_PCOMMIT); 148 + clflush_cache_range(addr, size); 134 149 } 135 150 #endif /* CONFIG_ARCH_HAS_PMEM_API */ 136 151 #endif /* __ASM_X86_PMEM_H__ */

-46

arch/x86/include/asm/special_insns.h

··· 253 253 : [pax] "a" (p)); 254 254 } 255 255 256 - /** 257 - * pcommit_sfence() - persistent commit and fence 258 - * 259 - * The PCOMMIT instruction ensures that data that has been flushed from the 260 - * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to 261 - * memory and is durable on the DIMM. The primary use case for this is 262 - * persistent memory. 263 - * 264 - * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT 265 - * with appropriate fencing. 266 - * 267 - * Example: 268 - * void flush_and_commit_buffer(void *vaddr, unsigned int size) 269 - * { 270 - * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; 271 - * void *vend = vaddr + size; 272 - * void *p; 273 - * 274 - * for (p = (void *)((unsigned long)vaddr & ~clflush_mask); 275 - * p < vend; p += boot_cpu_data.x86_clflush_size) 276 - * clwb(p); 277 - * 278 - * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes 279 - * // MFENCE via mb() also works 280 - * wmb(); 281 - * 282 - * // PCOMMIT and the required SFENCE for ordering 283 - * pcommit_sfence(); 284 - * } 285 - * 286 - * After this function completes the data pointed to by 'vaddr' has been 287 - * accepted to memory and will be durable if the 'vaddr' points to persistent 288 - * memory. 289 - * 290 - * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify 291 - * things we include both the PCOMMIT and the required SFENCE in the 292 - * alternatives generated by pcommit_sfence(). 293 - */ 294 - static inline void pcommit_sfence(void) 295 - { 296 - alternative(ASM_NOP7, 297 - ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */ 298 - "sfence", 299 - X86_FEATURE_PCOMMIT); 300 - } 301 - 302 256 #define nop() asm volatile ("nop") 303 257 304 258

-1

arch/x86/include/asm/vmx.h

··· 72 72 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 73 73 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 74 74 #define SECONDARY_EXEC_XSAVES 0x00100000 75 - #define SECONDARY_EXEC_PCOMMIT 0x00200000 76 75 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 77 76 78 77 #define PIN_BASED_EXT_INTR_MASK 0x00000001

+1 -3

arch/x86/include/uapi/asm/vmx.h

··· 78 78 #define EXIT_REASON_PML_FULL 62 79 79 #define EXIT_REASON_XSAVES 63 80 80 #define EXIT_REASON_XRSTORS 64 81 - #define EXIT_REASON_PCOMMIT 65 82 81 83 82 #define VMX_EXIT_REASONS \ 84 83 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ ··· 126 127 { EXIT_REASON_INVVPID, "INVVPID" }, \ 127 128 { EXIT_REASON_INVPCID, "INVPCID" }, \ 128 129 { EXIT_REASON_XSAVES, "XSAVES" }, \ 129 - { EXIT_REASON_XRSTORS, "XRSTORS" }, \ 130 - { EXIT_REASON_PCOMMIT, "PCOMMIT" } 130 + { EXIT_REASON_XRSTORS, "XRSTORS" } 131 131 132 132 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 133 133 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4

+1 -1

arch/x86/kvm/cpuid.c

··· 366 366 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 367 367 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | 368 368 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | 369 - F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); 369 + F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB); 370 370 371 371 /* cpuid 0xD.1.eax */ 372 372 const u32 kvm_cpuid_D_1_eax_x86_features =

-8

arch/x86/kvm/cpuid.h

··· 144 144 return best && (best->ebx & bit(X86_FEATURE_RTM)); 145 145 } 146 146 147 - static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) 148 - { 149 - struct kvm_cpuid_entry2 *best; 150 - 151 - best = kvm_find_cpuid_entry(vcpu, 7, 0); 152 - return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); 153 - } 154 - 155 147 static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) 156 148 { 157 149 struct kvm_cpuid_entry2 *best;

+4 -28

arch/x86/kvm/vmx.c

··· 2707 2707 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2708 2708 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2709 2709 SECONDARY_EXEC_WBINVD_EXITING | 2710 - SECONDARY_EXEC_XSAVES | 2711 - SECONDARY_EXEC_PCOMMIT; 2710 + SECONDARY_EXEC_XSAVES; 2712 2711 2713 2712 if (enable_ept) { 2714 2713 /* nested EPT: emulate EPT also to L1 */ ··· 3269 3270 SECONDARY_EXEC_SHADOW_VMCS | 3270 3271 SECONDARY_EXEC_XSAVES | 3271 3272 SECONDARY_EXEC_ENABLE_PML | 3272 - SECONDARY_EXEC_PCOMMIT | 3273 3273 SECONDARY_EXEC_TSC_SCALING; 3274 3274 if (adjust_vmx_controls(min2, opt2, 3275 3275 MSR_IA32_VMX_PROCBASED_CTLS2, ··· 4856 4858 if (!enable_pml) 4857 4859 exec_control &= ~SECONDARY_EXEC_ENABLE_PML; 4858 4860 4859 - /* Currently, we allow L1 guest to directly run pcommit instruction. */ 4860 - exec_control &= ~SECONDARY_EXEC_PCOMMIT; 4861 - 4862 4861 return exec_control; 4863 4862 } 4864 4863 ··· 4899 4904 4900 4905 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); 4901 4906 4902 - if (cpu_has_secondary_exec_ctrls()) 4907 + if (cpu_has_secondary_exec_ctrls()) { 4903 4908 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, 4904 4909 vmx_secondary_exec_control(vmx)); 4910 + } 4905 4911 4906 4912 if (kvm_vcpu_apicv_active(&vmx->vcpu)) { 4907 4913 vmcs_write64(EOI_EXIT_BITMAP0, 0); ··· 7560 7564 return 1; 7561 7565 } 7562 7566 7563 - static int handle_pcommit(struct kvm_vcpu *vcpu) 7564 - { 7565 - /* we never catch pcommit instruct for L1 guest. */ 7566 - WARN_ON(1); 7567 - return 1; 7568 - } 7569 - 7570 7567 /* 7571 7568 * The exit handlers return 1 if the exit was handled fully and guest execution 7572 7569 * may resume. Otherwise they set the kvm_run parameter to indicate what needs ··· 7610 7621 [EXIT_REASON_XSAVES] = handle_xsaves, 7611 7622 [EXIT_REASON_XRSTORS] = handle_xrstors, 7612 7623 [EXIT_REASON_PML_FULL] = handle_pml_full, 7613 - [EXIT_REASON_PCOMMIT] = handle_pcommit, 7614 7624 }; 7615 7625 7616 7626 static const int kvm_vmx_max_exit_handlers = ··· 7918 7930 * the XSS exit bitmap in vmcs12. 7919 7931 */ 7920 7932 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); 7921 - case EXIT_REASON_PCOMMIT: 7922 - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); 7923 7933 default: 7924 7934 return true; 7925 7935 } ··· 9080 9094 9081 9095 if (cpu_has_secondary_exec_ctrls()) 9082 9096 vmcs_set_secondary_exec_control(secondary_exec_ctl); 9083 - 9084 - if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { 9085 - if (guest_cpuid_has_pcommit(vcpu)) 9086 - vmx->nested.nested_vmx_secondary_ctls_high |= 9087 - SECONDARY_EXEC_PCOMMIT; 9088 - else 9089 - vmx->nested.nested_vmx_secondary_ctls_high &= 9090 - ~SECONDARY_EXEC_PCOMMIT; 9091 - } 9092 9097 } 9093 9098 9094 9099 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ··· 9692 9715 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 9693 9716 SECONDARY_EXEC_RDTSCP | 9694 9717 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 9695 - SECONDARY_EXEC_APIC_REGISTER_VIRT | 9696 - SECONDARY_EXEC_PCOMMIT); 9718 + SECONDARY_EXEC_APIC_REGISTER_VIRT); 9697 9719 if (nested_cpu_has(vmcs12, 9698 9720 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 9699 9721 exec_control |= vmcs12->secondary_vm_exec_control;

+1 -1

arch/x86/lib/x86-opcode-map.txt

+1 -26

drivers/acpi/Kconfig

··· 454 454 455 455 If you are unsure what to do, do not enable this option. 456 456 457 - config ACPI_NFIT 458 - tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" 459 - depends on PHYS_ADDR_T_64BIT 460 - depends on BLK_DEV 461 - depends on ARCH_HAS_MMIO_FLUSH 462 - select LIBNVDIMM 463 - help 464 - Infrastructure to probe ACPI 6 compliant platforms for 465 - NVDIMMs (NFIT) and register a libnvdimm device tree. In 466 - addition to storage devices this also enables libnvdimm to pass 467 - ACPI._DSM messages for platform/dimm configuration. 468 - 469 - To compile this driver as a module, choose M here: 470 - the module will be called nfit. 471 - 472 - config ACPI_NFIT_DEBUG 473 - bool "NFIT DSM debug" 474 - depends on ACPI_NFIT 475 - depends on DYNAMIC_DEBUG 476 - default n 477 - help 478 - Enabling this option causes the nfit driver to dump the 479 - input and output buffers of _DSM operations on the ACPI0012 480 - device and its children. This can be very verbose, so leave 481 - it disabled unless you are debugging a hardware / firmware 482 - issue. 457 + source "drivers/acpi/nfit/Kconfig" 483 458 484 459 source "drivers/acpi/apei/Kconfig" 485 460 source "drivers/acpi/dptf/Kconfig"

+1 -1

drivers/acpi/Makefile

··· 69 69 obj-$(CONFIG_ACPI_PROCESSOR) += processor.o 70 70 obj-$(CONFIG_ACPI) += container.o 71 71 obj-$(CONFIG_ACPI_THERMAL) += thermal.o 72 - obj-$(CONFIG_ACPI_NFIT) += nfit.o 72 + obj-$(CONFIG_ACPI_NFIT) += nfit/ 73 73 obj-$(CONFIG_ACPI) += acpi_memhotplug.o 74 74 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o 75 75 obj-$(CONFIG_ACPI_BATTERY) += battery.o

+359 -288

drivers/acpi/nfit.c drivers/acpi/nfit/core.c

··· 15 15 #include <linux/module.h> 16 16 #include <linux/mutex.h> 17 17 #include <linux/ndctl.h> 18 + #include <linux/sysfs.h> 18 19 #include <linux/delay.h> 19 20 #include <linux/list.h> 20 21 #include <linux/acpi.h> ··· 50 49 module_param(disable_vendor_specific, bool, S_IRUGO); 51 50 MODULE_PARM_DESC(disable_vendor_specific, 52 51 "Limit commands to the publicly specified set\n"); 52 + 53 + LIST_HEAD(acpi_descs); 54 + DEFINE_MUTEX(acpi_desc_lock); 53 55 54 56 static struct workqueue_struct *nfit_wq; 55 57 ··· 364 360 return to_name[type]; 365 361 } 366 362 367 - static int nfit_spa_type(struct acpi_nfit_system_address *spa) 363 + int nfit_spa_type(struct acpi_nfit_system_address *spa) 368 364 { 369 365 int i; 370 366 ··· 378 374 struct nfit_table_prev *prev, 379 375 struct acpi_nfit_system_address *spa) 380 376 { 381 - size_t length = min_t(size_t, sizeof(*spa), spa->header.length); 382 377 struct device *dev = acpi_desc->dev; 383 378 struct nfit_spa *nfit_spa; 384 379 380 + if (spa->header.length != sizeof(*spa)) 381 + return false; 382 + 385 383 list_for_each_entry(nfit_spa, &prev->spas, list) { 386 - if (memcmp(nfit_spa->spa, spa, length) == 0) { 384 + if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) { 387 385 list_move_tail(&nfit_spa->list, &acpi_desc->spas); 388 386 return true; 389 387 } 390 388 } 391 389 392 - nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL); 390 + nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa), 391 + GFP_KERNEL); 393 392 if (!nfit_spa) 394 393 return false; 395 394 INIT_LIST_HEAD(&nfit_spa->list); 396 - nfit_spa->spa = spa; 395 + memcpy(nfit_spa->spa, spa, sizeof(*spa)); 397 396 list_add_tail(&nfit_spa->list, &acpi_desc->spas); 398 397 dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, 399 398 spa->range_index, ··· 408 401 struct nfit_table_prev *prev, 409 402 struct acpi_nfit_memory_map *memdev) 410 403 { 411 - size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length); 412 404 struct device *dev = acpi_desc->dev; 413 405 struct nfit_memdev *nfit_memdev; 414 406 407 + if (memdev->header.length != sizeof(*memdev)) 408 + return false; 409 + 415 410 list_for_each_entry(nfit_memdev, &prev->memdevs, list) 416 - if (memcmp(nfit_memdev->memdev, memdev, length) == 0) { 411 + if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) { 417 412 list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs); 418 413 return true; 419 414 } 420 415 421 - nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL); 416 + nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev), 417 + GFP_KERNEL); 422 418 if (!nfit_memdev) 423 419 return false; 424 420 INIT_LIST_HEAD(&nfit_memdev->list); 425 - nfit_memdev->memdev = memdev; 421 + memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev)); 426 422 list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); 427 423 dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n", 428 424 __func__, memdev->device_handle, memdev->range_index, ··· 433 423 return true; 434 424 } 435 425 426 + /* 427 + * An implementation may provide a truncated control region if no block windows 428 + * are defined. 429 + */ 430 + static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr) 431 + { 432 + if (dcr->header.length < offsetof(struct acpi_nfit_control_region, 433 + window_size)) 434 + return 0; 435 + if (dcr->windows) 436 + return sizeof(*dcr); 437 + return offsetof(struct acpi_nfit_control_region, window_size); 438 + } 439 + 436 440 static bool add_dcr(struct acpi_nfit_desc *acpi_desc, 437 441 struct nfit_table_prev *prev, 438 442 struct acpi_nfit_control_region *dcr) 439 443 { 440 - size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length); 441 444 struct device *dev = acpi_desc->dev; 442 445 struct nfit_dcr *nfit_dcr; 443 446 447 + if (!sizeof_dcr(dcr)) 448 + return false; 449 + 444 450 list_for_each_entry(nfit_dcr, &prev->dcrs, list) 445 - if (memcmp(nfit_dcr->dcr, dcr, length) == 0) { 451 + if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) { 446 452 list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs); 447 453 return true; 448 454 } 449 455 450 - nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL); 456 + nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr), 457 + GFP_KERNEL); 451 458 if (!nfit_dcr) 452 459 return false; 453 460 INIT_LIST_HEAD(&nfit_dcr->list); 454 - nfit_dcr->dcr = dcr; 461 + memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)); 455 462 list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); 456 463 dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, 457 464 dcr->region_index, dcr->windows); ··· 479 452 struct nfit_table_prev *prev, 480 453 struct acpi_nfit_data_region *bdw) 481 454 { 482 - size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length); 483 455 struct device *dev = acpi_desc->dev; 484 456 struct nfit_bdw *nfit_bdw; 485 457 458 + if (bdw->header.length != sizeof(*bdw)) 459 + return false; 486 460 list_for_each_entry(nfit_bdw, &prev->bdws, list) 487 - if (memcmp(nfit_bdw->bdw, bdw, length) == 0) { 461 + if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) { 488 462 list_move_tail(&nfit_bdw->list, &acpi_desc->bdws); 489 463 return true; 490 464 } 491 465 492 - nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL); 466 + nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw), 467 + GFP_KERNEL); 493 468 if (!nfit_bdw) 494 469 return false; 495 470 INIT_LIST_HEAD(&nfit_bdw->list); 496 - nfit_bdw->bdw = bdw; 471 + memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw)); 497 472 list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); 498 473 dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, 499 474 bdw->region_index, bdw->windows); 500 475 return true; 501 476 } 502 477 478 + static size_t sizeof_idt(struct acpi_nfit_interleave *idt) 479 + { 480 + if (idt->header.length < sizeof(*idt)) 481 + return 0; 482 + return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1); 483 + } 484 + 503 485 static bool add_idt(struct acpi_nfit_desc *acpi_desc, 504 486 struct nfit_table_prev *prev, 505 487 struct acpi_nfit_interleave *idt) 506 488 { 507 - size_t length = min_t(size_t, sizeof(*idt), idt->header.length); 508 489 struct device *dev = acpi_desc->dev; 509 490 struct nfit_idt *nfit_idt; 510 491 511 - list_for_each_entry(nfit_idt, &prev->idts, list) 512 - if (memcmp(nfit_idt->idt, idt, length) == 0) { 492 + if (!sizeof_idt(idt)) 493 + return false; 494 + 495 + list_for_each_entry(nfit_idt, &prev->idts, list) { 496 + if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt)) 497 + continue; 498 + 499 + if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) { 513 500 list_move_tail(&nfit_idt->list, &acpi_desc->idts); 514 501 return true; 515 502 } 503 + } 516 504 517 - nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL); 505 + nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt), 506 + GFP_KERNEL); 518 507 if (!nfit_idt) 519 508 return false; 520 509 INIT_LIST_HEAD(&nfit_idt->list); 521 - nfit_idt->idt = idt; 510 + memcpy(nfit_idt->idt, idt, sizeof_idt(idt)); 522 511 list_add_tail(&nfit_idt->list, &acpi_desc->idts); 523 512 dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, 524 513 idt->interleave_index, idt->line_count); 525 514 return true; 526 515 } 527 516 517 + static size_t sizeof_flush(struct acpi_nfit_flush_address *flush) 518 + { 519 + if (flush->header.length < sizeof(*flush)) 520 + return 0; 521 + return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1); 522 + } 523 + 528 524 static bool add_flush(struct acpi_nfit_desc *acpi_desc, 529 525 struct nfit_table_prev *prev, 530 526 struct acpi_nfit_flush_address *flush) 531 527 { 532 - size_t length = min_t(size_t, sizeof(*flush), flush->header.length); 533 528 struct device *dev = acpi_desc->dev; 534 529 struct nfit_flush *nfit_flush; 535 530 536 - list_for_each_entry(nfit_flush, &prev->flushes, list) 537 - if (memcmp(nfit_flush->flush, flush, length) == 0) { 531 + if (!sizeof_flush(flush)) 532 + return false; 533 + 534 + list_for_each_entry(nfit_flush, &prev->flushes, list) { 535 + if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush)) 536 + continue; 537 + 538 + if (memcmp(nfit_flush->flush, flush, 539 + sizeof_flush(flush)) == 0) { 538 540 list_move_tail(&nfit_flush->list, &acpi_desc->flushes); 539 541 return true; 540 542 } 543 + } 541 544 542 - nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL); 545 + nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush) 546 + + sizeof_flush(flush), GFP_KERNEL); 543 547 if (!nfit_flush) 544 548 return false; 545 549 INIT_LIST_HEAD(&nfit_flush->list); 546 - nfit_flush->flush = flush; 550 + memcpy(nfit_flush->flush, flush, sizeof_flush(flush)); 547 551 list_add_tail(&nfit_flush->list, &acpi_desc->flushes); 548 552 dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, 549 553 flush->device_handle, flush->hint_count); ··· 672 614 { 673 615 u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; 674 616 struct nfit_memdev *nfit_memdev; 675 - struct nfit_flush *nfit_flush; 676 617 struct nfit_bdw *nfit_bdw; 677 618 struct nfit_idt *nfit_idt; 678 619 u16 idt_idx, range_index; ··· 704 647 nfit_mem->idt_bdw = nfit_idt->idt; 705 648 break; 706 649 } 707 - 708 - list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { 709 - if (nfit_flush->flush->device_handle != 710 - nfit_memdev->memdev->device_handle) 711 - continue; 712 - nfit_mem->nfit_flush = nfit_flush; 713 - break; 714 - } 715 650 break; 716 651 } 717 652 } ··· 724 675 } 725 676 726 677 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { 678 + struct nfit_flush *nfit_flush; 727 679 struct nfit_dcr *nfit_dcr; 728 680 u32 device_handle; 729 681 u16 dcr; ··· 768 718 else if (nfit_mem->dcr->windows == 0 769 719 && nfit_dcr->dcr->windows) 770 720 nfit_mem->dcr = nfit_dcr->dcr; 721 + break; 722 + } 723 + 724 + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { 725 + struct acpi_nfit_flush_address *flush; 726 + u16 i; 727 + 728 + if (nfit_flush->flush->device_handle != device_handle) 729 + continue; 730 + nfit_mem->nfit_flush = nfit_flush; 731 + flush = nfit_flush->flush; 732 + nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev, 733 + flush->hint_count 734 + * sizeof(struct resource), GFP_KERNEL); 735 + if (!nfit_mem->flush_wpq) 736 + return -ENOMEM; 737 + for (i = 0; i < flush->hint_count; i++) { 738 + struct resource *res = &nfit_mem->flush_wpq[i]; 739 + 740 + res->start = flush->hint_address[i]; 741 + res->end = res->start + 8 - 1; 742 + } 771 743 break; 772 744 } 773 745 ··· 878 806 } 879 807 static DEVICE_ATTR_RO(revision); 880 808 809 + /* 810 + * This shows the number of full Address Range Scrubs that have been 811 + * completed since driver load time. Userspace can wait on this using 812 + * select/poll etc. A '+' at the end indicates an ARS is in progress 813 + */ 814 + static ssize_t scrub_show(struct device *dev, 815 + struct device_attribute *attr, char *buf) 816 + { 817 + struct nvdimm_bus_descriptor *nd_desc; 818 + ssize_t rc = -ENXIO; 819 + 820 + device_lock(dev); 821 + nd_desc = dev_get_drvdata(dev); 822 + if (nd_desc) { 823 + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 824 + 825 + rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, 826 + (work_busy(&acpi_desc->work)) ? "+\n" : "\n"); 827 + } 828 + device_unlock(dev); 829 + return rc; 830 + } 831 + 832 + static ssize_t scrub_store(struct device *dev, 833 + struct device_attribute *attr, const char *buf, size_t size) 834 + { 835 + struct nvdimm_bus_descriptor *nd_desc; 836 + ssize_t rc; 837 + long val; 838 + 839 + rc = kstrtol(buf, 0, &val); 840 + if (rc) 841 + return rc; 842 + if (val != 1) 843 + return -EINVAL; 844 + 845 + device_lock(dev); 846 + nd_desc = dev_get_drvdata(dev); 847 + if (nd_desc) { 848 + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 849 + 850 + rc = acpi_nfit_ars_rescan(acpi_desc); 851 + } 852 + device_unlock(dev); 853 + if (rc) 854 + return rc; 855 + return size; 856 + } 857 + static DEVICE_ATTR_RW(scrub); 858 + 859 + static bool ars_supported(struct nvdimm_bus *nvdimm_bus) 860 + { 861 + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 862 + const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START 863 + | 1 << ND_CMD_ARS_STATUS; 864 + 865 + return (nd_desc->cmd_mask & mask) == mask; 866 + } 867 + 868 + static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n) 869 + { 870 + struct device *dev = container_of(kobj, struct device, kobj); 871 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 872 + 873 + if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus)) 874 + return 0; 875 + return a->mode; 876 + } 877 + 881 878 static struct attribute *acpi_nfit_attributes[] = { 882 879 &dev_attr_revision.attr, 880 + &dev_attr_scrub.attr, 883 881 NULL, 884 882 }; 885 883 886 884 static struct attribute_group acpi_nfit_attribute_group = { 887 885 .name = "nfit", 888 886 .attrs = acpi_nfit_attributes, 887 + .is_visible = nfit_visible, 889 888 }; 890 889 891 890 static const struct attribute_group *acpi_nfit_attribute_groups[] = { ··· 1273 1130 } 1274 1131 1275 1132 /* 1276 - * Until standardization materializes we need to consider up to 3 1133 + * Until standardization materializes we need to consider 4 1277 1134 * different command sets. Note, that checking for function0 (bit0) 1278 1135 * tells us if any commands are reachable through this uuid. 1279 1136 */ 1280 - for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++) 1137 + for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) 1281 1138 if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) 1282 1139 break; 1283 1140 ··· 1287 1144 dsm_mask = 0x3fe; 1288 1145 if (disable_vendor_specific) 1289 1146 dsm_mask &= ~(1 << ND_CMD_VENDOR); 1290 - } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) 1147 + } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) { 1291 1148 dsm_mask = 0x1c3c76; 1292 - else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) { 1149 + } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) { 1293 1150 dsm_mask = 0x1fe; 1294 1151 if (disable_vendor_specific) 1295 1152 dsm_mask &= ~(1 << 8); 1153 + } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) { 1154 + dsm_mask = 0xffffffff; 1296 1155 } else { 1297 1156 dev_dbg(dev, "unknown dimm command family\n"); 1298 1157 nfit_mem->family = -1; ··· 1316 1171 int dimm_count = 0; 1317 1172 1318 1173 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { 1174 + struct acpi_nfit_flush_address *flush; 1319 1175 unsigned long flags = 0, cmd_mask; 1320 1176 struct nvdimm *nvdimm; 1321 1177 u32 device_handle; ··· 1350 1204 if (nfit_mem->family == NVDIMM_FAMILY_INTEL) 1351 1205 cmd_mask |= nfit_mem->dsm_mask; 1352 1206 1207 + flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush 1208 + : NULL; 1353 1209 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, 1354 1210 acpi_nfit_dimm_attribute_groups, 1355 - flags, cmd_mask); 1211 + flags, cmd_mask, flush ? flush->hint_count : 0, 1212 + nfit_mem->flush_wpq); 1356 1213 if (!nvdimm) 1357 1214 return -ENOMEM; 1358 1215 ··· 1523 1374 return mmio->base_offset + line_offset + table_offset + sub_line_offset; 1524 1375 } 1525 1376 1526 - static void wmb_blk(struct nfit_blk *nfit_blk) 1527 - { 1528 - 1529 - if (nfit_blk->nvdimm_flush) { 1530 - /* 1531 - * The first wmb() is needed to 'sfence' all previous writes 1532 - * such that they are architecturally visible for the platform 1533 - * buffer flush. Note that we've already arranged for pmem 1534 - * writes to avoid the cache via arch_memcpy_to_pmem(). The 1535 - * final wmb() ensures ordering for the NVDIMM flush write. 1536 - */ 1537 - wmb(); 1538 - writeq(1, nfit_blk->nvdimm_flush); 1539 - wmb(); 1540 - } else 1541 - wmb_pmem(); 1542 - } 1543 - 1544 1377 static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) 1545 1378 { 1546 1379 struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; ··· 1557 1426 offset = to_interleave_offset(offset, mmio); 1558 1427 1559 1428 writeq(cmd, mmio->addr.base + offset); 1560 - wmb_blk(nfit_blk); 1429 + nvdimm_flush(nfit_blk->nd_region); 1561 1430 1562 1431 if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) 1563 1432 readq(mmio->addr.base + offset); ··· 1608 1477 } 1609 1478 1610 1479 if (rw) 1611 - wmb_blk(nfit_blk); 1480 + nvdimm_flush(nfit_blk->nd_region); 1612 1481 1613 1482 rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; 1614 1483 return rc; ··· 1638 1507 nd_region_release_lane(nd_region, lane); 1639 1508 1640 1509 return rc; 1641 - } 1642 - 1643 - static void nfit_spa_mapping_release(struct kref *kref) 1644 - { 1645 - struct nfit_spa_mapping *spa_map = to_spa_map(kref); 1646 - struct acpi_nfit_system_address *spa = spa_map->spa; 1647 - struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc; 1648 - 1649 - WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); 1650 - dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index); 1651 - if (spa_map->type == SPA_MAP_APERTURE) 1652 - memunmap((void __force *)spa_map->addr.aperture); 1653 - else 1654 - iounmap(spa_map->addr.base); 1655 - release_mem_region(spa->address, spa->length); 1656 - list_del(&spa_map->list); 1657 - kfree(spa_map); 1658 - } 1659 - 1660 - static struct nfit_spa_mapping *find_spa_mapping( 1661 - struct acpi_nfit_desc *acpi_desc, 1662 - struct acpi_nfit_system_address *spa) 1663 - { 1664 - struct nfit_spa_mapping *spa_map; 1665 - 1666 - WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); 1667 - list_for_each_entry(spa_map, &acpi_desc->spa_maps, list) 1668 - if (spa_map->spa == spa) 1669 - return spa_map; 1670 - 1671 - return NULL; 1672 - } 1673 - 1674 - static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, 1675 - struct acpi_nfit_system_address *spa) 1676 - { 1677 - struct nfit_spa_mapping *spa_map; 1678 - 1679 - mutex_lock(&acpi_desc->spa_map_mutex); 1680 - spa_map = find_spa_mapping(acpi_desc, spa); 1681 - 1682 - if (spa_map) 1683 - kref_put(&spa_map->kref, nfit_spa_mapping_release); 1684 - mutex_unlock(&acpi_desc->spa_map_mutex); 1685 - } 1686 - 1687 - static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, 1688 - struct acpi_nfit_system_address *spa, enum spa_map_type type) 1689 - { 1690 - resource_size_t start = spa->address; 1691 - resource_size_t n = spa->length; 1692 - struct nfit_spa_mapping *spa_map; 1693 - struct resource *res; 1694 - 1695 - WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); 1696 - 1697 - spa_map = find_spa_mapping(acpi_desc, spa); 1698 - if (spa_map) { 1699 - kref_get(&spa_map->kref); 1700 - return spa_map->addr.base; 1701 - } 1702 - 1703 - spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL); 1704 - if (!spa_map) 1705 - return NULL; 1706 - 1707 - INIT_LIST_HEAD(&spa_map->list); 1708 - spa_map->spa = spa; 1709 - kref_init(&spa_map->kref); 1710 - spa_map->acpi_desc = acpi_desc; 1711 - 1712 - res = request_mem_region(start, n, dev_name(acpi_desc->dev)); 1713 - if (!res) 1714 - goto err_mem; 1715 - 1716 - spa_map->type = type; 1717 - if (type == SPA_MAP_APERTURE) 1718 - spa_map->addr.aperture = (void __pmem *)memremap(start, n, 1719 - ARCH_MEMREMAP_PMEM); 1720 - else 1721 - spa_map->addr.base = ioremap_nocache(start, n); 1722 - 1723 - 1724 - if (!spa_map->addr.base) 1725 - goto err_map; 1726 - 1727 - list_add_tail(&spa_map->list, &acpi_desc->spa_maps); 1728 - return spa_map->addr.base; 1729 - 1730 - err_map: 1731 - release_mem_region(start, n); 1732 - err_mem: 1733 - kfree(spa_map); 1734 - return NULL; 1735 - } 1736 - 1737 - /** 1738 - * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges 1739 - * @nvdimm_bus: NFIT-bus that provided the spa table entry 1740 - * @nfit_spa: spa table to map 1741 - * @type: aperture or control region 1742 - * 1743 - * In the case where block-data-window apertures and 1744 - * dimm-control-regions are interleaved they will end up sharing a 1745 - * single request_mem_region() + ioremap() for the address range. In 1746 - * the style of devm nfit_spa_map() mappings are automatically dropped 1747 - * when all region devices referencing the same mapping are disabled / 1748 - * unbound. 1749 - */ 1750 - static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, 1751 - struct acpi_nfit_system_address *spa, enum spa_map_type type) 1752 - { 1753 - void __iomem *iomem; 1754 - 1755 - mutex_lock(&acpi_desc->spa_map_mutex); 1756 - iomem = __nfit_spa_map(acpi_desc, spa, type); 1757 - mutex_unlock(&acpi_desc->spa_map_mutex); 1758 - 1759 - return iomem; 1760 1510 } 1761 1511 1762 1512 static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, ··· 1681 1669 struct device *dev) 1682 1670 { 1683 1671 struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 1684 - struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 1685 1672 struct nd_blk_region *ndbr = to_nd_blk_region(dev); 1686 - struct nfit_flush *nfit_flush; 1687 1673 struct nfit_blk_mmio *mmio; 1688 1674 struct nfit_blk *nfit_blk; 1689 1675 struct nfit_mem *nfit_mem; ··· 1707 1697 /* map block aperture memory */ 1708 1698 nfit_blk->bdw_offset = nfit_mem->bdw->offset; 1709 1699 mmio = &nfit_blk->mmio[BDW]; 1710 - mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, 1711 - SPA_MAP_APERTURE); 1700 + mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, 1701 + nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM); 1712 1702 if (!mmio->addr.base) { 1713 1703 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, 1714 1704 nvdimm_name(nvdimm)); ··· 1730 1720 nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; 1731 1721 nfit_blk->stat_offset = nfit_mem->dcr->status_offset; 1732 1722 mmio = &nfit_blk->mmio[DCR]; 1733 - mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, 1734 - SPA_MAP_CONTROL); 1723 + mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address, 1724 + nfit_mem->spa_dcr->length); 1735 1725 if (!mmio->addr.base) { 1736 1726 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, 1737 1727 nvdimm_name(nvdimm)); ··· 1756 1746 return rc; 1757 1747 } 1758 1748 1759 - nfit_flush = nfit_mem->nfit_flush; 1760 - if (nfit_flush && nfit_flush->flush->hint_count != 0) { 1761 - nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, 1762 - nfit_flush->flush->hint_address[0], 8); 1763 - if (!nfit_blk->nvdimm_flush) 1764 - return -ENOMEM; 1765 - } 1766 - 1767 - if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush) 1749 + if (nvdimm_has_flush(nfit_blk->nd_region) < 0) 1768 1750 dev_warn(dev, "unable to guarantee persistence of writes\n"); 1769 1751 1770 1752 if (mmio->line_size == 0) ··· 1773 1771 } 1774 1772 1775 1773 return 0; 1776 - } 1777 - 1778 - static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, 1779 - struct device *dev) 1780 - { 1781 - struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 1782 - struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 1783 - struct nd_blk_region *ndbr = to_nd_blk_region(dev); 1784 - struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr); 1785 - int i; 1786 - 1787 - if (!nfit_blk) 1788 - return; /* never enabled */ 1789 - 1790 - /* auto-free BLK spa mappings */ 1791 - for (i = 0; i < 2; i++) { 1792 - struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i]; 1793 - 1794 - if (mmio->addr.base) 1795 - nfit_spa_unmap(acpi_desc, mmio->spa); 1796 - } 1797 - nd_blk_region_set_provider_data(ndbr, NULL); 1798 - /* devm will free nfit_blk */ 1799 1774 } 1800 1775 1801 1776 static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, ··· 1898 1919 if (ret) 1899 1920 return ret; 1900 1921 1901 - ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); 1902 - if (ret) { 1903 - remove_resource(res); 1922 + ret = devm_add_action_or_reset(acpi_desc->dev, 1923 + acpi_nfit_remove_resource, 1924 + res); 1925 + if (ret) 1904 1926 return ret; 1905 - } 1906 1927 1907 1928 return 0; 1908 1929 } ··· 1948 1969 ndr_desc->num_mappings = blk_valid; 1949 1970 ndbr_desc = to_blk_region_desc(ndr_desc); 1950 1971 ndbr_desc->enable = acpi_nfit_blk_region_enable; 1951 - ndbr_desc->disable = acpi_nfit_blk_region_disable; 1952 1972 ndbr_desc->do_io = acpi_desc->blk_do_io; 1953 1973 nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, 1954 1974 ndr_desc); ··· 1957 1979 } 1958 1980 1959 1981 return 0; 1982 + } 1983 + 1984 + static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa) 1985 + { 1986 + return (nfit_spa_type(spa) == NFIT_SPA_VDISK || 1987 + nfit_spa_type(spa) == NFIT_SPA_VCD || 1988 + nfit_spa_type(spa) == NFIT_SPA_PDISK || 1989 + nfit_spa_type(spa) == NFIT_SPA_PCD); 1960 1990 } 1961 1991 1962 1992 static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ··· 1982 1996 if (nfit_spa->nd_region) 1983 1997 return 0; 1984 1998 1985 - if (spa->range_index == 0) { 1999 + if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) { 1986 2000 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", 1987 2001 __func__); 1988 2002 return 0; ··· 2043 2057 rc = -ENOMEM; 2044 2058 } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { 2045 2059 nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, 2060 + ndr_desc); 2061 + if (!nfit_spa->nd_region) 2062 + rc = -ENOMEM; 2063 + } else if (nfit_spa_is_virtual(spa)) { 2064 + nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus, 2046 2065 ndr_desc); 2047 2066 if (!nfit_spa->nd_region) 2048 2067 rc = -ENOMEM; ··· 2130 2139 unsigned int tmo = scrub_timeout; 2131 2140 int rc; 2132 2141 2133 - if (nfit_spa->ars_done || !nfit_spa->nd_region) 2142 + if (!nfit_spa->ars_required || !nfit_spa->nd_region) 2134 2143 return; 2135 2144 2136 2145 rc = ars_start(acpi_desc, nfit_spa); ··· 2219 2228 * firmware initiated scrubs to complete and then we go search for the 2220 2229 * affected spa regions to mark them scanned. In the second phase we 2221 2230 * initiate a directed scrub for every range that was not scrubbed in 2222 - * phase 1. 2231 + * phase 1. If we're called for a 'rescan', we harmlessly pass through 2232 + * the first phase, but really only care about running phase 2, where 2233 + * regions can be notified of new poison. 2223 2234 */ 2224 2235 2225 2236 /* process platform firmware initiated scrubs */ ··· 2324 2331 * Flag all the ranges that still need scrubbing, but 2325 2332 * register them now to make data available. 2326 2333 */ 2327 - if (nfit_spa->nd_region) 2328 - nfit_spa->ars_done = 1; 2329 - else 2334 + if (!nfit_spa->nd_region) { 2335 + nfit_spa->ars_required = 1; 2330 2336 acpi_nfit_register_region(acpi_desc, nfit_spa); 2337 + } 2331 2338 } 2332 2339 2333 2340 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2334 2341 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2342 + acpi_desc->scrub_count++; 2343 + if (acpi_desc->scrub_count_state) 2344 + sysfs_notify_dirent(acpi_desc->scrub_count_state); 2335 2345 mutex_unlock(&acpi_desc->init_mutex); 2336 2346 } 2337 2347 ··· 2372 2376 return 0; 2373 2377 } 2374 2378 2375 - int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) 2379 + static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc) 2380 + { 2381 + struct device *dev = acpi_desc->dev; 2382 + struct kernfs_node *nfit; 2383 + struct device *bus_dev; 2384 + 2385 + if (!ars_supported(acpi_desc->nvdimm_bus)) 2386 + return 0; 2387 + 2388 + bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2389 + nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit"); 2390 + if (!nfit) { 2391 + dev_err(dev, "sysfs_get_dirent 'nfit' failed\n"); 2392 + return -ENODEV; 2393 + } 2394 + acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub"); 2395 + sysfs_put(nfit); 2396 + if (!acpi_desc->scrub_count_state) { 2397 + dev_err(dev, "sysfs_get_dirent 'scrub' failed\n"); 2398 + return -ENODEV; 2399 + } 2400 + 2401 + return 0; 2402 + } 2403 + 2404 + static void acpi_nfit_destruct(void *data) 2405 + { 2406 + struct acpi_nfit_desc *acpi_desc = data; 2407 + struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2408 + 2409 + /* 2410 + * Destruct under acpi_desc_lock so that nfit_handle_mce does not 2411 + * race teardown 2412 + */ 2413 + mutex_lock(&acpi_desc_lock); 2414 + acpi_desc->cancel = 1; 2415 + /* 2416 + * Bounce the nvdimm bus lock to make sure any in-flight 2417 + * acpi_nfit_ars_rescan() submissions have had a chance to 2418 + * either submit or see ->cancel set. 2419 + */ 2420 + device_lock(bus_dev); 2421 + device_unlock(bus_dev); 2422 + 2423 + flush_workqueue(nfit_wq); 2424 + if (acpi_desc->scrub_count_state) 2425 + sysfs_put(acpi_desc->scrub_count_state); 2426 + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2427 + acpi_desc->nvdimm_bus = NULL; 2428 + list_del(&acpi_desc->list); 2429 + mutex_unlock(&acpi_desc_lock); 2430 + } 2431 + 2432 + int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) 2376 2433 { 2377 2434 struct device *dev = acpi_desc->dev; 2378 2435 struct nfit_table_prev prev; 2379 2436 const void *end; 2380 - u8 *data; 2381 2437 int rc; 2438 + 2439 + if (!acpi_desc->nvdimm_bus) { 2440 + acpi_nfit_init_dsms(acpi_desc); 2441 + 2442 + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, 2443 + &acpi_desc->nd_desc); 2444 + if (!acpi_desc->nvdimm_bus) 2445 + return -ENOMEM; 2446 + 2447 + rc = devm_add_action_or_reset(dev, acpi_nfit_destruct, 2448 + acpi_desc); 2449 + if (rc) 2450 + return rc; 2451 + 2452 + rc = acpi_nfit_desc_init_scrub_attr(acpi_desc); 2453 + if (rc) 2454 + return rc; 2455 + 2456 + /* register this acpi_desc for mce notifications */ 2457 + mutex_lock(&acpi_desc_lock); 2458 + list_add_tail(&acpi_desc->list, &acpi_descs); 2459 + mutex_unlock(&acpi_desc_lock); 2460 + } 2382 2461 2383 2462 mutex_lock(&acpi_desc->init_mutex); 2384 2463 ··· 2477 2406 list_cut_position(&prev.flushes, &acpi_desc->flushes, 2478 2407 acpi_desc->flushes.prev); 2479 2408 2480 - data = (u8 *) acpi_desc->nfit; 2481 2409 end = data + sz; 2482 2410 while (!IS_ERR_OR_NULL(data)) 2483 2411 data = add_table(acpi_desc, &prev, data, end); ··· 2492 2422 if (rc) 2493 2423 goto out_unlock; 2494 2424 2495 - if (nfit_mem_init(acpi_desc) != 0) { 2496 - rc = -ENOMEM; 2425 + rc = nfit_mem_init(acpi_desc); 2426 + if (rc) 2497 2427 goto out_unlock; 2498 - } 2499 - 2500 - acpi_nfit_init_dsms(acpi_desc); 2501 2428 2502 2429 rc = acpi_nfit_register_dimms(acpi_desc); 2503 2430 if (rc) ··· 2563 2496 return 0; 2564 2497 } 2565 2498 2499 + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) 2500 + { 2501 + struct device *dev = acpi_desc->dev; 2502 + struct nfit_spa *nfit_spa; 2503 + 2504 + if (work_busy(&acpi_desc->work)) 2505 + return -EBUSY; 2506 + 2507 + if (acpi_desc->cancel) 2508 + return 0; 2509 + 2510 + mutex_lock(&acpi_desc->init_mutex); 2511 + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2512 + struct acpi_nfit_system_address *spa = nfit_spa->spa; 2513 + 2514 + if (nfit_spa_type(spa) != NFIT_SPA_PM) 2515 + continue; 2516 + 2517 + nfit_spa->ars_required = 1; 2518 + } 2519 + queue_work(nfit_wq, &acpi_desc->work); 2520 + dev_dbg(dev, "%s: ars_scan triggered\n", __func__); 2521 + mutex_unlock(&acpi_desc->init_mutex); 2522 + 2523 + return 0; 2524 + } 2525 + 2566 2526 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) 2567 2527 { 2568 2528 struct nvdimm_bus_descriptor *nd_desc; ··· 2599 2505 acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io; 2600 2506 nd_desc = &acpi_desc->nd_desc; 2601 2507 nd_desc->provider_name = "ACPI.NFIT"; 2508 + nd_desc->module = THIS_MODULE; 2602 2509 nd_desc->ndctl = acpi_nfit_ctl; 2603 2510 nd_desc->flush_probe = acpi_nfit_flush_probe; 2604 2511 nd_desc->clear_to_send = acpi_nfit_clear_to_send; 2605 2512 nd_desc->attr_groups = acpi_nfit_attribute_groups; 2606 2513 2607 - INIT_LIST_HEAD(&acpi_desc->spa_maps); 2608 2514 INIT_LIST_HEAD(&acpi_desc->spas); 2609 2515 INIT_LIST_HEAD(&acpi_desc->dcrs); 2610 2516 INIT_LIST_HEAD(&acpi_desc->bdws); ··· 2612 2518 INIT_LIST_HEAD(&acpi_desc->flushes); 2613 2519 INIT_LIST_HEAD(&acpi_desc->memdevs); 2614 2520 INIT_LIST_HEAD(&acpi_desc->dimms); 2615 - mutex_init(&acpi_desc->spa_map_mutex); 2521 + INIT_LIST_HEAD(&acpi_desc->list); 2616 2522 mutex_init(&acpi_desc->init_mutex); 2617 2523 INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); 2618 2524 } ··· 2626 2532 struct acpi_table_header *tbl; 2627 2533 acpi_status status = AE_OK; 2628 2534 acpi_size sz; 2629 - int rc; 2535 + int rc = 0; 2630 2536 2631 2537 status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz); 2632 2538 if (ACPI_FAILURE(status)) { ··· 2639 2545 if (!acpi_desc) 2640 2546 return -ENOMEM; 2641 2547 acpi_nfit_desc_init(acpi_desc, &adev->dev); 2642 - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); 2643 - if (!acpi_desc->nvdimm_bus) 2644 - return -ENOMEM; 2645 2548 2646 - /* 2647 - * Save the acpi header for later and then skip it, 2648 - * making nfit point to the first nfit table header. 2649 - */ 2549 + /* Save the acpi header for exporting the revision via sysfs */ 2650 2550 acpi_desc->acpi_header = *tbl; 2651 - acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit); 2652 - sz -= sizeof(struct acpi_table_nfit); 2653 2551 2654 2552 /* Evaluate _FIT and override with that if present */ 2655 2553 status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); 2656 2554 if (ACPI_SUCCESS(status) && buf.length > 0) { 2657 - union acpi_object *obj; 2658 - /* 2659 - * Adjust for the acpi_object header of the _FIT 2660 - */ 2661 - obj = buf.pointer; 2662 - if (obj->type == ACPI_TYPE_BUFFER) { 2663 - acpi_desc->nfit = 2664 - (struct acpi_nfit_header *)obj->buffer.pointer; 2665 - sz = obj->buffer.length; 2666 - } else 2555 + union acpi_object *obj = buf.pointer; 2556 + 2557 + if (obj->type == ACPI_TYPE_BUFFER) 2558 + rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer, 2559 + obj->buffer.length); 2560 + else 2667 2561 dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n", 2668 2562 __func__, (int) obj->type); 2669 - } 2670 - 2671 - rc = acpi_nfit_init(acpi_desc, sz); 2672 - if (rc) { 2673 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2674 - return rc; 2675 - } 2676 - return 0; 2563 + kfree(buf.pointer); 2564 + } else 2565 + /* skip over the lead-in header table */ 2566 + rc = acpi_nfit_init(acpi_desc, (void *) tbl 2567 + + sizeof(struct acpi_table_nfit), 2568 + sz - sizeof(struct acpi_table_nfit)); 2569 + return rc; 2677 2570 } 2678 2571 2679 2572 static int acpi_nfit_remove(struct acpi_device *adev) 2680 2573 { 2681 - struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); 2682 - 2683 - acpi_desc->cancel = 1; 2684 - flush_workqueue(nfit_wq); 2685 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2574 + /* see acpi_nfit_destruct */ 2686 2575 return 0; 2687 2576 } 2688 2577 ··· 2673 2596 { 2674 2597 struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); 2675 2598 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; 2676 - struct acpi_nfit_header *nfit_saved; 2677 - union acpi_object *obj; 2678 2599 struct device *dev = &adev->dev; 2600 + union acpi_object *obj; 2679 2601 acpi_status status; 2680 2602 int ret; 2681 2603 ··· 2692 2616 if (!acpi_desc) 2693 2617 goto out_unlock; 2694 2618 acpi_nfit_desc_init(acpi_desc, &adev->dev); 2695 - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); 2696 - if (!acpi_desc->nvdimm_bus) 2697 - goto out_unlock; 2698 2619 } else { 2699 2620 /* 2700 2621 * Finish previous registration before considering new ··· 2707 2634 goto out_unlock; 2708 2635 } 2709 2636 2710 - nfit_saved = acpi_desc->nfit; 2711 2637 obj = buf.pointer; 2712 2638 if (obj->type == ACPI_TYPE_BUFFER) { 2713 - acpi_desc->nfit = 2714 - (struct acpi_nfit_header *)obj->buffer.pointer; 2715 - ret = acpi_nfit_init(acpi_desc, obj->buffer.length); 2716 - if (ret) { 2717 - /* Merge failed, restore old nfit, and exit */ 2718 - acpi_desc->nfit = nfit_saved; 2639 + ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer, 2640 + obj->buffer.length); 2641 + if (ret) 2719 2642 dev_err(dev, "failed to merge updated NFIT\n"); 2720 - } 2721 - } else { 2722 - /* Bad _FIT, restore old nfit */ 2643 + } else 2723 2644 dev_err(dev, "Invalid _FIT\n"); 2724 - } 2725 2645 kfree(buf.pointer); 2726 2646 2727 2647 out_unlock: ··· 2759 2693 acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); 2760 2694 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); 2761 2695 acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); 2696 + acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]); 2762 2697 2763 2698 nfit_wq = create_singlethread_workqueue("nfit"); 2764 2699 if (!nfit_wq) 2765 2700 return -ENOMEM; 2701 + 2702 + nfit_mce_register(); 2766 2703 2767 2704 return acpi_bus_register_driver(&acpi_nfit_driver); 2768 2705 } 2769 2706 2770 2707 static __exit void nfit_exit(void) 2771 2708 { 2709 + nfit_mce_unregister(); 2772 2710 acpi_bus_unregister_driver(&acpi_nfit_driver); 2773 2711 destroy_workqueue(nfit_wq); 2712 + WARN_ON(!list_empty(&acpi_descs)); 2774 2713 } 2775 2714 2776 2715 module_init(nfit_init);

+32 -28

drivers/acpi/nfit.h drivers/acpi/nfit/nfit.h

··· 16 16 #define __NFIT_H__ 17 17 #include <linux/workqueue.h> 18 18 #include <linux/libnvdimm.h> 19 + #include <linux/ndctl.h> 19 20 #include <linux/types.h> 20 21 #include <linux/uuid.h> 21 22 #include <linux/acpi.h> ··· 32 31 #define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6" 33 32 #define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e" 34 33 34 + /* https://msdn.microsoft.com/library/windows/hardware/mt604741 */ 35 + #define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05" 36 + 35 37 #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ 36 38 | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ 37 39 | ACPI_NFIT_MEM_NOT_ARMED) ··· 44 40 NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, 45 41 NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1, 46 42 NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2, 43 + NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT, 47 44 NFIT_SPA_VOLATILE, 48 45 NFIT_SPA_PM, 49 46 NFIT_SPA_DCR, ··· 79 74 }; 80 75 81 76 struct nfit_spa { 82 - struct acpi_nfit_system_address *spa; 83 77 struct list_head list; 84 78 struct nd_region *nd_region; 85 - unsigned int ars_done:1; 79 + unsigned int ars_required:1; 86 80 u32 clear_err_unit; 87 81 u32 max_ars; 82 + struct acpi_nfit_system_address spa[0]; 88 83 }; 89 84 90 85 struct nfit_dcr { 91 - struct acpi_nfit_control_region *dcr; 92 86 struct list_head list; 87 + struct acpi_nfit_control_region dcr[0]; 93 88 }; 94 89 95 90 struct nfit_bdw { 96 - struct acpi_nfit_data_region *bdw; 97 91 struct list_head list; 92 + struct acpi_nfit_data_region bdw[0]; 98 93 }; 99 94 100 95 struct nfit_idt { 101 - struct acpi_nfit_interleave *idt; 102 96 struct list_head list; 97 + struct acpi_nfit_interleave idt[0]; 103 98 }; 104 99 105 100 struct nfit_flush { 106 - struct acpi_nfit_flush_address *flush; 107 101 struct list_head list; 102 + struct acpi_nfit_flush_address flush[0]; 108 103 }; 109 104 110 105 struct nfit_memdev { 111 - struct acpi_nfit_memory_map *memdev; 112 106 struct list_head list; 107 + struct acpi_nfit_memory_map memdev[0]; 113 108 }; 114 109 115 110 /* assembled tables for a given dimm/memory-device */ ··· 128 123 struct list_head list; 129 124 struct acpi_device *adev; 130 125 struct acpi_nfit_desc *acpi_desc; 126 + struct resource *flush_wpq; 131 127 unsigned long dsm_mask; 132 128 int family; 133 129 }; ··· 136 130 struct acpi_nfit_desc { 137 131 struct nvdimm_bus_descriptor nd_desc; 138 132 struct acpi_table_header acpi_header; 139 - struct acpi_nfit_header *nfit; 140 - struct mutex spa_map_mutex; 141 133 struct mutex init_mutex; 142 - struct list_head spa_maps; 143 134 struct list_head memdevs; 144 135 struct list_head flushes; 145 136 struct list_head dimms; ··· 149 146 struct nd_cmd_ars_status *ars_status; 150 147 size_t ars_status_size; 151 148 struct work_struct work; 149 + struct list_head list; 150 + struct kernfs_node *scrub_count_state; 151 + unsigned int scrub_count; 152 152 unsigned int cancel:1; 153 153 unsigned long dimm_cmd_force_en; 154 154 unsigned long bus_cmd_force_en; ··· 167 161 struct nd_blk_addr { 168 162 union { 169 163 void __iomem *base; 170 - void __pmem *aperture; 164 + void *aperture; 171 165 }; 172 166 }; 173 167 ··· 186 180 u64 bdw_offset; /* post interleave offset */ 187 181 u64 stat_offset; 188 182 u64 cmd_offset; 189 - void __iomem *nvdimm_flush; 190 183 u32 dimm_flags; 191 184 }; 192 185 193 - enum spa_map_type { 194 - SPA_MAP_CONTROL, 195 - SPA_MAP_APERTURE, 196 - }; 186 + extern struct list_head acpi_descs; 187 + extern struct mutex acpi_desc_lock; 188 + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc); 197 189 198 - struct nfit_spa_mapping { 199 - struct acpi_nfit_desc *acpi_desc; 200 - struct acpi_nfit_system_address *spa; 201 - struct list_head list; 202 - struct kref kref; 203 - enum spa_map_type type; 204 - struct nd_blk_addr addr; 205 - }; 206 - 207 - static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) 190 + #ifdef CONFIG_X86_MCE 191 + void nfit_mce_register(void); 192 + void nfit_mce_unregister(void); 193 + #else 194 + static inline void nfit_mce_register(void) 208 195 { 209 - return container_of(kref, struct nfit_spa_mapping, kref); 210 196 } 197 + static inline void nfit_mce_unregister(void) 198 + { 199 + } 200 + #endif 201 + 202 + int nfit_spa_type(struct acpi_nfit_system_address *spa); 211 203 212 204 static inline struct acpi_nfit_memory_map *__to_nfit_memdev( 213 205 struct nfit_mem *nfit_mem) ··· 222 218 } 223 219 224 220 const u8 *to_nfit_uuid(enum nfit_uuids id); 225 - int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); 221 + int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); 226 222 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); 227 223 #endif /* __NFIT_H__ */

+26

drivers/acpi/nfit/Kconfig

··· 1 + config ACPI_NFIT 2 + tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" 3 + depends on PHYS_ADDR_T_64BIT 4 + depends on BLK_DEV 5 + depends on ARCH_HAS_MMIO_FLUSH 6 + select LIBNVDIMM 7 + help 8 + Infrastructure to probe ACPI 6 compliant platforms for 9 + NVDIMMs (NFIT) and register a libnvdimm device tree. In 10 + addition to storage devices this also enables libnvdimm to pass 11 + ACPI._DSM messages for platform/dimm configuration. 12 + 13 + To compile this driver as a module, choose M here: 14 + the module will be called nfit. 15 + 16 + config ACPI_NFIT_DEBUG 17 + bool "NFIT DSM debug" 18 + depends on ACPI_NFIT 19 + depends on DYNAMIC_DEBUG 20 + default n 21 + help 22 + Enabling this option causes the nfit driver to dump the 23 + input and output buffers of _DSM operations on the ACPI0012 24 + device and its children. This can be very verbose, so leave 25 + it disabled unless you are debugging a hardware / firmware 26 + issue.

+3

drivers/acpi/nfit/Makefile

··· 1 + obj-$(CONFIG_ACPI_NFIT) := nfit.o 2 + nfit-y := core.o 3 + nfit-$(CONFIG_X86_MCE) += mce.o

+89

drivers/acpi/nfit/mce.c

··· 1 + /* 2 + * NFIT - Machine Check Handler 3 + * 4 + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of version 2 of the GNU General Public License as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + */ 15 + #include <linux/notifier.h> 16 + #include <linux/acpi.h> 17 + #include <asm/mce.h> 18 + #include "nfit.h" 19 + 20 + static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, 21 + void *data) 22 + { 23 + struct mce *mce = (struct mce *)data; 24 + struct acpi_nfit_desc *acpi_desc; 25 + struct nfit_spa *nfit_spa; 26 + 27 + /* We only care about memory errors */ 28 + if (!(mce->status & MCACOD)) 29 + return NOTIFY_DONE; 30 + 31 + /* 32 + * mce->addr contains the physical addr accessed that caused the 33 + * machine check. We need to walk through the list of NFITs, and see 34 + * if any of them matches that address, and only then start a scrub. 35 + */ 36 + mutex_lock(&acpi_desc_lock); 37 + list_for_each_entry(acpi_desc, &acpi_descs, list) { 38 + struct device *dev = acpi_desc->dev; 39 + int found_match = 0; 40 + 41 + mutex_lock(&acpi_desc->init_mutex); 42 + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 43 + struct acpi_nfit_system_address *spa = nfit_spa->spa; 44 + 45 + if (nfit_spa_type(spa) == NFIT_SPA_PM) 46 + continue; 47 + /* find the spa that covers the mce addr */ 48 + if (spa->address > mce->addr) 49 + continue; 50 + if ((spa->address + spa->length - 1) < mce->addr) 51 + continue; 52 + found_match = 1; 53 + dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n", 54 + __func__, spa->range_index, spa->address, 55 + spa->length); 56 + /* 57 + * We can break at the first match because we're going 58 + * to rescan all the SPA ranges. There shouldn't be any 59 + * aliasing anyway. 60 + */ 61 + break; 62 + } 63 + mutex_unlock(&acpi_desc->init_mutex); 64 + 65 + /* 66 + * We can ignore an -EBUSY here because if an ARS is already 67 + * in progress, just let that be the last authoritative one 68 + */ 69 + if (found_match) 70 + acpi_nfit_ars_rescan(acpi_desc); 71 + } 72 + 73 + mutex_unlock(&acpi_desc_lock); 74 + return NOTIFY_DONE; 75 + } 76 + 77 + static struct notifier_block nfit_mce_dec = { 78 + .notifier_call = nfit_handle_mce, 79 + }; 80 + 81 + void nfit_mce_register(void) 82 + { 83 + mce_register_decode_chain(&nfit_mce_dec); 84 + } 85 + 86 + void nfit_mce_unregister(void) 87 + { 88 + mce_unregister_decode_chain(&nfit_mce_dec); 89 + }

+2 -2

drivers/block/brd.c

··· 379 379 380 380 #ifdef CONFIG_BLK_DEV_RAM_DAX 381 381 static long brd_direct_access(struct block_device *bdev, sector_t sector, 382 - void __pmem **kaddr, pfn_t *pfn, long size) 382 + void **kaddr, pfn_t *pfn, long size) 383 383 { 384 384 struct brd_device *brd = bdev->bd_disk->private_data; 385 385 struct page *page; ··· 389 389 page = brd_insert_page(brd, sector); 390 390 if (!page) 391 391 return -ENOSPC; 392 - *kaddr = (void __pmem *)page_address(page); 392 + *kaddr = page_address(page); 393 393 *pfn = page_to_pfn_t(page); 394 394 395 395 return PAGE_SIZE;

+2 -4

drivers/dax/dax.c

··· 211 211 } 212 212 dax_dev->dev = dev; 213 213 214 - rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); 215 - if (rc) { 216 - unregister_dax_dev(dev); 214 + rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev); 215 + if (rc) 217 216 return rc; 218 - } 219 217 220 218 return 0; 221 219

+6 -8

drivers/dax/pmem.c

··· 102 102 if (rc) 103 103 return rc; 104 104 105 - rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); 106 - if (rc) { 107 - dax_pmem_percpu_exit(&dax_pmem->ref); 105 + rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit, 106 + &dax_pmem->ref); 107 + if (rc) 108 108 return rc; 109 - } 110 109 111 110 addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); 112 111 if (IS_ERR(addr)) 113 112 return PTR_ERR(addr); 114 113 115 - rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); 116 - if (rc) { 117 - dax_pmem_percpu_kill(&dax_pmem->ref); 114 + rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill, 115 + &dax_pmem->ref); 116 + if (rc) 118 117 return rc; 119 - } 120 118 121 119 nd_region = to_nd_region(dev->parent); 122 120 dax_region = alloc_dax_region(dev, nd_region->id, &res,

+1 -1

drivers/md/dm-linear.c

··· 142 142 } 143 143 144 144 static long linear_direct_access(struct dm_target *ti, sector_t sector, 145 - void __pmem **kaddr, pfn_t *pfn, long size) 145 + void **kaddr, pfn_t *pfn, long size) 146 146 { 147 147 struct linear_c *lc = ti->private; 148 148 struct block_device *bdev = lc->dev->bdev;

+1 -1

drivers/md/dm-snap.c

··· 2303 2303 } 2304 2304 2305 2305 static long origin_direct_access(struct dm_target *ti, sector_t sector, 2306 - void __pmem **kaddr, pfn_t *pfn, long size) 2306 + void **kaddr, pfn_t *pfn, long size) 2307 2307 { 2308 2308 DMWARN("device does not support dax."); 2309 2309 return -EIO;

+1 -1

drivers/md/dm-stripe.c

··· 309 309 } 310 310 311 311 static long stripe_direct_access(struct dm_target *ti, sector_t sector, 312 - void __pmem **kaddr, pfn_t *pfn, long size) 312 + void **kaddr, pfn_t *pfn, long size) 313 313 { 314 314 struct stripe_c *sc = ti->private; 315 315 uint32_t stripe;

+1 -1

drivers/md/dm-target.c

··· 149 149 } 150 150 151 151 static long io_err_direct_access(struct dm_target *ti, sector_t sector, 152 - void __pmem **kaddr, pfn_t *pfn, long size) 152 + void **kaddr, pfn_t *pfn, long size) 153 153 { 154 154 return -EIO; 155 155 }

+1 -1

drivers/md/dm.c

··· 906 906 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 907 907 908 908 static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, 909 - void __pmem **kaddr, pfn_t *pfn, long size) 909 + void **kaddr, pfn_t *pfn, long size) 910 910 { 911 911 struct mapped_device *md = bdev->bd_disk->private_data; 912 912 struct dm_table *map;

+1 -1

drivers/nvdimm/Kconfig

··· 1 1 menuconfig LIBNVDIMM 2 2 tristate "NVDIMM (Non-Volatile Memory Device) Support" 3 3 depends on PHYS_ADDR_T_64BIT 4 + depends on HAS_IOMEM 4 5 depends on BLK_DEV 5 6 help 6 7 Generic support for non-volatile memory devices including ··· 20 19 config BLK_DEV_PMEM 21 20 tristate "PMEM: Persistent memory block device support" 22 21 default LIBNVDIMM 23 - depends on HAS_IOMEM 24 22 select ND_BTT if BTT 25 23 select ND_PFN if NVDIMM_PFN 26 24 help

+4 -7

drivers/nvdimm/blk.c

··· 267 267 q = blk_alloc_queue(GFP_KERNEL); 268 268 if (!q) 269 269 return -ENOMEM; 270 - if (devm_add_action(dev, nd_blk_release_queue, q)) { 271 - blk_cleanup_queue(q); 270 + if (devm_add_action_or_reset(dev, nd_blk_release_queue, q)) 272 271 return -ENOMEM; 273 - } 274 272 275 273 blk_queue_make_request(q, nd_blk_make_request); 276 274 blk_queue_max_hw_sectors(q, UINT_MAX); ··· 280 282 disk = alloc_disk(0); 281 283 if (!disk) 282 284 return -ENOMEM; 283 - if (devm_add_action(dev, nd_blk_release_disk, disk)) { 284 - put_disk(disk); 285 - return -ENOMEM; 286 - } 287 285 288 286 disk->first_minor = 0; 289 287 disk->fops = &nd_blk_fops; ··· 288 294 nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); 289 295 set_capacity(disk, 0); 290 296 device_add_disk(dev, disk); 297 + 298 + if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk)) 299 + return -ENOMEM; 291 300 292 301 if (nsblk_meta_size(nsblk)) { 293 302 int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));

+1 -2

drivers/nvdimm/btt_devs.c

··· 198 198 { 199 199 struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); 200 200 201 - if (dev) 202 - __nd_device_register(dev); 201 + __nd_device_register(dev); 203 202 return dev; 204 203 } 205 204

+200 -12

drivers/nvdimm/bus.c

··· 31 31 int nvdimm_major; 32 32 static int nvdimm_bus_major; 33 33 static struct class *nd_class; 34 + static DEFINE_IDA(nd_ida); 34 35 35 36 static int to_nd_device_type(struct device *dev) 36 37 { ··· 61 60 to_nd_device_type(dev)); 62 61 } 63 62 64 - static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) 65 - { 66 - struct nd_device_driver *nd_drv = to_nd_device_driver(drv); 67 - 68 - return !!test_bit(to_nd_device_type(dev), &nd_drv->type); 69 - } 70 - 71 63 static struct module *to_bus_provider(struct device *dev) 72 64 { 73 65 /* pin bus providers while regions are enabled */ 74 66 if (is_nd_pmem(dev) || is_nd_blk(dev)) { 75 67 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 76 68 77 - return nvdimm_bus->module; 69 + return nvdimm_bus->nd_desc->module; 78 70 } 79 71 return NULL; 80 72 } ··· 128 134 dev_name(dev), rc); 129 135 module_put(provider); 130 136 return rc; 137 + } 138 + 139 + static void nvdimm_bus_shutdown(struct device *dev) 140 + { 141 + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 142 + struct nd_device_driver *nd_drv = NULL; 143 + 144 + if (dev->driver) 145 + nd_drv = to_nd_device_driver(dev->driver); 146 + 147 + if (nd_drv && nd_drv->shutdown) { 148 + nd_drv->shutdown(dev); 149 + dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n", 150 + dev->driver->name, dev_name(dev)); 151 + } 131 152 } 132 153 133 154 void nd_device_notify(struct device *dev, enum nvdimm_event event) ··· 217 208 } 218 209 EXPORT_SYMBOL_GPL(nvdimm_clear_poison); 219 210 211 + static int nvdimm_bus_match(struct device *dev, struct device_driver *drv); 212 + 220 213 static struct bus_type nvdimm_bus_type = { 221 214 .name = "nd", 222 215 .uevent = nvdimm_bus_uevent, 223 216 .match = nvdimm_bus_match, 224 217 .probe = nvdimm_bus_probe, 225 218 .remove = nvdimm_bus_remove, 219 + .shutdown = nvdimm_bus_shutdown, 226 220 }; 221 + 222 + static void nvdimm_bus_release(struct device *dev) 223 + { 224 + struct nvdimm_bus *nvdimm_bus; 225 + 226 + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 227 + ida_simple_remove(&nd_ida, nvdimm_bus->id); 228 + kfree(nvdimm_bus); 229 + } 230 + 231 + static bool is_nvdimm_bus(struct device *dev) 232 + { 233 + return dev->release == nvdimm_bus_release; 234 + } 235 + 236 + struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 237 + { 238 + struct device *dev; 239 + 240 + for (dev = nd_dev; dev; dev = dev->parent) 241 + if (is_nvdimm_bus(dev)) 242 + break; 243 + dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); 244 + if (dev) 245 + return to_nvdimm_bus(dev); 246 + return NULL; 247 + } 248 + 249 + struct nvdimm_bus *to_nvdimm_bus(struct device *dev) 250 + { 251 + struct nvdimm_bus *nvdimm_bus; 252 + 253 + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 254 + WARN_ON(!is_nvdimm_bus(dev)); 255 + return nvdimm_bus; 256 + } 257 + EXPORT_SYMBOL_GPL(to_nvdimm_bus); 258 + 259 + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, 260 + struct nvdimm_bus_descriptor *nd_desc) 261 + { 262 + struct nvdimm_bus *nvdimm_bus; 263 + int rc; 264 + 265 + nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); 266 + if (!nvdimm_bus) 267 + return NULL; 268 + INIT_LIST_HEAD(&nvdimm_bus->list); 269 + INIT_LIST_HEAD(&nvdimm_bus->mapping_list); 270 + INIT_LIST_HEAD(&nvdimm_bus->poison_list); 271 + init_waitqueue_head(&nvdimm_bus->probe_wait); 272 + nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 273 + mutex_init(&nvdimm_bus->reconfig_mutex); 274 + if (nvdimm_bus->id < 0) { 275 + kfree(nvdimm_bus); 276 + return NULL; 277 + } 278 + nvdimm_bus->nd_desc = nd_desc; 279 + nvdimm_bus->dev.parent = parent; 280 + nvdimm_bus->dev.release = nvdimm_bus_release; 281 + nvdimm_bus->dev.groups = nd_desc->attr_groups; 282 + nvdimm_bus->dev.bus = &nvdimm_bus_type; 283 + dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 284 + rc = device_register(&nvdimm_bus->dev); 285 + if (rc) { 286 + dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); 287 + goto err; 288 + } 289 + 290 + return nvdimm_bus; 291 + err: 292 + put_device(&nvdimm_bus->dev); 293 + return NULL; 294 + } 295 + EXPORT_SYMBOL_GPL(nvdimm_bus_register); 296 + 297 + void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) 298 + { 299 + if (!nvdimm_bus) 300 + return; 301 + device_unregister(&nvdimm_bus->dev); 302 + } 303 + EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); 304 + 305 + static int child_unregister(struct device *dev, void *data) 306 + { 307 + /* 308 + * the singular ndctl class device per bus needs to be 309 + * "device_destroy"ed, so skip it here 310 + * 311 + * i.e. remove classless children 312 + */ 313 + if (dev->class) 314 + /* pass */; 315 + else 316 + nd_device_unregister(dev, ND_SYNC); 317 + return 0; 318 + } 319 + 320 + static void free_poison_list(struct list_head *poison_list) 321 + { 322 + struct nd_poison *pl, *next; 323 + 324 + list_for_each_entry_safe(pl, next, poison_list, list) { 325 + list_del(&pl->list); 326 + kfree(pl); 327 + } 328 + list_del_init(poison_list); 329 + } 330 + 331 + static int nd_bus_remove(struct device *dev) 332 + { 333 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 334 + 335 + mutex_lock(&nvdimm_bus_list_mutex); 336 + list_del_init(&nvdimm_bus->list); 337 + mutex_unlock(&nvdimm_bus_list_mutex); 338 + 339 + nd_synchronize(); 340 + device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 341 + 342 + nvdimm_bus_lock(&nvdimm_bus->dev); 343 + free_poison_list(&nvdimm_bus->poison_list); 344 + nvdimm_bus_unlock(&nvdimm_bus->dev); 345 + 346 + nvdimm_bus_destroy_ndctl(nvdimm_bus); 347 + 348 + return 0; 349 + } 350 + 351 + static int nd_bus_probe(struct device *dev) 352 + { 353 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 354 + int rc; 355 + 356 + rc = nvdimm_bus_create_ndctl(nvdimm_bus); 357 + if (rc) 358 + return rc; 359 + 360 + mutex_lock(&nvdimm_bus_list_mutex); 361 + list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); 362 + mutex_unlock(&nvdimm_bus_list_mutex); 363 + 364 + /* enable bus provider attributes to look up their local context */ 365 + dev_set_drvdata(dev, nvdimm_bus->nd_desc); 366 + 367 + return 0; 368 + } 369 + 370 + static struct nd_device_driver nd_bus_driver = { 371 + .probe = nd_bus_probe, 372 + .remove = nd_bus_remove, 373 + .drv = { 374 + .name = "nd_bus", 375 + .suppress_bind_attrs = true, 376 + .bus = &nvdimm_bus_type, 377 + .owner = THIS_MODULE, 378 + .mod_name = KBUILD_MODNAME, 379 + }, 380 + }; 381 + 382 + static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) 383 + { 384 + struct nd_device_driver *nd_drv = to_nd_device_driver(drv); 385 + 386 + if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver) 387 + return true; 388 + 389 + return !!test_bit(to_nd_device_type(dev), &nd_drv->type); 390 + } 227 391 228 392 static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); 229 393 ··· 577 395 dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, 578 396 "ndctl%d", nvdimm_bus->id); 579 397 580 - if (IS_ERR(dev)) { 398 + if (IS_ERR(dev)) 581 399 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", 582 400 nvdimm_bus->id, PTR_ERR(dev)); 583 - return PTR_ERR(dev); 584 - } 585 - return 0; 401 + return PTR_ERR_OR_ZERO(dev); 586 402 } 587 403 588 404 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) ··· 1030 850 goto err_class; 1031 851 } 1032 852 853 + rc = driver_register(&nd_bus_driver.drv); 854 + if (rc) 855 + goto err_nd_bus; 856 + 1033 857 return 0; 1034 858 859 + err_nd_bus: 860 + class_destroy(nd_class); 1035 861 err_class: 1036 862 unregister_chrdev(nvdimm_major, "dimmctl"); 1037 863 err_dimm_chrdev: ··· 1050 864 1051 865 void nvdimm_bus_exit(void) 1052 866 { 867 + driver_unregister(&nd_bus_driver.drv); 1053 868 class_destroy(nd_class); 1054 869 unregister_chrdev(nvdimm_bus_major, "ndctl"); 1055 870 unregister_chrdev(nvdimm_major, "dimmctl"); 1056 871 bus_unregister(&nvdimm_bus_type); 872 + ida_destroy(&nd_ida); 1057 873 }

+3 -4

drivers/nvdimm/claim.c

··· 240 240 return memcpy_from_pmem(buf, nsio->addr + offset, size); 241 241 } else { 242 242 memcpy_to_pmem(nsio->addr + offset, buf, size); 243 - wmb_pmem(); 243 + nvdimm_flush(to_nd_region(ndns->dev.parent)); 244 244 } 245 245 246 246 return 0; ··· 266 266 267 267 nsio->addr = devm_memremap(dev, res->start, resource_size(res), 268 268 ARCH_MEMREMAP_PMEM); 269 - if (IS_ERR(nsio->addr)) 270 - return PTR_ERR(nsio->addr); 271 - return 0; 269 + 270 + return PTR_ERR_OR_ZERO(nsio->addr); 272 271 } 273 272 EXPORT_SYMBOL_GPL(devm_nsio_enable); 274 273

+128 -125

drivers/nvdimm/core.c

··· 20 20 #include <linux/ndctl.h> 21 21 #include <linux/mutex.h> 22 22 #include <linux/slab.h> 23 + #include <linux/io.h> 23 24 #include "nd-core.h" 24 25 #include "nd.h" 25 26 26 27 LIST_HEAD(nvdimm_bus_list); 27 28 DEFINE_MUTEX(nvdimm_bus_list_mutex); 28 - static DEFINE_IDA(nd_ida); 29 29 30 30 void nvdimm_bus_lock(struct device *dev) 31 31 { ··· 57 57 } 58 58 EXPORT_SYMBOL(is_nvdimm_bus_locked); 59 59 60 + struct nvdimm_map { 61 + struct nvdimm_bus *nvdimm_bus; 62 + struct list_head list; 63 + resource_size_t offset; 64 + unsigned long flags; 65 + size_t size; 66 + union { 67 + void *mem; 68 + void __iomem *iomem; 69 + }; 70 + struct kref kref; 71 + }; 72 + 73 + static struct nvdimm_map *find_nvdimm_map(struct device *dev, 74 + resource_size_t offset) 75 + { 76 + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 77 + struct nvdimm_map *nvdimm_map; 78 + 79 + list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list) 80 + if (nvdimm_map->offset == offset) 81 + return nvdimm_map; 82 + return NULL; 83 + } 84 + 85 + static struct nvdimm_map *alloc_nvdimm_map(struct device *dev, 86 + resource_size_t offset, size_t size, unsigned long flags) 87 + { 88 + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 89 + struct nvdimm_map *nvdimm_map; 90 + 91 + nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL); 92 + if (!nvdimm_map) 93 + return NULL; 94 + 95 + INIT_LIST_HEAD(&nvdimm_map->list); 96 + nvdimm_map->nvdimm_bus = nvdimm_bus; 97 + nvdimm_map->offset = offset; 98 + nvdimm_map->flags = flags; 99 + nvdimm_map->size = size; 100 + kref_init(&nvdimm_map->kref); 101 + 102 + if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) 103 + goto err_request_region; 104 + 105 + if (flags) 106 + nvdimm_map->mem = memremap(offset, size, flags); 107 + else 108 + nvdimm_map->iomem = ioremap(offset, size); 109 + 110 + if (!nvdimm_map->mem) 111 + goto err_map; 112 + 113 + dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!", 114 + __func__); 115 + list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list); 116 + 117 + return nvdimm_map; 118 + 119 + err_map: 120 + release_mem_region(offset, size); 121 + err_request_region: 122 + kfree(nvdimm_map); 123 + return NULL; 124 + } 125 + 126 + static void nvdimm_map_release(struct kref *kref) 127 + { 128 + struct nvdimm_bus *nvdimm_bus; 129 + struct nvdimm_map *nvdimm_map; 130 + 131 + nvdimm_map = container_of(kref, struct nvdimm_map, kref); 132 + nvdimm_bus = nvdimm_map->nvdimm_bus; 133 + 134 + dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset); 135 + list_del(&nvdimm_map->list); 136 + if (nvdimm_map->flags) 137 + memunmap(nvdimm_map->mem); 138 + else 139 + iounmap(nvdimm_map->iomem); 140 + release_mem_region(nvdimm_map->offset, nvdimm_map->size); 141 + kfree(nvdimm_map); 142 + } 143 + 144 + static void nvdimm_map_put(void *data) 145 + { 146 + struct nvdimm_map *nvdimm_map = data; 147 + struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus; 148 + 149 + nvdimm_bus_lock(&nvdimm_bus->dev); 150 + kref_put(&nvdimm_map->kref, nvdimm_map_release); 151 + nvdimm_bus_unlock(&nvdimm_bus->dev); 152 + } 153 + 154 + /** 155 + * devm_nvdimm_memremap - map a resource that is shared across regions 156 + * @dev: device that will own a reference to the shared mapping 157 + * @offset: physical base address of the mapping 158 + * @size: mapping size 159 + * @flags: memremap flags, or, if zero, perform an ioremap instead 160 + */ 161 + void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, 162 + size_t size, unsigned long flags) 163 + { 164 + struct nvdimm_map *nvdimm_map; 165 + 166 + nvdimm_bus_lock(dev); 167 + nvdimm_map = find_nvdimm_map(dev, offset); 168 + if (!nvdimm_map) 169 + nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags); 170 + else 171 + kref_get(&nvdimm_map->kref); 172 + nvdimm_bus_unlock(dev); 173 + 174 + if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map)) 175 + return NULL; 176 + 177 + return nvdimm_map->mem; 178 + } 179 + EXPORT_SYMBOL_GPL(devm_nvdimm_memremap); 180 + 60 181 u64 nd_fletcher64(void *addr, size_t len, bool le) 61 182 { 62 183 u32 *buf = addr; ··· 194 73 } 195 74 EXPORT_SYMBOL_GPL(nd_fletcher64); 196 75 197 - static void nvdimm_bus_release(struct device *dev) 198 - { 199 - struct nvdimm_bus *nvdimm_bus; 200 - 201 - nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 202 - ida_simple_remove(&nd_ida, nvdimm_bus->id); 203 - kfree(nvdimm_bus); 204 - } 205 - 206 - struct nvdimm_bus *to_nvdimm_bus(struct device *dev) 207 - { 208 - struct nvdimm_bus *nvdimm_bus; 209 - 210 - nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 211 - WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release); 212 - return nvdimm_bus; 213 - } 214 - EXPORT_SYMBOL_GPL(to_nvdimm_bus); 215 - 216 76 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) 217 77 { 218 78 /* struct nvdimm_bus definition is private to libnvdimm */ ··· 201 99 } 202 100 EXPORT_SYMBOL_GPL(to_nd_desc); 203 101 204 - struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 102 + struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus) 205 103 { 206 - struct device *dev; 207 - 208 - for (dev = nd_dev; dev; dev = dev->parent) 209 - if (dev->release == nvdimm_bus_release) 210 - break; 211 - dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); 212 - if (dev) 213 - return to_nvdimm_bus(dev); 214 - return NULL; 104 + /* struct nvdimm_bus definition is private to libnvdimm */ 105 + return &nvdimm_bus->dev; 215 106 } 107 + EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); 216 108 217 109 static bool is_uuid_sep(char sep) 218 110 { ··· 421 325 }; 422 326 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); 423 327 424 - struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 425 - struct nvdimm_bus_descriptor *nd_desc, struct module *module) 426 - { 427 - struct nvdimm_bus *nvdimm_bus; 428 - int rc; 429 - 430 - nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); 431 - if (!nvdimm_bus) 432 - return NULL; 433 - INIT_LIST_HEAD(&nvdimm_bus->list); 434 - INIT_LIST_HEAD(&nvdimm_bus->poison_list); 435 - init_waitqueue_head(&nvdimm_bus->probe_wait); 436 - nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 437 - mutex_init(&nvdimm_bus->reconfig_mutex); 438 - if (nvdimm_bus->id < 0) { 439 - kfree(nvdimm_bus); 440 - return NULL; 441 - } 442 - nvdimm_bus->nd_desc = nd_desc; 443 - nvdimm_bus->module = module; 444 - nvdimm_bus->dev.parent = parent; 445 - nvdimm_bus->dev.release = nvdimm_bus_release; 446 - nvdimm_bus->dev.groups = nd_desc->attr_groups; 447 - dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 448 - rc = device_register(&nvdimm_bus->dev); 449 - if (rc) { 450 - dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); 451 - goto err; 452 - } 453 - 454 - rc = nvdimm_bus_create_ndctl(nvdimm_bus); 455 - if (rc) 456 - goto err; 457 - 458 - mutex_lock(&nvdimm_bus_list_mutex); 459 - list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); 460 - mutex_unlock(&nvdimm_bus_list_mutex); 461 - 462 - return nvdimm_bus; 463 - err: 464 - put_device(&nvdimm_bus->dev); 465 - return NULL; 466 - } 467 - EXPORT_SYMBOL_GPL(__nvdimm_bus_register); 468 - 469 328 static void set_badblock(struct badblocks *bb, sector_t s, int num) 470 329 { 471 330 dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", ··· 596 545 } 597 546 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); 598 547 599 - static void free_poison_list(struct list_head *poison_list) 600 - { 601 - struct nd_poison *pl, *next; 602 - 603 - list_for_each_entry_safe(pl, next, poison_list, list) { 604 - list_del(&pl->list); 605 - kfree(pl); 606 - } 607 - list_del_init(poison_list); 608 - } 609 - 610 - static int child_unregister(struct device *dev, void *data) 611 - { 612 - /* 613 - * the singular ndctl class device per bus needs to be 614 - * "device_destroy"ed, so skip it here 615 - * 616 - * i.e. remove classless children 617 - */ 618 - if (dev->class) 619 - /* pass */; 620 - else 621 - nd_device_unregister(dev, ND_SYNC); 622 - return 0; 623 - } 624 - 625 - void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) 626 - { 627 - if (!nvdimm_bus) 628 - return; 629 - 630 - mutex_lock(&nvdimm_bus_list_mutex); 631 - list_del_init(&nvdimm_bus->list); 632 - mutex_unlock(&nvdimm_bus_list_mutex); 633 - 634 - nd_synchronize(); 635 - device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 636 - 637 - nvdimm_bus_lock(&nvdimm_bus->dev); 638 - free_poison_list(&nvdimm_bus->poison_list); 639 - nvdimm_bus_unlock(&nvdimm_bus->dev); 640 - 641 - nvdimm_bus_destroy_ndctl(nvdimm_bus); 642 - 643 - device_unregister(&nvdimm_bus->dev); 644 - } 645 - EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); 646 - 647 548 #ifdef CONFIG_BLK_DEV_INTEGRITY 648 549 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) 649 550 { ··· 604 601 if (meta_size == 0) 605 602 return 0; 606 603 607 - bi.profile = NULL; 604 + memset(&bi, 0, sizeof(bi)); 605 + 608 606 bi.tuple_size = meta_size; 609 607 bi.tag_size = meta_size; 610 608 ··· 654 650 nvdimm_bus_exit(); 655 651 nd_region_devs_exit(); 656 652 nvdimm_devs_exit(); 657 - ida_destroy(&nd_ida); 658 653 } 659 654 660 655 MODULE_LICENSE("GPL v2");

+4 -1

drivers/nvdimm/dimm_devs.c

··· 346 346 347 347 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 348 348 const struct attribute_group **groups, unsigned long flags, 349 - unsigned long cmd_mask) 349 + unsigned long cmd_mask, int num_flush, 350 + struct resource *flush_wpq) 350 351 { 351 352 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); 352 353 struct device *dev; ··· 363 362 nvdimm->provider_data = provider_data; 364 363 nvdimm->flags = flags; 365 364 nvdimm->cmd_mask = cmd_mask; 365 + nvdimm->num_flush = num_flush; 366 + nvdimm->flush_wpq = flush_wpq; 366 367 atomic_set(&nvdimm->busy, 0); 367 368 dev = &nvdimm->dev; 368 369 dev_set_name(dev, "nmem%d", nvdimm->id);

+1

drivers/nvdimm/e820.c

··· 47 47 48 48 nd_desc.attr_groups = e820_pmem_attribute_groups; 49 49 nd_desc.provider_name = "e820"; 50 + nd_desc.module = THIS_MODULE; 50 51 nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); 51 52 if (!nvdimm_bus) 52 53 goto err;

+3 -2

drivers/nvdimm/nd-core.h

··· 26 26 struct nvdimm_bus { 27 27 struct nvdimm_bus_descriptor *nd_desc; 28 28 wait_queue_head_t probe_wait; 29 - struct module *module; 30 29 struct list_head list; 31 30 struct device dev; 32 31 int id, probe_active; 33 32 struct list_head poison_list; 33 + struct list_head mapping_list; 34 34 struct mutex reconfig_mutex; 35 35 }; 36 36 ··· 40 40 unsigned long cmd_mask; 41 41 struct device dev; 42 42 atomic_t busy; 43 - int id; 43 + int id, num_flush; 44 + struct resource *flush_wpq; 44 45 }; 45 46 46 47 bool is_nvdimm(struct device *dev);

+6 -4

drivers/nvdimm/nd.h

··· 49 49 struct kref kref; 50 50 }; 51 51 52 - struct nd_region_namespaces { 53 - int count; 54 - int active; 52 + struct nd_region_data { 53 + int ns_count; 54 + int ns_active; 55 + unsigned int flush_mask; 56 + void __iomem *flush_wpq[0][0]; 55 57 }; 56 58 57 59 static inline struct nd_namespace_index *to_namespace_index( ··· 121 119 122 120 struct nd_blk_region { 123 121 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 124 - void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 125 122 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 126 123 void *iobuf, u64 len, int rw); 127 124 void *blk_provider_data; ··· 326 325 } 327 326 #endif 328 327 int nd_blk_region_init(struct nd_region *nd_region); 328 + int nd_region_activate(struct nd_region *nd_region); 329 329 void __nd_iostat_start(struct bio *bio, unsigned long *start); 330 330 static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) 331 331 {

+51 -34

drivers/nvdimm/pmem.c

··· 29 29 #include <linux/slab.h> 30 30 #include <linux/pmem.h> 31 31 #include <linux/nd.h> 32 + #include "pmem.h" 32 33 #include "pfn.h" 33 34 #include "nd.h" 34 35 35 - struct pmem_device { 36 - /* One contiguous memory region per device */ 37 - phys_addr_t phys_addr; 38 - /* when non-zero this device is hosting a 'pfn' instance */ 39 - phys_addr_t data_offset; 40 - u64 pfn_flags; 41 - void __pmem *virt_addr; 42 - /* immutable base size of the namespace */ 43 - size_t size; 44 - /* trim size when namespace capacity has been section aligned */ 45 - u32 pfn_pad; 46 - struct badblocks bb; 47 - }; 36 + static struct device *to_dev(struct pmem_device *pmem) 37 + { 38 + /* 39 + * nvdimm bus services need a 'dev' parameter, and we record the device 40 + * at init in bb.dev. 41 + */ 42 + return pmem->bb.dev; 43 + } 44 + 45 + static struct nd_region *to_region(struct pmem_device *pmem) 46 + { 47 + return to_nd_region(to_dev(pmem)->parent); 48 + } 48 49 49 50 static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 50 51 unsigned int len) 51 52 { 52 - struct device *dev = pmem->bb.dev; 53 + struct device *dev = to_dev(pmem); 53 54 sector_t sector; 54 55 long cleared; 55 56 ··· 58 57 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); 59 58 60 59 if (cleared > 0 && cleared / 512) { 61 - dev_dbg(dev, "%s: %llx clear %ld sector%s\n", 60 + dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", 62 61 __func__, (unsigned long long) sector, 63 62 cleared / 512, cleared / 512 > 1 ? "s" : ""); 64 63 badblocks_clear(&pmem->bb, sector, cleared / 512); ··· 74 73 bool bad_pmem = false; 75 74 void *mem = kmap_atomic(page); 76 75 phys_addr_t pmem_off = sector * 512 + pmem->data_offset; 77 - void __pmem *pmem_addr = pmem->virt_addr + pmem_off; 76 + void *pmem_addr = pmem->virt_addr + pmem_off; 78 77 79 78 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) 80 79 bad_pmem = true; ··· 113 112 return rc; 114 113 } 115 114 115 + /* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */ 116 + #ifndef REQ_FLUSH 117 + #define REQ_FLUSH REQ_PREFLUSH 118 + #endif 119 + 116 120 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) 117 121 { 118 122 int rc = 0; ··· 126 120 struct bio_vec bvec; 127 121 struct bvec_iter iter; 128 122 struct pmem_device *pmem = q->queuedata; 123 + struct nd_region *nd_region = to_region(pmem); 124 + 125 + if (bio->bi_rw & REQ_FLUSH) 126 + nvdimm_flush(nd_region); 129 127 130 128 do_acct = nd_iostat_start(bio, &start); 131 129 bio_for_each_segment(bvec, bio, iter) { ··· 144 134 if (do_acct) 145 135 nd_iostat_end(bio, start); 146 136 147 - if (bio_data_dir(bio)) 148 - wmb_pmem(); 137 + if (bio->bi_rw & REQ_FUA) 138 + nvdimm_flush(nd_region); 149 139 150 140 bio_endio(bio); 151 141 return BLK_QC_T_NONE; ··· 158 148 int rc; 159 149 160 150 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); 161 - if (rw & WRITE) 162 - wmb_pmem(); 163 151 164 152 /* 165 153 * The ->rw_page interface is subtle and tricky. The core ··· 171 163 return rc; 172 164 } 173 165 174 - static long pmem_direct_access(struct block_device *bdev, sector_t sector, 175 - void __pmem **kaddr, pfn_t *pfn, long size) 166 + /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ 167 + __weak long pmem_direct_access(struct block_device *bdev, sector_t sector, 168 + void **kaddr, pfn_t *pfn, long size) 176 169 { 177 170 struct pmem_device *pmem = bdev->bd_queue->queuedata; 178 171 resource_size_t offset = sector * 512 + pmem->data_offset; ··· 204 195 blk_cleanup_queue(q); 205 196 } 206 197 207 - void pmem_release_disk(void *disk) 198 + static void pmem_release_disk(void *disk) 208 199 { 209 200 del_gendisk(disk); 210 201 put_disk(disk); ··· 214 205 struct nd_namespace_common *ndns) 215 206 { 216 207 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 208 + struct nd_region *nd_region = to_nd_region(dev->parent); 217 209 struct vmem_altmap __altmap, *altmap = NULL; 218 210 struct resource *res = &nsio->res; 219 211 struct nd_pfn *nd_pfn = NULL; ··· 244 234 dev_set_drvdata(dev, pmem); 245 235 pmem->phys_addr = res->start; 246 236 pmem->size = resource_size(res); 247 - if (!arch_has_wmb_pmem()) 237 + if (nvdimm_has_flush(nd_region) < 0) 248 238 dev_warn(dev, "unable to guarantee persistence of writes\n"); 249 239 250 240 if (!devm_request_mem_region(dev, res->start, resource_size(res), ··· 279 269 * At release time the queue must be dead before 280 270 * devm_memremap_pages is unwound 281 271 */ 282 - if (devm_add_action(dev, pmem_release_queue, q)) { 283 - blk_cleanup_queue(q); 272 + if (devm_add_action_or_reset(dev, pmem_release_queue, q)) 284 273 return -ENOMEM; 285 - } 286 274 287 275 if (IS_ERR(addr)) 288 276 return PTR_ERR(addr); 289 - pmem->virt_addr = (void __pmem *) addr; 277 + pmem->virt_addr = addr; 290 278 279 + blk_queue_write_cache(q, true, true); 291 280 blk_queue_make_request(q, pmem_make_request); 292 281 blk_queue_physical_block_size(q, PAGE_SIZE); 293 282 blk_queue_max_hw_sectors(q, UINT_MAX); ··· 298 289 disk = alloc_disk_node(0, nid); 299 290 if (!disk) 300 291 return -ENOMEM; 301 - if (devm_add_action(dev, pmem_release_disk, disk)) { 302 - put_disk(disk); 303 - return -ENOMEM; 304 - } 305 292 306 293 disk->fops = &pmem_fops; 307 294 disk->queue = q; ··· 307 302 / 512); 308 303 if (devm_init_badblocks(dev, &pmem->bb)) 309 304 return -ENOMEM; 310 - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); 305 + nvdimm_badblocks_populate(nd_region, &pmem->bb, res); 311 306 disk->bb = &pmem->bb; 312 307 device_add_disk(dev, disk); 308 + 309 + if (devm_add_action_or_reset(dev, pmem_release_disk, disk)) 310 + return -ENOMEM; 311 + 313 312 revalidate_disk(disk); 314 313 315 314 return 0; ··· 349 340 { 350 341 if (is_nd_btt(dev)) 351 342 nvdimm_namespace_detach_btt(to_nd_btt(dev)); 343 + nvdimm_flush(to_nd_region(dev->parent)); 344 + 352 345 return 0; 346 + } 347 + 348 + static void nd_pmem_shutdown(struct device *dev) 349 + { 350 + nvdimm_flush(to_nd_region(dev->parent)); 353 351 } 354 352 355 353 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 356 354 { 357 - struct nd_region *nd_region = to_nd_region(dev->parent); 358 355 struct pmem_device *pmem = dev_get_drvdata(dev); 356 + struct nd_region *nd_region = to_region(pmem); 359 357 resource_size_t offset = 0, end_trunc = 0; 360 358 struct nd_namespace_common *ndns; 361 359 struct nd_namespace_io *nsio; ··· 398 382 .probe = nd_pmem_probe, 399 383 .remove = nd_pmem_remove, 400 384 .notify = nd_pmem_notify, 385 + .shutdown = nd_pmem_shutdown, 401 386 .drv = { 402 387 .name = "nd_pmem", 403 388 },

+24

drivers/nvdimm/pmem.h

··· 1 + #ifndef __NVDIMM_PMEM_H__ 2 + #define __NVDIMM_PMEM_H__ 3 + #include <linux/badblocks.h> 4 + #include <linux/types.h> 5 + #include <linux/pfn_t.h> 6 + #include <linux/fs.h> 7 + 8 + long pmem_direct_access(struct block_device *bdev, sector_t sector, 9 + void **kaddr, pfn_t *pfn, long size); 10 + /* this definition is in it's own header for tools/testing/nvdimm to consume */ 11 + struct pmem_device { 12 + /* One contiguous memory region per device */ 13 + phys_addr_t phys_addr; 14 + /* when non-zero this device is hosting a 'pfn' instance */ 15 + phys_addr_t data_offset; 16 + u64 pfn_flags; 17 + void *virt_addr; 18 + /* immutable base size of the namespace */ 19 + size_t size; 20 + /* trim size when namespace capacity has been section aligned */ 21 + u32 pfn_pad; 22 + struct badblocks bb; 23 + }; 24 + #endif /* __NVDIMM_PMEM_H__ */

+10 -9

drivers/nvdimm/region.c

··· 20 20 { 21 21 int err, rc; 22 22 static unsigned long once; 23 - struct nd_region_namespaces *num_ns; 23 + struct nd_region_data *ndrd; 24 24 struct nd_region *nd_region = to_nd_region(dev); 25 25 26 26 if (nd_region->num_lanes > num_online_cpus() ··· 33 33 nd_region->num_lanes); 34 34 } 35 35 36 + rc = nd_region_activate(nd_region); 37 + if (rc) 38 + return rc; 39 + 36 40 rc = nd_blk_region_init(nd_region); 37 41 if (rc) 38 42 return rc; 39 43 40 44 rc = nd_region_register_namespaces(nd_region, &err); 41 - num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL); 42 - if (!num_ns) 43 - return -ENOMEM; 44 - 45 45 if (rc < 0) 46 46 return rc; 47 47 48 - num_ns->active = rc; 49 - num_ns->count = rc + err; 50 - dev_set_drvdata(dev, num_ns); 48 + ndrd = dev_get_drvdata(dev); 49 + ndrd->ns_active = rc; 50 + ndrd->ns_count = rc + err; 51 51 52 52 if (rc && err && rc == err) 53 53 return -ENODEV; ··· 82 82 { 83 83 struct nd_region *nd_region = to_nd_region(dev); 84 84 85 + device_for_each_child(dev, NULL, child_unregister); 86 + 85 87 /* flush attribute readers and disable */ 86 88 nvdimm_bus_lock(dev); 87 89 nd_region->ns_seed = NULL; ··· 93 91 dev_set_drvdata(dev, NULL); 94 92 nvdimm_bus_unlock(dev); 95 93 96 - device_for_each_child(dev, NULL, child_unregister); 97 94 return 0; 98 95 } 99 96

+148 -6

drivers/nvdimm/region_devs.c

··· 14 14 #include <linux/highmem.h> 15 15 #include <linux/sched.h> 16 16 #include <linux/slab.h> 17 + #include <linux/hash.h> 18 + #include <linux/pmem.h> 17 19 #include <linux/sort.h> 18 20 #include <linux/io.h> 19 21 #include <linux/nd.h> 20 22 #include "nd-core.h" 21 23 #include "nd.h" 22 24 25 + /* 26 + * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is 27 + * irrelevant. 28 + */ 29 + #include <linux/io-64-nonatomic-hi-lo.h> 30 + 23 31 static DEFINE_IDA(region_ida); 32 + static DEFINE_PER_CPU(int, flush_idx); 33 + 34 + static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, 35 + struct nd_region_data *ndrd) 36 + { 37 + int i, j; 38 + 39 + dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm), 40 + nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es"); 41 + for (i = 0; i < nvdimm->num_flush; i++) { 42 + struct resource *res = &nvdimm->flush_wpq[i]; 43 + unsigned long pfn = PHYS_PFN(res->start); 44 + void __iomem *flush_page; 45 + 46 + /* check if flush hints share a page */ 47 + for (j = 0; j < i; j++) { 48 + struct resource *res_j = &nvdimm->flush_wpq[j]; 49 + unsigned long pfn_j = PHYS_PFN(res_j->start); 50 + 51 + if (pfn == pfn_j) 52 + break; 53 + } 54 + 55 + if (j < i) 56 + flush_page = (void __iomem *) ((unsigned long) 57 + ndrd->flush_wpq[dimm][j] & PAGE_MASK); 58 + else 59 + flush_page = devm_nvdimm_ioremap(dev, 60 + PHYS_PFN(pfn), PAGE_SIZE); 61 + if (!flush_page) 62 + return -ENXIO; 63 + ndrd->flush_wpq[dimm][i] = flush_page 64 + + (res->start & ~PAGE_MASK); 65 + } 66 + 67 + return 0; 68 + } 69 + 70 + int nd_region_activate(struct nd_region *nd_region) 71 + { 72 + int i, num_flush = 0; 73 + struct nd_region_data *ndrd; 74 + struct device *dev = &nd_region->dev; 75 + size_t flush_data_size = sizeof(void *); 76 + 77 + nvdimm_bus_lock(&nd_region->dev); 78 + for (i = 0; i < nd_region->ndr_mappings; i++) { 79 + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 80 + struct nvdimm *nvdimm = nd_mapping->nvdimm; 81 + 82 + /* at least one null hint slot per-dimm for the "no-hint" case */ 83 + flush_data_size += sizeof(void *); 84 + num_flush = min_not_zero(num_flush, nvdimm->num_flush); 85 + if (!nvdimm->num_flush) 86 + continue; 87 + flush_data_size += nvdimm->num_flush * sizeof(void *); 88 + } 89 + nvdimm_bus_unlock(&nd_region->dev); 90 + 91 + ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL); 92 + if (!ndrd) 93 + return -ENOMEM; 94 + dev_set_drvdata(dev, ndrd); 95 + 96 + ndrd->flush_mask = (1 << ilog2(num_flush)) - 1; 97 + for (i = 0; i < nd_region->ndr_mappings; i++) { 98 + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 99 + struct nvdimm *nvdimm = nd_mapping->nvdimm; 100 + int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd); 101 + 102 + if (rc) 103 + return rc; 104 + } 105 + 106 + return 0; 107 + } 24 108 25 109 static void nd_region_release(struct device *dev) 26 110 { ··· 326 242 static ssize_t init_namespaces_show(struct device *dev, 327 243 struct device_attribute *attr, char *buf) 328 244 { 329 - struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); 245 + struct nd_region_data *ndrd = dev_get_drvdata(dev); 330 246 ssize_t rc; 331 247 332 248 nvdimm_bus_lock(dev); 333 - if (num_ns) 334 - rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); 249 + if (ndrd) 250 + rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count); 335 251 else 336 252 rc = -ENXIO; 337 253 nvdimm_bus_unlock(dev); ··· 517 433 518 434 if (is_nd_pmem(dev)) 519 435 return; 520 - 521 - to_nd_blk_region(dev)->disable(nvdimm_bus, dev); 522 436 } 523 437 if (dev->parent && is_nd_blk(dev->parent) && probe) { 524 438 nd_region = to_nd_region(dev->parent); ··· 780 698 if (ndbr) { 781 699 nd_region = &ndbr->nd_region; 782 700 ndbr->enable = ndbr_desc->enable; 783 - ndbr->disable = ndbr_desc->disable; 784 701 ndbr->do_io = ndbr_desc->do_io; 785 702 } 786 703 region_buf = ndbr; ··· 874 793 __func__); 875 794 } 876 795 EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); 796 + 797 + /** 798 + * nvdimm_flush - flush any posted write queues between the cpu and pmem media 799 + * @nd_region: blk or interleaved pmem region 800 + */ 801 + void nvdimm_flush(struct nd_region *nd_region) 802 + { 803 + struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); 804 + int i, idx; 805 + 806 + /* 807 + * Try to encourage some diversity in flush hint addresses 808 + * across cpus assuming a limited number of flush hints. 809 + */ 810 + idx = this_cpu_read(flush_idx); 811 + idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8)); 812 + 813 + /* 814 + * The first wmb() is needed to 'sfence' all previous writes 815 + * such that they are architecturally visible for the platform 816 + * buffer flush. Note that we've already arranged for pmem 817 + * writes to avoid the cache via arch_memcpy_to_pmem(). The 818 + * final wmb() ensures ordering for the NVDIMM flush write. 819 + */ 820 + wmb(); 821 + for (i = 0; i < nd_region->ndr_mappings; i++) 822 + if (ndrd->flush_wpq[i][0]) 823 + writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]); 824 + wmb(); 825 + } 826 + EXPORT_SYMBOL_GPL(nvdimm_flush); 827 + 828 + /** 829 + * nvdimm_has_flush - determine write flushing requirements 830 + * @nd_region: blk or interleaved pmem region 831 + * 832 + * Returns 1 if writes require flushing 833 + * Returns 0 if writes do not require flushing 834 + * Returns -ENXIO if flushing capability can not be determined 835 + */ 836 + int nvdimm_has_flush(struct nd_region *nd_region) 837 + { 838 + struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); 839 + int i; 840 + 841 + /* no nvdimm == flushing capability unknown */ 842 + if (nd_region->ndr_mappings == 0) 843 + return -ENXIO; 844 + 845 + for (i = 0; i < nd_region->ndr_mappings; i++) 846 + /* flush hints present, flushing required */ 847 + if (ndrd->flush_wpq[i][0]) 848 + return 1; 849 + 850 + /* 851 + * The platform defines dimm devices without hints, assume 852 + * platform persistence mechanism like ADR 853 + */ 854 + return 0; 855 + } 856 + EXPORT_SYMBOL_GPL(nvdimm_has_flush); 877 857 878 858 void __exit nd_region_devs_exit(void) 879 859 {

+3 -3

drivers/s390/block/dcssblk.c

··· 31 31 static blk_qc_t dcssblk_make_request(struct request_queue *q, 32 32 struct bio *bio); 33 33 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, 34 - void __pmem **kaddr, pfn_t *pfn, long size); 34 + void **kaddr, pfn_t *pfn, long size); 35 35 36 36 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; 37 37 ··· 884 884 885 885 static long 886 886 dcssblk_direct_access (struct block_device *bdev, sector_t secnum, 887 - void __pmem **kaddr, pfn_t *pfn, long size) 887 + void **kaddr, pfn_t *pfn, long size) 888 888 { 889 889 struct dcssblk_dev_info *dev_info; 890 890 unsigned long offset, dev_sz; ··· 894 894 return -ENODEV; 895 895 dev_sz = dev_info->end - dev_info->start; 896 896 offset = secnum * 512; 897 - *kaddr = (void __pmem *) (dev_info->start + offset); 897 + *kaddr = (void *) dev_info->start + offset; 898 898 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); 899 899 900 900 return dev_sz - offset;

+4 -9

fs/dax.c

··· 75 75 struct request_queue *q = bdev->bd_queue; 76 76 long rc = -EIO; 77 77 78 - dax->addr = (void __pmem *) ERR_PTR(-EIO); 78 + dax->addr = ERR_PTR(-EIO); 79 79 if (blk_queue_enter(q, true) != 0) 80 80 return rc; 81 81 82 82 rc = bdev_direct_access(bdev, dax); 83 83 if (rc < 0) { 84 - dax->addr = (void __pmem *) ERR_PTR(rc); 84 + dax->addr = ERR_PTR(rc); 85 85 blk_queue_exit(q); 86 86 return rc; 87 87 } ··· 147 147 struct buffer_head *bh) 148 148 { 149 149 loff_t pos = start, max = start, bh_max = start; 150 - bool hole = false, need_wmb = false; 150 + bool hole = false; 151 151 struct block_device *bdev = NULL; 152 152 int rw = iov_iter_rw(iter), rc; 153 153 long map_len = 0; 154 154 struct blk_dax_ctl dax = { 155 - .addr = (void __pmem *) ERR_PTR(-EIO), 155 + .addr = ERR_PTR(-EIO), 156 156 }; 157 157 unsigned blkbits = inode->i_blkbits; 158 158 sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1) ··· 218 218 219 219 if (iov_iter_rw(iter) == WRITE) { 220 220 len = copy_from_iter_pmem(dax.addr, max - pos, iter); 221 - need_wmb = true; 222 221 } else if (!hole) 223 222 len = copy_to_iter((void __force *) dax.addr, max - pos, 224 223 iter); ··· 234 235 dax.addr += len; 235 236 } 236 237 237 - if (need_wmb) 238 - wmb_pmem(); 239 238 dax_unmap_atomic(bdev, &dax); 240 239 241 240 return (pos == start) ? rc : pos - start; ··· 785 788 return ret; 786 789 } 787 790 } 788 - wmb_pmem(); 789 791 return 0; 790 792 } 791 793 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); ··· 1183 1187 if (dax_map_atomic(bdev, &dax) < 0) 1184 1188 return PTR_ERR(dax.addr); 1185 1189 clear_pmem(dax.addr + offset, length); 1186 - wmb_pmem(); 1187 1190 dax_unmap_atomic(bdev, &dax); 1188 1191 } 1189 1192 return 0;

+3 -3

include/linux/blkdev.h

··· 1665 1665 */ 1666 1666 struct blk_dax_ctl { 1667 1667 sector_t sector; 1668 - void __pmem *addr; 1668 + void *addr; 1669 1669 long size; 1670 1670 pfn_t pfn; 1671 1671 }; ··· 1676 1676 int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); 1677 1677 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1678 1678 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1679 - long (*direct_access)(struct block_device *, sector_t, void __pmem **, 1680 - pfn_t *, long); 1679 + long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, 1680 + long); 1681 1681 unsigned int (*check_events) (struct gendisk *disk, 1682 1682 unsigned int clearing); 1683 1683 /* ->media_changed() is DEPRECATED, use ->check_events() instead */

-2

include/linux/compiler.h

··· 17 17 # define __release(x) __context__(x,-1) 18 18 # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 19 19 # define __percpu __attribute__((noderef, address_space(3))) 20 - # define __pmem __attribute__((noderef, address_space(5))) 21 20 #ifdef CONFIG_SPARSE_RCU_POINTER 22 21 # define __rcu __attribute__((noderef, address_space(4))) 23 22 #else /* CONFIG_SPARSE_RCU_POINTER */ ··· 44 45 # define __cond_lock(x,c) (c) 45 46 # define __percpu 46 47 # define __rcu 47 - # define __pmem 48 48 # define __private 49 49 # define ACCESS_PRIVATE(p, member) ((p)->member) 50 50 #endif /* __CHECKER__ */

+1 -1

include/linux/device-mapper.h

··· 131 131 * >= 0 : the number of bytes accessible at the address 132 132 */ 133 133 typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, 134 - void __pmem **kaddr, pfn_t *pfn, long size); 134 + void **kaddr, pfn_t *pfn, long size); 135 135 136 136 void dm_error(const char *message); 137 137

+18 -6

include/linux/libnvdimm.h

··· 52 52 53 53 struct nd_namespace_label; 54 54 struct nvdimm_drvdata; 55 + 55 56 struct nd_mapping { 56 57 struct nvdimm *nvdimm; 57 58 struct nd_namespace_label **labels; ··· 70 69 struct nvdimm_bus_descriptor { 71 70 const struct attribute_group **attr_groups; 72 71 unsigned long cmd_mask; 72 + struct module *module; 73 73 char *provider_name; 74 74 ndctl_fn ndctl; 75 75 int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); ··· 101 99 unsigned long flags; 102 100 }; 103 101 102 + struct device; 103 + void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, 104 + size_t size, unsigned long flags); 105 + static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, 106 + resource_size_t offset, size_t size) 107 + { 108 + return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0); 109 + } 110 + 104 111 struct nvdimm_bus; 105 112 struct module; 106 113 struct device; 107 114 struct nd_blk_region; 108 115 struct nd_blk_region_desc { 109 116 int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 110 - void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); 111 117 int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, 112 118 void *iobuf, u64 len, int rw); 113 119 struct nd_region_desc ndr_desc; ··· 129 119 } 130 120 131 121 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); 132 - struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 133 - struct nvdimm_bus_descriptor *nfit_desc, struct module *module); 134 - #define nvdimm_bus_register(parent, desc) \ 135 - __nvdimm_bus_register(parent, desc, THIS_MODULE) 122 + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, 123 + struct nvdimm_bus_descriptor *nfit_desc); 136 124 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); 137 125 struct nvdimm_bus *to_nvdimm_bus(struct device *dev); 138 126 struct nvdimm *to_nvdimm(struct device *dev); 139 127 struct nd_region *to_nd_region(struct device *dev); 140 128 struct nd_blk_region *to_nd_blk_region(struct device *dev); 141 129 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); 130 + struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); 142 131 const char *nvdimm_name(struct nvdimm *nvdimm); 143 132 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); 144 133 void *nvdimm_provider_data(struct nvdimm *nvdimm); 145 134 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 146 135 const struct attribute_group **groups, unsigned long flags, 147 - unsigned long cmd_mask); 136 + unsigned long cmd_mask, int num_flush, 137 + struct resource *flush_wpq); 148 138 const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); 149 139 const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); 150 140 u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, ··· 166 156 unsigned int nd_region_acquire_lane(struct nd_region *nd_region); 167 157 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); 168 158 u64 nd_fletcher64(void *addr, size_t len, bool le); 159 + void nvdimm_flush(struct nd_region *nd_region); 160 + int nvdimm_has_flush(struct nd_region *nd_region); 169 161 #endif /* __LIBNVDIMM_H__ */

+2 -1

include/linux/nd.h

··· 26 26 unsigned long type; 27 27 int (*probe)(struct device *dev); 28 28 int (*remove)(struct device *dev); 29 + void (*shutdown)(struct device *dev); 29 30 void (*notify)(struct device *dev, enum nvdimm_event event); 30 31 }; 31 32 ··· 68 67 struct nd_namespace_common common; 69 68 struct resource res; 70 69 resource_size_t size; 71 - void __pmem *addr; 70 + void *addr; 72 71 struct badblocks bb; 73 72 }; 74 73

+4 -1

include/linux/pfn_t.h

··· 28 28 return __pfn_to_pfn_t(pfn, 0); 29 29 } 30 30 31 - extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); 31 + static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) 32 + { 33 + return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); 34 + } 32 35 33 36 static inline bool pfn_t_has_page(pfn_t pfn) 34 37 {

+21 -96

include/linux/pmem.h

··· 26 26 * calling these symbols with arch_has_pmem_api() and redirect to the 27 27 * implementation in asm/pmem.h. 28 28 */ 29 - static inline bool __arch_has_wmb_pmem(void) 30 - { 31 - return false; 32 - } 33 - 34 - static inline void arch_wmb_pmem(void) 29 + static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) 35 30 { 36 31 BUG(); 37 32 } 38 33 39 - static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, 40 - size_t n) 41 - { 42 - BUG(); 43 - } 44 - 45 - static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, 46 - size_t n) 34 + static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) 47 35 { 48 36 BUG(); 49 37 return -EFAULT; 50 38 } 51 39 52 - static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, 40 + static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, 53 41 struct iov_iter *i) 54 42 { 55 43 BUG(); 56 44 return 0; 57 45 } 58 46 59 - static inline void arch_clear_pmem(void __pmem *addr, size_t size) 47 + static inline void arch_clear_pmem(void *addr, size_t size) 60 48 { 61 49 BUG(); 62 50 } 63 51 64 - static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) 52 + static inline void arch_wb_cache_pmem(void *addr, size_t size) 65 53 { 66 54 BUG(); 67 55 } 68 56 69 - static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) 57 + static inline void arch_invalidate_pmem(void *addr, size_t size) 70 58 { 71 59 BUG(); 72 60 } ··· 65 77 return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); 66 78 } 67 79 68 - static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, 69 - size_t size) 70 - { 71 - memcpy(dst, (void __force *) src, size); 72 - return 0; 73 - } 74 - 75 80 /* 76 81 * memcpy_from_pmem - read from persistent memory with error handling 77 82 * @dst: destination buffer ··· 73 92 * 74 93 * Returns 0 on success negative error code on failure. 75 94 */ 76 - static inline int memcpy_from_pmem(void *dst, void __pmem const *src, 77 - size_t size) 95 + static inline int memcpy_from_pmem(void *dst, void const *src, size_t size) 78 96 { 79 97 if (arch_has_pmem_api()) 80 98 return arch_memcpy_from_pmem(dst, src, size); 81 99 else 82 - return default_memcpy_from_pmem(dst, src, size); 83 - } 84 - 85 - /** 86 - * arch_has_wmb_pmem - true if wmb_pmem() ensures durability 87 - * 88 - * For a given cpu implementation within an architecture it is possible 89 - * that wmb_pmem() resolves to a nop. In the case this returns 90 - * false, pmem api users are unable to ensure durability and may want to 91 - * fall back to a different data consistency model, or otherwise notify 92 - * the user. 93 - */ 94 - static inline bool arch_has_wmb_pmem(void) 95 - { 96 - return arch_has_pmem_api() && __arch_has_wmb_pmem(); 97 - } 98 - 99 - /* 100 - * These defaults seek to offer decent performance and minimize the 101 - * window between i/o completion and writes being durable on media. 102 - * However, it is undefined / architecture specific whether 103 - * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for 104 - * making data durable relative to i/o completion. 105 - */ 106 - static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, 107 - size_t size) 108 - { 109 - memcpy((void __force *) dst, src, size); 110 - } 111 - 112 - static inline size_t default_copy_from_iter_pmem(void __pmem *addr, 113 - size_t bytes, struct iov_iter *i) 114 - { 115 - return copy_from_iter_nocache((void __force *)addr, bytes, i); 116 - } 117 - 118 - static inline void default_clear_pmem(void __pmem *addr, size_t size) 119 - { 120 - if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0) 121 - clear_page((void __force *)addr); 122 - else 123 - memset((void __force *)addr, 0, size); 100 + memcpy(dst, src, size); 101 + return 0; 124 102 } 125 103 126 104 /** ··· 92 152 * being effectively evicted from, or never written to, the processor 93 153 * cache hierarchy after the copy completes. After memcpy_to_pmem() 94 154 * data may still reside in cpu or platform buffers, so this operation 95 - * must be followed by a wmb_pmem(). 155 + * must be followed by a blkdev_issue_flush() on the pmem block device. 96 156 */ 97 - static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) 157 + static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) 98 158 { 99 159 if (arch_has_pmem_api()) 100 160 arch_memcpy_to_pmem(dst, src, n); 101 161 else 102 - default_memcpy_to_pmem(dst, src, n); 103 - } 104 - 105 - /** 106 - * wmb_pmem - synchronize writes to persistent memory 107 - * 108 - * After a series of memcpy_to_pmem() operations this drains data from 109 - * cpu write buffers and any platform (memory controller) buffers to 110 - * ensure that written data is durable on persistent memory media. 111 - */ 112 - static inline void wmb_pmem(void) 113 - { 114 - if (arch_has_wmb_pmem()) 115 - arch_wmb_pmem(); 116 - else 117 - wmb(); 162 + memcpy(dst, src, n); 118 163 } 119 164 120 165 /** ··· 109 184 * @i: iterator with source data 110 185 * 111 186 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. 112 - * This function requires explicit ordering with a wmb_pmem() call. 187 + * See blkdev_issue_flush() note for memcpy_to_pmem(). 113 188 */ 114 - static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes, 189 + static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, 115 190 struct iov_iter *i) 116 191 { 117 192 if (arch_has_pmem_api()) 118 193 return arch_copy_from_iter_pmem(addr, bytes, i); 119 - return default_copy_from_iter_pmem(addr, bytes, i); 194 + return copy_from_iter_nocache(addr, bytes, i); 120 195 } 121 196 122 197 /** ··· 125 200 * @size: number of bytes to zero 126 201 * 127 202 * Write zeros into the memory range starting at 'addr' for 'size' bytes. 128 - * This function requires explicit ordering with a wmb_pmem() call. 203 + * See blkdev_issue_flush() note for memcpy_to_pmem(). 129 204 */ 130 - static inline void clear_pmem(void __pmem *addr, size_t size) 205 + static inline void clear_pmem(void *addr, size_t size) 131 206 { 132 207 if (arch_has_pmem_api()) 133 208 arch_clear_pmem(addr, size); 134 209 else 135 - default_clear_pmem(addr, size); 210 + memset(addr, 0, size); 136 211 } 137 212 138 213 /** ··· 143 218 * For platforms that support clearing poison this flushes any poisoned 144 219 * ranges out of the cache 145 220 */ 146 - static inline void invalidate_pmem(void __pmem *addr, size_t size) 221 + static inline void invalidate_pmem(void *addr, size_t size) 147 222 { 148 223 if (arch_has_pmem_api()) 149 224 arch_invalidate_pmem(addr, size); ··· 155 230 * @size: number of bytes to write back 156 231 * 157 232 * Write back the processor cache range starting at 'addr' for 'size' bytes. 158 - * This function requires explicit ordering with a wmb_pmem() call. 233 + * See blkdev_issue_flush() note for memcpy_to_pmem(). 159 234 */ 160 - static inline void wb_cache_pmem(void __pmem *addr, size_t size) 235 + static inline void wb_cache_pmem(void *addr, size_t size) 161 236 { 162 237 if (arch_has_pmem_api()) 163 238 arch_wb_cache_pmem(addr, size);

+1

include/uapi/linux/ndctl.h

··· 298 298 #define NVDIMM_FAMILY_INTEL 0 299 299 #define NVDIMM_FAMILY_HPE1 1 300 300 #define NVDIMM_FAMILY_HPE2 2 301 + #define NVDIMM_FAMILY_MSFT 3 301 302 302 303 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ 303 304 struct nd_cmd_pkg)

-6

kernel/memremap.c

··· 169 169 } 170 170 EXPORT_SYMBOL(devm_memunmap); 171 171 172 - pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) 173 - { 174 - return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); 175 - } 176 - EXPORT_SYMBOL(phys_to_pfn_t); 177 - 178 172 #ifdef CONFIG_ZONE_DEVICE 179 173 static DEFINE_MUTEX(pgmap_lock); 180 174 static RADIX_TREE(pgmap_radix, GFP_KERNEL);

-1

scripts/checkpatch.pl

+1 -1

tools/objtool/arch/x86/insn/x86-opcode-map.txt

-2

tools/perf/arch/x86/tests/insn-x86-dat-32.c

··· 1664 1664 "0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, 1665 1665 {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", 1666 1666 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, 1667 - {{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", 1668 - "66 0f ae f8 \tpcommit ",},

-2

tools/perf/arch/x86/tests/insn-x86-dat-64.c

··· 1696 1696 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, 1697 1697 {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", 1698 1698 "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, 1699 - {{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", 1700 - "66 0f ae f8 \tpcommit ",},

-4

tools/perf/arch/x86/tests/insn-x86-dat-src.c

··· 2655 2655 2656 2656 #endif /* #ifndef __x86_64__ */ 2657 2657 2658 - /* pcommit */ 2659 - 2660 - asm volatile("pcommit"); 2661 - 2662 2658 /* Following line is a marker for the awk script - do not change */ 2663 2659 asm volatile("rdtsc"); /* Stop here */ 2664 2660

+1 -1

tools/perf/util/intel-pt-decoder/x86-opcode-map.txt

+7 -3

tools/testing/nvdimm/Kbuild

··· 11 11 ldflags-y += --wrap=__request_region 12 12 ldflags-y += --wrap=__release_region 13 13 ldflags-y += --wrap=devm_memremap_pages 14 - ldflags-y += --wrap=phys_to_pfn_t 14 + ldflags-y += --wrap=insert_resource 15 + ldflags-y += --wrap=remove_resource 15 16 16 17 DRIVERS := ../../../drivers 17 18 NVDIMM_SRC := $(DRIVERS)/nvdimm 18 - ACPI_SRC := $(DRIVERS)/acpi 19 + ACPI_SRC := $(DRIVERS)/acpi/nfit 19 20 DAX_SRC := $(DRIVERS)/dax 21 + ccflags-y := -I$(src)/$(NVDIMM_SRC)/ 20 22 21 23 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o 22 24 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o ··· 29 27 obj-$(CONFIG_DEV_DAX) += dax.o 30 28 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 31 29 32 - nfit-y := $(ACPI_SRC)/nfit.o 30 + nfit-y := $(ACPI_SRC)/core.o 31 + nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o 33 32 nfit-y += config_check.o 34 33 35 34 nd_pmem-y := $(NVDIMM_SRC)/pmem.o 35 + nd_pmem-y += pmem-dax.o 36 36 nd_pmem-y += config_check.o 37 37 38 38 nd_btt-y := $(NVDIMM_SRC)/btt.o

+1

tools/testing/nvdimm/config_check.c

··· 10 10 BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); 11 11 BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); 12 12 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); 13 + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN)); 13 14 BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); 14 15 BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); 15 16 BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));

+54

tools/testing/nvdimm/pmem-dax.c

··· 1 + /* 2 + * Copyright (c) 2014-2016, Intel Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #include "test/nfit_test.h" 14 + #include <linux/blkdev.h> 15 + #include <pmem.h> 16 + #include <nd.h> 17 + 18 + long pmem_direct_access(struct block_device *bdev, sector_t sector, 19 + void **kaddr, pfn_t *pfn, long size) 20 + { 21 + struct pmem_device *pmem = bdev->bd_queue->queuedata; 22 + resource_size_t offset = sector * 512 + pmem->data_offset; 23 + 24 + if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) 25 + return -EIO; 26 + 27 + /* 28 + * Limit dax to a single page at a time given vmalloc()-backed 29 + * in the nfit_test case. 30 + */ 31 + if (get_nfit_res(pmem->phys_addr + offset)) { 32 + struct page *page; 33 + 34 + *kaddr = pmem->virt_addr + offset; 35 + page = vmalloc_to_page(pmem->virt_addr + offset); 36 + *pfn = page_to_pfn_t(page); 37 + dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, 38 + "%s: sector: %#llx pfn: %#lx\n", __func__, 39 + (unsigned long long) sector, page_to_pfn(page)); 40 + 41 + return PAGE_SIZE; 42 + } 43 + 44 + *kaddr = pmem->virt_addr + offset; 45 + *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); 46 + 47 + /* 48 + * If badblocks are present, limit known good range to the 49 + * requested range. 50 + */ 51 + if (unlikely(pmem->bb.count)) 52 + return size; 53 + return pmem->size - pmem->pfn_pad - offset; 54 + }

+1 -1

tools/testing/nvdimm/test/Kbuild

··· 1 1 ccflags-y := -I$(src)/../../../../drivers/nvdimm/ 2 - ccflags-y += -I$(src)/../../../../drivers/acpi/ 2 + ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/ 3 3 4 4 obj-m += nfit_test.o 5 5 obj-m += nfit_test_iomap.o

+20 -18

tools/testing/nvdimm/test/iomap.c

··· 10 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 11 * General Public License for more details. 12 12 */ 13 + #include <linux/memremap.h> 13 14 #include <linux/rculist.h> 14 15 #include <linux/export.h> 15 16 #include <linux/ioport.h> 16 17 #include <linux/module.h> 17 18 #include <linux/types.h> 19 + #include <linux/pfn_t.h> 18 20 #include <linux/io.h> 19 21 #include <linux/mm.h> 20 22 #include "nfit_test.h" ··· 54 52 return NULL; 55 53 } 56 54 57 - static struct nfit_test_resource *get_nfit_res(resource_size_t resource) 55 + struct nfit_test_resource *get_nfit_res(resource_size_t resource) 58 56 { 59 57 struct nfit_test_resource *res; 60 58 ··· 64 62 65 63 return res; 66 64 } 65 + EXPORT_SYMBOL(get_nfit_res); 67 66 68 67 void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, 69 68 void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) ··· 100 97 } 101 98 EXPORT_SYMBOL(__wrap_devm_memremap); 102 99 103 - #ifdef __HAVE_ARCH_PTE_DEVMAP 104 - #include <linux/memremap.h> 105 - #include <linux/pfn_t.h> 106 - 107 100 void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, 108 101 struct percpu_ref *ref, struct vmem_altmap *altmap) 109 102 { ··· 121 122 return phys_to_pfn_t(addr, flags); 122 123 } 123 124 EXPORT_SYMBOL(__wrap_phys_to_pfn_t); 124 - #else 125 - /* to be removed post 4.5-rc1 */ 126 - void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res) 127 - { 128 - resource_size_t offset = res->start; 129 - struct nfit_test_resource *nfit_res = get_nfit_res(offset); 130 - 131 - if (nfit_res) 132 - return nfit_res->buf + offset - nfit_res->res->start; 133 - return devm_memremap_pages(dev, res); 134 - } 135 - EXPORT_SYMBOL(__wrap_devm_memremap_pages); 136 - #endif 137 125 138 126 void *__wrap_memremap(resource_size_t offset, size_t size, 139 127 unsigned long flags) ··· 214 228 return nfit_test_request_region(NULL, parent, start, n, name, flags); 215 229 } 216 230 EXPORT_SYMBOL(__wrap___request_region); 231 + 232 + int __wrap_insert_resource(struct resource *parent, struct resource *res) 233 + { 234 + if (get_nfit_res(res->start)) 235 + return 0; 236 + return insert_resource(parent, res); 237 + } 238 + EXPORT_SYMBOL(__wrap_insert_resource); 239 + 240 + int __wrap_remove_resource(struct resource *res) 241 + { 242 + if (get_nfit_res(res->start)) 243 + return 0; 244 + return remove_resource(res); 245 + } 246 + EXPORT_SYMBOL(__wrap_remove_resource); 217 247 218 248 struct resource *__wrap___devm_request_region(struct device *dev, 219 249 struct resource *parent, resource_size_t start,

+83 -116

tools/testing/nvdimm/test/nfit.c

··· 98 98 enum { 99 99 NUM_PM = 3, 100 100 NUM_DCR = 5, 101 + NUM_HINTS = 8, 101 102 NUM_BDW = NUM_DCR, 102 103 NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, 103 104 NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, 104 105 DIMM_SIZE = SZ_32M, 105 106 LABEL_SIZE = SZ_128K, 107 + SPA_VCD_SIZE = SZ_4M, 106 108 SPA0_SIZE = DIMM_SIZE, 107 109 SPA1_SIZE = DIMM_SIZE*2, 108 110 SPA2_SIZE = DIMM_SIZE, ··· 472 470 list_del(&nfit_res->list); 473 471 spin_unlock(&nfit_test_lock); 474 472 475 - if (is_vmalloc_addr(nfit_res->buf)) 476 - vfree(nfit_res->buf); 477 - else 478 - dma_free_coherent(nfit_res->dev, resource_size(res), 479 - nfit_res->buf, res->start); 473 + vfree(nfit_res->buf); 480 474 kfree(res); 481 475 kfree(nfit_res); 482 476 } ··· 505 507 506 508 return nfit_res->buf; 507 509 err: 508 - if (buf && !is_vmalloc_addr(buf)) 509 - dma_free_coherent(dev, size, buf, *dma); 510 - else if (buf) 510 + if (buf) 511 511 vfree(buf); 512 512 kfree(res); 513 513 kfree(nfit_res); ··· 517 521 void *buf = vmalloc(size); 518 522 519 523 *dma = (unsigned long) buf; 520 - return __test_alloc(t, size, dma, buf); 521 - } 522 - 523 - static void *test_alloc_coherent(struct nfit_test *t, size_t size, 524 - dma_addr_t *dma) 525 - { 526 - struct device *dev = &t->pdev.dev; 527 - void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); 528 - 529 524 return __test_alloc(t, size, dma, buf); 530 525 } 531 526 ··· 571 584 + offsetof(struct acpi_nfit_control_region, 572 585 window_size) * NUM_DCR 573 586 + sizeof(struct acpi_nfit_data_region) * NUM_BDW 574 - + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; 587 + + (sizeof(struct acpi_nfit_flush_address) 588 + + sizeof(u64) * NUM_HINTS) * NUM_DCR; 575 589 int i; 576 590 577 591 t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); ··· 580 592 return -ENOMEM; 581 593 t->nfit_size = nfit_size; 582 594 583 - t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); 595 + t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]); 584 596 if (!t->spa_set[0]) 585 597 return -ENOMEM; 586 598 587 - t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); 599 + t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]); 588 600 if (!t->spa_set[1]) 589 601 return -ENOMEM; 590 602 591 - t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]); 603 + t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]); 592 604 if (!t->spa_set[2]) 593 605 return -ENOMEM; 594 606 ··· 602 614 return -ENOMEM; 603 615 sprintf(t->label[i], "label%d", i); 604 616 605 - t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); 617 + t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS, 618 + &t->flush_dma[i]); 606 619 if (!t->flush[i]) 607 620 return -ENOMEM; 608 621 } ··· 619 630 620 631 static int nfit_test1_alloc(struct nfit_test *t) 621 632 { 622 - size_t nfit_size = sizeof(struct acpi_nfit_system_address) 633 + size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 623 634 + sizeof(struct acpi_nfit_memory_map) 624 635 + offsetof(struct acpi_nfit_control_region, window_size); 625 636 ··· 628 639 return -ENOMEM; 629 640 t->nfit_size = nfit_size; 630 641 631 - t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); 642 + t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]); 632 643 if (!t->spa_set[0]) 644 + return -ENOMEM; 645 + 646 + t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]); 647 + if (!t->spa_set[1]) 633 648 return -ENOMEM; 634 649 635 650 return ars_state_init(&t->pdev.dev, &t->ars_state); 636 651 } 637 652 653 + static void dcr_common_init(struct acpi_nfit_control_region *dcr) 654 + { 655 + dcr->vendor_id = 0xabcd; 656 + dcr->device_id = 0; 657 + dcr->revision_id = 1; 658 + dcr->valid_fields = 1; 659 + dcr->manufacturing_location = 0xa; 660 + dcr->manufacturing_date = cpu_to_be16(2016); 661 + } 662 + 638 663 static void nfit_test0_setup(struct nfit_test *t) 639 664 { 665 + const int flush_hint_size = sizeof(struct acpi_nfit_flush_address) 666 + + (sizeof(u64) * NUM_HINTS); 640 667 struct acpi_nfit_desc *acpi_desc; 641 668 struct acpi_nfit_memory_map *memdev; 642 669 void *nfit_buf = t->nfit_buf; ··· 660 655 struct acpi_nfit_control_region *dcr; 661 656 struct acpi_nfit_data_region *bdw; 662 657 struct acpi_nfit_flush_address *flush; 663 - unsigned int offset; 658 + unsigned int offset, i; 664 659 665 660 /* 666 661 * spa0 (interleave first half of dimm0 and dimm1, note storage ··· 977 972 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 978 973 dcr->header.length = sizeof(struct acpi_nfit_control_region); 979 974 dcr->region_index = 0+1; 980 - dcr->vendor_id = 0xabcd; 981 - dcr->device_id = 0; 982 - dcr->revision_id = 1; 975 + dcr_common_init(dcr); 983 976 dcr->serial_number = ~handle[0]; 984 977 dcr->code = NFIT_FIC_BLK; 985 978 dcr->windows = 1; ··· 992 989 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 993 990 dcr->header.length = sizeof(struct acpi_nfit_control_region); 994 991 dcr->region_index = 1+1; 995 - dcr->vendor_id = 0xabcd; 996 - dcr->device_id = 0; 997 - dcr->revision_id = 1; 992 + dcr_common_init(dcr); 998 993 dcr->serial_number = ~handle[1]; 999 994 dcr->code = NFIT_FIC_BLK; 1000 995 dcr->windows = 1; ··· 1007 1006 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1008 1007 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1009 1008 dcr->region_index = 2+1; 1010 - dcr->vendor_id = 0xabcd; 1011 - dcr->device_id = 0; 1012 - dcr->revision_id = 1; 1009 + dcr_common_init(dcr); 1013 1010 dcr->serial_number = ~handle[2]; 1014 1011 dcr->code = NFIT_FIC_BLK; 1015 1012 dcr->windows = 1; ··· 1022 1023 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1023 1024 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1024 1025 dcr->region_index = 3+1; 1025 - dcr->vendor_id = 0xabcd; 1026 - dcr->device_id = 0; 1027 - dcr->revision_id = 1; 1026 + dcr_common_init(dcr); 1028 1027 dcr->serial_number = ~handle[3]; 1029 1028 dcr->code = NFIT_FIC_BLK; 1030 1029 dcr->windows = 1; ··· 1039 1042 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1040 1043 window_size); 1041 1044 dcr->region_index = 4+1; 1042 - dcr->vendor_id = 0xabcd; 1043 - dcr->device_id = 0; 1044 - dcr->revision_id = 1; 1045 + dcr_common_init(dcr); 1045 1046 dcr->serial_number = ~handle[0]; 1046 1047 dcr->code = NFIT_FIC_BYTEN; 1047 1048 dcr->windows = 0; ··· 1051 1056 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1052 1057 window_size); 1053 1058 dcr->region_index = 5+1; 1054 - dcr->vendor_id = 0xabcd; 1055 - dcr->device_id = 0; 1056 - dcr->revision_id = 1; 1059 + dcr_common_init(dcr); 1057 1060 dcr->serial_number = ~handle[1]; 1058 1061 dcr->code = NFIT_FIC_BYTEN; 1059 1062 dcr->windows = 0; ··· 1063 1070 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1064 1071 window_size); 1065 1072 dcr->region_index = 6+1; 1066 - dcr->vendor_id = 0xabcd; 1067 - dcr->device_id = 0; 1068 - dcr->revision_id = 1; 1073 + dcr_common_init(dcr); 1069 1074 dcr->serial_number = ~handle[2]; 1070 1075 dcr->code = NFIT_FIC_BYTEN; 1071 1076 dcr->windows = 0; ··· 1075 1084 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1076 1085 window_size); 1077 1086 dcr->region_index = 7+1; 1078 - dcr->vendor_id = 0xabcd; 1079 - dcr->device_id = 0; 1080 - dcr->revision_id = 1; 1087 + dcr_common_init(dcr); 1081 1088 dcr->serial_number = ~handle[3]; 1082 1089 dcr->code = NFIT_FIC_BYTEN; 1083 1090 dcr->windows = 0; ··· 1130 1141 /* flush0 (dimm0) */ 1131 1142 flush = nfit_buf + offset; 1132 1143 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1133 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1144 + flush->header.length = flush_hint_size; 1134 1145 flush->device_handle = handle[0]; 1135 - flush->hint_count = 1; 1136 - flush->hint_address[0] = t->flush_dma[0]; 1146 + flush->hint_count = NUM_HINTS; 1147 + for (i = 0; i < NUM_HINTS; i++) 1148 + flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); 1137 1149 1138 1150 /* flush1 (dimm1) */ 1139 - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; 1151 + flush = nfit_buf + offset + flush_hint_size * 1; 1140 1152 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1141 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1153 + flush->header.length = flush_hint_size; 1142 1154 flush->device_handle = handle[1]; 1143 - flush->hint_count = 1; 1144 - flush->hint_address[0] = t->flush_dma[1]; 1155 + flush->hint_count = NUM_HINTS; 1156 + for (i = 0; i < NUM_HINTS; i++) 1157 + flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); 1145 1158 1146 1159 /* flush2 (dimm2) */ 1147 - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; 1160 + flush = nfit_buf + offset + flush_hint_size * 2; 1148 1161 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1149 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1162 + flush->header.length = flush_hint_size; 1150 1163 flush->device_handle = handle[2]; 1151 - flush->hint_count = 1; 1152 - flush->hint_address[0] = t->flush_dma[2]; 1164 + flush->hint_count = NUM_HINTS; 1165 + for (i = 0; i < NUM_HINTS; i++) 1166 + flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); 1153 1167 1154 1168 /* flush3 (dimm3) */ 1155 - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; 1169 + flush = nfit_buf + offset + flush_hint_size * 3; 1156 1170 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1157 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1171 + flush->header.length = flush_hint_size; 1158 1172 flush->device_handle = handle[3]; 1159 - flush->hint_count = 1; 1160 - flush->hint_address[0] = t->flush_dma[3]; 1173 + flush->hint_count = NUM_HINTS; 1174 + for (i = 0; i < NUM_HINTS; i++) 1175 + flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); 1161 1176 1162 1177 if (t->setup_hotplug) { 1163 - offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; 1178 + offset = offset + flush_hint_size * 4; 1164 1179 /* dcr-descriptor4: blk */ 1165 1180 dcr = nfit_buf + offset; 1166 1181 dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; 1167 1182 dcr->header.length = sizeof(struct acpi_nfit_control_region); 1168 1183 dcr->region_index = 8+1; 1169 - dcr->vendor_id = 0xabcd; 1170 - dcr->device_id = 0; 1171 - dcr->revision_id = 1; 1184 + dcr_common_init(dcr); 1172 1185 dcr->serial_number = ~handle[4]; 1173 1186 dcr->code = NFIT_FIC_BLK; 1174 1187 dcr->windows = 1; ··· 1187 1196 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1188 1197 window_size); 1189 1198 dcr->region_index = 9+1; 1190 - dcr->vendor_id = 0xabcd; 1191 - dcr->device_id = 0; 1192 - dcr->revision_id = 1; 1199 + dcr_common_init(dcr); 1193 1200 dcr->serial_number = ~handle[4]; 1194 1201 dcr->code = NFIT_FIC_BYTEN; 1195 1202 dcr->windows = 0; ··· 1289 1300 /* flush3 (dimm4) */ 1290 1301 flush = nfit_buf + offset; 1291 1302 flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; 1292 - flush->header.length = sizeof(struct acpi_nfit_flush_address); 1303 + flush->header.length = flush_hint_size; 1293 1304 flush->device_handle = handle[4]; 1294 - flush->hint_count = 1; 1295 - flush->hint_address[0] = t->flush_dma[4]; 1305 + flush->hint_count = NUM_HINTS; 1306 + for (i = 0; i < NUM_HINTS; i++) 1307 + flush->hint_address[i] = t->flush_dma[4] 1308 + + i * sizeof(u64); 1296 1309 } 1297 1310 1298 1311 post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); ··· 1330 1339 spa->address = t->spa_set_dma[0]; 1331 1340 spa->length = SPA2_SIZE; 1332 1341 1333 - offset += sizeof(*spa); 1342 + /* virtual cd region */ 1343 + spa = nfit_buf + sizeof(*spa); 1344 + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; 1345 + spa->header.length = sizeof(*spa); 1346 + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); 1347 + spa->range_index = 0; 1348 + spa->address = t->spa_set_dma[1]; 1349 + spa->length = SPA_VCD_SIZE; 1350 + 1351 + offset += sizeof(*spa) * 2; 1334 1352 /* mem-region0 (spa0, dimm0) */ 1335 1353 memdev = nfit_buf + offset; 1336 1354 memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; ··· 1365 1365 dcr->header.length = offsetof(struct acpi_nfit_control_region, 1366 1366 window_size); 1367 1367 dcr->region_index = 0+1; 1368 - dcr->vendor_id = 0xabcd; 1369 - dcr->device_id = 0; 1370 - dcr->revision_id = 1; 1368 + dcr_common_init(dcr); 1371 1369 dcr->serial_number = ~0; 1372 1370 dcr->code = NFIT_FIC_BYTE; 1373 1371 dcr->windows = 0; ··· 1460 1462 nfit_test->setup(nfit_test); 1461 1463 acpi_desc = &nfit_test->acpi_desc; 1462 1464 acpi_nfit_desc_init(acpi_desc, &pdev->dev); 1463 - acpi_desc->nfit = nfit_test->nfit_buf; 1464 1465 acpi_desc->blk_do_io = nfit_test_blk_do_io; 1465 1466 nd_desc = &acpi_desc->nd_desc; 1466 1467 nd_desc->provider_name = NULL; 1468 + nd_desc->module = THIS_MODULE; 1467 1469 nd_desc->ndctl = nfit_test_ctl; 1468 - acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); 1469 - if (!acpi_desc->nvdimm_bus) 1470 - return -ENXIO; 1471 1470 1472 - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); 1473 - if (rc) { 1474 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1471 + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, 1472 + nfit_test->nfit_size); 1473 + if (rc) 1475 1474 return rc; 1476 - } 1477 1475 1478 1476 if (nfit_test->setup != nfit_test0_setup) 1479 1477 return 0; ··· 1477 1483 nfit_test->setup_hotplug = 1; 1478 1484 nfit_test->setup(nfit_test); 1479 1485 1480 - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); 1481 - if (rc) { 1482 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1486 + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, 1487 + nfit_test->nfit_size); 1488 + if (rc) 1483 1489 return rc; 1484 - } 1485 1490 1486 1491 return 0; 1487 1492 } 1488 1493 1489 1494 static int nfit_test_remove(struct platform_device *pdev) 1490 1495 { 1491 - struct nfit_test *nfit_test = to_nfit_test(&pdev->dev); 1492 - struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc; 1493 - 1494 - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 1495 - 1496 1496 return 0; 1497 1497 } 1498 1498 ··· 1511 1523 .id_table = nfit_test_id, 1512 1524 }; 1513 1525 1514 - #ifdef CONFIG_CMA_SIZE_MBYTES 1515 - #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES 1516 - #else 1517 - #define CMA_SIZE_MBYTES 0 1518 - #endif 1519 - 1520 1526 static __init int nfit_test_init(void) 1521 1527 { 1522 1528 int rc, i; ··· 1520 1538 for (i = 0; i < NUM_NFITS; i++) { 1521 1539 struct nfit_test *nfit_test; 1522 1540 struct platform_device *pdev; 1523 - static int once; 1524 1541 1525 1542 nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); 1526 1543 if (!nfit_test) { ··· 1558 1577 goto err_register; 1559 1578 1560 1579 instances[i] = nfit_test; 1561 - 1562 - if (!once++) { 1563 - dma_addr_t dma; 1564 - void *buf; 1565 - 1566 - buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma, 1567 - GFP_KERNEL); 1568 - if (!buf) { 1569 - rc = -ENOMEM; 1570 - dev_warn(&pdev->dev, "need 128M of free cma\n"); 1571 - goto err_register; 1572 - } 1573 - dma_free_coherent(&pdev->dev, SZ_128M, buf, dma); 1574 - } 1575 1580 } 1576 1581 1577 1582 rc = platform_driver_register(&nfit_test_driver);

+2

tools/testing/nvdimm/test/nfit_test.h

··· 12 12 */ 13 13 #ifndef __NFIT_TEST_H__ 14 14 #define __NFIT_TEST_H__ 15 + #include <linux/list.h> 15 16 16 17 struct nfit_test_resource { 17 18 struct list_head list; ··· 27 26 void __wrap_iounmap(volatile void __iomem *addr); 28 27 void nfit_test_setup(nfit_test_lookup_fn lookup); 29 28 void nfit_test_teardown(void); 29 + struct nfit_test_resource *get_nfit_res(resource_size_t resource); 30 30 #endif