Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dax: update I/O path to do proper PMEM flushing

Update the DAX I/O path so that all operations that store data (I/O
writes, zeroing blocks, punching holes, etc.) properly synchronize the
stores to media using the PMEM API. This ensures that the data DAX is
writing is durable on media before the operation completes.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

authored by

Ross Zwisler and committed by
Dan Williams
2765cfbb 5de490da

+25 -14
+25 -14
fs/dax.c
··· 23 23 #include <linux/memcontrol.h> 24 24 #include <linux/mm.h> 25 25 #include <linux/mutex.h> 26 + #include <linux/pmem.h> 26 27 #include <linux/sched.h> 27 28 #include <linux/uio.h> 28 29 #include <linux/vmstat.h> ··· 47 46 unsigned pgsz = PAGE_SIZE - offset_in_page(addr); 48 47 if (pgsz > count) 49 48 pgsz = count; 50 - if (pgsz < PAGE_SIZE) 51 - memset(addr, 0, pgsz); 52 - else 53 - clear_page(addr); 49 + clear_pmem((void __pmem *)addr, pgsz); 54 50 addr += pgsz; 55 51 size -= pgsz; 56 52 count -= pgsz; ··· 57 59 } 58 60 } while (size); 59 61 62 + wmb_pmem(); 60 63 return 0; 61 64 } 62 65 EXPORT_SYMBOL_GPL(dax_clear_blocks); ··· 69 70 return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size); 70 71 } 71 72 73 + /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ 72 74 static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos, 73 75 loff_t end) 74 76 { 75 77 loff_t final = end - pos + first; /* The final byte of the buffer */ 76 78 77 79 if (first > 0) 78 - memset(addr, 0, first); 80 + clear_pmem((void __pmem *)addr, first); 79 81 if (final < size) 80 - memset(addr + final, 0, size - final); 82 + clear_pmem((void __pmem *)addr + final, size - final); 81 83 } 82 84 83 85 static bool buffer_written(struct buffer_head *bh) ··· 108 108 loff_t bh_max = start; 109 109 void *addr; 110 110 bool hole = false; 111 + bool need_wmb = false; 111 112 112 113 if (iov_iter_rw(iter) != WRITE) 113 114 end = min(end, i_size_read(inode)); 114 115 115 116 while (pos < end) { 116 - unsigned len; 117 + size_t len; 117 118 if (pos == max) { 118 119 unsigned blkbits = inode->i_blkbits; 119 120 sector_t block = pos >> blkbits; ··· 146 145 retval = dax_get_addr(bh, &addr, blkbits); 147 146 if (retval < 0) 148 147 break; 149 - if (buffer_unwritten(bh) || buffer_new(bh)) 148 + if (buffer_unwritten(bh) || buffer_new(bh)) { 150 149 dax_new_buf(addr, retval, first, pos, 151 150 end); 151 + need_wmb = true; 152 + } 152 153 addr += first; 153 154 size = retval - first; 154 155 } 155 156 max = min(pos + size, end); 156 157 } 157 158 158 - if (iov_iter_rw(iter) == WRITE) 159 - len = copy_from_iter_nocache(addr, max - pos, iter); 160 - else if (!hole) 159 + if (iov_iter_rw(iter) == WRITE) { 160 + len = copy_from_iter_pmem((void __pmem *)addr, 161 + max - pos, iter); 162 + need_wmb = true; 163 + } else if (!hole) 161 164 len = copy_to_iter(addr, max - pos, iter); 162 165 else 163 166 len = iov_iter_zero(max - pos, iter); ··· 172 167 pos += len; 173 168 addr += len; 174 169 } 170 + 171 + if (need_wmb) 172 + wmb_pmem(); 175 173 176 174 return (pos == start) ? retval : pos - start; 177 175 } ··· 311 303 goto out; 312 304 } 313 305 314 - if (buffer_unwritten(bh) || buffer_new(bh)) 315 - clear_page(addr); 306 + if (buffer_unwritten(bh) || buffer_new(bh)) { 307 + clear_pmem((void __pmem *)addr, PAGE_SIZE); 308 + wmb_pmem(); 309 + } 316 310 317 311 error = vm_insert_mixed(vma, vaddr, pfn); 318 312 ··· 552 542 err = dax_get_addr(&bh, &addr, inode->i_blkbits); 553 543 if (err < 0) 554 544 return err; 555 - memset(addr + offset, 0, length); 545 + clear_pmem((void __pmem *)addr + offset, length); 546 + wmb_pmem(); 556 547 } 557 548 558 549 return 0;