Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mm-hotfixes-stable-2022-07-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
"Thirteen hotfixes.

Eight are cc:stable and the remainder are for post-5.18 issues or are
too minor to warrant backporting"

* tag 'mm-hotfixes-stable-2022-07-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
mailmap: update Gao Xiang's email addresses
userfaultfd: provide properly masked address for huge-pages
Revert "ocfs2: mount shared volume without ha stack"
hugetlb: fix memoryleak in hugetlb_mcopy_atomic_pte
fs: sendfile handles O_NONBLOCK of out_fd
ntfs: fix use-after-free in ntfs_ucsncmp()
secretmem: fix unhandled fault in truncate
mm/hugetlb: separate path for hwpoison entry in copy_hugetlb_page_range()
mm: fix missing wake-up event for FSDAX pages
mm: fix page leak with multiple threads mapping the same page
mailmap: update Seth Forshee's email address
tmpfs: fix the issue that the mount and remount results are inconsistent.
mm: kfence: apply kmemleak_ignore_phys on early allocated pool

+105 -93
+3
.mailmap
··· 135 135 Frank Zago <fzago@systemfabricworks.com> 136 136 Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com> 137 137 Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com> 138 + Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com> 139 + Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com> 138 140 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com> 139 141 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com> 140 142 Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com> ··· 373 371 Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk> 374 372 Sebastian Reichel <sre@kernel.org> <sre@debian.org> 375 373 Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de> 374 + Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> 376 375 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com> 377 376 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> 378 377 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
+6 -2
fs/ntfs/attrib.c
··· 592 592 a = (ATTR_RECORD*)((u8*)ctx->attr + 593 593 le32_to_cpu(ctx->attr->length)); 594 594 for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { 595 - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + 596 - le32_to_cpu(ctx->mrec->bytes_allocated)) 595 + u8 *mrec_end = (u8 *)ctx->mrec + 596 + le32_to_cpu(ctx->mrec->bytes_allocated); 597 + u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + 598 + a->name_length * sizeof(ntfschar); 599 + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || 600 + name_end > mrec_end) 597 601 break; 598 602 ctx->attr = a; 599 603 if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
+1 -3
fs/ocfs2/ocfs2.h
··· 277 277 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ 278 278 OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ 279 279 OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ 280 - OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ 281 280 }; 282 281 283 282 #define OCFS2_OSB_SOFT_RO 0x0001 ··· 672 673 673 674 static inline int ocfs2_mount_local(struct ocfs2_super *osb) 674 675 { 675 - return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) 676 - || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); 676 + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); 677 677 } 678 678 679 679 static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
+19 -27
fs/ocfs2/slot_map.c
··· 252 252 int i, ret = -ENOSPC; 253 253 254 254 if ((preferred >= 0) && (preferred < si->si_num_slots)) { 255 - if (!si->si_slots[preferred].sl_valid || 256 - !si->si_slots[preferred].sl_node_num) { 255 + if (!si->si_slots[preferred].sl_valid) { 257 256 ret = preferred; 258 257 goto out; 259 258 } 260 259 } 261 260 262 261 for(i = 0; i < si->si_num_slots; i++) { 263 - if (!si->si_slots[i].sl_valid || 264 - !si->si_slots[i].sl_node_num) { 262 + if (!si->si_slots[i].sl_valid) { 265 263 ret = i; 266 264 break; 267 265 } ··· 454 456 spin_lock(&osb->osb_lock); 455 457 ocfs2_update_slot_info(si); 456 458 457 - if (ocfs2_mount_local(osb)) 458 - /* use slot 0 directly in local mode */ 459 - slot = 0; 460 - else { 461 - /* search for ourselves first and take the slot if it already 462 - * exists. Perhaps we need to mark this in a variable for our 463 - * own journal recovery? Possibly not, though we certainly 464 - * need to warn to the user */ 465 - slot = __ocfs2_node_num_to_slot(si, osb->node_num); 459 + /* search for ourselves first and take the slot if it already 460 + * exists. Perhaps we need to mark this in a variable for our 461 + * own journal recovery? Possibly not, though we certainly 462 + * need to warn to the user */ 463 + slot = __ocfs2_node_num_to_slot(si, osb->node_num); 464 + if (slot < 0) { 465 + /* if no slot yet, then just take 1st available 466 + * one. */ 467 + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); 466 468 if (slot < 0) { 467 - /* if no slot yet, then just take 1st available 468 - * one. */ 469 - slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); 470 - if (slot < 0) { 471 - spin_unlock(&osb->osb_lock); 472 - mlog(ML_ERROR, "no free slots available!\n"); 473 - status = -EINVAL; 474 - goto bail; 475 - } 476 - } else 477 - printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " 478 - "already allocated to this node!\n", 479 - slot, osb->dev_str); 480 - } 469 + spin_unlock(&osb->osb_lock); 470 + mlog(ML_ERROR, "no free slots available!\n"); 471 + status = -EINVAL; 472 + goto bail; 473 + } 474 + } else 475 + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " 476 + "allocated to this node!\n", slot, osb->dev_str); 481 477 482 478 ocfs2_set_slot(si, slot, osb->node_num); 483 479 osb->slot_num = slot;
-21
fs/ocfs2/super.c
··· 172 172 Opt_dir_resv_level, 173 173 Opt_journal_async_commit, 174 174 Opt_err_cont, 175 - Opt_nocluster, 176 175 Opt_err, 177 176 }; 178 177 ··· 205 206 {Opt_dir_resv_level, "dir_resv_level=%u"}, 206 207 {Opt_journal_async_commit, "journal_async_commit"}, 207 208 {Opt_err_cont, "errors=continue"}, 208 - {Opt_nocluster, "nocluster"}, 209 209 {Opt_err, NULL} 210 210 }; 211 211 ··· 616 618 goto out; 617 619 } 618 620 619 - tmp = OCFS2_MOUNT_NOCLUSTER; 620 - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { 621 - ret = -EINVAL; 622 - mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); 623 - goto out; 624 - } 625 - 626 621 tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 627 622 OCFS2_MOUNT_HB_NONE; 628 623 if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { ··· 856 865 } 857 866 858 867 if (ocfs2_userspace_stack(osb) && 859 - !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && 860 868 strncmp(osb->osb_cluster_stack, mopt->cluster_stack, 861 869 OCFS2_STACK_LABEL_LEN)) { 862 870 mlog(ML_ERROR, ··· 1126 1136 osb->dev_str, nodestr, osb->slot_num, 1127 1137 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 1128 1138 "ordered"); 1129 - 1130 - if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && 1131 - !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) 1132 - printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " 1133 - "without cluster aware mode.\n", osb->dev_str); 1134 1139 1135 1140 atomic_set(&osb->vol_state, VOLUME_MOUNTED); 1136 1141 wake_up(&osb->osb_mount_event); ··· 1437 1452 case Opt_journal_async_commit: 1438 1453 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; 1439 1454 break; 1440 - case Opt_nocluster: 1441 - mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; 1442 - break; 1443 1455 default: 1444 1456 mlog(ML_ERROR, 1445 1457 "Unrecognized mount option \"%s\" " ··· 1547 1565 1548 1566 if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) 1549 1567 seq_printf(s, ",journal_async_commit"); 1550 - 1551 - if (opts & OCFS2_MOUNT_NOCLUSTER) 1552 - seq_printf(s, ",nocluster"); 1553 1568 1554 1569 return 0; 1555 1570 }
+3
fs/read_write.c
··· 1263 1263 count, fl); 1264 1264 file_end_write(out.file); 1265 1265 } else { 1266 + if (out.file->f_flags & O_NONBLOCK) 1267 + fl |= SPLICE_F_NONBLOCK; 1268 + 1266 1269 retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); 1267 1270 } 1268 1271
+7 -5
fs/userfaultfd.c
··· 192 192 } 193 193 194 194 static inline struct uffd_msg userfault_msg(unsigned long address, 195 + unsigned long real_address, 195 196 unsigned int flags, 196 197 unsigned long reason, 197 198 unsigned int features) 198 199 { 199 200 struct uffd_msg msg; 201 + 200 202 msg_init(&msg); 201 203 msg.event = UFFD_EVENT_PAGEFAULT; 202 204 203 - if (!(features & UFFD_FEATURE_EXACT_ADDRESS)) 204 - address &= PAGE_MASK; 205 - msg.arg.pagefault.address = address; 205 + msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ? 206 + real_address : address; 207 + 206 208 /* 207 209 * These flags indicate why the userfault occurred: 208 210 * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault. ··· 490 488 491 489 init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); 492 490 uwq.wq.private = current; 493 - uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason, 494 - ctx->features); 491 + uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags, 492 + reason, ctx->features); 495 493 uwq.ctx = ctx; 496 494 uwq.waken = false; 497 495
+9 -5
include/linux/mm.h
··· 1130 1130 #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) 1131 1131 DECLARE_STATIC_KEY_FALSE(devmap_managed_key); 1132 1132 1133 - bool __put_devmap_managed_page(struct page *page); 1134 - static inline bool put_devmap_managed_page(struct page *page) 1133 + bool __put_devmap_managed_page_refs(struct page *page, int refs); 1134 + static inline bool put_devmap_managed_page_refs(struct page *page, int refs) 1135 1135 { 1136 1136 if (!static_branch_unlikely(&devmap_managed_key)) 1137 1137 return false; 1138 1138 if (!is_zone_device_page(page)) 1139 1139 return false; 1140 - return __put_devmap_managed_page(page); 1140 + return __put_devmap_managed_page_refs(page, refs); 1141 1141 } 1142 - 1143 1142 #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ 1144 - static inline bool put_devmap_managed_page(struct page *page) 1143 + static inline bool put_devmap_managed_page_refs(struct page *page, int refs) 1145 1144 { 1146 1145 return false; 1147 1146 } 1148 1147 #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ 1148 + 1149 + static inline bool put_devmap_managed_page(struct page *page) 1150 + { 1151 + return put_devmap_managed_page_refs(page, 1); 1152 + } 1149 1153 1150 1154 /* 127: arbitrary random number, small enough to assemble well */ 1151 1155 #define folio_ref_zero_or_close_to_overflow(folio) \
+4 -2
mm/gup.c
··· 87 87 * belongs to this folio. 88 88 */ 89 89 if (unlikely(page_folio(page) != folio)) { 90 - folio_put_refs(folio, refs); 90 + if (!put_devmap_managed_page_refs(&folio->page, refs)) 91 + folio_put_refs(folio, refs); 91 92 goto retry; 92 93 } 93 94 ··· 177 176 refs *= GUP_PIN_COUNTING_BIAS; 178 177 } 179 178 180 - folio_put_refs(folio, refs); 179 + if (!put_devmap_managed_page_refs(&folio->page, refs)) 180 + folio_put_refs(folio, refs); 181 181 } 182 182 183 183 /**
+8 -2
mm/hugetlb.c
··· 4788 4788 * sharing with another vma. 4789 4789 */ 4790 4790 ; 4791 - } else if (unlikely(is_hugetlb_entry_migration(entry) || 4792 - is_hugetlb_entry_hwpoisoned(entry))) { 4791 + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { 4792 + bool uffd_wp = huge_pte_uffd_wp(entry); 4793 + 4794 + if (!userfaultfd_wp(dst_vma) && uffd_wp) 4795 + entry = huge_pte_clear_uffd_wp(entry); 4796 + set_huge_pte_at(dst, addr, dst_pte, entry); 4797 + } else if (unlikely(is_hugetlb_entry_migration(entry))) { 4793 4798 swp_entry_t swp_entry = pte_to_swp_entry(entry); 4794 4799 bool uffd_wp = huge_pte_uffd_wp(entry); 4795 4800 ··· 5952 5947 5953 5948 page = alloc_huge_page(dst_vma, dst_addr, 0); 5954 5949 if (IS_ERR(page)) { 5950 + put_page(*pagep); 5955 5951 ret = -ENOMEM; 5956 5952 *pagep = NULL; 5957 5953 goto out;
+9 -9
mm/kfence/core.c
··· 603 603 addr += 2 * PAGE_SIZE; 604 604 } 605 605 606 - /* 607 - * The pool is live and will never be deallocated from this point on. 608 - * Remove the pool object from the kmemleak object tree, as it would 609 - * otherwise overlap with allocations returned by kfence_alloc(), which 610 - * are registered with kmemleak through the slab post-alloc hook. 611 - */ 612 - kmemleak_free(__kfence_pool); 613 - 614 606 return 0; 615 607 } 616 608 ··· 615 623 616 624 addr = kfence_init_pool(); 617 625 618 - if (!addr) 626 + if (!addr) { 627 + /* 628 + * The pool is live and will never be deallocated from this point on. 629 + * Ignore the pool object from the kmemleak phys object tree, as it would 630 + * otherwise overlap with allocations returned by kfence_alloc(), which 631 + * are registered with kmemleak through the slab post-alloc hook. 632 + */ 633 + kmemleak_ignore_phys(__pa(__kfence_pool)); 619 634 return true; 635 + } 620 636 621 637 /* 622 638 * Only release unprotected pages, and do not try to go back and change
+5 -2
mm/memory.c
··· 4369 4369 return VM_FAULT_OOM; 4370 4370 } 4371 4371 4372 - /* See comment in handle_pte_fault() */ 4372 + /* 4373 + * See comment in handle_pte_fault() for how this scenario happens, we 4374 + * need to return NOPAGE so that we drop this page. 4375 + */ 4373 4376 if (pmd_devmap_trans_unstable(vmf->pmd)) 4374 - return 0; 4377 + return VM_FAULT_NOPAGE; 4375 4378 4376 4379 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 4377 4380 vmf->address, &vmf->ptl);
+3 -3
mm/memremap.c
··· 499 499 } 500 500 501 501 #ifdef CONFIG_FS_DAX 502 - bool __put_devmap_managed_page(struct page *page) 502 + bool __put_devmap_managed_page_refs(struct page *page, int refs) 503 503 { 504 504 if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) 505 505 return false; ··· 509 509 * refcount is 1, then the page is free and the refcount is 510 510 * stable because nobody holds a reference on the page. 511 511 */ 512 - if (page_ref_dec_return(page) == 1) 512 + if (page_ref_sub_return(page, refs) == 1) 513 513 wake_up_var(&page->_refcount); 514 514 return true; 515 515 } 516 - EXPORT_SYMBOL(__put_devmap_managed_page); 516 + EXPORT_SYMBOL(__put_devmap_managed_page_refs); 517 517 #endif /* CONFIG_FS_DAX */
+26 -7
mm/secretmem.c
··· 55 55 gfp_t gfp = vmf->gfp_mask; 56 56 unsigned long addr; 57 57 struct page *page; 58 + vm_fault_t ret; 58 59 int err; 59 60 60 61 if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) 61 62 return vmf_error(-EINVAL); 62 63 64 + filemap_invalidate_lock_shared(mapping); 65 + 63 66 retry: 64 67 page = find_lock_page(mapping, offset); 65 68 if (!page) { 66 69 page = alloc_page(gfp | __GFP_ZERO); 67 - if (!page) 68 - return VM_FAULT_OOM; 70 + if (!page) { 71 + ret = VM_FAULT_OOM; 72 + goto out; 73 + } 69 74 70 75 err = set_direct_map_invalid_noflush(page); 71 76 if (err) { 72 77 put_page(page); 73 - return vmf_error(err); 78 + ret = vmf_error(err); 79 + goto out; 74 80 } 75 81 76 82 __SetPageUptodate(page); ··· 92 86 if (err == -EEXIST) 93 87 goto retry; 94 88 95 - return vmf_error(err); 89 + ret = vmf_error(err); 90 + goto out; 96 91 } 97 92 98 93 addr = (unsigned long)page_address(page); ··· 101 94 } 102 95 103 96 vmf->page = page; 104 - return VM_FAULT_LOCKED; 97 + ret = VM_FAULT_LOCKED; 98 + 99 + out: 100 + filemap_invalidate_unlock_shared(mapping); 101 + return ret; 105 102 } 106 103 107 104 static const struct vm_operations_struct secretmem_vm_ops = { ··· 173 162 struct dentry *dentry, struct iattr *iattr) 174 163 { 175 164 struct inode *inode = d_inode(dentry); 165 + struct address_space *mapping = inode->i_mapping; 176 166 unsigned int ia_valid = iattr->ia_valid; 167 + int ret; 168 + 169 + filemap_invalidate_lock(mapping); 177 170 178 171 if ((ia_valid & ATTR_SIZE) && inode->i_size) 179 - return -EINVAL; 172 + ret = -EINVAL; 173 + else 174 + ret = simple_setattr(mnt_userns, dentry, iattr); 180 175 181 - return simple_setattr(mnt_userns, dentry, iattr); 176 + filemap_invalidate_unlock(mapping); 177 + 178 + return ret; 182 179 } 183 180 184 181 static const struct inode_operations secretmem_iops = {
+2 -5
mm/shmem.c
··· 3392 3392 break; 3393 3393 case Opt_nr_blocks: 3394 3394 ctx->blocks = memparse(param->string, &rest); 3395 - if (*rest) 3395 + if (*rest || ctx->blocks > S64_MAX) 3396 3396 goto bad_value; 3397 3397 ctx->seen |= SHMEM_SEEN_BLOCKS; 3398 3398 break; ··· 3514 3514 3515 3515 raw_spin_lock(&sbinfo->stat_lock); 3516 3516 inodes = sbinfo->max_inodes - sbinfo->free_inodes; 3517 - if (ctx->blocks > S64_MAX) { 3518 - err = "Number of blocks too large"; 3519 - goto out; 3520 - } 3517 + 3521 3518 if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { 3522 3519 if (!sbinfo->max_blocks) { 3523 3520 err = "Cannot retroactively limit size";