Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: mte: Lock a page for MTE tag initialisation

Initialising the tags and setting PG_mte_tagged flag for a page can race
between multiple set_pte_at() on shared pages or setting the stage 2 pte
via user_mem_abort(). Introduce a new PG_mte_lock flag as PG_arch_3 and
set it before attempting page initialisation. Given that PG_mte_tagged
is never cleared for a page, consider setting this flag to mean page
unlocked and wait on this bit with acquire semantics if the page is
locked:

- try_page_mte_tagging() - lock the page for tagging, return true if it
can be tagged, false if already tagged. No acquire semantics if it
returns true (PG_mte_tagged not set) as there is no serialisation with
a previous set_page_mte_tagged().

- set_page_mte_tagged() - set PG_mte_tagged with release semantics.

The two-bit locking is based on Peter Collingbourne's idea.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Peter Collingbourne <pcc@google.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-6-pcc@google.com

authored by

Catalin Marinas and committed by
Marc Zyngier
d77e59a8 ef6458b1

+60 -29
+34 -1
arch/arm64/include/asm/mte.h
··· 25 25 unsigned long n); 26 26 int mte_save_tags(struct page *page); 27 27 void mte_save_page_tags(const void *page_addr, void *tag_storage); 28 - bool mte_restore_tags(swp_entry_t entry, struct page *page); 28 + void mte_restore_tags(swp_entry_t entry, struct page *page); 29 29 void mte_restore_page_tags(void *page_addr, const void *tag_storage); 30 30 void mte_invalidate_tags(int type, pgoff_t offset); 31 31 void mte_invalidate_tags_area(int type); ··· 36 36 37 37 /* track which pages have valid allocation tags */ 38 38 #define PG_mte_tagged PG_arch_2 39 + /* simple lock to avoid multiple threads tagging the same page */ 40 + #define PG_mte_lock PG_arch_3 39 41 40 42 static inline void set_page_mte_tagged(struct page *page) 41 43 { ··· 60 58 if (ret) 61 59 smp_rmb(); 62 60 return ret; 61 + } 62 + 63 + /* 64 + * Lock the page for tagging and return 'true' if the page can be tagged, 65 + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the 66 + * locking only happens once for page initialisation. 67 + * 68 + * The page MTE lock state: 69 + * 70 + * Locked: PG_mte_lock && !PG_mte_tagged 71 + * Unlocked: !PG_mte_lock || PG_mte_tagged 72 + * 73 + * Acquire semantics only if the page is tagged (returning 'false'). 74 + */ 75 + static inline bool try_page_mte_tagging(struct page *page) 76 + { 77 + if (!test_and_set_bit(PG_mte_lock, &page->flags)) 78 + return true; 79 + 80 + /* 81 + * The tags are either being initialised or may have been initialised 82 + * already. Check if the PG_mte_tagged flag has been set or wait 83 + * otherwise. 84 + */ 85 + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged)); 86 + 87 + return false; 63 88 } 64 89 65 90 void mte_zero_clear_page_tags(void *addr); ··· 112 83 { 113 84 } 114 85 static inline bool page_mte_tagged(struct page *page) 86 + { 87 + return false; 88 + } 89 + static inline bool try_page_mte_tagging(struct page *page) 115 90 { 116 91 return false; 117 92 }
+2 -2
arch/arm64/include/asm/pgtable.h
··· 1049 1049 #define __HAVE_ARCH_SWAP_RESTORE 1050 1050 static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) 1051 1051 { 1052 - if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) 1053 - set_page_mte_tagged(&folio->page); 1052 + if (system_supports_mte()) 1053 + mte_restore_tags(entry, &folio->page); 1054 1054 } 1055 1055 1056 1056 #endif /* CONFIG_ARM64_MTE */
+1 -1
arch/arm64/kernel/cpufeature.c
··· 2050 2050 * Clear the tags in the zero page. This needs to be done via the 2051 2051 * linear map which has the Tagged attribute. 2052 2052 */ 2053 - if (!page_mte_tagged(ZERO_PAGE(0))) { 2053 + if (try_page_mte_tagging(ZERO_PAGE(0))) { 2054 2054 mte_clear_page_tags(lm_alias(empty_zero_page)); 2055 2055 set_page_mte_tagged(ZERO_PAGE(0)); 2056 2056 }
+3 -9
arch/arm64/kernel/mte.c
··· 41 41 if (check_swap && is_swap_pte(old_pte)) { 42 42 swp_entry_t entry = pte_to_swp_entry(old_pte); 43 43 44 - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) { 45 - set_page_mte_tagged(page); 46 - return; 47 - } 44 + if (!non_swap_entry(entry)) 45 + mte_restore_tags(entry, page); 48 46 } 49 47 50 48 if (!pte_is_tagged) 51 49 return; 52 50 53 - /* 54 - * Test PG_mte_tagged again in case it was racing with another 55 - * set_pte_at(). 56 - */ 57 - if (!page_mte_tagged(page)) { 51 + if (try_page_mte_tagging(page)) { 58 52 mte_clear_page_tags(page_address(page)); 59 53 set_page_mte_tagged(page); 60 54 }
+10 -6
arch/arm64/kvm/guest.c
··· 1068 1068 clear_user(tags, MTE_GRANULES_PER_PAGE); 1069 1069 kvm_release_pfn_clean(pfn); 1070 1070 } else { 1071 + /* 1072 + * Only locking to serialise with a concurrent 1073 + * set_pte_at() in the VMM but still overriding the 1074 + * tags, hence ignoring the return value. 1075 + */ 1076 + try_page_mte_tagging(page); 1071 1077 num_tags = mte_copy_tags_from_user(maddr, tags, 1072 1078 MTE_GRANULES_PER_PAGE); 1073 1079 1074 - /* 1075 - * Set the flag after checking the write 1076 - * completed fully 1077 - */ 1078 - if (num_tags == MTE_GRANULES_PER_PAGE) 1079 - set_page_mte_tagged(page); 1080 + /* uaccess failed, don't leave stale tags */ 1081 + if (num_tags != MTE_GRANULES_PER_PAGE) 1082 + mte_clear_page_tags(page); 1083 + set_page_mte_tagged(page); 1080 1084 1081 1085 kvm_release_pfn_dirty(pfn); 1082 1086 }
+1 -1
arch/arm64/kvm/mmu.c
··· 1101 1101 return; 1102 1102 1103 1103 for (i = 0; i < nr_pages; i++, page++) { 1104 - if (!page_mte_tagged(page)) { 1104 + if (try_page_mte_tagging(page)) { 1105 1105 mte_clear_page_tags(page_address(page)); 1106 1106 set_page_mte_tagged(page); 1107 1107 }
+2
arch/arm64/mm/copypage.c
··· 23 23 24 24 if (system_supports_mte() && page_mte_tagged(from)) { 25 25 page_kasan_tag_reset(to); 26 + /* It's a new page, shouldn't have been tagged yet */ 27 + WARN_ON_ONCE(!try_page_mte_tagging(to)); 26 28 mte_copy_page_tags(kto, kfrom); 27 29 set_page_mte_tagged(to); 28 30 }
+2
arch/arm64/mm/fault.c
··· 933 933 934 934 void tag_clear_highpage(struct page *page) 935 935 { 936 + /* Newly allocated page, shouldn't have been tagged yet */ 937 + WARN_ON_ONCE(!try_page_mte_tagging(page)); 936 938 mte_zero_clear_page_tags(page_address(page)); 937 939 set_page_mte_tagged(page); 938 940 }
+5 -9
arch/arm64/mm/mteswap.c
··· 46 46 return 0; 47 47 } 48 48 49 - bool mte_restore_tags(swp_entry_t entry, struct page *page) 49 + void mte_restore_tags(swp_entry_t entry, struct page *page) 50 50 { 51 51 void *tags = xa_load(&mte_pages, entry.val); 52 52 53 53 if (!tags) 54 - return false; 54 + return; 55 55 56 - /* 57 - * Test PG_mte_tagged again in case it was racing with another 58 - * set_pte_at(). 59 - */ 60 - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) 56 + if (try_page_mte_tagging(page)) { 61 57 mte_restore_page_tags(page_address(page), tags); 62 - 63 - return true; 58 + set_page_mte_tagged(page); 59 + } 64 60 } 65 61 66 62 void mte_invalidate_tags(int type, pgoff_t offset)