at v3.18 359 lines 12 kB view raw
1#ifndef _LINUX_MMU_NOTIFIER_H 2#define _LINUX_MMU_NOTIFIER_H 3 4#include <linux/list.h> 5#include <linux/spinlock.h> 6#include <linux/mm_types.h> 7#include <linux/srcu.h> 8 9struct mmu_notifier; 10struct mmu_notifier_ops; 11 12#ifdef CONFIG_MMU_NOTIFIER 13 14/* 15 * The mmu notifier_mm structure is allocated and installed in 16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected 17 * critical section and it's released only when mm_count reaches zero 18 * in mmdrop(). 19 */ 20struct mmu_notifier_mm { 21 /* all mmu notifiers registerd in this mm are queued in this list */ 22 struct hlist_head list; 23 /* to serialize the list modifications and hlist_unhashed */ 24 spinlock_t lock; 25}; 26 27struct mmu_notifier_ops { 28 /* 29 * Called either by mmu_notifier_unregister or when the mm is 30 * being destroyed by exit_mmap, always before all pages are 31 * freed. This can run concurrently with other mmu notifier 32 * methods (the ones invoked outside the mm context) and it 33 * should tear down all secondary mmu mappings and freeze the 34 * secondary mmu. If this method isn't implemented you've to 35 * be sure that nothing could possibly write to the pages 36 * through the secondary mmu by the time the last thread with 37 * tsk->mm == mm exits. 38 * 39 * As side note: the pages freed after ->release returns could 40 * be immediately reallocated by the gart at an alias physical 41 * address with a different cache model, so if ->release isn't 42 * implemented because all _software_ driven memory accesses 43 * through the secondary mmu are terminated by the time the 44 * last thread of this mm quits, you've also to be sure that 45 * speculative _hardware_ operations can't allocate dirty 46 * cachelines in the cpu that could not be snooped and made 47 * coherent with the other read and write operations happening 48 * through the gart alias address, so leading to memory 49 * corruption. 50 */ 51 void (*release)(struct mmu_notifier *mn, 52 struct mm_struct *mm); 53 54 /* 55 * clear_flush_young is called after the VM is 56 * test-and-clearing the young/accessed bitflag in the 57 * pte. This way the VM will provide proper aging to the 58 * accesses to the page through the secondary MMUs and not 59 * only to the ones through the Linux pte. 60 * Start-end is necessary in case the secondary MMU is mapping the page 61 * at a smaller granularity than the primary MMU. 62 */ 63 int (*clear_flush_young)(struct mmu_notifier *mn, 64 struct mm_struct *mm, 65 unsigned long start, 66 unsigned long end); 67 68 /* 69 * test_young is called to check the young/accessed bitflag in 70 * the secondary pte. This is used to know if the page is 71 * frequently used without actually clearing the flag or tearing 72 * down the secondary mapping on the page. 73 */ 74 int (*test_young)(struct mmu_notifier *mn, 75 struct mm_struct *mm, 76 unsigned long address); 77 78 /* 79 * change_pte is called in cases that pte mapping to page is changed: 80 * for example, when ksm remaps pte to point to a new shared page. 81 */ 82 void (*change_pte)(struct mmu_notifier *mn, 83 struct mm_struct *mm, 84 unsigned long address, 85 pte_t pte); 86 87 /* 88 * Before this is invoked any secondary MMU is still ok to 89 * read/write to the page previously pointed to by the Linux 90 * pte because the page hasn't been freed yet and it won't be 91 * freed until this returns. If required set_page_dirty has to 92 * be called internally to this method. 93 */ 94 void (*invalidate_page)(struct mmu_notifier *mn, 95 struct mm_struct *mm, 96 unsigned long address); 97 98 /* 99 * invalidate_range_start() and invalidate_range_end() must be 100 * paired and are called only when the mmap_sem and/or the 101 * locks protecting the reverse maps are held. The subsystem 102 * must guarantee that no additional references are taken to 103 * the pages in the range established between the call to 104 * invalidate_range_start() and the matching call to 105 * invalidate_range_end(). 106 * 107 * Invalidation of multiple concurrent ranges may be 108 * optionally permitted by the driver. Either way the 109 * establishment of sptes is forbidden in the range passed to 110 * invalidate_range_begin/end for the whole duration of the 111 * invalidate_range_begin/end critical section. 112 * 113 * invalidate_range_start() is called when all pages in the 114 * range are still mapped and have at least a refcount of one. 115 * 116 * invalidate_range_end() is called when all pages in the 117 * range have been unmapped and the pages have been freed by 118 * the VM. 119 * 120 * The VM will remove the page table entries and potentially 121 * the page between invalidate_range_start() and 122 * invalidate_range_end(). If the page must not be freed 123 * because of pending I/O or other circumstances then the 124 * invalidate_range_start() callback (or the initial mapping 125 * by the driver) must make sure that the refcount is kept 126 * elevated. 127 * 128 * If the driver increases the refcount when the pages are 129 * initially mapped into an address space then either 130 * invalidate_range_start() or invalidate_range_end() may 131 * decrease the refcount. If the refcount is decreased on 132 * invalidate_range_start() then the VM can free pages as page 133 * table entries are removed. If the refcount is only 134 * droppped on invalidate_range_end() then the driver itself 135 * will drop the last refcount but it must take care to flush 136 * any secondary tlb before doing the final free on the 137 * page. Pages will no longer be referenced by the linux 138 * address space but may still be referenced by sptes until 139 * the last refcount is dropped. 140 */ 141 void (*invalidate_range_start)(struct mmu_notifier *mn, 142 struct mm_struct *mm, 143 unsigned long start, unsigned long end); 144 void (*invalidate_range_end)(struct mmu_notifier *mn, 145 struct mm_struct *mm, 146 unsigned long start, unsigned long end); 147}; 148 149/* 150 * The notifier chains are protected by mmap_sem and/or the reverse map 151 * semaphores. Notifier chains are only changed when all reverse maps and 152 * the mmap_sem locks are taken. 153 * 154 * Therefore notifier chains can only be traversed when either 155 * 156 * 1. mmap_sem is held. 157 * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem). 158 * 3. No other concurrent thread can access the list (release) 159 */ 160struct mmu_notifier { 161 struct hlist_node hlist; 162 const struct mmu_notifier_ops *ops; 163}; 164 165static inline int mm_has_notifiers(struct mm_struct *mm) 166{ 167 return unlikely(mm->mmu_notifier_mm); 168} 169 170extern int mmu_notifier_register(struct mmu_notifier *mn, 171 struct mm_struct *mm); 172extern int __mmu_notifier_register(struct mmu_notifier *mn, 173 struct mm_struct *mm); 174extern void mmu_notifier_unregister(struct mmu_notifier *mn, 175 struct mm_struct *mm); 176extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 177 struct mm_struct *mm); 178extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 179extern void __mmu_notifier_release(struct mm_struct *mm); 180extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 181 unsigned long start, 182 unsigned long end); 183extern int __mmu_notifier_test_young(struct mm_struct *mm, 184 unsigned long address); 185extern void __mmu_notifier_change_pte(struct mm_struct *mm, 186 unsigned long address, pte_t pte); 187extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, 188 unsigned long address); 189extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 190 unsigned long start, unsigned long end); 191extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 192 unsigned long start, unsigned long end); 193 194static inline void mmu_notifier_release(struct mm_struct *mm) 195{ 196 if (mm_has_notifiers(mm)) 197 __mmu_notifier_release(mm); 198} 199 200static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 201 unsigned long start, 202 unsigned long end) 203{ 204 if (mm_has_notifiers(mm)) 205 return __mmu_notifier_clear_flush_young(mm, start, end); 206 return 0; 207} 208 209static inline int mmu_notifier_test_young(struct mm_struct *mm, 210 unsigned long address) 211{ 212 if (mm_has_notifiers(mm)) 213 return __mmu_notifier_test_young(mm, address); 214 return 0; 215} 216 217static inline void mmu_notifier_change_pte(struct mm_struct *mm, 218 unsigned long address, pte_t pte) 219{ 220 if (mm_has_notifiers(mm)) 221 __mmu_notifier_change_pte(mm, address, pte); 222} 223 224static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 225 unsigned long address) 226{ 227 if (mm_has_notifiers(mm)) 228 __mmu_notifier_invalidate_page(mm, address); 229} 230 231static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 232 unsigned long start, unsigned long end) 233{ 234 if (mm_has_notifiers(mm)) 235 __mmu_notifier_invalidate_range_start(mm, start, end); 236} 237 238static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 239 unsigned long start, unsigned long end) 240{ 241 if (mm_has_notifiers(mm)) 242 __mmu_notifier_invalidate_range_end(mm, start, end); 243} 244 245static inline void mmu_notifier_mm_init(struct mm_struct *mm) 246{ 247 mm->mmu_notifier_mm = NULL; 248} 249 250static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 251{ 252 if (mm_has_notifiers(mm)) 253 __mmu_notifier_mm_destroy(mm); 254} 255 256#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 257({ \ 258 int __young; \ 259 struct vm_area_struct *___vma = __vma; \ 260 unsigned long ___address = __address; \ 261 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 262 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 263 ___address, \ 264 ___address + \ 265 PAGE_SIZE); \ 266 __young; \ 267}) 268 269#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ 270({ \ 271 int __young; \ 272 struct vm_area_struct *___vma = __vma; \ 273 unsigned long ___address = __address; \ 274 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ 275 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 276 ___address, \ 277 ___address + \ 278 PMD_SIZE); \ 279 __young; \ 280}) 281 282/* 283 * set_pte_at_notify() sets the pte _after_ running the notifier. 284 * This is safe to start by updating the secondary MMUs, because the primary MMU 285 * pte invalidate must have already happened with a ptep_clear_flush() before 286 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is 287 * required when we change both the protection of the mapping from read-only to 288 * read-write and the pfn (like during copy on write page faults). Otherwise the 289 * old page would remain mapped readonly in the secondary MMUs after the new 290 * page is already writable by some CPU through the primary MMU. 291 */ 292#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 293({ \ 294 struct mm_struct *___mm = __mm; \ 295 unsigned long ___address = __address; \ 296 pte_t ___pte = __pte; \ 297 \ 298 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 299 set_pte_at(___mm, ___address, __ptep, ___pte); \ 300}) 301 302extern void mmu_notifier_call_srcu(struct rcu_head *rcu, 303 void (*func)(struct rcu_head *rcu)); 304extern void mmu_notifier_synchronize(void); 305 306#else /* CONFIG_MMU_NOTIFIER */ 307 308static inline void mmu_notifier_release(struct mm_struct *mm) 309{ 310} 311 312static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 313 unsigned long start, 314 unsigned long end) 315{ 316 return 0; 317} 318 319static inline int mmu_notifier_test_young(struct mm_struct *mm, 320 unsigned long address) 321{ 322 return 0; 323} 324 325static inline void mmu_notifier_change_pte(struct mm_struct *mm, 326 unsigned long address, pte_t pte) 327{ 328} 329 330static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 331 unsigned long address) 332{ 333} 334 335static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 336 unsigned long start, unsigned long end) 337{ 338} 339 340static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 341 unsigned long start, unsigned long end) 342{ 343} 344 345static inline void mmu_notifier_mm_init(struct mm_struct *mm) 346{ 347} 348 349static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 350{ 351} 352 353#define ptep_clear_flush_young_notify ptep_clear_flush_young 354#define pmdp_clear_flush_young_notify pmdp_clear_flush_young 355#define set_pte_at_notify set_pte_at 356 357#endif /* CONFIG_MMU_NOTIFIER */ 358 359#endif /* _LINUX_MMU_NOTIFIER_H */