Merge branch 'safe-dirty-tlb-flush'

This merges the patch to fix possible loss of dirty bit on munmap() or
madvice(DONTNEED). If there are concurrent writers on other CPU's that
have the unmapped/unneeded page in their TLBs, their writes to the page
could possibly get lost if a third CPU raced with the TLB flush and did
a page_mkclean() before the page was fully written.

Admittedly, if you unmap() or madvice(DONTNEED) an area _while_ another
thread is still busy writing to it, you deserve all the lost writes you
could get. But we kernel people hold ourselves to higher quality
standards than "crazy people deserve to lose", because, well, we've seen
people do all kinds of crazy things.

So let's get it right, just because we can, and we don't have to worry
about it.

* safe-dirty-tlb-flush:
mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing parts

Changed files
+111 -33
arch
arm
include
asm
ia64
include
asm
s390
include
asm
sh
include
asm
um
include
asm
mm
+11 -1
arch/arm/include/asm/tlb.h
··· 98 98 } 99 99 } 100 100 101 - static inline void tlb_flush_mmu(struct mmu_gather *tlb) 101 + static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 102 102 { 103 103 tlb_flush(tlb); 104 + } 105 + 106 + static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) 107 + { 104 108 free_pages_and_swap_cache(tlb->pages, tlb->nr); 105 109 tlb->nr = 0; 106 110 if (tlb->pages == tlb->local) 107 111 __tlb_alloc_page(tlb); 112 + } 113 + 114 + static inline void tlb_flush_mmu(struct mmu_gather *tlb) 115 + { 116 + tlb_flush_mmu_tlbonly(tlb); 117 + tlb_flush_mmu_free(tlb); 108 118 } 109 119 110 120 static inline void
+32 -10
arch/ia64/include/asm/tlb.h
··· 91 91 #define RR_RID_MASK 0x00000000ffffff00L 92 92 #define RR_TO_RID(val) ((val >> 8) & 0xffffff) 93 93 94 - /* 95 - * Flush the TLB for address range START to END and, if not in fast mode, release the 96 - * freed pages that where gathered up to this point. 97 - */ 98 94 static inline void 99 - ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) 95 + ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end) 100 96 { 101 - unsigned long i; 102 - unsigned int nr; 103 - 104 - if (!tlb->need_flush) 105 - return; 106 97 tlb->need_flush = 0; 107 98 108 99 if (tlb->fullmm) { ··· 126 135 flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end)); 127 136 } 128 137 138 + } 139 + 140 + static inline void 141 + ia64_tlb_flush_mmu_free(struct mmu_gather *tlb) 142 + { 143 + unsigned long i; 144 + unsigned int nr; 145 + 129 146 /* lastly, release the freed pages */ 130 147 nr = tlb->nr; 131 148 ··· 141 142 tlb->start_addr = ~0UL; 142 143 for (i = 0; i < nr; ++i) 143 144 free_page_and_swap_cache(tlb->pages[i]); 145 + } 146 + 147 + /* 148 + * Flush the TLB for address range START to END and, if not in fast mode, release the 149 + * freed pages that where gathered up to this point. 150 + */ 151 + static inline void 152 + ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) 153 + { 154 + if (!tlb->need_flush) 155 + return; 156 + ia64_tlb_flush_mmu_tlbonly(tlb, start, end); 157 + ia64_tlb_flush_mmu_free(tlb); 144 158 } 145 159 146 160 static inline void __tlb_alloc_page(struct mmu_gather *tlb) ··· 216 204 VM_BUG_ON(tlb->nr > tlb->max); 217 205 218 206 return tlb->max - tlb->nr; 207 + } 208 + 209 + static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 210 + { 211 + ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr); 212 + } 213 + 214 + static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) 215 + { 216 + ia64_tlb_flush_mmu_free(tlb); 219 217 } 220 218 221 219 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+12 -1
arch/s390/include/asm/tlb.h
··· 59 59 tlb->batch = NULL; 60 60 } 61 61 62 - static inline void tlb_flush_mmu(struct mmu_gather *tlb) 62 + static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 63 63 { 64 64 __tlb_flush_mm_lazy(tlb->mm); 65 + } 66 + 67 + static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) 68 + { 65 69 tlb_table_flush(tlb); 70 + } 71 + 72 + 73 + static inline void tlb_flush_mmu(struct mmu_gather *tlb) 74 + { 75 + tlb_flush_mmu_tlbonly(tlb); 76 + tlb_flush_mmu_free(tlb); 66 77 } 67 78 68 79 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
+8
arch/sh/include/asm/tlb.h
··· 86 86 } 87 87 } 88 88 89 + static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 90 + { 91 + } 92 + 93 + static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) 94 + { 95 + } 96 + 89 97 static inline void tlb_flush_mmu(struct mmu_gather *tlb) 90 98 { 91 99 }
+14 -2
arch/um/include/asm/tlb.h
··· 59 59 unsigned long end); 60 60 61 61 static inline void 62 + tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 63 + { 64 + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end); 65 + } 66 + 67 + static inline void 68 + tlb_flush_mmu_free(struct mmu_gather *tlb) 69 + { 70 + init_tlb_gather(tlb); 71 + } 72 + 73 + static inline void 62 74 tlb_flush_mmu(struct mmu_gather *tlb) 63 75 { 64 76 if (!tlb->need_flush) 65 77 return; 66 78 67 - flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end); 68 - init_tlb_gather(tlb); 79 + tlb_flush_mmu_tlbonly(tlb); 80 + tlb_flush_mmu_free(tlb); 69 81 } 70 82 71 83 /* tlb_finish_mmu
+34 -19
mm/memory.c
··· 232 232 #endif 233 233 } 234 234 235 - void tlb_flush_mmu(struct mmu_gather *tlb) 235 + static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 236 236 { 237 - struct mmu_gather_batch *batch; 238 - 239 - if (!tlb->need_flush) 240 - return; 241 237 tlb->need_flush = 0; 242 238 tlb_flush(tlb); 243 239 #ifdef CONFIG_HAVE_RCU_TABLE_FREE 244 240 tlb_table_flush(tlb); 245 241 #endif 242 + } 243 + 244 + static void tlb_flush_mmu_free(struct mmu_gather *tlb) 245 + { 246 + struct mmu_gather_batch *batch; 246 247 247 248 for (batch = &tlb->local; batch; batch = batch->next) { 248 249 free_pages_and_swap_cache(batch->pages, batch->nr); 249 250 batch->nr = 0; 250 251 } 251 252 tlb->active = &tlb->local; 253 + } 254 + 255 + void tlb_flush_mmu(struct mmu_gather *tlb) 256 + { 257 + if (!tlb->need_flush) 258 + return; 259 + tlb_flush_mmu_tlbonly(tlb); 260 + tlb_flush_mmu_free(tlb); 252 261 } 253 262 254 263 /* tlb_finish_mmu ··· 1136 1127 if (PageAnon(page)) 1137 1128 rss[MM_ANONPAGES]--; 1138 1129 else { 1139 - if (pte_dirty(ptent)) 1130 + if (pte_dirty(ptent)) { 1131 + force_flush = 1; 1140 1132 set_page_dirty(page); 1133 + } 1141 1134 if (pte_young(ptent) && 1142 1135 likely(!(vma->vm_flags & VM_SEQ_READ))) 1143 1136 mark_page_accessed(page); ··· 1148 1137 page_remove_rmap(page); 1149 1138 if (unlikely(page_mapcount(page) < 0)) 1150 1139 print_bad_pte(vma, addr, ptent, page); 1151 - force_flush = !__tlb_remove_page(tlb, page); 1152 - if (force_flush) 1140 + if (unlikely(!__tlb_remove_page(tlb, page))) { 1141 + force_flush = 1; 1153 1142 break; 1143 + } 1154 1144 continue; 1155 1145 } 1156 1146 /* ··· 1186 1174 1187 1175 add_mm_rss_vec(mm, rss); 1188 1176 arch_leave_lazy_mmu_mode(); 1189 - pte_unmap_unlock(start_pte, ptl); 1190 1177 1191 - /* 1192 - * mmu_gather ran out of room to batch pages, we break out of 1193 - * the PTE lock to avoid doing the potential expensive TLB invalidate 1194 - * and page-free while holding it. 1195 - */ 1178 + /* Do the actual TLB flush before dropping ptl */ 1196 1179 if (force_flush) { 1197 1180 unsigned long old_end; 1198 - 1199 - force_flush = 0; 1200 1181 1201 1182 /* 1202 1183 * Flush the TLB just for the previous segment, ··· 1198 1193 */ 1199 1194 old_end = tlb->end; 1200 1195 tlb->end = addr; 1201 - 1202 - tlb_flush_mmu(tlb); 1203 - 1196 + tlb_flush_mmu_tlbonly(tlb); 1204 1197 tlb->start = addr; 1205 1198 tlb->end = old_end; 1199 + } 1200 + pte_unmap_unlock(start_pte, ptl); 1201 + 1202 + /* 1203 + * If we forced a TLB flush (either due to running out of 1204 + * batch buffers or because we needed to flush dirty TLB 1205 + * entries before releasing the ptl), free the batched 1206 + * memory too. Restart if we didn't do everything. 1207 + */ 1208 + if (force_flush) { 1209 + force_flush = 0; 1210 + tlb_flush_mmu_free(tlb); 1206 1211 1207 1212 if (addr != end) 1208 1213 goto again;