include/asm-generic/tlb.h at v6.19-rc8 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / asm-generic / tlb.h
at v6.19-rc8 851 lines 25 kB view raw
  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/* include/asm-generic/tlb.h
  3 *
  4 *	Generic TLB shootdown code
  5 *
  6 * Copyright 2001 Red Hat, Inc.
  7 * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  8 *
  9 * Copyright 2011 Red Hat, Inc., Peter Zijlstra
 10 */
 11#ifndef _ASM_GENERIC__TLB_H
 12#define _ASM_GENERIC__TLB_H
 13
 14#include <linux/mmu_notifier.h>
 15#include <linux/swap.h>
 16#include <linux/hugetlb_inline.h>
 17#include <asm/tlbflush.h>
 18#include <asm/cacheflush.h>
 19
 20/*
 21 * Blindly accessing user memory from NMI context can be dangerous
 22 * if we're in the middle of switching the current user task or switching
 23 * the loaded mm.
 24 */
 25#ifndef nmi_uaccess_okay
 26# define nmi_uaccess_okay() true
 27#endif
 28
 29#ifdef CONFIG_MMU
 30
 31/*
 32 * Generic MMU-gather implementation.
 33 *
 34 * The mmu_gather data structure is used by the mm code to implement the
 35 * correct and efficient ordering of freeing pages and TLB invalidations.
 36 *
 37 * This correct ordering is:
 38 *
 39 *  1) unhook page
 40 *  2) TLB invalidate page
 41 *  3) free page
 42 *
 43 * That is, we must never free a page before we have ensured there are no live
 44 * translations left to it. Otherwise it might be possible to observe (or
 45 * worse, change) the page content after it has been reused.
 46 *
 47 * The mmu_gather API consists of:
 48 *
 49 *  - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() /
 50 *    tlb_finish_mmu()
 51 *
 52 *    start and finish a mmu_gather
 53 *
 54 *    Finish in particular will issue a (final) TLB invalidate and free
 55 *    all (remaining) queued pages.
 56 *
 57 *  - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA
 58 *
 59 *    Defaults to flushing at tlb_end_vma() to reset the range; helps when
 60 *    there's large holes between the VMAs.
 61 *
 62 *  - tlb_free_vmas()
 63 *
 64 *    tlb_free_vmas() marks the start of unlinking of one or more vmas
 65 *    and freeing page-tables.
 66 *
 67 *  - tlb_remove_table()
 68 *
 69 *    tlb_remove_table() is the basic primitive to free page-table directories
 70 *    (__p*_free_tlb()).  In it's most primitive form it is an alias for
 71 *    tlb_remove_page() below, for when page directories are pages and have no
 72 *    additional constraints.
 73 *
 74 *    See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE.
 75 *
 76 *  - tlb_remove_page() / tlb_remove_page_size()
 77 *  - __tlb_remove_folio_pages() / __tlb_remove_page_size()
 78 *  - __tlb_remove_folio_pages_size()
 79 *
 80 *    __tlb_remove_folio_pages_size() is the basic primitive that queues pages
 81 *    for freeing. It will return a boolean indicating if the queue is (now)
 82 *    full and a call to tlb_flush_mmu() is required.
 83 *
 84 *    tlb_remove_page() and tlb_remove_page_size() imply the call to
 85 *    tlb_flush_mmu() when required and has no return value.
 86 *
 87 *    __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(),
 88 *    however, instead of removing a single page, assume PAGE_SIZE and remove
 89 *    the given number of consecutive pages that are all part of the
 90 *    same (large) folio.
 91 *
 92 *  - tlb_change_page_size()
 93 *
 94 *    call before __tlb_remove_page*() to set the current page-size; implies a
 95 *    possible tlb_flush_mmu() call.
 96 *
 97 *  - tlb_flush_mmu() / tlb_flush_mmu_tlbonly()
 98 *
 99 *    tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets
100 *                              related state, like the range)
101 *
102 *    tlb_flush_mmu() - in addition to the above TLB invalidate, also frees
103 *			whatever pages are still batched.
104 *
105 *  - mmu_gather::fullmm
106 *
107 *    A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free
108 *    the entire mm; this allows a number of optimizations.
109 *
110 *    - We can ignore tlb_{start,end}_vma(); because we don't
111 *      care about ranges. Everything will be shot down.
112 *
113 *    - (RISC) architectures that use ASIDs can cycle to a new ASID
114 *      and delay the invalidation until ASID space runs out.
115 *
116 *  - mmu_gather::need_flush_all
117 *
118 *    A flag that can be set by the arch code if it wants to force
119 *    flush the entire TLB irrespective of the range. For instance
120 *    x86-PAE needs this when changing top-level entries.
121 *
122 * And allows the architecture to provide and implement tlb_flush():
123 *
124 * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make
125 * use of:
126 *
127 *  - mmu_gather::start / mmu_gather::end
128 *
129 *    which provides the range that needs to be flushed to cover the pages to
130 *    be freed.
131 *
132 *  - mmu_gather::freed_tables
133 *
134 *    set when we freed page table pages
135 *
136 *  - tlb_get_unmap_shift() / tlb_get_unmap_size()
137 *
138 *    returns the smallest TLB entry size unmapped in this range.
139 *
140 * If an architecture does not provide tlb_flush() a default implementation
141 * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is
142 * specified, in which case we'll default to flush_tlb_mm().
143 *
144 * Additionally there are a few opt-in features:
145 *
146 *  MMU_GATHER_PAGE_SIZE
147 *
148 *  This ensures we call tlb_flush() every time tlb_change_page_size() actually
149 *  changes the size and provides mmu_gather::page_size to tlb_flush().
150 *
151 *  This might be useful if your architecture has size specific TLB
152 *  invalidation instructions.
153 *
154 *  MMU_GATHER_TABLE_FREE
155 *
156 *  This provides tlb_remove_table(), to be used instead of tlb_remove_page()
157 *  for page directores (__p*_free_tlb()).
158 *
159 *  Useful if your architecture has non-page page directories.
160 *
161 *  When used, an architecture is expected to provide __tlb_remove_table() or
162 *  use the generic __tlb_remove_table(), which does the actual freeing of these
163 *  pages.
164 *
165 *  MMU_GATHER_RCU_TABLE_FREE
166 *
167 *  Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see
168 *  comment below).
169 *
170 *  Useful if your architecture doesn't use IPIs for remote TLB invalidates
171 *  and therefore doesn't naturally serialize with software page-table walkers.
172 *
173 *  MMU_GATHER_NO_FLUSH_CACHE
174 *
175 *  Indicates the architecture has flush_cache_range() but it needs *NOT* be called
176 *  before unmapping a VMA.
177 *
178 *  NOTE: strictly speaking we shouldn't have this knob and instead rely on
179 *	  flush_cache_range() being a NOP, except Sparc64 seems to be
180 *	  different here.
181 *
182 *  MMU_GATHER_MERGE_VMAS
183 *
184 *  Indicates the architecture wants to merge ranges over VMAs; typical when
185 *  multiple range invalidates are more expensive than a full invalidate.
186 *
187 *  MMU_GATHER_NO_RANGE
188 *
189 *  Use this if your architecture lacks an efficient flush_tlb_range(). This
190 *  option implies MMU_GATHER_MERGE_VMAS above.
191 *
192 *  MMU_GATHER_NO_GATHER
193 *
194 *  If the option is set the mmu_gather will not track individual pages for
195 *  delayed page free anymore. A platform that enables the option needs to
196 *  provide its own implementation of the __tlb_remove_page_size() function to
197 *  free pages.
198 *
199 *  This is useful if your architecture already flushes TLB entries in the
200 *  various ptep_get_and_clear() functions.
201 */
202
203#ifdef CONFIG_MMU_GATHER_TABLE_FREE
204
205struct mmu_table_batch {
206#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
207	struct rcu_head		rcu;
208#endif
209	unsigned int		nr;
210	void			*tables[];
211};
212
213#define MAX_TABLE_BATCH		\
214	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
215
216#ifndef __HAVE_ARCH_TLB_REMOVE_TABLE
217static inline void __tlb_remove_table(void *table)
218{
219	struct ptdesc *ptdesc = (struct ptdesc *)table;
220
221	pagetable_dtor_free(ptdesc);
222}
223#endif
224
225extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
226
227#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
228
229static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page);
230/*
231 * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based
232 * page directories and we can use the normal page batching to free them.
233 */
234static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
235{
236	struct ptdesc *ptdesc = (struct ptdesc *)table;
237
238	pagetable_dtor(ptdesc);
239	tlb_remove_page(tlb, ptdesc_page(ptdesc));
240}
241#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
242
243#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
244/*
245 * This allows an architecture that does not use the linux page-tables for
246 * hardware to skip the TLBI when freeing page tables.
247 */
248#ifndef tlb_needs_table_invalidate
249#define tlb_needs_table_invalidate() (true)
250#endif
251
252void tlb_remove_table_sync_one(void);
253
254#else
255
256#ifdef tlb_needs_table_invalidate
257#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
258#endif
259
260static inline void tlb_remove_table_sync_one(void) { }
261
262#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
263
264
265#ifndef CONFIG_MMU_GATHER_NO_GATHER
266/*
267 * If we can't allocate a page to make a big batch of page pointers
268 * to work on, then just handle a few from the on-stack structure.
269 */
270#define MMU_GATHER_BUNDLE	8
271
272struct mmu_gather_batch {
273	struct mmu_gather_batch	*next;
274	unsigned int		nr;
275	unsigned int		max;
276	struct encoded_page	*encoded_pages[];
277};
278
279#define MAX_GATHER_BATCH	\
280	((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
281
282/*
283 * Limit the maximum number of mmu_gather batches to reduce a risk of soft
284 * lockups for non-preemptible kernels on huge machines when a lot of memory
285 * is zapped during unmapping.
286 * 10K pages freed at once should be safe even without a preemption point.
287 */
288#define MAX_GATHER_BATCH_COUNT	(10000UL/MAX_GATHER_BATCH)
289
290extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
291		bool delay_rmap, int page_size);
292bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page,
293		unsigned int nr_pages, bool delay_rmap);
294
295#ifdef CONFIG_SMP
296/*
297 * This both sets 'delayed_rmap', and returns true. It would be an inline
298 * function, except we define it before the 'struct mmu_gather'.
299 */
300#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
301extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
302#endif
303
304#endif
305
306/*
307 * We have a no-op version of the rmap removal that doesn't
308 * delay anything. That is used on S390, which flushes remote
309 * TLBs synchronously, and on UP, which doesn't have any
310 * remote TLBs to flush and is not preemptible due to this
311 * all happening under the page table lock.
312 */
313#ifndef tlb_delay_rmap
314#define tlb_delay_rmap(tlb) (false)
315static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
316#endif
317
318/*
319 * struct mmu_gather is an opaque type used by the mm code for passing around
320 * any data needed by arch specific code for tlb_remove_page.
321 */
322struct mmu_gather {
323	struct mm_struct	*mm;
324
325#ifdef CONFIG_MMU_GATHER_TABLE_FREE
326	struct mmu_table_batch	*batch;
327#endif
328
329	unsigned long		start;
330	unsigned long		end;
331	/*
332	 * we are in the middle of an operation to clear
333	 * a full mm and can make some optimizations
334	 */
335	unsigned int		fullmm : 1;
336
337	/*
338	 * we have performed an operation which
339	 * requires a complete flush of the tlb
340	 */
341	unsigned int		need_flush_all : 1;
342
343	/*
344	 * we have removed page directories
345	 */
346	unsigned int		freed_tables : 1;
347
348	/*
349	 * Do we have pending delayed rmap removals?
350	 */
351	unsigned int		delayed_rmap : 1;
352
353	/*
354	 * at which levels have we cleared entries?
355	 */
356	unsigned int		cleared_ptes : 1;
357	unsigned int		cleared_pmds : 1;
358	unsigned int		cleared_puds : 1;
359	unsigned int		cleared_p4ds : 1;
360
361	/*
362	 * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma
363	 */
364	unsigned int		vma_exec : 1;
365	unsigned int		vma_huge : 1;
366	unsigned int		vma_pfn  : 1;
367
368	/*
369	 * Did we unshare (unmap) any shared page tables? For now only
370	 * used for hugetlb PMD table sharing.
371	 */
372	unsigned int		unshared_tables : 1;
373
374	/*
375	 * Did we unshare any page tables such that they are now exclusive
376	 * and could get reused+modified by the new owner? When setting this
377	 * flag, "unshared_tables" will be set as well. For now only used
378	 * for hugetlb PMD table sharing.
379	 */
380	unsigned int		fully_unshared_tables : 1;
381
382	unsigned int		batch_count;
383
384#ifndef CONFIG_MMU_GATHER_NO_GATHER
385	struct mmu_gather_batch *active;
386	struct mmu_gather_batch	local;
387	struct page		*__pages[MMU_GATHER_BUNDLE];
388
389#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
390	unsigned int page_size;
391#endif
392#endif
393};
394
395void tlb_flush_mmu(struct mmu_gather *tlb);
396
397static inline void __tlb_adjust_range(struct mmu_gather *tlb,
398				      unsigned long address,
399				      unsigned int range_size)
400{
401	tlb->start = min(tlb->start, address);
402	tlb->end = max(tlb->end, address + range_size);
403}
404
405static inline void __tlb_reset_range(struct mmu_gather *tlb)
406{
407	if (tlb->fullmm) {
408		tlb->start = tlb->end = ~0;
409	} else {
410		tlb->start = TASK_SIZE;
411		tlb->end = 0;
412	}
413	tlb->freed_tables = 0;
414	tlb->cleared_ptes = 0;
415	tlb->cleared_pmds = 0;
416	tlb->cleared_puds = 0;
417	tlb->cleared_p4ds = 0;
418	tlb->unshared_tables = 0;
419	/*
420	 * Do not reset mmu_gather::vma_* fields here, we do not
421	 * call into tlb_start_vma() again to set them if there is an
422	 * intermediate flush.
423	 */
424}
425
426#ifdef CONFIG_MMU_GATHER_NO_RANGE
427
428#if defined(tlb_flush)
429#error MMU_GATHER_NO_RANGE relies on default tlb_flush()
430#endif
431
432/*
433 * When an architecture does not have efficient means of range flushing TLBs
434 * there is no point in doing intermediate flushes on tlb_end_vma() to keep the
435 * range small. We equally don't have to worry about page granularity or other
436 * things.
437 *
438 * All we need to do is issue a full flush for any !0 range.
439 */
440static inline void tlb_flush(struct mmu_gather *tlb)
441{
442	if (tlb->end)
443		flush_tlb_mm(tlb->mm);
444}
445
446#else /* CONFIG_MMU_GATHER_NO_RANGE */
447
448#ifndef tlb_flush
449/*
450 * When an architecture does not provide its own tlb_flush() implementation
451 * but does have a reasonably efficient flush_vma_range() implementation
452 * use that.
453 */
454static inline void tlb_flush(struct mmu_gather *tlb)
455{
456	if (tlb->fullmm || tlb->need_flush_all) {
457		flush_tlb_mm(tlb->mm);
458	} else if (tlb->end) {
459		struct vm_area_struct vma = {
460			.vm_mm = tlb->mm,
461			.vm_flags = (tlb->vma_exec ? VM_EXEC    : 0) |
462				    (tlb->vma_huge ? VM_HUGETLB : 0),
463		};
464
465		flush_tlb_range(&vma, tlb->start, tlb->end);
466	}
467}
468#endif
469
470#endif /* CONFIG_MMU_GATHER_NO_RANGE */
471
472static inline void
473tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
474{
475	/*
476	 * flush_tlb_range() implementations that look at VM_HUGETLB (tile,
477	 * mips-4k) flush only large pages.
478	 *
479	 * flush_tlb_range() implementations that flush I-TLB also flush D-TLB
480	 * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing
481	 * range.
482	 *
483	 * We rely on tlb_end_vma() to issue a flush, such that when we reset
484	 * these values the batch is empty.
485	 */
486	tlb->vma_huge = is_vm_hugetlb_page(vma);
487	tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
488
489	/*
490	 * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma
491	 * in the tracked range, see tlb_free_vmas().
492	 */
493	tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
494}
495
496static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
497{
498	/*
499	 * Anything calling __tlb_adjust_range() also sets at least one of
500	 * these bits.
501	 */
502	if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
503	      tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables))
504		return;
505
506	tlb_flush(tlb);
507	__tlb_reset_range(tlb);
508}
509
510static inline void tlb_remove_page_size(struct mmu_gather *tlb,
511					struct page *page, int page_size)
512{
513	if (__tlb_remove_page_size(tlb, page, false, page_size))
514		tlb_flush_mmu(tlb);
515}
516
517static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
518{
519	return tlb_remove_page_size(tlb, page, PAGE_SIZE);
520}
521
522static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt)
523{
524	tlb_remove_table(tlb, pt);
525}
526
527static inline void tlb_change_page_size(struct mmu_gather *tlb,
528						     unsigned int page_size)
529{
530#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
531	if (tlb->page_size && tlb->page_size != page_size) {
532		if (!tlb->fullmm && !tlb->need_flush_all)
533			tlb_flush_mmu(tlb);
534	}
535
536	tlb->page_size = page_size;
537#endif
538}
539
540static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
541{
542	if (tlb->cleared_ptes)
543		return PAGE_SHIFT;
544	if (tlb->cleared_pmds)
545		return PMD_SHIFT;
546	if (tlb->cleared_puds)
547		return PUD_SHIFT;
548	if (tlb->cleared_p4ds)
549		return P4D_SHIFT;
550
551	return PAGE_SHIFT;
552}
553
554static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
555{
556	return 1UL << tlb_get_unmap_shift(tlb);
557}
558
559/*
560 * In the case of tlb vma handling, we can optimise these away in the
561 * case where we're doing a full MM flush.  When we're doing a munmap,
562 * the vmas are adjusted to only cover the region to be torn down.
563 */
564static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
565{
566	if (tlb->fullmm)
567		return;
568
569	tlb_update_vma_flags(tlb, vma);
570#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
571	flush_cache_range(vma, vma->vm_start, vma->vm_end);
572#endif
573}
574
575static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
576{
577	if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS))
578		return;
579
580	/*
581	 * Do a TLB flush and reset the range at VMA boundaries; this avoids
582	 * the ranges growing with the unused space between consecutive VMAs,
583	 * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
584	 * this.
585	 */
586	tlb_flush_mmu_tlbonly(tlb);
587}
588
589static inline void tlb_free_vmas(struct mmu_gather *tlb)
590{
591	if (tlb->fullmm)
592		return;
593
594	/*
595	 * VM_PFNMAP is more fragile because the core mm will not track the
596	 * page mapcount -- there might not be page-frames for these PFNs
597	 * after all.
598	 *
599	 * Specifically() there is a race between munmap() and
600	 * unmap_mapping_range(), where munmap() will unlink the VMA, such
601	 * that unmap_mapping_range() will no longer observe the VMA and
602	 * no-op, without observing the TLBI, returning prematurely.
603	 *
604	 * So if we're about to unlink such a VMA, and we have pending
605	 * TLBI for such a vma, flush things now.
606	 */
607	if (tlb->vma_pfn)
608		tlb_flush_mmu_tlbonly(tlb);
609}
610
611/*
612 * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
613 * and set corresponding cleared_*.
614 */
615static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
616				     unsigned long address, unsigned long size)
617{
618	__tlb_adjust_range(tlb, address, size);
619	tlb->cleared_ptes = 1;
620}
621
622static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
623				     unsigned long address, unsigned long size)
624{
625	__tlb_adjust_range(tlb, address, size);
626	tlb->cleared_pmds = 1;
627}
628
629static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
630				     unsigned long address, unsigned long size)
631{
632	__tlb_adjust_range(tlb, address, size);
633	tlb->cleared_puds = 1;
634}
635
636static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
637				     unsigned long address, unsigned long size)
638{
639	__tlb_adjust_range(tlb, address, size);
640	tlb->cleared_p4ds = 1;
641}
642
643#ifndef __tlb_remove_tlb_entry
644static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
645{
646}
647#endif
648
649/**
650 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
651 *
652 * Record the fact that pte's were really unmapped by updating the range,
653 * so we can later optimise away the tlb invalidate.   This helps when
654 * userspace is unmapping already-unmapped pages, which happens quite a lot.
655 */
656#define tlb_remove_tlb_entry(tlb, ptep, address)		\
657	do {							\
658		tlb_flush_pte_range(tlb, address, PAGE_SIZE);	\
659		__tlb_remove_tlb_entry(tlb, ptep, address);	\
660	} while (0)
661
662/**
663 * tlb_remove_tlb_entries - remember unmapping of multiple consecutive ptes for
664 *			    later tlb invalidation.
665 *
666 * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple
667 * consecutive ptes instead of only a single one.
668 */
669static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb,
670		pte_t *ptep, unsigned int nr, unsigned long address)
671{
672	tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr);
673	for (;;) {
674		__tlb_remove_tlb_entry(tlb, ptep, address);
675		if (--nr == 0)
676			break;
677		ptep++;
678		address += PAGE_SIZE;
679	}
680}
681
682#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
683	do {							\
684		unsigned long _sz = huge_page_size(h);		\
685		if (_sz >= P4D_SIZE)				\
686			tlb_flush_p4d_range(tlb, address, _sz);	\
687		else if (_sz >= PUD_SIZE)			\
688			tlb_flush_pud_range(tlb, address, _sz);	\
689		else if (_sz >= PMD_SIZE)			\
690			tlb_flush_pmd_range(tlb, address, _sz);	\
691		else						\
692			tlb_flush_pte_range(tlb, address, _sz);	\
693		__tlb_remove_tlb_entry(tlb, ptep, address);	\
694	} while (0)
695
696/**
697 * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
698 * This is a nop so far, because only x86 needs it.
699 */
700#ifndef __tlb_remove_pmd_tlb_entry
701#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
702#endif
703
704#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)			\
705	do {								\
706		tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE);	\
707		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);		\
708	} while (0)
709
710/**
711 * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
712 * invalidation. This is a nop so far, because only x86 needs it.
713 */
714#ifndef __tlb_remove_pud_tlb_entry
715#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
716#endif
717
718#define tlb_remove_pud_tlb_entry(tlb, pudp, address)			\
719	do {								\
720		tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE);	\
721		__tlb_remove_pud_tlb_entry(tlb, pudp, address);		\
722	} while (0)
723
724/*
725 * For things like page tables caches (ie caching addresses "inside" the
726 * page tables, like x86 does), for legacy reasons, flushing an
727 * individual page had better flush the page table caches behind it. This
728 * is definitely how x86 works, for example. And if you have an
729 * architected non-legacy page table cache (which I'm not aware of
730 * anybody actually doing), you're going to have some architecturally
731 * explicit flushing for that, likely *separate* from a regular TLB entry
732 * flush, and thus you'd need more than just some range expansion..
733 *
734 * So if we ever find an architecture
735 * that would want something that odd, I think it is up to that
736 * architecture to do its own odd thing, not cause pain for others
737 * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
738 *
739 * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
740 */
741
742#ifndef pte_free_tlb
743#define pte_free_tlb(tlb, ptep, address)			\
744	do {							\
745		tlb_flush_pmd_range(tlb, address, PAGE_SIZE);	\
746		tlb->freed_tables = 1;				\
747		__pte_free_tlb(tlb, ptep, address);		\
748	} while (0)
749#endif
750
751#ifndef pmd_free_tlb
752#define pmd_free_tlb(tlb, pmdp, address)			\
753	do {							\
754		tlb_flush_pud_range(tlb, address, PAGE_SIZE);	\
755		tlb->freed_tables = 1;				\
756		__pmd_free_tlb(tlb, pmdp, address);		\
757	} while (0)
758#endif
759
760#ifndef pud_free_tlb
761#define pud_free_tlb(tlb, pudp, address)			\
762	do {							\
763		tlb_flush_p4d_range(tlb, address, PAGE_SIZE);	\
764		tlb->freed_tables = 1;				\
765		__pud_free_tlb(tlb, pudp, address);		\
766	} while (0)
767#endif
768
769#ifndef p4d_free_tlb
770#define p4d_free_tlb(tlb, pudp, address)			\
771	do {							\
772		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
773		tlb->freed_tables = 1;				\
774		__p4d_free_tlb(tlb, pudp, address);		\
775	} while (0)
776#endif
777
778#ifndef pte_needs_flush
779static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
780{
781	return true;
782}
783#endif
784
785#ifndef huge_pmd_needs_flush
786static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
787{
788	return true;
789}
790#endif
791
792#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
793static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt,
794					  unsigned long addr)
795{
796	/*
797	 * The caller must make sure that concurrent unsharing + exclusive
798	 * reuse is impossible until tlb_flush_unshared_tables() was called.
799	 */
800	VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt));
801	ptdesc_pmd_pts_dec(pt);
802
803	/* Clearing a PUD pointing at a PMD table with PMD leaves. */
804	tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE);
805
806	/*
807	 * If the page table is now exclusively owned, we fully unshared
808	 * a page table.
809	 */
810	if (!ptdesc_pmd_is_shared(pt))
811		tlb->fully_unshared_tables = true;
812	tlb->unshared_tables = true;
813}
814
815static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb)
816{
817	/*
818	 * As soon as the caller drops locks to allow for reuse of
819	 * previously-shared tables, these tables could get modified and
820	 * even reused outside of hugetlb context, so we have to make sure that
821	 * any page table walkers (incl. TLB, GUP-fast) are aware of that
822	 * change.
823	 *
824	 * Even if we are not fully unsharing a PMD table, we must
825	 * flush the TLB for the unsharer now.
826	 */
827	if (tlb->unshared_tables)
828		tlb_flush_mmu_tlbonly(tlb);
829
830	/*
831	 * Similarly, we must make sure that concurrent GUP-fast will not
832	 * walk previously-shared page tables that are getting modified+reused
833	 * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast.
834	 *
835	 * We only perform this when we are the last sharer of a page table,
836	 * as the IPI will reach all CPUs: any GUP-fast.
837	 *
838	 * Note that on configs where tlb_remove_table_sync_one() is a NOP,
839	 * the expectation is that the tlb_flush_mmu_tlbonly() would have issued
840	 * required IPIs already for us.
841	 */
842	if (tlb->fully_unshared_tables) {
843		tlb_remove_table_sync_one();
844		tlb->fully_unshared_tables = false;
845	}
846}
847#endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
848
849#endif /* CONFIG_MMU */
850
851#endif /* _ASM_GENERIC__TLB_H */