Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm/cma: refuse handing out non-contiguous page ranges

Let's disallow handing out PFN ranges with non-contiguous pages, so we can
remove the nth-page usage in __cma_alloc(), and so any callers don't have
to worry about that either when wanting to blindly iterate pages.

This is really only a problem in configs with SPARSEMEM but without
SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
cases.

Will this cause harm? Probably not, because it's mostly 32bit that does
not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
look into allocating the memmap for the memory sections spanned by a
single CMA region in one go from memblock.

[david@redhat.com: we can have NUMMU configs with SPARSEMEM enabled]
Link: https://lkml.kernel.org/r/6ec933b1-b3f7-41c0-95d8-e518bb87375e@redhat.com
Link: https://lkml.kernel.org/r/20250901150359.867252-23-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

David Hildenbrand and committed by
Andrew Morton
6972706f b71ddc9e

+65 -15
+6
include/linux/mm.h
··· 209 209 extern unsigned long sysctl_admin_reserve_kbytes; 210 210 211 211 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) 212 + bool page_range_contiguous(const struct page *page, unsigned long nr_pages); 212 213 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) 213 214 #else 214 215 #define nth_page(page,n) ((page) + (n)) 216 + static inline bool page_range_contiguous(const struct page *page, 217 + unsigned long nr_pages) 218 + { 219 + return true; 220 + } 215 221 #endif 216 222 217 223 /* to align the pointer to the (next) page boundary */
+24 -15
mm/cma.c
··· 780 780 unsigned long count, unsigned int align, 781 781 struct page **pagep, gfp_t gfp) 782 782 { 783 - unsigned long mask, offset; 784 - unsigned long pfn = -1; 785 - unsigned long start = 0; 786 783 unsigned long bitmap_maxno, bitmap_no, bitmap_count; 784 + unsigned long start, pfn, mask, offset; 787 785 int ret = -EBUSY; 788 786 struct page *page = NULL; 789 787 ··· 793 795 if (bitmap_count > bitmap_maxno) 794 796 goto out; 795 797 796 - for (;;) { 798 + for (start = 0; ; start = bitmap_no + mask + 1) { 797 799 spin_lock_irq(&cma->lock); 798 800 /* 799 801 * If the request is larger than the available number ··· 810 812 spin_unlock_irq(&cma->lock); 811 813 break; 812 814 } 815 + 816 + pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); 817 + page = pfn_to_page(pfn); 818 + 819 + /* 820 + * Do not hand out page ranges that are not contiguous, so 821 + * callers can just iterate the pages without having to worry 822 + * about these corner cases. 823 + */ 824 + if (!page_range_contiguous(page, count)) { 825 + spin_unlock_irq(&cma->lock); 826 + pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]", 827 + __func__, cma->name, pfn, pfn + count - 1); 828 + continue; 829 + } 830 + 813 831 bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); 814 832 cma->available_count -= count; 815 833 /* ··· 835 821 */ 836 822 spin_unlock_irq(&cma->lock); 837 823 838 - pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); 839 824 mutex_lock(&cma->alloc_mutex); 840 825 ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); 841 826 mutex_unlock(&cma->alloc_mutex); 842 - if (ret == 0) { 843 - page = pfn_to_page(pfn); 827 + if (!ret) 844 828 break; 845 - } 846 829 847 830 cma_clear_bitmap(cma, cmr, pfn, count); 848 831 if (ret != -EBUSY) 849 832 break; 850 833 851 834 pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n", 852 - __func__, pfn, pfn_to_page(pfn)); 835 + __func__, pfn, page); 853 836 854 - trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn), 855 - count, align); 856 - /* try again with a bit different memory target */ 857 - start = bitmap_no + mask + 1; 837 + trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align); 858 838 } 859 839 out: 860 - *pagep = page; 840 + if (!ret) 841 + *pagep = page; 861 842 return ret; 862 843 } 863 844 ··· 891 882 */ 892 883 if (page) { 893 884 for (i = 0; i < count; i++) 894 - page_kasan_tag_reset(nth_page(page, i)); 885 + page_kasan_tag_reset(page + i); 895 886 } 896 887 897 888 if (ret && !(gfp & __GFP_NOWARN)) {
+35
mm/util.c
··· 1281 1281 return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0); 1282 1282 } 1283 1283 #endif /* CONFIG_MMU */ 1284 + 1285 + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) 1286 + /** 1287 + * page_range_contiguous - test whether the page range is contiguous 1288 + * @page: the start of the page range. 1289 + * @nr_pages: the number of pages in the range. 1290 + * 1291 + * Test whether the page range is contiguous, such that they can be iterated 1292 + * naively, corresponding to iterating a contiguous PFN range. 1293 + * 1294 + * This function should primarily only be used for debug checks, or when 1295 + * working with page ranges that are not naturally contiguous (e.g., pages 1296 + * within a folio are). 1297 + * 1298 + * Returns true if contiguous, otherwise false. 1299 + */ 1300 + bool page_range_contiguous(const struct page *page, unsigned long nr_pages) 1301 + { 1302 + const unsigned long start_pfn = page_to_pfn(page); 1303 + const unsigned long end_pfn = start_pfn + nr_pages; 1304 + unsigned long pfn; 1305 + 1306 + /* 1307 + * The memmap is allocated per memory section, so no need to check 1308 + * within the first section. However, we need to check each other 1309 + * spanned memory section once, making sure the first page in a 1310 + * section could similarly be reached by just iterating pages. 1311 + */ 1312 + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); 1313 + pfn < end_pfn; pfn += PAGES_PER_SECTION) 1314 + if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn))) 1315 + return false; 1316 + return true; 1317 + } 1318 + #endif