Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: fix struct page layout on 32-bit systems

32-bit architectures which expect 8-byte alignment for 8-byte integers and
need 64-bit DMA addresses (arm, mips, ppc) had their struct page
inadvertently expanded in 2019. When the dma_addr_t was added, it forced
the alignment of the union to 8 bytes, which inserted a 4 byte gap between
'flags' and the union.

Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long. We always
store the low bits in the first word to prevent the PageTail bit from
being inadvertently set on a big endian platform. If that happened,
get_user_pages_fast() racing against a page which was freed and
reallocated to the page_pool could dereference a bogus compound_head(),
which would be hard to trace back to this cause.

Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Matthew Wilcox (Oracle) and committed by
Linus Torvalds
9ddb3c14 62862290

+20 -8
+2 -2
include/linux/mm_types.h
··· 97 97 }; 98 98 struct { /* page_pool used by netstack */ 99 99 /** 100 - * @dma_addr: might require a 64-bit value even on 100 + * @dma_addr: might require a 64-bit value on 101 101 * 32-bit architectures. 102 102 */ 103 - dma_addr_t dma_addr; 103 + unsigned long dma_addr[2]; 104 104 }; 105 105 struct { /* slab, slob and slub */ 106 106 union {
+11 -1
include/net/page_pool.h
··· 198 198 199 199 static inline dma_addr_t page_pool_get_dma_addr(struct page *page) 200 200 { 201 - return page->dma_addr; 201 + dma_addr_t ret = page->dma_addr[0]; 202 + if (sizeof(dma_addr_t) > sizeof(unsigned long)) 203 + ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; 204 + return ret; 205 + } 206 + 207 + static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) 208 + { 209 + page->dma_addr[0] = addr; 210 + if (sizeof(dma_addr_t) > sizeof(unsigned long)) 211 + page->dma_addr[1] = upper_32_bits(addr); 202 212 } 203 213 204 214 static inline bool is_page_pool_compiled_in(void)
+7 -5
net/core/page_pool.c
··· 174 174 struct page *page, 175 175 unsigned int dma_sync_size) 176 176 { 177 + dma_addr_t dma_addr = page_pool_get_dma_addr(page); 178 + 177 179 dma_sync_size = min(dma_sync_size, pool->p.max_len); 178 - dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, 180 + dma_sync_single_range_for_device(pool->p.dev, dma_addr, 179 181 pool->p.offset, dma_sync_size, 180 182 pool->p.dma_dir); 181 183 } ··· 197 195 if (dma_mapping_error(pool->p.dev, dma)) 198 196 return false; 199 197 200 - page->dma_addr = dma; 198 + page_pool_set_dma_addr(page, dma); 201 199 202 200 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) 203 201 page_pool_dma_sync_for_device(pool, page, pool->p.max_len); ··· 333 331 */ 334 332 goto skip_dma_unmap; 335 333 336 - dma = page->dma_addr; 334 + dma = page_pool_get_dma_addr(page); 337 335 338 - /* When page is unmapped, it cannot be returned our pool */ 336 + /* When page is unmapped, it cannot be returned to our pool */ 339 337 dma_unmap_page_attrs(pool->p.dev, dma, 340 338 PAGE_SIZE << pool->p.order, pool->p.dma_dir, 341 339 DMA_ATTR_SKIP_CPU_SYNC); 342 - page->dma_addr = 0; 340 + page_pool_set_dma_addr(page, 0); 343 341 skip_dma_unmap: 344 342 /* This may be the last page returned, releasing the pool, so 345 343 * it is not safe to reference pool afterwards.