Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: remove __GFP_COLD

As the page free path makes no distinction between cache hot and cold
pages, there is no real useful ordering of pages in the free list that
allocation requests can take advantage of. Juding from the users of
__GFP_COLD, it is likely that a number of them are the result of copying
other sites instead of actually measuring the impact. Remove the
__GFP_COLD parameter which simplifies a number of paths in the page
allocator.

This is potentially controversial but bear in mind that the size of the
per-cpu pagelists versus modern cache sizes means that the whole per-cpu
list can often fit in the L3 cache. Hence, there is only a potential
benefit for microbenchmarks that alloc/free pages in a tight loop. It's
even worse when THP is taken into account which has little or no chance
of getting a cache-hot page as the per-cpu list is bypassed and the
zeroing of multiple pages will thrash the cache anyway.

The truncate microbenchmarks are not shown as this patch affects the
allocation path and not the free path. A page fault microbenchmark was
tested but it showed no sigificant difference which is not surprising
given that the __GFP_COLD branches are a miniscule percentage of the
fault path.

Link: http://lkml.kernel.org/r/20171018075952.10627-9-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mel Gorman and committed by
Linus Torvalds
453f85d4 2d4894b5

+32 -62
+1 -1
drivers/net/ethernet/amazon/ena/ena_netdev.c
··· 517 517 518 518 519 519 rc = ena_alloc_rx_page(rx_ring, rx_info, 520 - __GFP_COLD | GFP_ATOMIC | __GFP_COMP); 520 + GFP_ATOMIC | __GFP_COMP); 521 521 if (unlikely(rc < 0)) { 522 522 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, 523 523 "failed to alloc buffer for rx queue %d\n",
+1 -1
drivers/net/ethernet/amd/xgbe/xgbe-desc.c
··· 295 295 order = alloc_order; 296 296 297 297 /* Try to obtain pages, decreasing order if necessary */ 298 - gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN; 298 + gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; 299 299 while (order >= 0) { 300 300 pages = alloc_pages_node(node, gfp, order); 301 301 if (pages)
+1 -2
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
··· 304 304 buff->flags = 0U; 305 305 buff->len = AQ_CFG_RX_FRAME_MAX; 306 306 307 - buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD | 308 - __GFP_COMP, pages_order); 307 + buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order); 309 308 if (!buff->page) { 310 309 err = -ENOMEM; 311 310 goto err_exit;
+1 -1
drivers/net/ethernet/cavium/liquidio/octeon_network.h
··· 195 195 struct sk_buff *skb; 196 196 struct octeon_skb_page_info *skb_pg_info; 197 197 198 - page = alloc_page(GFP_ATOMIC | __GFP_COLD); 198 + page = alloc_page(GFP_ATOMIC); 199 199 if (unlikely(!page)) 200 200 return NULL; 201 201
+2 -3
drivers/net/ethernet/mellanox/mlx4/en_rx.c
··· 193 193 194 194 if (mlx4_en_prepare_rx_desc(priv, ring, 195 195 ring->actual_size, 196 - GFP_KERNEL | __GFP_COLD)) { 196 + GFP_KERNEL)) { 197 197 if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { 198 198 en_err(priv, "Failed to allocate enough rx buffers\n"); 199 199 return -ENOMEM; ··· 552 552 do { 553 553 if (mlx4_en_prepare_rx_desc(priv, ring, 554 554 ring->prod & ring->size_mask, 555 - GFP_ATOMIC | __GFP_COLD | 556 - __GFP_MEMALLOC)) 555 + GFP_ATOMIC | __GFP_MEMALLOC)) 557 556 break; 558 557 ring->prod++; 559 558 } while (likely(--missing));
+2 -2
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
··· 1185 1185 } else { 1186 1186 struct page *page; 1187 1187 1188 - page = alloc_page(GFP_KERNEL | __GFP_COLD); 1188 + page = alloc_page(GFP_KERNEL); 1189 1189 frag = page ? page_address(page) : NULL; 1190 1190 } 1191 1191 if (!frag) { ··· 1212 1212 } else { 1213 1213 struct page *page; 1214 1214 1215 - page = alloc_page(GFP_ATOMIC | __GFP_COLD); 1215 + page = alloc_page(GFP_ATOMIC); 1216 1216 frag = page ? page_address(page) : NULL; 1217 1217 } 1218 1218 if (!frag) {
+1 -2
drivers/net/ethernet/qlogic/qlge/qlge_main.c
··· 1092 1092 { 1093 1093 if (!rx_ring->pg_chunk.page) { 1094 1094 u64 map; 1095 - rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP | 1096 - GFP_ATOMIC, 1095 + rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC, 1097 1096 qdev->lbq_buf_order); 1098 1097 if (unlikely(!rx_ring->pg_chunk.page)) { 1099 1098 netif_err(qdev, drv, qdev->ndev,
+1 -1
drivers/net/ethernet/sfc/falcon/rx.c
··· 163 163 do { 164 164 page = ef4_reuse_page(rx_queue); 165 165 if (page == NULL) { 166 - page = alloc_pages(__GFP_COLD | __GFP_COMP | 166 + page = alloc_pages(__GFP_COMP | 167 167 (atomic ? GFP_ATOMIC : GFP_KERNEL), 168 168 efx->rx_buffer_order); 169 169 if (unlikely(page == NULL))
+1 -1
drivers/net/ethernet/sfc/rx.c
··· 163 163 do { 164 164 page = efx_reuse_page(rx_queue); 165 165 if (page == NULL) { 166 - page = alloc_pages(__GFP_COLD | __GFP_COMP | 166 + page = alloc_pages(__GFP_COMP | 167 167 (atomic ? GFP_ATOMIC : GFP_KERNEL), 168 168 efx->rx_buffer_order); 169 169 if (unlikely(page == NULL))
+1 -1
drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
··· 335 335 dma_addr_t pages_dma; 336 336 337 337 /* Try to obtain pages, decreasing order if necessary */ 338 - gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; 338 + gfp |= __GFP_COMP | __GFP_NOWARN; 339 339 while (order >= 0) { 340 340 pages = alloc_pages(gfp, order); 341 341 if (pages)
+1 -1
drivers/net/ethernet/ti/netcp_core.c
··· 906 906 sw_data[0] = (u32)bufptr; 907 907 } else { 908 908 /* Allocate a secondary receive queue entry */ 909 - page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD); 909 + page = alloc_page(GFP_ATOMIC | GFP_DMA); 910 910 if (unlikely(!page)) { 911 911 dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n"); 912 912 goto fail;
-1
drivers/net/virtio_net.c
··· 988 988 int err; 989 989 bool oom; 990 990 991 - gfp |= __GFP_COLD; 992 991 do { 993 992 if (vi->mergeable_rx_bufs) 994 993 err = add_recvbuf_mergeable(vi, rq, gfp);
+1 -1
drivers/staging/lustre/lustre/mdc/mdc_request.c
··· 1152 1152 } 1153 1153 1154 1154 for (npages = 1; npages < max_pages; npages++) { 1155 - page = page_cache_alloc_cold(inode->i_mapping); 1155 + page = page_cache_alloc(inode->i_mapping); 1156 1156 if (!page) 1157 1157 break; 1158 1158 page_pool[npages] = page;
+2 -4
fs/cachefiles/rdwr.c
··· 256 256 goto backing_page_already_present; 257 257 258 258 if (!newpage) { 259 - newpage = __page_cache_alloc(cachefiles_gfp | 260 - __GFP_COLD); 259 + newpage = __page_cache_alloc(cachefiles_gfp); 261 260 if (!newpage) 262 261 goto nomem_monitor; 263 262 } ··· 492 493 goto backing_page_already_present; 493 494 494 495 if (!newpage) { 495 - newpage = __page_cache_alloc(cachefiles_gfp | 496 - __GFP_COLD); 496 + newpage = __page_cache_alloc(cachefiles_gfp); 497 497 if (!newpage) 498 498 goto nomem; 499 499 }
-5
include/linux/gfp.h
··· 24 24 #define ___GFP_HIGH 0x20u 25 25 #define ___GFP_IO 0x40u 26 26 #define ___GFP_FS 0x80u 27 - #define ___GFP_COLD 0x100u 28 27 #define ___GFP_NOWARN 0x200u 29 28 #define ___GFP_RETRY_MAYFAIL 0x400u 30 29 #define ___GFP_NOFAIL 0x800u ··· 191 192 /* 192 193 * Action modifiers 193 194 * 194 - * __GFP_COLD indicates that the caller does not expect to be used in the near 195 - * future. Where possible, a cache-cold page will be returned. 196 - * 197 195 * __GFP_NOWARN suppresses allocation failure reports. 198 196 * 199 197 * __GFP_COMP address compound page metadata. 200 198 * 201 199 * __GFP_ZERO returns a zeroed page on success. 202 200 */ 203 - #define __GFP_COLD ((__force gfp_t)___GFP_COLD) 204 201 #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) 205 202 #define __GFP_COMP ((__force gfp_t)___GFP_COMP) 206 203 #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
+1 -7
include/linux/pagemap.h
··· 234 234 return __page_cache_alloc(mapping_gfp_mask(x)); 235 235 } 236 236 237 - static inline struct page *page_cache_alloc_cold(struct address_space *x) 238 - { 239 - return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); 240 - } 241 - 242 237 static inline gfp_t readahead_gfp_mask(struct address_space *x) 243 238 { 244 - return mapping_gfp_mask(x) | 245 - __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN; 239 + return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; 246 240 } 247 241 248 242 typedef int filler_t(void *, struct page *);
+1 -1
include/linux/skbuff.h
··· 2672 2672 * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to 2673 2673 * code in gfp_to_alloc_flags that should be enforcing this. 2674 2674 */ 2675 - gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC; 2675 + gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; 2676 2676 2677 2677 return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); 2678 2678 }
-3
include/linux/slab.h
··· 467 467 * Also it is possible to set different flags by OR'ing 468 468 * in one or more of the following additional @flags: 469 469 * 470 - * %__GFP_COLD - Request cache-cold pages instead of 471 - * trying to return cache-warm pages. 472 - * 473 470 * %__GFP_HIGH - This allocation has high priority and may use emergency pools. 474 471 * 475 472 * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
-1
include/trace/events/mmflags.h
··· 32 32 {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ 33 33 {(unsigned long)__GFP_IO, "__GFP_IO"}, \ 34 34 {(unsigned long)__GFP_FS, "__GFP_FS"}, \ 35 - {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ 36 35 {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ 37 36 {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \ 38 37 {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \
+2 -2
kernel/power/snapshot.c
··· 1884 1884 */ 1885 1885 static inline int get_highmem_buffer(int safe_needed) 1886 1886 { 1887 - buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); 1887 + buffer = get_image_page(GFP_ATOMIC, safe_needed); 1888 1888 return buffer ? 0 : -ENOMEM; 1889 1889 } 1890 1890 ··· 1945 1945 while (nr_pages-- > 0) { 1946 1946 struct page *page; 1947 1947 1948 - page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); 1948 + page = alloc_image_page(GFP_ATOMIC); 1949 1949 if (!page) 1950 1950 goto err_out; 1951 1951 memory_bm_set_bit(copy_bm, page_to_pfn(page));
+3 -3
mm/filemap.c
··· 2272 2272 * Ok, it wasn't cached, so we need to create a new 2273 2273 * page.. 2274 2274 */ 2275 - page = page_cache_alloc_cold(mapping); 2275 + page = page_cache_alloc(mapping); 2276 2276 if (!page) { 2277 2277 error = -ENOMEM; 2278 2278 goto out; ··· 2384 2384 int ret; 2385 2385 2386 2386 do { 2387 - page = __page_cache_alloc(gfp_mask|__GFP_COLD); 2387 + page = __page_cache_alloc(gfp_mask); 2388 2388 if (!page) 2389 2389 return -ENOMEM; 2390 2390 ··· 2788 2788 repeat: 2789 2789 page = find_get_page(mapping, index); 2790 2790 if (!page) { 2791 - page = __page_cache_alloc(gfp | __GFP_COLD); 2791 + page = __page_cache_alloc(gfp); 2792 2792 if (!page) 2793 2793 return ERR_PTR(-ENOMEM); 2794 2794 err = add_to_page_cache_lru(page, mapping, index, gfp);
+6 -14
mm/page_alloc.c
··· 2336 2336 */ 2337 2337 static int rmqueue_bulk(struct zone *zone, unsigned int order, 2338 2338 unsigned long count, struct list_head *list, 2339 - int migratetype, bool cold) 2339 + int migratetype) 2340 2340 { 2341 2341 int i, alloced = 0; 2342 2342 ··· 2358 2358 * merge IO requests if the physical pages are ordered 2359 2359 * properly. 2360 2360 */ 2361 - if (likely(!cold)) 2362 - list_add(&page->lru, list); 2363 - else 2364 - list_add_tail(&page->lru, list); 2361 + list_add(&page->lru, list); 2365 2362 list = &page->lru; 2366 2363 alloced++; 2367 2364 if (is_migrate_cma(get_pcppage_migratetype(page))) ··· 2792 2795 2793 2796 /* Remove page from the per-cpu list, caller must protect the list */ 2794 2797 static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, 2795 - bool cold, struct per_cpu_pages *pcp, 2798 + struct per_cpu_pages *pcp, 2796 2799 struct list_head *list) 2797 2800 { 2798 2801 struct page *page; ··· 2801 2804 if (list_empty(list)) { 2802 2805 pcp->count += rmqueue_bulk(zone, 0, 2803 2806 pcp->batch, list, 2804 - migratetype, cold); 2807 + migratetype); 2805 2808 if (unlikely(list_empty(list))) 2806 2809 return NULL; 2807 2810 } 2808 2811 2809 - if (cold) 2810 - page = list_last_entry(list, struct page, lru); 2811 - else 2812 - page = list_first_entry(list, struct page, lru); 2813 - 2812 + page = list_first_entry(list, struct page, lru); 2814 2813 list_del(&page->lru); 2815 2814 pcp->count--; 2816 2815 } while (check_new_pcp(page)); ··· 2821 2828 { 2822 2829 struct per_cpu_pages *pcp; 2823 2830 struct list_head *list; 2824 - bool cold = ((gfp_flags & __GFP_COLD) != 0); 2825 2831 struct page *page; 2826 2832 unsigned long flags; 2827 2833 2828 2834 local_irq_save(flags); 2829 2835 pcp = &this_cpu_ptr(zone->pageset)->pcp; 2830 2836 list = &pcp->lists[migratetype]; 2831 - page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); 2837 + page = __rmqueue_pcplist(zone, migratetype, pcp, list); 2832 2838 if (page) { 2833 2839 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); 2834 2840 zone_statistics(preferred_zone, zone);
+1 -1
mm/percpu-vm.c
··· 81 81 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 82 82 struct page **pages, int page_start, int page_end) 83 83 { 84 - const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 84 + const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM; 85 85 unsigned int cpu, tcpu; 86 86 int i; 87 87
+2 -2
net/core/skbuff.c
··· 353 353 */ 354 354 void *netdev_alloc_frag(unsigned int fragsz) 355 355 { 356 - return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); 356 + return __netdev_alloc_frag(fragsz, GFP_ATOMIC); 357 357 } 358 358 EXPORT_SYMBOL(netdev_alloc_frag); 359 359 ··· 366 366 367 367 void *napi_alloc_frag(unsigned int fragsz) 368 368 { 369 - return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); 369 + return __napi_alloc_frag(fragsz, GFP_ATOMIC); 370 370 } 371 371 EXPORT_SYMBOL(napi_alloc_frag); 372 372
-1
tools/perf/builtin-kmem.c
··· 641 641 { "__GFP_ATOMIC", "_A" }, 642 642 { "__GFP_IO", "I" }, 643 643 { "__GFP_FS", "F" }, 644 - { "__GFP_COLD", "CO" }, 645 644 { "__GFP_NOWARN", "NWR" }, 646 645 { "__GFP_RETRY_MAYFAIL", "R" }, 647 646 { "__GFP_NOFAIL", "NF" },