Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: convert zone->managed_pages to atomic variable

totalram_pages, zone->managed_pages and totalhigh_pages updates are
protected by managed_page_count_lock, but readers never care about it.
Convert these variables to atomic to avoid readers potentially seeing a
store tear.

This patch converts zone->managed_pages. Subsequent patches will convert
totalram_panges, totalhigh_pages and eventually managed_page_count_lock
will be removed.

Main motivation was that managed_page_count_lock handling was complicating
things. It was discussed in length here,
https://lore.kernel.org/patchwork/patch/995739/#1181785 So it seemes
better to remove the lock and convert variables to atomic, with preventing
poteintial store-to-read tearing as a bonus.

Link: http://lkml.kernel.org/r/1542090790-21750-3-git-send-email-arunks@codeaurora.org
Signed-off-by: Arun KS <arunks@codeaurora.org>
Suggested-by: Michal Hocko <mhocko@suse.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Arun KS and committed by
Linus Torvalds
9705bea5 3d6357de

+34 -29
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
··· 853 853 */ 854 854 pgdat = NODE_DATA(numa_node_id); 855 855 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 856 - mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; 856 + mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); 857 857 mem_in_bytes <<= PAGE_SHIFT; 858 858 859 859 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
+7 -2
include/linux/mmzone.h
··· 435 435 * adjust_managed_page_count() should be used instead of directly 436 436 * touching zone->managed_pages and totalram_pages. 437 437 */ 438 - unsigned long managed_pages; 438 + atomic_long_t managed_pages; 439 439 unsigned long spanned_pages; 440 440 unsigned long present_pages; 441 441 ··· 523 523 */ 524 524 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 525 525 }; 526 + 527 + static inline unsigned long zone_managed_pages(struct zone *zone) 528 + { 529 + return (unsigned long)atomic_long_read(&zone->managed_pages); 530 + } 526 531 527 532 static inline unsigned long zone_end_pfn(const struct zone *zone) 528 533 { ··· 825 820 */ 826 821 static inline bool managed_zone(struct zone *zone) 827 822 { 828 - return zone->managed_pages; 823 + return zone_managed_pages(zone); 829 824 } 830 825 831 826 /* Returns true if a zone has memory */
+1 -1
lib/show_mem.c
··· 28 28 continue; 29 29 30 30 total += zone->present_pages; 31 - reserved += zone->present_pages - zone->managed_pages; 31 + reserved += zone->present_pages - zone_managed_pages(zone); 32 32 33 33 if (is_highmem_idx(zoneid)) 34 34 highmem += zone->present_pages;
+1 -1
mm/memblock.c
··· 1950 1950 struct zone *z; 1951 1951 1952 1952 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 1953 - z->managed_pages = 0; 1953 + atomic_long_set(&z->managed_pages, 0); 1954 1954 } 1955 1955 1956 1956 void __init reset_all_zones_managed_pages(void)
+22 -22
mm/page_alloc.c
··· 1280 1280 __ClearPageReserved(p); 1281 1281 set_page_count(p, 0); 1282 1282 1283 - page_zone(page)->managed_pages += nr_pages; 1283 + atomic_long_add(nr_pages, &page_zone(page)->managed_pages); 1284 1284 set_page_refcounted(page); 1285 1285 __free_pages(page, order); 1286 1286 } ··· 2259 2259 * Limit the number reserved to 1 pageblock or roughly 1% of a zone. 2260 2260 * Check is race-prone but harmless. 2261 2261 */ 2262 - max_managed = (zone->managed_pages / 100) + pageblock_nr_pages; 2262 + max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages; 2263 2263 if (zone->nr_reserved_highatomic >= max_managed) 2264 2264 return; 2265 2265 ··· 4661 4661 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); 4662 4662 4663 4663 for_each_zone_zonelist(zone, z, zonelist, offset) { 4664 - unsigned long size = zone->managed_pages; 4664 + unsigned long size = zone_managed_pages(zone); 4665 4665 unsigned long high = high_wmark_pages(zone); 4666 4666 if (size > high) 4667 4667 sum += size - high; ··· 4768 4768 pg_data_t *pgdat = NODE_DATA(nid); 4769 4769 4770 4770 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 4771 - managed_pages += pgdat->node_zones[zone_type].managed_pages; 4771 + managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); 4772 4772 val->totalram = managed_pages; 4773 4773 val->sharedram = node_page_state(pgdat, NR_SHMEM); 4774 4774 val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); ··· 4777 4777 struct zone *zone = &pgdat->node_zones[zone_type]; 4778 4778 4779 4779 if (is_highmem(zone)) { 4780 - managed_highpages += zone->managed_pages; 4780 + managed_highpages += zone_managed_pages(zone); 4781 4781 free_highpages += zone_page_state(zone, NR_FREE_PAGES); 4782 4782 } 4783 4783 } ··· 4984 4984 K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), 4985 4985 K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), 4986 4986 K(zone->present_pages), 4987 - K(zone->managed_pages), 4987 + K(zone_managed_pages(zone)), 4988 4988 K(zone_page_state(zone, NR_MLOCK)), 4989 4989 zone_page_state(zone, NR_KERNEL_STACK_KB), 4990 4990 K(zone_page_state(zone, NR_PAGETABLE)), ··· 5656 5656 * The per-cpu-pages pools are set to around 1000th of the 5657 5657 * size of the zone. 5658 5658 */ 5659 - batch = zone->managed_pages / 1024; 5659 + batch = zone_managed_pages(zone) / 1024; 5660 5660 /* But no more than a meg. */ 5661 5661 if (batch * PAGE_SIZE > 1024 * 1024) 5662 5662 batch = (1024 * 1024) / PAGE_SIZE; ··· 5766 5766 { 5767 5767 if (percpu_pagelist_fraction) 5768 5768 pageset_set_high(pcp, 5769 - (zone->managed_pages / 5769 + (zone_managed_pages(zone) / 5770 5770 percpu_pagelist_fraction)); 5771 5771 else 5772 5772 pageset_set_batch(pcp, zone_batchsize(zone)); ··· 6323 6323 static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, 6324 6324 unsigned long remaining_pages) 6325 6325 { 6326 - zone->managed_pages = remaining_pages; 6326 + atomic_long_set(&zone->managed_pages, remaining_pages); 6327 6327 zone_set_nid(zone, nid); 6328 6328 zone->name = zone_names[idx]; 6329 6329 zone->zone_pgdat = NODE_DATA(nid); ··· 7076 7076 void adjust_managed_page_count(struct page *page, long count) 7077 7077 { 7078 7078 spin_lock(&managed_page_count_lock); 7079 - page_zone(page)->managed_pages += count; 7079 + atomic_long_add(count, &page_zone(page)->managed_pages); 7080 7080 totalram_pages += count; 7081 7081 #ifdef CONFIG_HIGHMEM 7082 7082 if (PageHighMem(page)) ··· 7124 7124 { 7125 7125 __free_reserved_page(page); 7126 7126 totalram_pages++; 7127 - page_zone(page)->managed_pages++; 7127 + atomic_long_inc(&page_zone(page)->managed_pages); 7128 7128 totalhigh_pages++; 7129 7129 } 7130 7130 #endif ··· 7257 7257 for (i = 0; i < MAX_NR_ZONES; i++) { 7258 7258 struct zone *zone = pgdat->node_zones + i; 7259 7259 long max = 0; 7260 - unsigned long managed_pages = zone->managed_pages; 7260 + unsigned long managed_pages = zone_managed_pages(zone); 7261 7261 7262 7262 /* Find valid and maximum lowmem_reserve in the zone */ 7263 7263 for (j = i; j < MAX_NR_ZONES; j++) { ··· 7293 7293 for_each_online_pgdat(pgdat) { 7294 7294 for (j = 0; j < MAX_NR_ZONES; j++) { 7295 7295 struct zone *zone = pgdat->node_zones + j; 7296 - unsigned long managed_pages = zone->managed_pages; 7296 + unsigned long managed_pages = zone_managed_pages(zone); 7297 7297 7298 7298 zone->lowmem_reserve[j] = 0; 7299 7299 ··· 7311 7311 lower_zone->lowmem_reserve[j] = 7312 7312 managed_pages / sysctl_lowmem_reserve_ratio[idx]; 7313 7313 } 7314 - managed_pages += lower_zone->managed_pages; 7314 + managed_pages += zone_managed_pages(lower_zone); 7315 7315 } 7316 7316 } 7317 7317 } ··· 7330 7330 /* Calculate total number of !ZONE_HIGHMEM pages */ 7331 7331 for_each_zone(zone) { 7332 7332 if (!is_highmem(zone)) 7333 - lowmem_pages += zone->managed_pages; 7333 + lowmem_pages += zone_managed_pages(zone); 7334 7334 } 7335 7335 7336 7336 for_each_zone(zone) { 7337 7337 u64 tmp; 7338 7338 7339 7339 spin_lock_irqsave(&zone->lock, flags); 7340 - tmp = (u64)pages_min * zone->managed_pages; 7340 + tmp = (u64)pages_min * zone_managed_pages(zone); 7341 7341 do_div(tmp, lowmem_pages); 7342 7342 if (is_highmem(zone)) { 7343 7343 /* ··· 7351 7351 */ 7352 7352 unsigned long min_pages; 7353 7353 7354 - min_pages = zone->managed_pages / 1024; 7354 + min_pages = zone_managed_pages(zone) / 1024; 7355 7355 min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL); 7356 7356 zone->watermark[WMARK_MIN] = min_pages; 7357 7357 } else { ··· 7368 7368 * ensure a minimum size on small systems. 7369 7369 */ 7370 7370 tmp = max_t(u64, tmp >> 2, 7371 - mult_frac(zone->managed_pages, 7371 + mult_frac(zone_managed_pages(zone), 7372 7372 watermark_scale_factor, 10000)); 7373 7373 7374 7374 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; ··· 7498 7498 pgdat->min_unmapped_pages = 0; 7499 7499 7500 7500 for_each_zone(zone) 7501 - zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages * 7502 - sysctl_min_unmapped_ratio) / 100; 7501 + zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) * 7502 + sysctl_min_unmapped_ratio) / 100; 7503 7503 } 7504 7504 7505 7505 ··· 7526 7526 pgdat->min_slab_pages = 0; 7527 7527 7528 7528 for_each_zone(zone) 7529 - zone->zone_pgdat->min_slab_pages += (zone->managed_pages * 7530 - sysctl_min_slab_ratio) / 100; 7529 + zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) * 7530 + sysctl_min_slab_ratio) / 100; 7531 7531 } 7532 7532 7533 7533 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+2 -2
mm/vmstat.c
··· 227 227 * 125 1024 10 16-32 GB 9 228 228 */ 229 229 230 - mem = zone->managed_pages >> (27 - PAGE_SHIFT); 230 + mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT); 231 231 232 232 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 233 233 ··· 1569 1569 high_wmark_pages(zone), 1570 1570 zone->spanned_pages, 1571 1571 zone->present_pages, 1572 - zone->managed_pages); 1572 + zone_managed_pages(zone)); 1573 1573 1574 1574 seq_printf(m, 1575 1575 "\n protection: (%ld",