Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tools/vm/page-types.c: support KPF_SOFTDIRTY bit

Soft dirty bit allows us to track which pages are written since the last
clear_ref (by "echo 4 > /proc/pid/clear_refs".) This is useful for
userspace applications to know their memory footprints.

Note that the kernel exposes this flag via bit[55] of /proc/pid/pagemap,
and the semantics is not a default one (scheduled to be the default in the
near future.) However, it shifts to the new semantics at the first
clear_ref, and the users of soft dirty bit always do it before utilizing
the bit, so that's not a big deal. Users must avoid relying on the bit in
page-types before the first clear_ref.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Naoya Horiguchi and committed by
Linus Torvalds
46c77e2b ec8e41ae

+21 -12
+1
include/linux/kernel-page-flags.h
··· 15 15 #define KPF_OWNER_PRIVATE 37 16 16 #define KPF_ARCH 38 17 17 #define KPF_UNCACHED 39 18 + #define KPF_SOFTDIRTY 40 18 19 19 20 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
+20 -12
tools/vm/page-types.c
··· 59 59 #define PM_PSHIFT_BITS 6 60 60 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) 61 61 #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) 62 - #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) 62 + #define __PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) 63 63 #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) 64 64 #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) 65 65 66 + #define __PM_SOFT_DIRTY (1LL) 66 67 #define PM_PRESENT PM_STATUS(4LL) 67 68 #define PM_SWAP PM_STATUS(2LL) 69 + #define PM_SOFT_DIRTY __PM_PSHIFT(__PM_SOFT_DIRTY) 68 70 69 71 70 72 /* ··· 85 83 #define KPF_OWNER_PRIVATE 37 86 84 #define KPF_ARCH 38 87 85 #define KPF_UNCACHED 39 86 + #define KPF_SOFTDIRTY 40 88 87 89 88 /* [48-] take some arbitrary free slots for expanding overloaded flags 90 89 * not part of kernel API ··· 135 132 [KPF_OWNER_PRIVATE] = "O:owner_private", 136 133 [KPF_ARCH] = "h:arch", 137 134 [KPF_UNCACHED] = "c:uncached", 135 + [KPF_SOFTDIRTY] = "f:softdirty", 138 136 139 137 [KPF_READAHEAD] = "I:readahead", 140 138 [KPF_SLOB_FREE] = "P:slob_free", ··· 421 417 return 1; 422 418 } 423 419 424 - static uint64_t expand_overloaded_flags(uint64_t flags) 420 + static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) 425 421 { 426 422 /* SLOB/SLUB overload several page flags */ 427 423 if (flags & BIT(SLAB)) { ··· 436 432 /* PG_reclaim is overloaded as PG_readahead in the read path */ 437 433 if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM)) 438 434 flags ^= BIT(RECLAIM) | BIT(READAHEAD); 435 + 436 + if (pme & PM_SOFT_DIRTY) 437 + flags |= BIT(SOFTDIRTY); 439 438 440 439 return flags; 441 440 } ··· 455 448 return flags; 456 449 } 457 450 458 - static uint64_t kpageflags_flags(uint64_t flags) 451 + static uint64_t kpageflags_flags(uint64_t flags, uint64_t pme) 459 452 { 460 - flags = expand_overloaded_flags(flags); 461 - 462 - if (!opt_raw) 453 + if (opt_raw) 454 + flags = expand_overloaded_flags(flags, pme); 455 + else 463 456 flags = well_known_flags(flags); 464 457 465 458 return flags; ··· 552 545 } 553 546 554 547 static void add_page(unsigned long voffset, 555 - unsigned long offset, uint64_t flags) 548 + unsigned long offset, uint64_t flags, uint64_t pme) 556 549 { 557 - flags = kpageflags_flags(flags); 550 + flags = kpageflags_flags(flags, pme); 558 551 559 552 if (!bit_mask_ok(flags)) 560 553 return; ··· 576 569 #define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ 577 570 static void walk_pfn(unsigned long voffset, 578 571 unsigned long index, 579 - unsigned long count) 572 + unsigned long count, 573 + uint64_t pme) 580 574 { 581 575 uint64_t buf[KPAGEFLAGS_BATCH]; 582 576 unsigned long batch; ··· 591 583 break; 592 584 593 585 for (i = 0; i < pages; i++) 594 - add_page(voffset + i, index + i, buf[i]); 586 + add_page(voffset + i, index + i, buf[i], pme); 595 587 596 588 index += pages; 597 589 count -= pages; ··· 616 608 for (i = 0; i < pages; i++) { 617 609 pfn = pagemap_pfn(buf[i]); 618 610 if (pfn) 619 - walk_pfn(index + i, pfn, 1); 611 + walk_pfn(index + i, pfn, 1, buf[i]); 620 612 } 621 613 622 614 index += pages; ··· 667 659 668 660 for (i = 0; i < nr_addr_ranges; i++) 669 661 if (!opt_pid) 670 - walk_pfn(0, opt_offset[i], opt_size[i]); 662 + walk_pfn(0, opt_offset[i], opt_size[i], 0); 671 663 else 672 664 walk_task(opt_offset[i], opt_size[i]); 673 665