at v6.19-rc4 26 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2#include <linux/debugfs.h> 3#include <linux/mm.h> 4#include <linux/slab.h> 5#include <linux/uaccess.h> 6#include <linux/memblock.h> 7#include <linux/stacktrace.h> 8#include <linux/page_owner.h> 9#include <linux/jump_label.h> 10#include <linux/migrate.h> 11#include <linux/stackdepot.h> 12#include <linux/seq_file.h> 13#include <linux/memcontrol.h> 14#include <linux/sched/clock.h> 15 16#include "internal.h" 17 18/* 19 * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack) 20 * to use off stack temporal storage 21 */ 22#define PAGE_OWNER_STACK_DEPTH (16) 23 24struct page_owner { 25 unsigned short order; 26 short last_migrate_reason; 27 gfp_t gfp_mask; 28 depot_stack_handle_t handle; 29 depot_stack_handle_t free_handle; 30 u64 ts_nsec; 31 u64 free_ts_nsec; 32 char comm[TASK_COMM_LEN]; 33 pid_t pid; 34 pid_t tgid; 35 pid_t free_pid; 36 pid_t free_tgid; 37}; 38 39struct stack { 40 struct stack_record *stack_record; 41 struct stack *next; 42}; 43static struct stack dummy_stack; 44static struct stack failure_stack; 45static struct stack *stack_list; 46static DEFINE_SPINLOCK(stack_list_lock); 47 48#define STACK_PRINT_FLAG_STACK 0x1 49#define STACK_PRINT_FLAG_PAGES 0x2 50#define STACK_PRINT_FLAG_HANDLE 0x4 51 52struct stack_print_ctx { 53 struct stack *stack; 54 u8 flags; 55}; 56 57static bool page_owner_enabled __initdata; 58DEFINE_STATIC_KEY_FALSE(page_owner_inited); 59 60static depot_stack_handle_t dummy_handle; 61static depot_stack_handle_t failure_handle; 62static depot_stack_handle_t early_handle; 63 64static void init_early_allocated_pages(void); 65 66static inline void set_current_in_page_owner(void) 67{ 68 /* 69 * Avoid recursion. 70 * 71 * We might need to allocate more memory from page_owner code, so make 72 * sure to signal it in order to avoid recursion. 73 */ 74 current->in_page_owner = 1; 75} 76 77static inline void unset_current_in_page_owner(void) 78{ 79 current->in_page_owner = 0; 80} 81 82static int __init early_page_owner_param(char *buf) 83{ 84 int ret = kstrtobool(buf, &page_owner_enabled); 85 86 if (page_owner_enabled) 87 stack_depot_request_early_init(); 88 89 return ret; 90} 91early_param("page_owner", early_page_owner_param); 92 93static __init bool need_page_owner(void) 94{ 95 return page_owner_enabled; 96} 97 98static __always_inline depot_stack_handle_t create_dummy_stack(void) 99{ 100 unsigned long entries[4]; 101 unsigned int nr_entries; 102 103 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); 104 return stack_depot_save(entries, nr_entries, GFP_KERNEL); 105} 106 107static noinline void register_dummy_stack(void) 108{ 109 dummy_handle = create_dummy_stack(); 110} 111 112static noinline void register_failure_stack(void) 113{ 114 failure_handle = create_dummy_stack(); 115} 116 117static noinline void register_early_stack(void) 118{ 119 early_handle = create_dummy_stack(); 120} 121 122static __init void init_page_owner(void) 123{ 124 if (!page_owner_enabled) 125 return; 126 127 register_dummy_stack(); 128 register_failure_stack(); 129 register_early_stack(); 130 init_early_allocated_pages(); 131 /* Initialize dummy and failure stacks and link them to stack_list */ 132 dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle); 133 failure_stack.stack_record = __stack_depot_get_stack_record(failure_handle); 134 if (dummy_stack.stack_record) 135 refcount_set(&dummy_stack.stack_record->count, 1); 136 if (failure_stack.stack_record) 137 refcount_set(&failure_stack.stack_record->count, 1); 138 dummy_stack.next = &failure_stack; 139 stack_list = &dummy_stack; 140 static_branch_enable(&page_owner_inited); 141} 142 143struct page_ext_operations page_owner_ops = { 144 .size = sizeof(struct page_owner), 145 .need = need_page_owner, 146 .init = init_page_owner, 147 .need_shared_flags = true, 148}; 149 150static inline struct page_owner *get_page_owner(struct page_ext *page_ext) 151{ 152 return page_ext_data(page_ext, &page_owner_ops); 153} 154 155static noinline depot_stack_handle_t save_stack(gfp_t flags) 156{ 157 unsigned long entries[PAGE_OWNER_STACK_DEPTH]; 158 depot_stack_handle_t handle; 159 unsigned int nr_entries; 160 161 if (current->in_page_owner) 162 return dummy_handle; 163 164 set_current_in_page_owner(); 165 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2); 166 handle = stack_depot_save(entries, nr_entries, flags); 167 if (!handle) 168 handle = failure_handle; 169 unset_current_in_page_owner(); 170 171 return handle; 172} 173 174static void add_stack_record_to_list(struct stack_record *stack_record, 175 gfp_t gfp_mask) 176{ 177 unsigned long flags; 178 struct stack *stack; 179 180 if (!gfpflags_allow_spinning(gfp_mask)) 181 return; 182 183 set_current_in_page_owner(); 184 stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); 185 if (!stack) { 186 unset_current_in_page_owner(); 187 return; 188 } 189 unset_current_in_page_owner(); 190 191 stack->stack_record = stack_record; 192 stack->next = NULL; 193 194 spin_lock_irqsave(&stack_list_lock, flags); 195 stack->next = stack_list; 196 /* 197 * This pairs with smp_load_acquire() from function 198 * stack_start(). This guarantees that stack_start() 199 * will see an updated stack_list before starting to 200 * traverse the list. 201 */ 202 smp_store_release(&stack_list, stack); 203 spin_unlock_irqrestore(&stack_list_lock, flags); 204} 205 206static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask, 207 int nr_base_pages) 208{ 209 struct stack_record *stack_record = __stack_depot_get_stack_record(handle); 210 211 if (!stack_record) 212 return; 213 214 /* 215 * New stack_record's that do not use STACK_DEPOT_FLAG_GET start 216 * with REFCOUNT_SATURATED to catch spurious increments of their 217 * refcount. 218 * Since we do not use STACK_DEPOT_FLAG_GET API, let us 219 * set a refcount of 1 ourselves. 220 */ 221 if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) { 222 int old = REFCOUNT_SATURATED; 223 224 if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1)) 225 /* Add the new stack_record to our list */ 226 add_stack_record_to_list(stack_record, gfp_mask); 227 } 228 refcount_add(nr_base_pages, &stack_record->count); 229} 230 231static void dec_stack_record_count(depot_stack_handle_t handle, 232 int nr_base_pages) 233{ 234 struct stack_record *stack_record = __stack_depot_get_stack_record(handle); 235 236 if (!stack_record) 237 return; 238 239 if (refcount_sub_and_test(nr_base_pages, &stack_record->count)) 240 pr_warn("%s: refcount went to 0 for %u handle\n", __func__, 241 handle); 242} 243 244static inline void __update_page_owner_handle(struct page *page, 245 depot_stack_handle_t handle, 246 unsigned short order, 247 gfp_t gfp_mask, 248 short last_migrate_reason, u64 ts_nsec, 249 pid_t pid, pid_t tgid, char *comm) 250{ 251 struct page_ext_iter iter; 252 struct page_ext *page_ext; 253 struct page_owner *page_owner; 254 255 rcu_read_lock(); 256 for_each_page_ext(page, 1 << order, page_ext, iter) { 257 page_owner = get_page_owner(page_ext); 258 page_owner->handle = handle; 259 page_owner->order = order; 260 page_owner->gfp_mask = gfp_mask; 261 page_owner->last_migrate_reason = last_migrate_reason; 262 page_owner->pid = pid; 263 page_owner->tgid = tgid; 264 page_owner->ts_nsec = ts_nsec; 265 strscpy(page_owner->comm, comm, 266 sizeof(page_owner->comm)); 267 __set_bit(PAGE_EXT_OWNER, &page_ext->flags); 268 __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); 269 } 270 rcu_read_unlock(); 271} 272 273static inline void __update_page_owner_free_handle(struct page *page, 274 depot_stack_handle_t handle, 275 unsigned short order, 276 pid_t pid, pid_t tgid, 277 u64 free_ts_nsec) 278{ 279 struct page_ext_iter iter; 280 struct page_ext *page_ext; 281 struct page_owner *page_owner; 282 283 rcu_read_lock(); 284 for_each_page_ext(page, 1 << order, page_ext, iter) { 285 page_owner = get_page_owner(page_ext); 286 /* Only __reset_page_owner() wants to clear the bit */ 287 if (handle) { 288 __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); 289 page_owner->free_handle = handle; 290 } 291 page_owner->free_ts_nsec = free_ts_nsec; 292 page_owner->free_pid = current->pid; 293 page_owner->free_tgid = current->tgid; 294 } 295 rcu_read_unlock(); 296} 297 298void __reset_page_owner(struct page *page, unsigned short order) 299{ 300 struct page_ext *page_ext; 301 depot_stack_handle_t handle; 302 depot_stack_handle_t alloc_handle; 303 struct page_owner *page_owner; 304 u64 free_ts_nsec = local_clock(); 305 306 page_ext = page_ext_get(page); 307 if (unlikely(!page_ext)) 308 return; 309 310 page_owner = get_page_owner(page_ext); 311 alloc_handle = page_owner->handle; 312 page_ext_put(page_ext); 313 314 /* 315 * Do not specify GFP_NOWAIT to make gfpflags_allow_spinning() == false 316 * to prevent issues in stack_depot_save(). 317 * This is similar to alloc_pages_nolock() gfp flags, but only used 318 * to signal stack_depot to avoid spin_locks. 319 */ 320 handle = save_stack(__GFP_NOWARN); 321 __update_page_owner_free_handle(page, handle, order, current->pid, 322 current->tgid, free_ts_nsec); 323 324 if (alloc_handle != early_handle) 325 /* 326 * early_handle is being set as a handle for all those 327 * early allocated pages. See init_pages_in_zone(). 328 * Since their refcount is not being incremented because 329 * the machinery is not ready yet, we cannot decrement 330 * their refcount either. 331 */ 332 dec_stack_record_count(alloc_handle, 1 << order); 333} 334 335noinline void __set_page_owner(struct page *page, unsigned short order, 336 gfp_t gfp_mask) 337{ 338 u64 ts_nsec = local_clock(); 339 depot_stack_handle_t handle; 340 341 handle = save_stack(gfp_mask); 342 __update_page_owner_handle(page, handle, order, gfp_mask, -1, 343 ts_nsec, current->pid, current->tgid, 344 current->comm); 345 inc_stack_record_count(handle, gfp_mask, 1 << order); 346} 347 348void __folio_set_owner_migrate_reason(struct folio *folio, int reason) 349{ 350 struct page_ext *page_ext = page_ext_get(&folio->page); 351 struct page_owner *page_owner; 352 353 if (unlikely(!page_ext)) 354 return; 355 356 page_owner = get_page_owner(page_ext); 357 page_owner->last_migrate_reason = reason; 358 page_ext_put(page_ext); 359} 360 361void __split_page_owner(struct page *page, int old_order, int new_order) 362{ 363 struct page_ext_iter iter; 364 struct page_ext *page_ext; 365 struct page_owner *page_owner; 366 367 rcu_read_lock(); 368 for_each_page_ext(page, 1 << old_order, page_ext, iter) { 369 page_owner = get_page_owner(page_ext); 370 page_owner->order = new_order; 371 } 372 rcu_read_unlock(); 373} 374 375void __folio_copy_owner(struct folio *newfolio, struct folio *old) 376{ 377 struct page_ext *page_ext; 378 struct page_ext_iter iter; 379 struct page_owner *old_page_owner; 380 struct page_owner *new_page_owner; 381 depot_stack_handle_t migrate_handle; 382 383 page_ext = page_ext_get(&old->page); 384 if (unlikely(!page_ext)) 385 return; 386 387 old_page_owner = get_page_owner(page_ext); 388 page_ext_put(page_ext); 389 390 page_ext = page_ext_get(&newfolio->page); 391 if (unlikely(!page_ext)) 392 return; 393 394 new_page_owner = get_page_owner(page_ext); 395 page_ext_put(page_ext); 396 397 migrate_handle = new_page_owner->handle; 398 __update_page_owner_handle(&newfolio->page, old_page_owner->handle, 399 old_page_owner->order, old_page_owner->gfp_mask, 400 old_page_owner->last_migrate_reason, 401 old_page_owner->ts_nsec, old_page_owner->pid, 402 old_page_owner->tgid, old_page_owner->comm); 403 /* 404 * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio 405 * will be freed after migration. Keep them until then as they may be 406 * useful. 407 */ 408 __update_page_owner_free_handle(&newfolio->page, 0, old_page_owner->order, 409 old_page_owner->free_pid, 410 old_page_owner->free_tgid, 411 old_page_owner->free_ts_nsec); 412 /* 413 * We linked the original stack to the new folio, we need to do the same 414 * for the new one and the old folio otherwise there will be an imbalance 415 * when subtracting those pages from the stack. 416 */ 417 rcu_read_lock(); 418 for_each_page_ext(&old->page, 1 << new_page_owner->order, page_ext, iter) { 419 old_page_owner = get_page_owner(page_ext); 420 old_page_owner->handle = migrate_handle; 421 } 422 rcu_read_unlock(); 423} 424 425void pagetypeinfo_showmixedcount_print(struct seq_file *m, 426 pg_data_t *pgdat, struct zone *zone) 427{ 428 struct page *page; 429 struct page_ext *page_ext; 430 struct page_owner *page_owner; 431 unsigned long pfn, block_end_pfn; 432 unsigned long end_pfn = zone_end_pfn(zone); 433 unsigned long count[MIGRATE_TYPES] = { 0, }; 434 int pageblock_mt, page_mt; 435 int i; 436 437 /* Scan block by block. First and last block may be incomplete */ 438 pfn = zone->zone_start_pfn; 439 440 /* 441 * Walk the zone in pageblock_nr_pages steps. If a page block spans 442 * a zone boundary, it will be double counted between zones. This does 443 * not matter as the mixed block count will still be correct 444 */ 445 for (; pfn < end_pfn; ) { 446 page = pfn_to_online_page(pfn); 447 if (!page) { 448 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); 449 continue; 450 } 451 452 block_end_pfn = pageblock_end_pfn(pfn); 453 block_end_pfn = min(block_end_pfn, end_pfn); 454 455 pageblock_mt = get_pageblock_migratetype(page); 456 457 for (; pfn < block_end_pfn; pfn++) { 458 /* The pageblock is online, no need to recheck. */ 459 page = pfn_to_page(pfn); 460 461 if (page_zone(page) != zone) 462 continue; 463 464 if (PageBuddy(page)) { 465 unsigned long freepage_order; 466 467 freepage_order = buddy_order_unsafe(page); 468 if (freepage_order <= MAX_PAGE_ORDER) 469 pfn += (1UL << freepage_order) - 1; 470 continue; 471 } 472 473 if (PageReserved(page)) 474 continue; 475 476 page_ext = page_ext_get(page); 477 if (unlikely(!page_ext)) 478 continue; 479 480 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) 481 goto ext_put_continue; 482 483 page_owner = get_page_owner(page_ext); 484 page_mt = gfp_migratetype(page_owner->gfp_mask); 485 if (pageblock_mt != page_mt) { 486 if (is_migrate_cma(pageblock_mt)) 487 count[MIGRATE_MOVABLE]++; 488 else 489 count[pageblock_mt]++; 490 491 pfn = block_end_pfn; 492 page_ext_put(page_ext); 493 break; 494 } 495 pfn += (1UL << page_owner->order) - 1; 496ext_put_continue: 497 page_ext_put(page_ext); 498 } 499 } 500 501 /* Print counts */ 502 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 503 for (i = 0; i < MIGRATE_TYPES; i++) 504 seq_printf(m, "%12lu ", count[i]); 505 seq_putc(m, '\n'); 506} 507 508/* 509 * Looking for memcg information and print it out 510 */ 511static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret, 512 struct page *page) 513{ 514#ifdef CONFIG_MEMCG 515 unsigned long memcg_data; 516 struct mem_cgroup *memcg; 517 bool online; 518 char name[80]; 519 520 rcu_read_lock(); 521 memcg_data = READ_ONCE(page->memcg_data); 522 if (!memcg_data || PageTail(page)) 523 goto out_unlock; 524 525 if (memcg_data & MEMCG_DATA_OBJEXTS) 526 ret += scnprintf(kbuf + ret, count - ret, 527 "Slab cache page\n"); 528 529 memcg = page_memcg_check(page); 530 if (!memcg) 531 goto out_unlock; 532 533 online = (memcg->css.flags & CSS_ONLINE); 534 cgroup_name(memcg->css.cgroup, name, sizeof(name)); 535 ret += scnprintf(kbuf + ret, count - ret, 536 "Charged %sto %smemcg %s\n", 537 PageMemcgKmem(page) ? "(via objcg) " : "", 538 online ? "" : "offline ", 539 name); 540out_unlock: 541 rcu_read_unlock(); 542#endif /* CONFIG_MEMCG */ 543 544 return ret; 545} 546 547static ssize_t 548print_page_owner(char __user *buf, size_t count, unsigned long pfn, 549 struct page *page, struct page_owner *page_owner, 550 depot_stack_handle_t handle) 551{ 552 int ret, pageblock_mt, page_mt; 553 char *kbuf; 554 555 count = min_t(size_t, count, PAGE_SIZE); 556 kbuf = kmalloc(count, GFP_KERNEL); 557 if (!kbuf) 558 return -ENOMEM; 559 560 ret = scnprintf(kbuf, count, 561 "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns\n", 562 page_owner->order, page_owner->gfp_mask, 563 &page_owner->gfp_mask, page_owner->pid, 564 page_owner->tgid, page_owner->comm, 565 page_owner->ts_nsec); 566 567 /* Print information relevant to grouping pages by mobility */ 568 pageblock_mt = get_pageblock_migratetype(page); 569 page_mt = gfp_migratetype(page_owner->gfp_mask); 570 ret += scnprintf(kbuf + ret, count - ret, 571 "PFN 0x%lx type %s Block %lu type %s Flags %pGp\n", 572 pfn, 573 migratetype_names[page_mt], 574 pfn >> pageblock_order, 575 migratetype_names[pageblock_mt], 576 &page->flags); 577 578 ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0); 579 if (ret >= count) 580 goto err; 581 582 if (page_owner->last_migrate_reason != -1) { 583 ret += scnprintf(kbuf + ret, count - ret, 584 "Page has been migrated, last migrate reason: %s\n", 585 migrate_reason_names[page_owner->last_migrate_reason]); 586 } 587 588 ret = print_page_owner_memcg(kbuf, count, ret, page); 589 590 ret += snprintf(kbuf + ret, count - ret, "\n"); 591 if (ret >= count) 592 goto err; 593 594 if (copy_to_user(buf, kbuf, ret)) 595 ret = -EFAULT; 596 597 kfree(kbuf); 598 return ret; 599 600err: 601 kfree(kbuf); 602 return -ENOMEM; 603} 604 605void __dump_page_owner(const struct page *page) 606{ 607 struct page_ext *page_ext = page_ext_get((void *)page); 608 struct page_owner *page_owner; 609 depot_stack_handle_t handle; 610 gfp_t gfp_mask; 611 int mt; 612 613 if (unlikely(!page_ext)) { 614 pr_alert("There is not page extension available.\n"); 615 return; 616 } 617 618 page_owner = get_page_owner(page_ext); 619 gfp_mask = page_owner->gfp_mask; 620 mt = gfp_migratetype(gfp_mask); 621 622 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { 623 pr_alert("page_owner info is not present (never set?)\n"); 624 page_ext_put(page_ext); 625 return; 626 } 627 628 if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) 629 pr_alert("page_owner tracks the page as allocated\n"); 630 else 631 pr_alert("page_owner tracks the page as freed\n"); 632 633 pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n", 634 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask, 635 page_owner->pid, page_owner->tgid, page_owner->comm, 636 page_owner->ts_nsec, page_owner->free_ts_nsec); 637 638 handle = READ_ONCE(page_owner->handle); 639 if (!handle) 640 pr_alert("page_owner allocation stack trace missing\n"); 641 else 642 stack_depot_print(handle); 643 644 handle = READ_ONCE(page_owner->free_handle); 645 if (!handle) { 646 pr_alert("page_owner free stack trace missing\n"); 647 } else { 648 pr_alert("page last free pid %d tgid %d stack trace:\n", 649 page_owner->free_pid, page_owner->free_tgid); 650 stack_depot_print(handle); 651 } 652 653 if (page_owner->last_migrate_reason != -1) 654 pr_alert("page has been migrated, last migrate reason: %s\n", 655 migrate_reason_names[page_owner->last_migrate_reason]); 656 page_ext_put(page_ext); 657} 658 659static ssize_t 660read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) 661{ 662 unsigned long pfn; 663 struct page *page; 664 struct page_ext *page_ext; 665 struct page_owner *page_owner; 666 depot_stack_handle_t handle; 667 668 if (!static_branch_unlikely(&page_owner_inited)) 669 return -EINVAL; 670 671 page = NULL; 672 if (*ppos == 0) 673 pfn = min_low_pfn; 674 else 675 pfn = *ppos; 676 /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */ 677 while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) 678 pfn++; 679 680 /* Find an allocated page */ 681 for (; pfn < max_pfn; pfn++) { 682 /* 683 * This temporary page_owner is required so 684 * that we can avoid the context switches while holding 685 * the rcu lock and copying the page owner information to 686 * user through copy_to_user() or GFP_KERNEL allocations. 687 */ 688 struct page_owner page_owner_tmp; 689 690 /* 691 * If the new page is in a new MAX_ORDER_NR_PAGES area, 692 * validate the area as existing, skip it if not 693 */ 694 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) { 695 pfn += MAX_ORDER_NR_PAGES - 1; 696 continue; 697 } 698 699 page = pfn_to_page(pfn); 700 if (PageBuddy(page)) { 701 unsigned long freepage_order = buddy_order_unsafe(page); 702 703 if (freepage_order <= MAX_PAGE_ORDER) 704 pfn += (1UL << freepage_order) - 1; 705 continue; 706 } 707 708 page_ext = page_ext_get(page); 709 if (unlikely(!page_ext)) 710 continue; 711 712 /* 713 * Some pages could be missed by concurrent allocation or free, 714 * because we don't hold the zone lock. 715 */ 716 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) 717 goto ext_put_continue; 718 719 /* 720 * Although we do have the info about past allocation of free 721 * pages, it's not relevant for current memory usage. 722 */ 723 if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) 724 goto ext_put_continue; 725 726 page_owner = get_page_owner(page_ext); 727 728 /* 729 * Don't print "tail" pages of high-order allocations as that 730 * would inflate the stats. 731 */ 732 if (!IS_ALIGNED(pfn, 1 << page_owner->order)) 733 goto ext_put_continue; 734 735 /* 736 * Access to page_ext->handle isn't synchronous so we should 737 * be careful to access it. 738 */ 739 handle = READ_ONCE(page_owner->handle); 740 if (!handle) 741 goto ext_put_continue; 742 743 /* Record the next PFN to read in the file offset */ 744 *ppos = pfn + 1; 745 746 page_owner_tmp = *page_owner; 747 page_ext_put(page_ext); 748 return print_page_owner(buf, count, pfn, page, 749 &page_owner_tmp, handle); 750ext_put_continue: 751 page_ext_put(page_ext); 752 } 753 754 return 0; 755} 756 757static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig) 758{ 759 switch (orig) { 760 case SEEK_SET: 761 file->f_pos = offset; 762 break; 763 case SEEK_CUR: 764 file->f_pos += offset; 765 break; 766 default: 767 return -EINVAL; 768 } 769 return file->f_pos; 770} 771 772static void init_pages_in_zone(struct zone *zone) 773{ 774 unsigned long pfn = zone->zone_start_pfn; 775 unsigned long end_pfn = zone_end_pfn(zone); 776 unsigned long count = 0; 777 778 /* 779 * Walk the zone in pageblock_nr_pages steps. If a page block spans 780 * a zone boundary, it will be double counted between zones. This does 781 * not matter as the mixed block count will still be correct 782 */ 783 for (; pfn < end_pfn; ) { 784 unsigned long block_end_pfn; 785 786 if (!pfn_valid(pfn)) { 787 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); 788 continue; 789 } 790 791 block_end_pfn = pageblock_end_pfn(pfn); 792 block_end_pfn = min(block_end_pfn, end_pfn); 793 794 for (; pfn < block_end_pfn; pfn++) { 795 struct page *page = pfn_to_page(pfn); 796 struct page_ext *page_ext; 797 798 if (page_zone(page) != zone) 799 continue; 800 801 /* 802 * To avoid having to grab zone->lock, be a little 803 * careful when reading buddy page order. The only 804 * danger is that we skip too much and potentially miss 805 * some early allocated pages, which is better than 806 * heavy lock contention. 807 */ 808 if (PageBuddy(page)) { 809 unsigned long order = buddy_order_unsafe(page); 810 811 if (order > 0 && order <= MAX_PAGE_ORDER) 812 pfn += (1UL << order) - 1; 813 continue; 814 } 815 816 if (PageReserved(page)) 817 continue; 818 819 page_ext = page_ext_get(page); 820 if (unlikely(!page_ext)) 821 continue; 822 823 /* Maybe overlapping zone */ 824 if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) 825 goto ext_put_continue; 826 827 /* Found early allocated page */ 828 __update_page_owner_handle(page, early_handle, 0, 0, 829 -1, local_clock(), current->pid, 830 current->tgid, current->comm); 831 count++; 832ext_put_continue: 833 page_ext_put(page_ext); 834 } 835 cond_resched(); 836 } 837 838 pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n", 839 zone->zone_pgdat->node_id, zone->name, count); 840} 841 842static void init_early_allocated_pages(void) 843{ 844 struct zone *zone; 845 846 for_each_populated_zone(zone) 847 init_pages_in_zone(zone); 848} 849 850static const struct file_operations page_owner_fops = { 851 .read = read_page_owner, 852 .llseek = lseek_page_owner, 853}; 854 855static void *stack_start(struct seq_file *m, loff_t *ppos) 856{ 857 struct stack *stack; 858 struct stack_print_ctx *ctx = m->private; 859 860 if (*ppos == -1UL) 861 return NULL; 862 863 if (!*ppos) { 864 /* 865 * This pairs with smp_store_release() from function 866 * add_stack_record_to_list(), so we get a consistent 867 * value of stack_list. 868 */ 869 stack = smp_load_acquire(&stack_list); 870 ctx->stack = stack; 871 } else { 872 stack = ctx->stack; 873 } 874 875 return stack; 876} 877 878static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) 879{ 880 struct stack *stack = v; 881 struct stack_print_ctx *ctx = m->private; 882 883 stack = stack->next; 884 *ppos = stack ? *ppos + 1 : -1UL; 885 ctx->stack = stack; 886 887 return stack; 888} 889 890static unsigned long page_owner_pages_threshold; 891 892static int stack_print(struct seq_file *m, void *v) 893{ 894 int i, nr_base_pages; 895 struct stack *stack = v; 896 unsigned long *entries; 897 unsigned long nr_entries; 898 struct stack_record *stack_record = stack->stack_record; 899 struct stack_print_ctx *ctx = m->private; 900 901 if (!stack->stack_record) 902 return 0; 903 904 nr_base_pages = refcount_read(&stack_record->count) - 1; 905 906 if (ctx->flags & STACK_PRINT_FLAG_PAGES && 907 (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)) 908 return 0; 909 910 if (ctx->flags & STACK_PRINT_FLAG_STACK) { 911 nr_entries = stack_record->size; 912 entries = stack_record->entries; 913 for (i = 0; i < nr_entries; i++) 914 seq_printf(m, " %pS\n", (void *)entries[i]); 915 } 916 if (ctx->flags & STACK_PRINT_FLAG_HANDLE) 917 seq_printf(m, "handle: %d\n", stack_record->handle.handle); 918 if (ctx->flags & STACK_PRINT_FLAG_PAGES) 919 seq_printf(m, "nr_base_pages: %d\n", nr_base_pages); 920 seq_putc(m, '\n'); 921 922 return 0; 923} 924 925static void stack_stop(struct seq_file *m, void *v) 926{ 927} 928 929static const struct seq_operations page_owner_stack_op = { 930 .start = stack_start, 931 .next = stack_next, 932 .stop = stack_stop, 933 .show = stack_print 934}; 935 936static int page_owner_stack_open(struct inode *inode, struct file *file) 937{ 938 int ret = seq_open_private(file, &page_owner_stack_op, 939 sizeof(struct stack_print_ctx)); 940 941 if (!ret) { 942 struct seq_file *m = file->private_data; 943 struct stack_print_ctx *ctx = m->private; 944 945 ctx->flags = (uintptr_t) inode->i_private; 946 } 947 948 return ret; 949} 950 951static const struct file_operations page_owner_stack_fops = { 952 .open = page_owner_stack_open, 953 .read = seq_read, 954 .llseek = seq_lseek, 955 .release = seq_release_private, 956}; 957 958static int page_owner_threshold_get(void *data, u64 *val) 959{ 960 *val = READ_ONCE(page_owner_pages_threshold); 961 return 0; 962} 963 964static int page_owner_threshold_set(void *data, u64 val) 965{ 966 WRITE_ONCE(page_owner_pages_threshold, val); 967 return 0; 968} 969 970DEFINE_SIMPLE_ATTRIBUTE(page_owner_threshold_fops, &page_owner_threshold_get, 971 &page_owner_threshold_set, "%llu"); 972 973 974static int __init pageowner_init(void) 975{ 976 struct dentry *dir; 977 978 if (!static_branch_unlikely(&page_owner_inited)) { 979 pr_info("page_owner is disabled\n"); 980 return 0; 981 } 982 983 debugfs_create_file("page_owner", 0400, NULL, NULL, &page_owner_fops); 984 dir = debugfs_create_dir("page_owner_stacks", NULL); 985 debugfs_create_file("show_stacks", 0400, dir, 986 (void *)(STACK_PRINT_FLAG_STACK | 987 STACK_PRINT_FLAG_PAGES), 988 &page_owner_stack_fops); 989 debugfs_create_file("show_handles", 0400, dir, 990 (void *)(STACK_PRINT_FLAG_HANDLE | 991 STACK_PRINT_FLAG_PAGES), 992 &page_owner_stack_fops); 993 debugfs_create_file("show_stacks_handles", 0400, dir, 994 (void *)(STACK_PRINT_FLAG_STACK | 995 STACK_PRINT_FLAG_HANDLE), 996 &page_owner_stack_fops); 997 debugfs_create_file("count_threshold", 0600, dir, NULL, 998 &page_owner_threshold_fops); 999 return 0; 1000} 1001late_initcall(pageowner_init)