Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: page_alloc: introduce alloc_contig_range()

This commit adds the alloc_contig_range() function which tries
to allocate given range of pages. It tries to migrate all
already allocated pages that fall in the range thus freeing them.
Once all pages in the range are freed they are removed from the
buddy system thus allocated for the caller to use.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>

authored by

Michal Nazarewicz and committed by
Marek Szyprowski
041d3a8c ff9543fd

+196
+8
include/linux/gfp.h
··· 391 391 } 392 392 #endif /* CONFIG_PM_SLEEP */ 393 393 394 + #ifdef CONFIG_CMA 395 + 396 + /* The below functions must be run on a range from a single zone. */ 397 + extern int alloc_contig_range(unsigned long start, unsigned long end); 398 + extern void free_contig_range(unsigned long pfn, unsigned nr_pages); 399 + 400 + #endif 401 + 394 402 #endif /* __LINUX_GFP_H */
+188
mm/page_alloc.c
··· 57 57 #include <linux/ftrace_event.h> 58 58 #include <linux/memcontrol.h> 59 59 #include <linux/prefetch.h> 60 + #include <linux/migrate.h> 60 61 #include <linux/page-debug-flags.h> 61 62 62 63 #include <asm/tlbflush.h> ··· 5550 5549 out: 5551 5550 spin_unlock_irqrestore(&zone->lock, flags); 5552 5551 } 5552 + 5553 + #ifdef CONFIG_CMA 5554 + 5555 + static unsigned long pfn_max_align_down(unsigned long pfn) 5556 + { 5557 + return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, 5558 + pageblock_nr_pages) - 1); 5559 + } 5560 + 5561 + static unsigned long pfn_max_align_up(unsigned long pfn) 5562 + { 5563 + return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, 5564 + pageblock_nr_pages)); 5565 + } 5566 + 5567 + static struct page * 5568 + __alloc_contig_migrate_alloc(struct page *page, unsigned long private, 5569 + int **resultp) 5570 + { 5571 + return alloc_page(GFP_HIGHUSER_MOVABLE); 5572 + } 5573 + 5574 + /* [start, end) must belong to a single zone. */ 5575 + static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) 5576 + { 5577 + /* This function is based on compact_zone() from compaction.c. */ 5578 + 5579 + unsigned long pfn = start; 5580 + unsigned int tries = 0; 5581 + int ret = 0; 5582 + 5583 + struct compact_control cc = { 5584 + .nr_migratepages = 0, 5585 + .order = -1, 5586 + .zone = page_zone(pfn_to_page(start)), 5587 + .sync = true, 5588 + }; 5589 + INIT_LIST_HEAD(&cc.migratepages); 5590 + 5591 + migrate_prep_local(); 5592 + 5593 + while (pfn < end || !list_empty(&cc.migratepages)) { 5594 + if (fatal_signal_pending(current)) { 5595 + ret = -EINTR; 5596 + break; 5597 + } 5598 + 5599 + if (list_empty(&cc.migratepages)) { 5600 + cc.nr_migratepages = 0; 5601 + pfn = isolate_migratepages_range(cc.zone, &cc, 5602 + pfn, end); 5603 + if (!pfn) { 5604 + ret = -EINTR; 5605 + break; 5606 + } 5607 + tries = 0; 5608 + } else if (++tries == 5) { 5609 + ret = ret < 0 ? ret : -EBUSY; 5610 + break; 5611 + } 5612 + 5613 + ret = migrate_pages(&cc.migratepages, 5614 + __alloc_contig_migrate_alloc, 5615 + 0, false, true); 5616 + } 5617 + 5618 + putback_lru_pages(&cc.migratepages); 5619 + return ret > 0 ? 0 : ret; 5620 + } 5621 + 5622 + /** 5623 + * alloc_contig_range() -- tries to allocate given range of pages 5624 + * @start: start PFN to allocate 5625 + * @end: one-past-the-last PFN to allocate 5626 + * 5627 + * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES 5628 + * aligned, however it's the caller's responsibility to guarantee that 5629 + * we are the only thread that changes migrate type of pageblocks the 5630 + * pages fall in. 5631 + * 5632 + * The PFN range must belong to a single zone. 5633 + * 5634 + * Returns zero on success or negative error code. On success all 5635 + * pages which PFN is in [start, end) are allocated for the caller and 5636 + * need to be freed with free_contig_range(). 5637 + */ 5638 + int alloc_contig_range(unsigned long start, unsigned long end) 5639 + { 5640 + struct zone *zone = page_zone(pfn_to_page(start)); 5641 + unsigned long outer_start, outer_end; 5642 + int ret = 0, order; 5643 + 5644 + /* 5645 + * What we do here is we mark all pageblocks in range as 5646 + * MIGRATE_ISOLATE. Because pageblock and max order pages may 5647 + * have different sizes, and due to the way page allocator 5648 + * work, we align the range to biggest of the two pages so 5649 + * that page allocator won't try to merge buddies from 5650 + * different pageblocks and change MIGRATE_ISOLATE to some 5651 + * other migration type. 5652 + * 5653 + * Once the pageblocks are marked as MIGRATE_ISOLATE, we 5654 + * migrate the pages from an unaligned range (ie. pages that 5655 + * we are interested in). This will put all the pages in 5656 + * range back to page allocator as MIGRATE_ISOLATE. 5657 + * 5658 + * When this is done, we take the pages in range from page 5659 + * allocator removing them from the buddy system. This way 5660 + * page allocator will never consider using them. 5661 + * 5662 + * This lets us mark the pageblocks back as 5663 + * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the 5664 + * aligned range but not in the unaligned, original range are 5665 + * put back to page allocator so that buddy can use them. 5666 + */ 5667 + 5668 + ret = start_isolate_page_range(pfn_max_align_down(start), 5669 + pfn_max_align_up(end)); 5670 + if (ret) 5671 + goto done; 5672 + 5673 + ret = __alloc_contig_migrate_range(start, end); 5674 + if (ret) 5675 + goto done; 5676 + 5677 + /* 5678 + * Pages from [start, end) are within a MAX_ORDER_NR_PAGES 5679 + * aligned blocks that are marked as MIGRATE_ISOLATE. What's 5680 + * more, all pages in [start, end) are free in page allocator. 5681 + * What we are going to do is to allocate all pages from 5682 + * [start, end) (that is remove them from page allocator). 5683 + * 5684 + * The only problem is that pages at the beginning and at the 5685 + * end of interesting range may be not aligned with pages that 5686 + * page allocator holds, ie. they can be part of higher order 5687 + * pages. Because of this, we reserve the bigger range and 5688 + * once this is done free the pages we are not interested in. 5689 + * 5690 + * We don't have to hold zone->lock here because the pages are 5691 + * isolated thus they won't get removed from buddy. 5692 + */ 5693 + 5694 + lru_add_drain_all(); 5695 + drain_all_pages(); 5696 + 5697 + order = 0; 5698 + outer_start = start; 5699 + while (!PageBuddy(pfn_to_page(outer_start))) { 5700 + if (++order >= MAX_ORDER) { 5701 + ret = -EBUSY; 5702 + goto done; 5703 + } 5704 + outer_start &= ~0UL << order; 5705 + } 5706 + 5707 + /* Make sure the range is really isolated. */ 5708 + if (test_pages_isolated(outer_start, end)) { 5709 + pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", 5710 + outer_start, end); 5711 + ret = -EBUSY; 5712 + goto done; 5713 + } 5714 + 5715 + outer_end = isolate_freepages_range(outer_start, end); 5716 + if (!outer_end) { 5717 + ret = -EBUSY; 5718 + goto done; 5719 + } 5720 + 5721 + /* Free head and tail (if any) */ 5722 + if (start != outer_start) 5723 + free_contig_range(outer_start, start - outer_start); 5724 + if (end != outer_end) 5725 + free_contig_range(end, outer_end - end); 5726 + 5727 + done: 5728 + undo_isolate_page_range(pfn_max_align_down(start), 5729 + pfn_max_align_up(end)); 5730 + return ret; 5731 + } 5732 + 5733 + void free_contig_range(unsigned long pfn, unsigned nr_pages) 5734 + { 5735 + for (; nr_pages--; ++pfn) 5736 + __free_page(pfn_to_page(pfn)); 5737 + } 5738 + #endif 5553 5739 5554 5740 #ifdef CONFIG_MEMORY_HOTREMOVE 5555 5741 /*