Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.0-rc2 1153 lines 31 kB view raw
1/* 2 * z3fold.c 3 * 4 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 5 * Copyright (C) 2016, Sony Mobile Communications Inc. 6 * 7 * This implementation is based on zbud written by Seth Jennings. 8 * 9 * z3fold is an special purpose allocator for storing compressed pages. It 10 * can store up to three compressed pages per page which improves the 11 * compression ratio of zbud while retaining its main concepts (e. g. always 12 * storing an integral number of objects per page) and simplicity. 13 * It still has simple and deterministic reclaim properties that make it 14 * preferable to a higher density approach (with no requirement on integral 15 * number of object per page) when reclaim is used. 16 * 17 * As in zbud, pages are divided into "chunks". The size of the chunks is 18 * fixed at compile time and is determined by NCHUNKS_ORDER below. 19 * 20 * z3fold doesn't export any API and is meant to be used via zpool API. 21 */ 22 23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25#include <linux/atomic.h> 26#include <linux/sched.h> 27#include <linux/list.h> 28#include <linux/mm.h> 29#include <linux/module.h> 30#include <linux/percpu.h> 31#include <linux/preempt.h> 32#include <linux/workqueue.h> 33#include <linux/slab.h> 34#include <linux/spinlock.h> 35#include <linux/zpool.h> 36 37/***************** 38 * Structures 39*****************/ 40struct z3fold_pool; 41struct z3fold_ops { 42 int (*evict)(struct z3fold_pool *pool, unsigned long handle); 43}; 44 45enum buddy { 46 HEADLESS = 0, 47 FIRST, 48 MIDDLE, 49 LAST, 50 BUDDIES_MAX 51}; 52 53/* 54 * struct z3fold_header - z3fold page metadata occupying first chunks of each 55 * z3fold page, except for HEADLESS pages 56 * @buddy: links the z3fold page into the relevant list in the 57 * pool 58 * @page_lock: per-page lock 59 * @refcount: reference count for the z3fold page 60 * @work: work_struct for page layout optimization 61 * @pool: pointer to the pool which this page belongs to 62 * @cpu: CPU which this page "belongs" to 63 * @first_chunks: the size of the first buddy in chunks, 0 if free 64 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 65 * @last_chunks: the size of the last buddy in chunks, 0 if free 66 * @first_num: the starting number (for the first handle) 67 */ 68struct z3fold_header { 69 struct list_head buddy; 70 spinlock_t page_lock; 71 struct kref refcount; 72 struct work_struct work; 73 struct z3fold_pool *pool; 74 short cpu; 75 unsigned short first_chunks; 76 unsigned short middle_chunks; 77 unsigned short last_chunks; 78 unsigned short start_middle; 79 unsigned short first_num:2; 80}; 81 82/* 83 * NCHUNKS_ORDER determines the internal allocation granularity, effectively 84 * adjusting internal fragmentation. It also determines the number of 85 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 86 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 87 * in the beginning of an allocated page are occupied by z3fold header, so 88 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 89 * which shows the max number of free chunks in z3fold page, also there will 90 * be 63, or 62, respectively, freelists per pool. 91 */ 92#define NCHUNKS_ORDER 6 93 94#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 95#define CHUNK_SIZE (1 << CHUNK_SHIFT) 96#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 97#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 98#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 99#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) 100 101#define BUDDY_MASK (0x3) 102#define BUDDY_SHIFT 2 103 104/** 105 * struct z3fold_pool - stores metadata for each z3fold pool 106 * @name: pool name 107 * @lock: protects pool unbuddied/lru lists 108 * @stale_lock: protects pool stale page list 109 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 110 * buddies; the list each z3fold page is added to depends on 111 * the size of its free region. 112 * @lru: list tracking the z3fold pages in LRU order by most recently 113 * added buddy. 114 * @stale: list of pages marked for freeing 115 * @pages_nr: number of z3fold pages in the pool. 116 * @ops: pointer to a structure of user defined operations specified at 117 * pool creation time. 118 * @compact_wq: workqueue for page layout background optimization 119 * @release_wq: workqueue for safe page release 120 * @work: work_struct for safe page release 121 * 122 * This structure is allocated at pool creation time and maintains metadata 123 * pertaining to a particular z3fold pool. 124 */ 125struct z3fold_pool { 126 const char *name; 127 spinlock_t lock; 128 spinlock_t stale_lock; 129 struct list_head *unbuddied; 130 struct list_head lru; 131 struct list_head stale; 132 atomic64_t pages_nr; 133 const struct z3fold_ops *ops; 134 struct zpool *zpool; 135 const struct zpool_ops *zpool_ops; 136 struct workqueue_struct *compact_wq; 137 struct workqueue_struct *release_wq; 138 struct work_struct work; 139}; 140 141/* 142 * Internal z3fold page flags 143 */ 144enum z3fold_page_flags { 145 PAGE_HEADLESS = 0, 146 MIDDLE_CHUNK_MAPPED, 147 NEEDS_COMPACTING, 148 PAGE_STALE, 149 PAGE_CLAIMED, /* by either reclaim or free */ 150}; 151 152/***************** 153 * Helpers 154*****************/ 155 156/* Converts an allocation size in bytes to size in z3fold chunks */ 157static int size_to_chunks(size_t size) 158{ 159 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 160} 161 162#define for_each_unbuddied_list(_iter, _begin) \ 163 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 164 165static void compact_page_work(struct work_struct *w); 166 167/* Initializes the z3fold header of a newly allocated z3fold page */ 168static struct z3fold_header *init_z3fold_page(struct page *page, 169 struct z3fold_pool *pool) 170{ 171 struct z3fold_header *zhdr = page_address(page); 172 173 INIT_LIST_HEAD(&page->lru); 174 clear_bit(PAGE_HEADLESS, &page->private); 175 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 176 clear_bit(NEEDS_COMPACTING, &page->private); 177 clear_bit(PAGE_STALE, &page->private); 178 clear_bit(PAGE_CLAIMED, &page->private); 179 180 spin_lock_init(&zhdr->page_lock); 181 kref_init(&zhdr->refcount); 182 zhdr->first_chunks = 0; 183 zhdr->middle_chunks = 0; 184 zhdr->last_chunks = 0; 185 zhdr->first_num = 0; 186 zhdr->start_middle = 0; 187 zhdr->cpu = -1; 188 zhdr->pool = pool; 189 INIT_LIST_HEAD(&zhdr->buddy); 190 INIT_WORK(&zhdr->work, compact_page_work); 191 return zhdr; 192} 193 194/* Resets the struct page fields and frees the page */ 195static void free_z3fold_page(struct page *page) 196{ 197 __free_page(page); 198} 199 200/* Lock a z3fold page */ 201static inline void z3fold_page_lock(struct z3fold_header *zhdr) 202{ 203 spin_lock(&zhdr->page_lock); 204} 205 206/* Try to lock a z3fold page */ 207static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 208{ 209 return spin_trylock(&zhdr->page_lock); 210} 211 212/* Unlock a z3fold page */ 213static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 214{ 215 spin_unlock(&zhdr->page_lock); 216} 217 218/* 219 * Encodes the handle of a particular buddy within a z3fold page 220 * Pool lock should be held as this function accesses first_num 221 */ 222static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 223{ 224 unsigned long handle; 225 226 handle = (unsigned long)zhdr; 227 if (bud != HEADLESS) { 228 handle |= (bud + zhdr->first_num) & BUDDY_MASK; 229 if (bud == LAST) 230 handle |= (zhdr->last_chunks << BUDDY_SHIFT); 231 } 232 return handle; 233} 234 235/* Returns the z3fold page where a given handle is stored */ 236static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) 237{ 238 return (struct z3fold_header *)(handle & PAGE_MASK); 239} 240 241/* only for LAST bud, returns zero otherwise */ 242static unsigned short handle_to_chunks(unsigned long handle) 243{ 244 return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; 245} 246 247/* 248 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 249 * but that doesn't matter. because the masking will result in the 250 * correct buddy number. 251 */ 252static enum buddy handle_to_buddy(unsigned long handle) 253{ 254 struct z3fold_header *zhdr = handle_to_z3fold_header(handle); 255 return (handle - zhdr->first_num) & BUDDY_MASK; 256} 257 258static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 259{ 260 struct page *page = virt_to_page(zhdr); 261 struct z3fold_pool *pool = zhdr->pool; 262 263 WARN_ON(!list_empty(&zhdr->buddy)); 264 set_bit(PAGE_STALE, &page->private); 265 clear_bit(NEEDS_COMPACTING, &page->private); 266 spin_lock(&pool->lock); 267 if (!list_empty(&page->lru)) 268 list_del(&page->lru); 269 spin_unlock(&pool->lock); 270 if (locked) 271 z3fold_page_unlock(zhdr); 272 spin_lock(&pool->stale_lock); 273 list_add(&zhdr->buddy, &pool->stale); 274 queue_work(pool->release_wq, &pool->work); 275 spin_unlock(&pool->stale_lock); 276} 277 278static void __attribute__((__unused__)) 279 release_z3fold_page(struct kref *ref) 280{ 281 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 282 refcount); 283 __release_z3fold_page(zhdr, false); 284} 285 286static void release_z3fold_page_locked(struct kref *ref) 287{ 288 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 289 refcount); 290 WARN_ON(z3fold_page_trylock(zhdr)); 291 __release_z3fold_page(zhdr, true); 292} 293 294static void release_z3fold_page_locked_list(struct kref *ref) 295{ 296 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 297 refcount); 298 spin_lock(&zhdr->pool->lock); 299 list_del_init(&zhdr->buddy); 300 spin_unlock(&zhdr->pool->lock); 301 302 WARN_ON(z3fold_page_trylock(zhdr)); 303 __release_z3fold_page(zhdr, true); 304} 305 306static void free_pages_work(struct work_struct *w) 307{ 308 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 309 310 spin_lock(&pool->stale_lock); 311 while (!list_empty(&pool->stale)) { 312 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 313 struct z3fold_header, buddy); 314 struct page *page = virt_to_page(zhdr); 315 316 list_del(&zhdr->buddy); 317 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 318 continue; 319 spin_unlock(&pool->stale_lock); 320 cancel_work_sync(&zhdr->work); 321 free_z3fold_page(page); 322 cond_resched(); 323 spin_lock(&pool->stale_lock); 324 } 325 spin_unlock(&pool->stale_lock); 326} 327 328/* 329 * Returns the number of free chunks in a z3fold page. 330 * NB: can't be used with HEADLESS pages. 331 */ 332static int num_free_chunks(struct z3fold_header *zhdr) 333{ 334 int nfree; 335 /* 336 * If there is a middle object, pick up the bigger free space 337 * either before or after it. Otherwise just subtract the number 338 * of chunks occupied by the first and the last objects. 339 */ 340 if (zhdr->middle_chunks != 0) { 341 int nfree_before = zhdr->first_chunks ? 342 0 : zhdr->start_middle - ZHDR_CHUNKS; 343 int nfree_after = zhdr->last_chunks ? 344 0 : TOTAL_CHUNKS - 345 (zhdr->start_middle + zhdr->middle_chunks); 346 nfree = max(nfree_before, nfree_after); 347 } else 348 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 349 return nfree; 350} 351 352static inline void *mchunk_memmove(struct z3fold_header *zhdr, 353 unsigned short dst_chunk) 354{ 355 void *beg = zhdr; 356 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 357 beg + (zhdr->start_middle << CHUNK_SHIFT), 358 zhdr->middle_chunks << CHUNK_SHIFT); 359} 360 361#define BIG_CHUNK_GAP 3 362/* Has to be called with lock held */ 363static int z3fold_compact_page(struct z3fold_header *zhdr) 364{ 365 struct page *page = virt_to_page(zhdr); 366 367 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 368 return 0; /* can't move middle chunk, it's used */ 369 370 if (zhdr->middle_chunks == 0) 371 return 0; /* nothing to compact */ 372 373 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 374 /* move to the beginning */ 375 mchunk_memmove(zhdr, ZHDR_CHUNKS); 376 zhdr->first_chunks = zhdr->middle_chunks; 377 zhdr->middle_chunks = 0; 378 zhdr->start_middle = 0; 379 zhdr->first_num++; 380 return 1; 381 } 382 383 /* 384 * moving data is expensive, so let's only do that if 385 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 386 */ 387 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 388 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 389 BIG_CHUNK_GAP) { 390 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 391 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 392 return 1; 393 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 394 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 395 + zhdr->middle_chunks) >= 396 BIG_CHUNK_GAP) { 397 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 398 zhdr->middle_chunks; 399 mchunk_memmove(zhdr, new_start); 400 zhdr->start_middle = new_start; 401 return 1; 402 } 403 404 return 0; 405} 406 407static void do_compact_page(struct z3fold_header *zhdr, bool locked) 408{ 409 struct z3fold_pool *pool = zhdr->pool; 410 struct page *page; 411 struct list_head *unbuddied; 412 int fchunks; 413 414 page = virt_to_page(zhdr); 415 if (locked) 416 WARN_ON(z3fold_page_trylock(zhdr)); 417 else 418 z3fold_page_lock(zhdr); 419 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 420 z3fold_page_unlock(zhdr); 421 return; 422 } 423 spin_lock(&pool->lock); 424 list_del_init(&zhdr->buddy); 425 spin_unlock(&pool->lock); 426 427 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 428 atomic64_dec(&pool->pages_nr); 429 return; 430 } 431 432 z3fold_compact_page(zhdr); 433 unbuddied = get_cpu_ptr(pool->unbuddied); 434 fchunks = num_free_chunks(zhdr); 435 if (fchunks < NCHUNKS && 436 (!zhdr->first_chunks || !zhdr->middle_chunks || 437 !zhdr->last_chunks)) { 438 /* the page's not completely free and it's unbuddied */ 439 spin_lock(&pool->lock); 440 list_add(&zhdr->buddy, &unbuddied[fchunks]); 441 spin_unlock(&pool->lock); 442 zhdr->cpu = smp_processor_id(); 443 } 444 put_cpu_ptr(pool->unbuddied); 445 z3fold_page_unlock(zhdr); 446} 447 448static void compact_page_work(struct work_struct *w) 449{ 450 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 451 work); 452 453 do_compact_page(zhdr, false); 454} 455 456 457/* 458 * API Functions 459 */ 460 461/** 462 * z3fold_create_pool() - create a new z3fold pool 463 * @name: pool name 464 * @gfp: gfp flags when allocating the z3fold pool structure 465 * @ops: user-defined operations for the z3fold pool 466 * 467 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 468 * failed. 469 */ 470static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 471 const struct z3fold_ops *ops) 472{ 473 struct z3fold_pool *pool = NULL; 474 int i, cpu; 475 476 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 477 if (!pool) 478 goto out; 479 spin_lock_init(&pool->lock); 480 spin_lock_init(&pool->stale_lock); 481 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 482 if (!pool->unbuddied) 483 goto out_pool; 484 for_each_possible_cpu(cpu) { 485 struct list_head *unbuddied = 486 per_cpu_ptr(pool->unbuddied, cpu); 487 for_each_unbuddied_list(i, 0) 488 INIT_LIST_HEAD(&unbuddied[i]); 489 } 490 INIT_LIST_HEAD(&pool->lru); 491 INIT_LIST_HEAD(&pool->stale); 492 atomic64_set(&pool->pages_nr, 0); 493 pool->name = name; 494 pool->compact_wq = create_singlethread_workqueue(pool->name); 495 if (!pool->compact_wq) 496 goto out_unbuddied; 497 pool->release_wq = create_singlethread_workqueue(pool->name); 498 if (!pool->release_wq) 499 goto out_wq; 500 INIT_WORK(&pool->work, free_pages_work); 501 pool->ops = ops; 502 return pool; 503 504out_wq: 505 destroy_workqueue(pool->compact_wq); 506out_unbuddied: 507 free_percpu(pool->unbuddied); 508out_pool: 509 kfree(pool); 510out: 511 return NULL; 512} 513 514/** 515 * z3fold_destroy_pool() - destroys an existing z3fold pool 516 * @pool: the z3fold pool to be destroyed 517 * 518 * The pool should be emptied before this function is called. 519 */ 520static void z3fold_destroy_pool(struct z3fold_pool *pool) 521{ 522 destroy_workqueue(pool->release_wq); 523 destroy_workqueue(pool->compact_wq); 524 kfree(pool); 525} 526 527/** 528 * z3fold_alloc() - allocates a region of a given size 529 * @pool: z3fold pool from which to allocate 530 * @size: size in bytes of the desired allocation 531 * @gfp: gfp flags used if the pool needs to grow 532 * @handle: handle of the new allocation 533 * 534 * This function will attempt to find a free region in the pool large enough to 535 * satisfy the allocation request. A search of the unbuddied lists is 536 * performed first. If no suitable free region is found, then a new page is 537 * allocated and added to the pool to satisfy the request. 538 * 539 * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used 540 * as z3fold pool pages. 541 * 542 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 543 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 544 * a new page. 545 */ 546static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 547 unsigned long *handle) 548{ 549 int chunks = 0, i, freechunks; 550 struct z3fold_header *zhdr = NULL; 551 struct page *page = NULL; 552 enum buddy bud; 553 bool can_sleep = gfpflags_allow_blocking(gfp); 554 555 if (!size || (gfp & __GFP_HIGHMEM)) 556 return -EINVAL; 557 558 if (size > PAGE_SIZE) 559 return -ENOSPC; 560 561 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 562 bud = HEADLESS; 563 else { 564 struct list_head *unbuddied; 565 chunks = size_to_chunks(size); 566 567lookup: 568 /* First, try to find an unbuddied z3fold page. */ 569 unbuddied = get_cpu_ptr(pool->unbuddied); 570 for_each_unbuddied_list(i, chunks) { 571 struct list_head *l = &unbuddied[i]; 572 573 zhdr = list_first_entry_or_null(READ_ONCE(l), 574 struct z3fold_header, buddy); 575 576 if (!zhdr) 577 continue; 578 579 /* Re-check under lock. */ 580 spin_lock(&pool->lock); 581 l = &unbuddied[i]; 582 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 583 struct z3fold_header, buddy)) || 584 !z3fold_page_trylock(zhdr)) { 585 spin_unlock(&pool->lock); 586 put_cpu_ptr(pool->unbuddied); 587 goto lookup; 588 } 589 list_del_init(&zhdr->buddy); 590 zhdr->cpu = -1; 591 spin_unlock(&pool->lock); 592 593 page = virt_to_page(zhdr); 594 if (test_bit(NEEDS_COMPACTING, &page->private)) { 595 z3fold_page_unlock(zhdr); 596 zhdr = NULL; 597 put_cpu_ptr(pool->unbuddied); 598 if (can_sleep) 599 cond_resched(); 600 goto lookup; 601 } 602 603 /* 604 * this page could not be removed from its unbuddied 605 * list while pool lock was held, and then we've taken 606 * page lock so kref_put could not be called before 607 * we got here, so it's safe to just call kref_get() 608 */ 609 kref_get(&zhdr->refcount); 610 break; 611 } 612 put_cpu_ptr(pool->unbuddied); 613 614 if (zhdr) { 615 if (zhdr->first_chunks == 0) { 616 if (zhdr->middle_chunks != 0 && 617 chunks >= zhdr->start_middle) 618 bud = LAST; 619 else 620 bud = FIRST; 621 } else if (zhdr->last_chunks == 0) 622 bud = LAST; 623 else if (zhdr->middle_chunks == 0) 624 bud = MIDDLE; 625 else { 626 if (kref_put(&zhdr->refcount, 627 release_z3fold_page_locked)) 628 atomic64_dec(&pool->pages_nr); 629 else 630 z3fold_page_unlock(zhdr); 631 pr_err("No free chunks in unbuddied\n"); 632 WARN_ON(1); 633 goto lookup; 634 } 635 goto found; 636 } 637 bud = FIRST; 638 } 639 640 page = NULL; 641 if (can_sleep) { 642 spin_lock(&pool->stale_lock); 643 zhdr = list_first_entry_or_null(&pool->stale, 644 struct z3fold_header, buddy); 645 /* 646 * Before allocating a page, let's see if we can take one from 647 * the stale pages list. cancel_work_sync() can sleep so we 648 * limit this case to the contexts where we can sleep 649 */ 650 if (zhdr) { 651 list_del(&zhdr->buddy); 652 spin_unlock(&pool->stale_lock); 653 cancel_work_sync(&zhdr->work); 654 page = virt_to_page(zhdr); 655 } else { 656 spin_unlock(&pool->stale_lock); 657 } 658 } 659 if (!page) 660 page = alloc_page(gfp); 661 662 if (!page) 663 return -ENOMEM; 664 665 atomic64_inc(&pool->pages_nr); 666 zhdr = init_z3fold_page(page, pool); 667 668 if (bud == HEADLESS) { 669 set_bit(PAGE_HEADLESS, &page->private); 670 goto headless; 671 } 672 z3fold_page_lock(zhdr); 673 674found: 675 if (bud == FIRST) 676 zhdr->first_chunks = chunks; 677 else if (bud == LAST) 678 zhdr->last_chunks = chunks; 679 else { 680 zhdr->middle_chunks = chunks; 681 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 682 } 683 684 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 685 zhdr->middle_chunks == 0) { 686 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); 687 688 /* Add to unbuddied list */ 689 freechunks = num_free_chunks(zhdr); 690 spin_lock(&pool->lock); 691 list_add(&zhdr->buddy, &unbuddied[freechunks]); 692 spin_unlock(&pool->lock); 693 zhdr->cpu = smp_processor_id(); 694 put_cpu_ptr(pool->unbuddied); 695 } 696 697headless: 698 spin_lock(&pool->lock); 699 /* Add/move z3fold page to beginning of LRU */ 700 if (!list_empty(&page->lru)) 701 list_del(&page->lru); 702 703 list_add(&page->lru, &pool->lru); 704 705 *handle = encode_handle(zhdr, bud); 706 spin_unlock(&pool->lock); 707 if (bud != HEADLESS) 708 z3fold_page_unlock(zhdr); 709 710 return 0; 711} 712 713/** 714 * z3fold_free() - frees the allocation associated with the given handle 715 * @pool: pool in which the allocation resided 716 * @handle: handle associated with the allocation returned by z3fold_alloc() 717 * 718 * In the case that the z3fold page in which the allocation resides is under 719 * reclaim, as indicated by the PG_reclaim flag being set, this function 720 * only sets the first|last_chunks to 0. The page is actually freed 721 * once both buddies are evicted (see z3fold_reclaim_page() below). 722 */ 723static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 724{ 725 struct z3fold_header *zhdr; 726 struct page *page; 727 enum buddy bud; 728 729 zhdr = handle_to_z3fold_header(handle); 730 page = virt_to_page(zhdr); 731 732 if (test_bit(PAGE_HEADLESS, &page->private)) { 733 /* if a headless page is under reclaim, just leave. 734 * NB: we use test_and_set_bit for a reason: if the bit 735 * has not been set before, we release this page 736 * immediately so we don't care about its value any more. 737 */ 738 if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { 739 spin_lock(&pool->lock); 740 list_del(&page->lru); 741 spin_unlock(&pool->lock); 742 free_z3fold_page(page); 743 atomic64_dec(&pool->pages_nr); 744 } 745 return; 746 } 747 748 /* Non-headless case */ 749 z3fold_page_lock(zhdr); 750 bud = handle_to_buddy(handle); 751 752 switch (bud) { 753 case FIRST: 754 zhdr->first_chunks = 0; 755 break; 756 case MIDDLE: 757 zhdr->middle_chunks = 0; 758 break; 759 case LAST: 760 zhdr->last_chunks = 0; 761 break; 762 default: 763 pr_err("%s: unknown bud %d\n", __func__, bud); 764 WARN_ON(1); 765 z3fold_page_unlock(zhdr); 766 return; 767 } 768 769 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 770 atomic64_dec(&pool->pages_nr); 771 return; 772 } 773 if (test_bit(PAGE_CLAIMED, &page->private)) { 774 z3fold_page_unlock(zhdr); 775 return; 776 } 777 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 778 z3fold_page_unlock(zhdr); 779 return; 780 } 781 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 782 spin_lock(&pool->lock); 783 list_del_init(&zhdr->buddy); 784 spin_unlock(&pool->lock); 785 zhdr->cpu = -1; 786 kref_get(&zhdr->refcount); 787 do_compact_page(zhdr, true); 788 return; 789 } 790 kref_get(&zhdr->refcount); 791 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 792 z3fold_page_unlock(zhdr); 793} 794 795/** 796 * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 797 * @pool: pool from which a page will attempt to be evicted 798 * @retries: number of pages on the LRU list for which eviction will 799 * be attempted before failing 800 * 801 * z3fold reclaim is different from normal system reclaim in that it is done 802 * from the bottom, up. This is because only the bottom layer, z3fold, has 803 * information on how the allocations are organized within each z3fold page. 804 * This has the potential to create interesting locking situations between 805 * z3fold and the user, however. 806 * 807 * To avoid these, this is how z3fold_reclaim_page() should be called: 808 * 809 * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 810 * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 811 * call the user-defined eviction handler with the pool and handle as 812 * arguments. 813 * 814 * If the handle can not be evicted, the eviction handler should return 815 * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 816 * appropriate list and try the next z3fold page on the LRU up to 817 * a user defined number of retries. 818 * 819 * If the handle is successfully evicted, the eviction handler should 820 * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 821 * contains logic to delay freeing the page if the page is under reclaim, 822 * as indicated by the setting of the PG_reclaim flag on the underlying page. 823 * 824 * If all buddies in the z3fold page are successfully evicted, then the 825 * z3fold page can be freed. 826 * 827 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 828 * no pages to evict or an eviction handler is not registered, -EAGAIN if 829 * the retry limit was hit. 830 */ 831static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 832{ 833 int i, ret = 0; 834 struct z3fold_header *zhdr = NULL; 835 struct page *page = NULL; 836 struct list_head *pos; 837 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 838 839 spin_lock(&pool->lock); 840 if (!pool->ops || !pool->ops->evict || retries == 0) { 841 spin_unlock(&pool->lock); 842 return -EINVAL; 843 } 844 for (i = 0; i < retries; i++) { 845 if (list_empty(&pool->lru)) { 846 spin_unlock(&pool->lock); 847 return -EINVAL; 848 } 849 list_for_each_prev(pos, &pool->lru) { 850 page = list_entry(pos, struct page, lru); 851 852 /* this bit could have been set by free, in which case 853 * we pass over to the next page in the pool. 854 */ 855 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) 856 continue; 857 858 zhdr = page_address(page); 859 if (test_bit(PAGE_HEADLESS, &page->private)) 860 break; 861 862 if (!z3fold_page_trylock(zhdr)) { 863 zhdr = NULL; 864 continue; /* can't evict at this point */ 865 } 866 kref_get(&zhdr->refcount); 867 list_del_init(&zhdr->buddy); 868 zhdr->cpu = -1; 869 break; 870 } 871 872 if (!zhdr) 873 break; 874 875 list_del_init(&page->lru); 876 spin_unlock(&pool->lock); 877 878 if (!test_bit(PAGE_HEADLESS, &page->private)) { 879 /* 880 * We need encode the handles before unlocking, since 881 * we can race with free that will set 882 * (first|last)_chunks to 0 883 */ 884 first_handle = 0; 885 last_handle = 0; 886 middle_handle = 0; 887 if (zhdr->first_chunks) 888 first_handle = encode_handle(zhdr, FIRST); 889 if (zhdr->middle_chunks) 890 middle_handle = encode_handle(zhdr, MIDDLE); 891 if (zhdr->last_chunks) 892 last_handle = encode_handle(zhdr, LAST); 893 /* 894 * it's safe to unlock here because we hold a 895 * reference to this page 896 */ 897 z3fold_page_unlock(zhdr); 898 } else { 899 first_handle = encode_handle(zhdr, HEADLESS); 900 last_handle = middle_handle = 0; 901 } 902 903 /* Issue the eviction callback(s) */ 904 if (middle_handle) { 905 ret = pool->ops->evict(pool, middle_handle); 906 if (ret) 907 goto next; 908 } 909 if (first_handle) { 910 ret = pool->ops->evict(pool, first_handle); 911 if (ret) 912 goto next; 913 } 914 if (last_handle) { 915 ret = pool->ops->evict(pool, last_handle); 916 if (ret) 917 goto next; 918 } 919next: 920 if (test_bit(PAGE_HEADLESS, &page->private)) { 921 if (ret == 0) { 922 free_z3fold_page(page); 923 atomic64_dec(&pool->pages_nr); 924 return 0; 925 } 926 spin_lock(&pool->lock); 927 list_add(&page->lru, &pool->lru); 928 spin_unlock(&pool->lock); 929 } else { 930 z3fold_page_lock(zhdr); 931 clear_bit(PAGE_CLAIMED, &page->private); 932 if (kref_put(&zhdr->refcount, 933 release_z3fold_page_locked)) { 934 atomic64_dec(&pool->pages_nr); 935 return 0; 936 } 937 /* 938 * if we are here, the page is still not completely 939 * free. Take the global pool lock then to be able 940 * to add it back to the lru list 941 */ 942 spin_lock(&pool->lock); 943 list_add(&page->lru, &pool->lru); 944 spin_unlock(&pool->lock); 945 z3fold_page_unlock(zhdr); 946 } 947 948 /* We started off locked to we need to lock the pool back */ 949 spin_lock(&pool->lock); 950 } 951 spin_unlock(&pool->lock); 952 return -EAGAIN; 953} 954 955/** 956 * z3fold_map() - maps the allocation associated with the given handle 957 * @pool: pool in which the allocation resides 958 * @handle: handle associated with the allocation to be mapped 959 * 960 * Extracts the buddy number from handle and constructs the pointer to the 961 * correct starting chunk within the page. 962 * 963 * Returns: a pointer to the mapped allocation 964 */ 965static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 966{ 967 struct z3fold_header *zhdr; 968 struct page *page; 969 void *addr; 970 enum buddy buddy; 971 972 zhdr = handle_to_z3fold_header(handle); 973 addr = zhdr; 974 page = virt_to_page(zhdr); 975 976 if (test_bit(PAGE_HEADLESS, &page->private)) 977 goto out; 978 979 z3fold_page_lock(zhdr); 980 buddy = handle_to_buddy(handle); 981 switch (buddy) { 982 case FIRST: 983 addr += ZHDR_SIZE_ALIGNED; 984 break; 985 case MIDDLE: 986 addr += zhdr->start_middle << CHUNK_SHIFT; 987 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 988 break; 989 case LAST: 990 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 991 break; 992 default: 993 pr_err("unknown buddy id %d\n", buddy); 994 WARN_ON(1); 995 addr = NULL; 996 break; 997 } 998 999 z3fold_page_unlock(zhdr); 1000out: 1001 return addr; 1002} 1003 1004/** 1005 * z3fold_unmap() - unmaps the allocation associated with the given handle 1006 * @pool: pool in which the allocation resides 1007 * @handle: handle associated with the allocation to be unmapped 1008 */ 1009static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1010{ 1011 struct z3fold_header *zhdr; 1012 struct page *page; 1013 enum buddy buddy; 1014 1015 zhdr = handle_to_z3fold_header(handle); 1016 page = virt_to_page(zhdr); 1017 1018 if (test_bit(PAGE_HEADLESS, &page->private)) 1019 return; 1020 1021 z3fold_page_lock(zhdr); 1022 buddy = handle_to_buddy(handle); 1023 if (buddy == MIDDLE) 1024 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1025 z3fold_page_unlock(zhdr); 1026} 1027 1028/** 1029 * z3fold_get_pool_size() - gets the z3fold pool size in pages 1030 * @pool: pool whose size is being queried 1031 * 1032 * Returns: size in pages of the given pool. 1033 */ 1034static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 1035{ 1036 return atomic64_read(&pool->pages_nr); 1037} 1038 1039/***************** 1040 * zpool 1041 ****************/ 1042 1043static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 1044{ 1045 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 1046 return pool->zpool_ops->evict(pool->zpool, handle); 1047 else 1048 return -ENOENT; 1049} 1050 1051static const struct z3fold_ops z3fold_zpool_ops = { 1052 .evict = z3fold_zpool_evict 1053}; 1054 1055static void *z3fold_zpool_create(const char *name, gfp_t gfp, 1056 const struct zpool_ops *zpool_ops, 1057 struct zpool *zpool) 1058{ 1059 struct z3fold_pool *pool; 1060 1061 pool = z3fold_create_pool(name, gfp, 1062 zpool_ops ? &z3fold_zpool_ops : NULL); 1063 if (pool) { 1064 pool->zpool = zpool; 1065 pool->zpool_ops = zpool_ops; 1066 } 1067 return pool; 1068} 1069 1070static void z3fold_zpool_destroy(void *pool) 1071{ 1072 z3fold_destroy_pool(pool); 1073} 1074 1075static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1076 unsigned long *handle) 1077{ 1078 return z3fold_alloc(pool, size, gfp, handle); 1079} 1080static void z3fold_zpool_free(void *pool, unsigned long handle) 1081{ 1082 z3fold_free(pool, handle); 1083} 1084 1085static int z3fold_zpool_shrink(void *pool, unsigned int pages, 1086 unsigned int *reclaimed) 1087{ 1088 unsigned int total = 0; 1089 int ret = -EINVAL; 1090 1091 while (total < pages) { 1092 ret = z3fold_reclaim_page(pool, 8); 1093 if (ret < 0) 1094 break; 1095 total++; 1096 } 1097 1098 if (reclaimed) 1099 *reclaimed = total; 1100 1101 return ret; 1102} 1103 1104static void *z3fold_zpool_map(void *pool, unsigned long handle, 1105 enum zpool_mapmode mm) 1106{ 1107 return z3fold_map(pool, handle); 1108} 1109static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1110{ 1111 z3fold_unmap(pool, handle); 1112} 1113 1114static u64 z3fold_zpool_total_size(void *pool) 1115{ 1116 return z3fold_get_pool_size(pool) * PAGE_SIZE; 1117} 1118 1119static struct zpool_driver z3fold_zpool_driver = { 1120 .type = "z3fold", 1121 .owner = THIS_MODULE, 1122 .create = z3fold_zpool_create, 1123 .destroy = z3fold_zpool_destroy, 1124 .malloc = z3fold_zpool_malloc, 1125 .free = z3fold_zpool_free, 1126 .shrink = z3fold_zpool_shrink, 1127 .map = z3fold_zpool_map, 1128 .unmap = z3fold_zpool_unmap, 1129 .total_size = z3fold_zpool_total_size, 1130}; 1131 1132MODULE_ALIAS("zpool-z3fold"); 1133 1134static int __init init_z3fold(void) 1135{ 1136 /* Make sure the z3fold header is not larger than the page size */ 1137 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 1138 zpool_register_driver(&z3fold_zpool_driver); 1139 1140 return 0; 1141} 1142 1143static void __exit exit_z3fold(void) 1144{ 1145 zpool_unregister_driver(&z3fold_zpool_driver); 1146} 1147 1148module_init(init_z3fold); 1149module_exit(exit_z3fold); 1150 1151MODULE_LICENSE("GPL"); 1152MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1153MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages");