Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.7-rc6 2077 lines 55 kB view raw
1/* 2 * zcache.c 3 * 4 * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp. 5 * Copyright (c) 2010,2011, Nitin Gupta 6 * 7 * Zcache provides an in-kernel "host implementation" for transcendent memory 8 * and, thus indirectly, for cleancache and frontswap. Zcache includes two 9 * page-accessible memory [1] interfaces, both utilizing the crypto compression 10 * API: 11 * 1) "compression buddies" ("zbud") is used for ephemeral pages 12 * 2) zsmalloc is used for persistent pages. 13 * Xvmalloc (based on the TLSF allocator) has very low fragmentation 14 * so maximizes space efficiency, while zbud allows pairs (and potentially, 15 * in the future, more than a pair of) compressed pages to be closely linked 16 * so that reclaiming can be done via the kernel's physical-page-oriented 17 * "shrinker" interface. 18 * 19 * [1] For a definition of page-accessible memory (aka PAM), see: 20 * http://marc.info/?l=linux-mm&m=127811271605009 21 */ 22 23#include <linux/module.h> 24#include <linux/cpu.h> 25#include <linux/highmem.h> 26#include <linux/list.h> 27#include <linux/slab.h> 28#include <linux/spinlock.h> 29#include <linux/types.h> 30#include <linux/atomic.h> 31#include <linux/math64.h> 32#include <linux/crypto.h> 33#include <linux/string.h> 34#include <linux/idr.h> 35#include "tmem.h" 36 37#include "../zsmalloc/zsmalloc.h" 38 39#ifdef CONFIG_CLEANCACHE 40#include <linux/cleancache.h> 41#endif 42#ifdef CONFIG_FRONTSWAP 43#include <linux/frontswap.h> 44#endif 45 46#if 0 47/* this is more aggressive but may cause other problems? */ 48#define ZCACHE_GFP_MASK (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN) 49#else 50#define ZCACHE_GFP_MASK \ 51 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) 52#endif 53 54#define MAX_CLIENTS 16 55#define LOCAL_CLIENT ((uint16_t)-1) 56 57MODULE_LICENSE("GPL"); 58 59struct zcache_client { 60 struct idr tmem_pools; 61 struct zs_pool *zspool; 62 bool allocated; 63 atomic_t refcount; 64}; 65 66static struct zcache_client zcache_host; 67static struct zcache_client zcache_clients[MAX_CLIENTS]; 68 69static inline uint16_t get_client_id_from_client(struct zcache_client *cli) 70{ 71 BUG_ON(cli == NULL); 72 if (cli == &zcache_host) 73 return LOCAL_CLIENT; 74 return cli - &zcache_clients[0]; 75} 76 77static struct zcache_client *get_zcache_client(uint16_t cli_id) 78{ 79 if (cli_id == LOCAL_CLIENT) 80 return &zcache_host; 81 82 if ((unsigned int)cli_id < MAX_CLIENTS) 83 return &zcache_clients[cli_id]; 84 85 return NULL; 86} 87 88static inline bool is_local_client(struct zcache_client *cli) 89{ 90 return cli == &zcache_host; 91} 92 93/* crypto API for zcache */ 94#define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME 95static char zcache_comp_name[ZCACHE_COMP_NAME_SZ]; 96static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms; 97 98enum comp_op { 99 ZCACHE_COMPOP_COMPRESS, 100 ZCACHE_COMPOP_DECOMPRESS 101}; 102 103static inline int zcache_comp_op(enum comp_op op, 104 const u8 *src, unsigned int slen, 105 u8 *dst, unsigned int *dlen) 106{ 107 struct crypto_comp *tfm; 108 int ret; 109 110 BUG_ON(!zcache_comp_pcpu_tfms); 111 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); 112 BUG_ON(!tfm); 113 switch (op) { 114 case ZCACHE_COMPOP_COMPRESS: 115 ret = crypto_comp_compress(tfm, src, slen, dst, dlen); 116 break; 117 case ZCACHE_COMPOP_DECOMPRESS: 118 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); 119 break; 120 default: 121 ret = -EINVAL; 122 } 123 put_cpu(); 124 return ret; 125} 126 127/********** 128 * Compression buddies ("zbud") provides for packing two (or, possibly 129 * in the future, more) compressed ephemeral pages into a single "raw" 130 * (physical) page and tracking them with data structures so that 131 * the raw pages can be easily reclaimed. 132 * 133 * A zbud page ("zbpg") is an aligned page containing a list_head, 134 * a lock, and two "zbud headers". The remainder of the physical 135 * page is divided up into aligned 64-byte "chunks" which contain 136 * the compressed data for zero, one, or two zbuds. Each zbpg 137 * resides on: (1) an "unused list" if it has no zbuds; (2) a 138 * "buddied" list if it is fully populated with two zbuds; or 139 * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks 140 * the one unbuddied zbud uses. The data inside a zbpg cannot be 141 * read or written unless the zbpg's lock is held. 142 */ 143 144#define ZBH_SENTINEL 0x43214321 145#define ZBPG_SENTINEL 0xdeadbeef 146 147#define ZBUD_MAX_BUDS 2 148 149struct zbud_hdr { 150 uint16_t client_id; 151 uint16_t pool_id; 152 struct tmem_oid oid; 153 uint32_t index; 154 uint16_t size; /* compressed size in bytes, zero means unused */ 155 DECL_SENTINEL 156}; 157 158struct zbud_page { 159 struct list_head bud_list; 160 spinlock_t lock; 161 struct zbud_hdr buddy[ZBUD_MAX_BUDS]; 162 DECL_SENTINEL 163 /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */ 164}; 165 166#define CHUNK_SHIFT 6 167#define CHUNK_SIZE (1 << CHUNK_SHIFT) 168#define CHUNK_MASK (~(CHUNK_SIZE-1)) 169#define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \ 170 CHUNK_MASK) >> CHUNK_SHIFT) 171#define MAX_CHUNK (NCHUNKS-1) 172 173static struct { 174 struct list_head list; 175 unsigned count; 176} zbud_unbuddied[NCHUNKS]; 177/* list N contains pages with N chunks USED and NCHUNKS-N unused */ 178/* element 0 is never used but optimizing that isn't worth it */ 179static unsigned long zbud_cumul_chunk_counts[NCHUNKS]; 180 181struct list_head zbud_buddied_list; 182static unsigned long zcache_zbud_buddied_count; 183 184/* protects the buddied list and all unbuddied lists */ 185static DEFINE_SPINLOCK(zbud_budlists_spinlock); 186 187static LIST_HEAD(zbpg_unused_list); 188static unsigned long zcache_zbpg_unused_list_count; 189 190/* protects the unused page list */ 191static DEFINE_SPINLOCK(zbpg_unused_list_spinlock); 192 193static atomic_t zcache_zbud_curr_raw_pages; 194static atomic_t zcache_zbud_curr_zpages; 195static unsigned long zcache_zbud_curr_zbytes; 196static unsigned long zcache_zbud_cumul_zpages; 197static unsigned long zcache_zbud_cumul_zbytes; 198static unsigned long zcache_compress_poor; 199static unsigned long zcache_mean_compress_poor; 200 201/* forward references */ 202static void *zcache_get_free_page(void); 203static void zcache_free_page(void *p); 204 205/* 206 * zbud helper functions 207 */ 208 209static inline unsigned zbud_max_buddy_size(void) 210{ 211 return MAX_CHUNK << CHUNK_SHIFT; 212} 213 214static inline unsigned zbud_size_to_chunks(unsigned size) 215{ 216 BUG_ON(size == 0 || size > zbud_max_buddy_size()); 217 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 218} 219 220static inline int zbud_budnum(struct zbud_hdr *zh) 221{ 222 unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1); 223 struct zbud_page *zbpg = NULL; 224 unsigned budnum = -1U; 225 int i; 226 227 for (i = 0; i < ZBUD_MAX_BUDS; i++) 228 if (offset == offsetof(typeof(*zbpg), buddy[i])) { 229 budnum = i; 230 break; 231 } 232 BUG_ON(budnum == -1U); 233 return budnum; 234} 235 236static char *zbud_data(struct zbud_hdr *zh, unsigned size) 237{ 238 struct zbud_page *zbpg; 239 char *p; 240 unsigned budnum; 241 242 ASSERT_SENTINEL(zh, ZBH); 243 budnum = zbud_budnum(zh); 244 BUG_ON(size == 0 || size > zbud_max_buddy_size()); 245 zbpg = container_of(zh, struct zbud_page, buddy[budnum]); 246 ASSERT_SPINLOCK(&zbpg->lock); 247 p = (char *)zbpg; 248 if (budnum == 0) 249 p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) & 250 CHUNK_MASK); 251 else if (budnum == 1) 252 p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); 253 return p; 254} 255 256/* 257 * zbud raw page management 258 */ 259 260static struct zbud_page *zbud_alloc_raw_page(void) 261{ 262 struct zbud_page *zbpg = NULL; 263 struct zbud_hdr *zh0, *zh1; 264 bool recycled = 0; 265 266 /* if any pages on the zbpg list, use one */ 267 spin_lock(&zbpg_unused_list_spinlock); 268 if (!list_empty(&zbpg_unused_list)) { 269 zbpg = list_first_entry(&zbpg_unused_list, 270 struct zbud_page, bud_list); 271 list_del_init(&zbpg->bud_list); 272 zcache_zbpg_unused_list_count--; 273 recycled = 1; 274 } 275 spin_unlock(&zbpg_unused_list_spinlock); 276 if (zbpg == NULL) 277 /* none on zbpg list, try to get a kernel page */ 278 zbpg = zcache_get_free_page(); 279 if (likely(zbpg != NULL)) { 280 INIT_LIST_HEAD(&zbpg->bud_list); 281 zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; 282 spin_lock_init(&zbpg->lock); 283 if (recycled) { 284 ASSERT_INVERTED_SENTINEL(zbpg, ZBPG); 285 SET_SENTINEL(zbpg, ZBPG); 286 BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); 287 BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); 288 } else { 289 atomic_inc(&zcache_zbud_curr_raw_pages); 290 INIT_LIST_HEAD(&zbpg->bud_list); 291 SET_SENTINEL(zbpg, ZBPG); 292 zh0->size = 0; zh1->size = 0; 293 tmem_oid_set_invalid(&zh0->oid); 294 tmem_oid_set_invalid(&zh1->oid); 295 } 296 } 297 return zbpg; 298} 299 300static void zbud_free_raw_page(struct zbud_page *zbpg) 301{ 302 struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1]; 303 304 ASSERT_SENTINEL(zbpg, ZBPG); 305 BUG_ON(!list_empty(&zbpg->bud_list)); 306 ASSERT_SPINLOCK(&zbpg->lock); 307 BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); 308 BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); 309 INVERT_SENTINEL(zbpg, ZBPG); 310 spin_unlock(&zbpg->lock); 311 spin_lock(&zbpg_unused_list_spinlock); 312 list_add(&zbpg->bud_list, &zbpg_unused_list); 313 zcache_zbpg_unused_list_count++; 314 spin_unlock(&zbpg_unused_list_spinlock); 315} 316 317/* 318 * core zbud handling routines 319 */ 320 321static unsigned zbud_free(struct zbud_hdr *zh) 322{ 323 unsigned size; 324 325 ASSERT_SENTINEL(zh, ZBH); 326 BUG_ON(!tmem_oid_valid(&zh->oid)); 327 size = zh->size; 328 BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); 329 zh->size = 0; 330 tmem_oid_set_invalid(&zh->oid); 331 INVERT_SENTINEL(zh, ZBH); 332 zcache_zbud_curr_zbytes -= size; 333 atomic_dec(&zcache_zbud_curr_zpages); 334 return size; 335} 336 337static void zbud_free_and_delist(struct zbud_hdr *zh) 338{ 339 unsigned chunks; 340 struct zbud_hdr *zh_other; 341 unsigned budnum = zbud_budnum(zh), size; 342 struct zbud_page *zbpg = 343 container_of(zh, struct zbud_page, buddy[budnum]); 344 345 spin_lock(&zbud_budlists_spinlock); 346 spin_lock(&zbpg->lock); 347 if (list_empty(&zbpg->bud_list)) { 348 /* ignore zombie page... see zbud_evict_pages() */ 349 spin_unlock(&zbpg->lock); 350 spin_unlock(&zbud_budlists_spinlock); 351 return; 352 } 353 size = zbud_free(zh); 354 ASSERT_SPINLOCK(&zbpg->lock); 355 zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; 356 if (zh_other->size == 0) { /* was unbuddied: unlist and free */ 357 chunks = zbud_size_to_chunks(size) ; 358 BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); 359 list_del_init(&zbpg->bud_list); 360 zbud_unbuddied[chunks].count--; 361 spin_unlock(&zbud_budlists_spinlock); 362 zbud_free_raw_page(zbpg); 363 } else { /* was buddied: move remaining buddy to unbuddied list */ 364 chunks = zbud_size_to_chunks(zh_other->size) ; 365 list_del_init(&zbpg->bud_list); 366 zcache_zbud_buddied_count--; 367 list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); 368 zbud_unbuddied[chunks].count++; 369 spin_unlock(&zbud_budlists_spinlock); 370 spin_unlock(&zbpg->lock); 371 } 372} 373 374static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id, 375 struct tmem_oid *oid, 376 uint32_t index, struct page *page, 377 void *cdata, unsigned size) 378{ 379 struct zbud_hdr *zh0, *zh1, *zh = NULL; 380 struct zbud_page *zbpg = NULL, *ztmp; 381 unsigned nchunks; 382 char *to; 383 int i, found_good_buddy = 0; 384 385 nchunks = zbud_size_to_chunks(size) ; 386 for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { 387 spin_lock(&zbud_budlists_spinlock); 388 if (!list_empty(&zbud_unbuddied[i].list)) { 389 list_for_each_entry_safe(zbpg, ztmp, 390 &zbud_unbuddied[i].list, bud_list) { 391 if (spin_trylock(&zbpg->lock)) { 392 found_good_buddy = i; 393 goto found_unbuddied; 394 } 395 } 396 } 397 spin_unlock(&zbud_budlists_spinlock); 398 } 399 /* didn't find a good buddy, try allocating a new page */ 400 zbpg = zbud_alloc_raw_page(); 401 if (unlikely(zbpg == NULL)) 402 goto out; 403 /* ok, have a page, now compress the data before taking locks */ 404 spin_lock(&zbud_budlists_spinlock); 405 spin_lock(&zbpg->lock); 406 list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list); 407 zbud_unbuddied[nchunks].count++; 408 zh = &zbpg->buddy[0]; 409 goto init_zh; 410 411found_unbuddied: 412 ASSERT_SPINLOCK(&zbpg->lock); 413 zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; 414 BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0))); 415 if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */ 416 ASSERT_SENTINEL(zh0, ZBH); 417 zh = zh1; 418 } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */ 419 ASSERT_SENTINEL(zh1, ZBH); 420 zh = zh0; 421 } else 422 BUG(); 423 list_del_init(&zbpg->bud_list); 424 zbud_unbuddied[found_good_buddy].count--; 425 list_add_tail(&zbpg->bud_list, &zbud_buddied_list); 426 zcache_zbud_buddied_count++; 427 428init_zh: 429 SET_SENTINEL(zh, ZBH); 430 zh->size = size; 431 zh->index = index; 432 zh->oid = *oid; 433 zh->pool_id = pool_id; 434 zh->client_id = client_id; 435 to = zbud_data(zh, size); 436 memcpy(to, cdata, size); 437 spin_unlock(&zbpg->lock); 438 spin_unlock(&zbud_budlists_spinlock); 439 440 zbud_cumul_chunk_counts[nchunks]++; 441 atomic_inc(&zcache_zbud_curr_zpages); 442 zcache_zbud_cumul_zpages++; 443 zcache_zbud_curr_zbytes += size; 444 zcache_zbud_cumul_zbytes += size; 445out: 446 return zh; 447} 448 449static int zbud_decompress(struct page *page, struct zbud_hdr *zh) 450{ 451 struct zbud_page *zbpg; 452 unsigned budnum = zbud_budnum(zh); 453 unsigned int out_len = PAGE_SIZE; 454 char *to_va, *from_va; 455 unsigned size; 456 int ret = 0; 457 458 zbpg = container_of(zh, struct zbud_page, buddy[budnum]); 459 spin_lock(&zbpg->lock); 460 if (list_empty(&zbpg->bud_list)) { 461 /* ignore zombie page... see zbud_evict_pages() */ 462 ret = -EINVAL; 463 goto out; 464 } 465 ASSERT_SENTINEL(zh, ZBH); 466 BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); 467 to_va = kmap_atomic(page); 468 size = zh->size; 469 from_va = zbud_data(zh, size); 470 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size, 471 to_va, &out_len); 472 BUG_ON(ret); 473 BUG_ON(out_len != PAGE_SIZE); 474 kunmap_atomic(to_va); 475out: 476 spin_unlock(&zbpg->lock); 477 return ret; 478} 479 480/* 481 * The following routines handle shrinking of ephemeral pages by evicting 482 * pages "least valuable" first. 483 */ 484 485static unsigned long zcache_evicted_raw_pages; 486static unsigned long zcache_evicted_buddied_pages; 487static unsigned long zcache_evicted_unbuddied_pages; 488 489static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, 490 uint16_t poolid); 491static void zcache_put_pool(struct tmem_pool *pool); 492 493/* 494 * Flush and free all zbuds in a zbpg, then free the pageframe 495 */ 496static void zbud_evict_zbpg(struct zbud_page *zbpg) 497{ 498 struct zbud_hdr *zh; 499 int i, j; 500 uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS]; 501 uint32_t index[ZBUD_MAX_BUDS]; 502 struct tmem_oid oid[ZBUD_MAX_BUDS]; 503 struct tmem_pool *pool; 504 505 ASSERT_SPINLOCK(&zbpg->lock); 506 BUG_ON(!list_empty(&zbpg->bud_list)); 507 for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { 508 zh = &zbpg->buddy[i]; 509 if (zh->size) { 510 client_id[j] = zh->client_id; 511 pool_id[j] = zh->pool_id; 512 oid[j] = zh->oid; 513 index[j] = zh->index; 514 j++; 515 zbud_free(zh); 516 } 517 } 518 spin_unlock(&zbpg->lock); 519 for (i = 0; i < j; i++) { 520 pool = zcache_get_pool_by_id(client_id[i], pool_id[i]); 521 if (pool != NULL) { 522 tmem_flush_page(pool, &oid[i], index[i]); 523 zcache_put_pool(pool); 524 } 525 } 526 ASSERT_SENTINEL(zbpg, ZBPG); 527 spin_lock(&zbpg->lock); 528 zbud_free_raw_page(zbpg); 529} 530 531/* 532 * Free nr pages. This code is funky because we want to hold the locks 533 * protecting various lists for as short a time as possible, and in some 534 * circumstances the list may change asynchronously when the list lock is 535 * not held. In some cases we also trylock not only to avoid waiting on a 536 * page in use by another cpu, but also to avoid potential deadlock due to 537 * lock inversion. 538 */ 539static void zbud_evict_pages(int nr) 540{ 541 struct zbud_page *zbpg; 542 int i; 543 544 /* first try freeing any pages on unused list */ 545retry_unused_list: 546 spin_lock_bh(&zbpg_unused_list_spinlock); 547 if (!list_empty(&zbpg_unused_list)) { 548 /* can't walk list here, since it may change when unlocked */ 549 zbpg = list_first_entry(&zbpg_unused_list, 550 struct zbud_page, bud_list); 551 list_del_init(&zbpg->bud_list); 552 zcache_zbpg_unused_list_count--; 553 atomic_dec(&zcache_zbud_curr_raw_pages); 554 spin_unlock_bh(&zbpg_unused_list_spinlock); 555 zcache_free_page(zbpg); 556 zcache_evicted_raw_pages++; 557 if (--nr <= 0) 558 goto out; 559 goto retry_unused_list; 560 } 561 spin_unlock_bh(&zbpg_unused_list_spinlock); 562 563 /* now try freeing unbuddied pages, starting with least space avail */ 564 for (i = 0; i < MAX_CHUNK; i++) { 565retry_unbud_list_i: 566 spin_lock_bh(&zbud_budlists_spinlock); 567 if (list_empty(&zbud_unbuddied[i].list)) { 568 spin_unlock_bh(&zbud_budlists_spinlock); 569 continue; 570 } 571 list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) { 572 if (unlikely(!spin_trylock(&zbpg->lock))) 573 continue; 574 list_del_init(&zbpg->bud_list); 575 zbud_unbuddied[i].count--; 576 spin_unlock(&zbud_budlists_spinlock); 577 zcache_evicted_unbuddied_pages++; 578 /* want budlists unlocked when doing zbpg eviction */ 579 zbud_evict_zbpg(zbpg); 580 local_bh_enable(); 581 if (--nr <= 0) 582 goto out; 583 goto retry_unbud_list_i; 584 } 585 spin_unlock_bh(&zbud_budlists_spinlock); 586 } 587 588 /* as a last resort, free buddied pages */ 589retry_bud_list: 590 spin_lock_bh(&zbud_budlists_spinlock); 591 if (list_empty(&zbud_buddied_list)) { 592 spin_unlock_bh(&zbud_budlists_spinlock); 593 goto out; 594 } 595 list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) { 596 if (unlikely(!spin_trylock(&zbpg->lock))) 597 continue; 598 list_del_init(&zbpg->bud_list); 599 zcache_zbud_buddied_count--; 600 spin_unlock(&zbud_budlists_spinlock); 601 zcache_evicted_buddied_pages++; 602 /* want budlists unlocked when doing zbpg eviction */ 603 zbud_evict_zbpg(zbpg); 604 local_bh_enable(); 605 if (--nr <= 0) 606 goto out; 607 goto retry_bud_list; 608 } 609 spin_unlock_bh(&zbud_budlists_spinlock); 610out: 611 return; 612} 613 614static void __init zbud_init(void) 615{ 616 int i; 617 618 INIT_LIST_HEAD(&zbud_buddied_list); 619 620 for (i = 0; i < NCHUNKS; i++) 621 INIT_LIST_HEAD(&zbud_unbuddied[i].list); 622} 623 624#ifdef CONFIG_SYSFS 625/* 626 * These sysfs routines show a nice distribution of how many zbpg's are 627 * currently (and have ever been placed) in each unbuddied list. It's fun 628 * to watch but can probably go away before final merge. 629 */ 630static int zbud_show_unbuddied_list_counts(char *buf) 631{ 632 int i; 633 char *p = buf; 634 635 for (i = 0; i < NCHUNKS; i++) 636 p += sprintf(p, "%u ", zbud_unbuddied[i].count); 637 return p - buf; 638} 639 640static int zbud_show_cumul_chunk_counts(char *buf) 641{ 642 unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0; 643 unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0; 644 unsigned long total_chunks_lte_42 = 0; 645 char *p = buf; 646 647 for (i = 0; i < NCHUNKS; i++) { 648 p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]); 649 chunks += zbud_cumul_chunk_counts[i]; 650 total_chunks += zbud_cumul_chunk_counts[i]; 651 sum_total_chunks += i * zbud_cumul_chunk_counts[i]; 652 if (i == 21) 653 total_chunks_lte_21 = total_chunks; 654 if (i == 32) 655 total_chunks_lte_32 = total_chunks; 656 if (i == 42) 657 total_chunks_lte_42 = total_chunks; 658 } 659 p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n", 660 total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42, 661 chunks == 0 ? 0 : sum_total_chunks / chunks); 662 return p - buf; 663} 664#endif 665 666/********** 667 * This "zv" PAM implementation combines the slab-based zsmalloc 668 * with the crypto compression API to maximize the amount of data that can 669 * be packed into a physical page. 670 * 671 * Zv represents a PAM page with the index and object (plus a "size" value 672 * necessary for decompression) immediately preceding the compressed data. 673 */ 674 675#define ZVH_SENTINEL 0x43214321 676 677struct zv_hdr { 678 uint32_t pool_id; 679 struct tmem_oid oid; 680 uint32_t index; 681 size_t size; 682 DECL_SENTINEL 683}; 684 685/* rudimentary policy limits */ 686/* total number of persistent pages may not exceed this percentage */ 687static unsigned int zv_page_count_policy_percent = 75; 688/* 689 * byte count defining poor compression; pages with greater zsize will be 690 * rejected 691 */ 692static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7; 693/* 694 * byte count defining poor *mean* compression; pages with greater zsize 695 * will be rejected until sufficient better-compressed pages are accepted 696 * driving the mean below this threshold 697 */ 698static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5; 699 700static atomic_t zv_curr_dist_counts[NCHUNKS]; 701static atomic_t zv_cumul_dist_counts[NCHUNKS]; 702 703static unsigned long zv_create(struct zs_pool *pool, uint32_t pool_id, 704 struct tmem_oid *oid, uint32_t index, 705 void *cdata, unsigned clen) 706{ 707 struct zv_hdr *zv; 708 u32 size = clen + sizeof(struct zv_hdr); 709 int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT; 710 unsigned long handle = 0; 711 712 BUG_ON(!irqs_disabled()); 713 BUG_ON(chunks >= NCHUNKS); 714 handle = zs_malloc(pool, size); 715 if (!handle) 716 goto out; 717 atomic_inc(&zv_curr_dist_counts[chunks]); 718 atomic_inc(&zv_cumul_dist_counts[chunks]); 719 zv = zs_map_object(pool, handle, ZS_MM_WO); 720 zv->index = index; 721 zv->oid = *oid; 722 zv->pool_id = pool_id; 723 zv->size = clen; 724 SET_SENTINEL(zv, ZVH); 725 memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); 726 zs_unmap_object(pool, handle); 727out: 728 return handle; 729} 730 731static void zv_free(struct zs_pool *pool, unsigned long handle) 732{ 733 unsigned long flags; 734 struct zv_hdr *zv; 735 uint16_t size; 736 int chunks; 737 738 zv = zs_map_object(pool, handle, ZS_MM_RW); 739 ASSERT_SENTINEL(zv, ZVH); 740 size = zv->size + sizeof(struct zv_hdr); 741 INVERT_SENTINEL(zv, ZVH); 742 zs_unmap_object(pool, handle); 743 744 chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT; 745 BUG_ON(chunks >= NCHUNKS); 746 atomic_dec(&zv_curr_dist_counts[chunks]); 747 748 local_irq_save(flags); 749 zs_free(pool, handle); 750 local_irq_restore(flags); 751} 752 753static void zv_decompress(struct page *page, unsigned long handle) 754{ 755 unsigned int clen = PAGE_SIZE; 756 char *to_va; 757 int ret; 758 struct zv_hdr *zv; 759 760 zv = zs_map_object(zcache_host.zspool, handle, ZS_MM_RO); 761 BUG_ON(zv->size == 0); 762 ASSERT_SENTINEL(zv, ZVH); 763 to_va = kmap_atomic(page); 764 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, (char *)zv + sizeof(*zv), 765 zv->size, to_va, &clen); 766 kunmap_atomic(to_va); 767 zs_unmap_object(zcache_host.zspool, handle); 768 BUG_ON(ret); 769 BUG_ON(clen != PAGE_SIZE); 770} 771 772#ifdef CONFIG_SYSFS 773/* 774 * show a distribution of compression stats for zv pages. 775 */ 776 777static int zv_curr_dist_counts_show(char *buf) 778{ 779 unsigned long i, n, chunks = 0, sum_total_chunks = 0; 780 char *p = buf; 781 782 for (i = 0; i < NCHUNKS; i++) { 783 n = atomic_read(&zv_curr_dist_counts[i]); 784 p += sprintf(p, "%lu ", n); 785 chunks += n; 786 sum_total_chunks += i * n; 787 } 788 p += sprintf(p, "mean:%lu\n", 789 chunks == 0 ? 0 : sum_total_chunks / chunks); 790 return p - buf; 791} 792 793static int zv_cumul_dist_counts_show(char *buf) 794{ 795 unsigned long i, n, chunks = 0, sum_total_chunks = 0; 796 char *p = buf; 797 798 for (i = 0; i < NCHUNKS; i++) { 799 n = atomic_read(&zv_cumul_dist_counts[i]); 800 p += sprintf(p, "%lu ", n); 801 chunks += n; 802 sum_total_chunks += i * n; 803 } 804 p += sprintf(p, "mean:%lu\n", 805 chunks == 0 ? 0 : sum_total_chunks / chunks); 806 return p - buf; 807} 808 809/* 810 * setting zv_max_zsize via sysfs causes all persistent (e.g. swap) 811 * pages that don't compress to less than this value (including metadata 812 * overhead) to be rejected. We don't allow the value to get too close 813 * to PAGE_SIZE. 814 */ 815static ssize_t zv_max_zsize_show(struct kobject *kobj, 816 struct kobj_attribute *attr, 817 char *buf) 818{ 819 return sprintf(buf, "%u\n", zv_max_zsize); 820} 821 822static ssize_t zv_max_zsize_store(struct kobject *kobj, 823 struct kobj_attribute *attr, 824 const char *buf, size_t count) 825{ 826 unsigned long val; 827 int err; 828 829 if (!capable(CAP_SYS_ADMIN)) 830 return -EPERM; 831 832 err = kstrtoul(buf, 10, &val); 833 if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7)) 834 return -EINVAL; 835 zv_max_zsize = val; 836 return count; 837} 838 839/* 840 * setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap) 841 * pages that don't compress to less than this value (including metadata 842 * overhead) to be rejected UNLESS the mean compression is also smaller 843 * than this value. In other words, we are load-balancing-by-zsize the 844 * accepted pages. Again, we don't allow the value to get too close 845 * to PAGE_SIZE. 846 */ 847static ssize_t zv_max_mean_zsize_show(struct kobject *kobj, 848 struct kobj_attribute *attr, 849 char *buf) 850{ 851 return sprintf(buf, "%u\n", zv_max_mean_zsize); 852} 853 854static ssize_t zv_max_mean_zsize_store(struct kobject *kobj, 855 struct kobj_attribute *attr, 856 const char *buf, size_t count) 857{ 858 unsigned long val; 859 int err; 860 861 if (!capable(CAP_SYS_ADMIN)) 862 return -EPERM; 863 864 err = kstrtoul(buf, 10, &val); 865 if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7)) 866 return -EINVAL; 867 zv_max_mean_zsize = val; 868 return count; 869} 870 871/* 872 * setting zv_page_count_policy_percent via sysfs sets an upper bound of 873 * persistent (e.g. swap) pages that will be retained according to: 874 * (zv_page_count_policy_percent * totalram_pages) / 100) 875 * when that limit is reached, further puts will be rejected (until 876 * some pages have been flushed). Note that, due to compression, 877 * this number may exceed 100; it defaults to 75 and we set an 878 * arbitary limit of 150. A poor choice will almost certainly result 879 * in OOM's, so this value should only be changed prudently. 880 */ 881static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj, 882 struct kobj_attribute *attr, 883 char *buf) 884{ 885 return sprintf(buf, "%u\n", zv_page_count_policy_percent); 886} 887 888static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj, 889 struct kobj_attribute *attr, 890 const char *buf, size_t count) 891{ 892 unsigned long val; 893 int err; 894 895 if (!capable(CAP_SYS_ADMIN)) 896 return -EPERM; 897 898 err = kstrtoul(buf, 10, &val); 899 if (err || (val == 0) || (val > 150)) 900 return -EINVAL; 901 zv_page_count_policy_percent = val; 902 return count; 903} 904 905static struct kobj_attribute zcache_zv_max_zsize_attr = { 906 .attr = { .name = "zv_max_zsize", .mode = 0644 }, 907 .show = zv_max_zsize_show, 908 .store = zv_max_zsize_store, 909}; 910 911static struct kobj_attribute zcache_zv_max_mean_zsize_attr = { 912 .attr = { .name = "zv_max_mean_zsize", .mode = 0644 }, 913 .show = zv_max_mean_zsize_show, 914 .store = zv_max_mean_zsize_store, 915}; 916 917static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = { 918 .attr = { .name = "zv_page_count_policy_percent", 919 .mode = 0644 }, 920 .show = zv_page_count_policy_percent_show, 921 .store = zv_page_count_policy_percent_store, 922}; 923#endif 924 925/* 926 * zcache core code starts here 927 */ 928 929/* useful stats not collected by cleancache or frontswap */ 930static unsigned long zcache_flush_total; 931static unsigned long zcache_flush_found; 932static unsigned long zcache_flobj_total; 933static unsigned long zcache_flobj_found; 934static unsigned long zcache_failed_eph_puts; 935static unsigned long zcache_failed_pers_puts; 936 937/* 938 * Tmem operations assume the poolid implies the invoking client. 939 * Zcache only has one client (the kernel itself): LOCAL_CLIENT. 940 * RAMster has each client numbered by cluster node, and a KVM version 941 * of zcache would have one client per guest and each client might 942 * have a poolid==N. 943 */ 944static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid) 945{ 946 struct tmem_pool *pool = NULL; 947 struct zcache_client *cli = NULL; 948 949 cli = get_zcache_client(cli_id); 950 if (!cli) 951 goto out; 952 953 atomic_inc(&cli->refcount); 954 pool = idr_find(&cli->tmem_pools, poolid); 955 if (pool != NULL) 956 atomic_inc(&pool->refcount); 957out: 958 return pool; 959} 960 961static void zcache_put_pool(struct tmem_pool *pool) 962{ 963 struct zcache_client *cli = NULL; 964 965 if (pool == NULL) 966 BUG(); 967 cli = pool->client; 968 atomic_dec(&pool->refcount); 969 atomic_dec(&cli->refcount); 970} 971 972int zcache_new_client(uint16_t cli_id) 973{ 974 struct zcache_client *cli; 975 int ret = -1; 976 977 cli = get_zcache_client(cli_id); 978 979 if (cli == NULL) 980 goto out; 981 if (cli->allocated) 982 goto out; 983 cli->allocated = 1; 984#ifdef CONFIG_FRONTSWAP 985 cli->zspool = zs_create_pool("zcache", ZCACHE_GFP_MASK); 986 if (cli->zspool == NULL) 987 goto out; 988 idr_init(&cli->tmem_pools); 989#endif 990 ret = 0; 991out: 992 return ret; 993} 994 995/* counters for debugging */ 996static unsigned long zcache_failed_get_free_pages; 997static unsigned long zcache_failed_alloc; 998static unsigned long zcache_put_to_flush; 999 1000/* 1001 * for now, used named slabs so can easily track usage; later can 1002 * either just use kmalloc, or perhaps add a slab-like allocator 1003 * to more carefully manage total memory utilization 1004 */ 1005static struct kmem_cache *zcache_objnode_cache; 1006static struct kmem_cache *zcache_obj_cache; 1007static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0); 1008static unsigned long zcache_curr_obj_count_max; 1009static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0); 1010static unsigned long zcache_curr_objnode_count_max; 1011 1012/* 1013 * to avoid memory allocation recursion (e.g. due to direct reclaim), we 1014 * preload all necessary data structures so the hostops callbacks never 1015 * actually do a malloc 1016 */ 1017struct zcache_preload { 1018 void *page; 1019 struct tmem_obj *obj; 1020 int nr; 1021 struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH]; 1022}; 1023static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; 1024 1025static int zcache_do_preload(struct tmem_pool *pool) 1026{ 1027 struct zcache_preload *kp; 1028 struct tmem_objnode *objnode; 1029 struct tmem_obj *obj; 1030 void *page; 1031 int ret = -ENOMEM; 1032 1033 if (unlikely(zcache_objnode_cache == NULL)) 1034 goto out; 1035 if (unlikely(zcache_obj_cache == NULL)) 1036 goto out; 1037 1038 /* IRQ has already been disabled. */ 1039 kp = &__get_cpu_var(zcache_preloads); 1040 while (kp->nr < ARRAY_SIZE(kp->objnodes)) { 1041 objnode = kmem_cache_alloc(zcache_objnode_cache, 1042 ZCACHE_GFP_MASK); 1043 if (unlikely(objnode == NULL)) { 1044 zcache_failed_alloc++; 1045 goto out; 1046 } 1047 1048 kp->objnodes[kp->nr++] = objnode; 1049 } 1050 1051 if (!kp->obj) { 1052 obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); 1053 if (unlikely(obj == NULL)) { 1054 zcache_failed_alloc++; 1055 goto out; 1056 } 1057 kp->obj = obj; 1058 } 1059 1060 if (!kp->page) { 1061 page = (void *)__get_free_page(ZCACHE_GFP_MASK); 1062 if (unlikely(page == NULL)) { 1063 zcache_failed_get_free_pages++; 1064 goto out; 1065 } 1066 kp->page = page; 1067 } 1068 1069 ret = 0; 1070out: 1071 return ret; 1072} 1073 1074static void *zcache_get_free_page(void) 1075{ 1076 struct zcache_preload *kp; 1077 void *page; 1078 1079 kp = &__get_cpu_var(zcache_preloads); 1080 page = kp->page; 1081 BUG_ON(page == NULL); 1082 kp->page = NULL; 1083 return page; 1084} 1085 1086static void zcache_free_page(void *p) 1087{ 1088 free_page((unsigned long)p); 1089} 1090 1091/* 1092 * zcache implementation for tmem host ops 1093 */ 1094 1095static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) 1096{ 1097 struct tmem_objnode *objnode = NULL; 1098 unsigned long count; 1099 struct zcache_preload *kp; 1100 1101 kp = &__get_cpu_var(zcache_preloads); 1102 if (kp->nr <= 0) 1103 goto out; 1104 objnode = kp->objnodes[kp->nr - 1]; 1105 BUG_ON(objnode == NULL); 1106 kp->objnodes[kp->nr - 1] = NULL; 1107 kp->nr--; 1108 count = atomic_inc_return(&zcache_curr_objnode_count); 1109 if (count > zcache_curr_objnode_count_max) 1110 zcache_curr_objnode_count_max = count; 1111out: 1112 return objnode; 1113} 1114 1115static void zcache_objnode_free(struct tmem_objnode *objnode, 1116 struct tmem_pool *pool) 1117{ 1118 atomic_dec(&zcache_curr_objnode_count); 1119 BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0); 1120 kmem_cache_free(zcache_objnode_cache, objnode); 1121} 1122 1123static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) 1124{ 1125 struct tmem_obj *obj = NULL; 1126 unsigned long count; 1127 struct zcache_preload *kp; 1128 1129 kp = &__get_cpu_var(zcache_preloads); 1130 obj = kp->obj; 1131 BUG_ON(obj == NULL); 1132 kp->obj = NULL; 1133 count = atomic_inc_return(&zcache_curr_obj_count); 1134 if (count > zcache_curr_obj_count_max) 1135 zcache_curr_obj_count_max = count; 1136 return obj; 1137} 1138 1139static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) 1140{ 1141 atomic_dec(&zcache_curr_obj_count); 1142 BUG_ON(atomic_read(&zcache_curr_obj_count) < 0); 1143 kmem_cache_free(zcache_obj_cache, obj); 1144} 1145 1146static struct tmem_hostops zcache_hostops = { 1147 .obj_alloc = zcache_obj_alloc, 1148 .obj_free = zcache_obj_free, 1149 .objnode_alloc = zcache_objnode_alloc, 1150 .objnode_free = zcache_objnode_free, 1151}; 1152 1153/* 1154 * zcache implementations for PAM page descriptor ops 1155 */ 1156 1157static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0); 1158static unsigned long zcache_curr_eph_pampd_count_max; 1159static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0); 1160static unsigned long zcache_curr_pers_pampd_count_max; 1161 1162/* forward reference */ 1163static int zcache_compress(struct page *from, void **out_va, unsigned *out_len); 1164 1165static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph, 1166 struct tmem_pool *pool, struct tmem_oid *oid, 1167 uint32_t index) 1168{ 1169 void *pampd = NULL, *cdata; 1170 unsigned clen; 1171 int ret; 1172 unsigned long count; 1173 struct page *page = (struct page *)(data); 1174 struct zcache_client *cli = pool->client; 1175 uint16_t client_id = get_client_id_from_client(cli); 1176 unsigned long zv_mean_zsize; 1177 unsigned long curr_pers_pampd_count; 1178 u64 total_zsize; 1179 1180 if (eph) { 1181 ret = zcache_compress(page, &cdata, &clen); 1182 if (ret == 0) 1183 goto out; 1184 if (clen == 0 || clen > zbud_max_buddy_size()) { 1185 zcache_compress_poor++; 1186 goto out; 1187 } 1188 pampd = (void *)zbud_create(client_id, pool->pool_id, oid, 1189 index, page, cdata, clen); 1190 if (pampd != NULL) { 1191 count = atomic_inc_return(&zcache_curr_eph_pampd_count); 1192 if (count > zcache_curr_eph_pampd_count_max) 1193 zcache_curr_eph_pampd_count_max = count; 1194 } 1195 } else { 1196 curr_pers_pampd_count = 1197 atomic_read(&zcache_curr_pers_pampd_count); 1198 if (curr_pers_pampd_count > 1199 (zv_page_count_policy_percent * totalram_pages) / 100) 1200 goto out; 1201 ret = zcache_compress(page, &cdata, &clen); 1202 if (ret == 0) 1203 goto out; 1204 /* reject if compression is too poor */ 1205 if (clen > zv_max_zsize) { 1206 zcache_compress_poor++; 1207 goto out; 1208 } 1209 /* reject if mean compression is too poor */ 1210 if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) { 1211 total_zsize = zs_get_total_size_bytes(cli->zspool); 1212 zv_mean_zsize = div_u64(total_zsize, 1213 curr_pers_pampd_count); 1214 if (zv_mean_zsize > zv_max_mean_zsize) { 1215 zcache_mean_compress_poor++; 1216 goto out; 1217 } 1218 } 1219 pampd = (void *)zv_create(cli->zspool, pool->pool_id, 1220 oid, index, cdata, clen); 1221 if (pampd == NULL) 1222 goto out; 1223 count = atomic_inc_return(&zcache_curr_pers_pampd_count); 1224 if (count > zcache_curr_pers_pampd_count_max) 1225 zcache_curr_pers_pampd_count_max = count; 1226 } 1227out: 1228 return pampd; 1229} 1230 1231/* 1232 * fill the pageframe corresponding to the struct page with the data 1233 * from the passed pampd 1234 */ 1235static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw, 1236 void *pampd, struct tmem_pool *pool, 1237 struct tmem_oid *oid, uint32_t index) 1238{ 1239 int ret = 0; 1240 1241 BUG_ON(is_ephemeral(pool)); 1242 zv_decompress((struct page *)(data), (unsigned long)pampd); 1243 return ret; 1244} 1245 1246/* 1247 * fill the pageframe corresponding to the struct page with the data 1248 * from the passed pampd 1249 */ 1250static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw, 1251 void *pampd, struct tmem_pool *pool, 1252 struct tmem_oid *oid, uint32_t index) 1253{ 1254 BUG_ON(!is_ephemeral(pool)); 1255 if (zbud_decompress((struct page *)(data), pampd) < 0) 1256 return -EINVAL; 1257 zbud_free_and_delist((struct zbud_hdr *)pampd); 1258 atomic_dec(&zcache_curr_eph_pampd_count); 1259 return 0; 1260} 1261 1262/* 1263 * free the pampd and remove it from any zcache lists 1264 * pampd must no longer be pointed to from any tmem data structures! 1265 */ 1266static void zcache_pampd_free(void *pampd, struct tmem_pool *pool, 1267 struct tmem_oid *oid, uint32_t index) 1268{ 1269 struct zcache_client *cli = pool->client; 1270 1271 if (is_ephemeral(pool)) { 1272 zbud_free_and_delist((struct zbud_hdr *)pampd); 1273 atomic_dec(&zcache_curr_eph_pampd_count); 1274 BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); 1275 } else { 1276 zv_free(cli->zspool, (unsigned long)pampd); 1277 atomic_dec(&zcache_curr_pers_pampd_count); 1278 BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); 1279 } 1280} 1281 1282static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj) 1283{ 1284} 1285 1286static void zcache_pampd_new_obj(struct tmem_obj *obj) 1287{ 1288} 1289 1290static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj) 1291{ 1292 return -1; 1293} 1294 1295static bool zcache_pampd_is_remote(void *pampd) 1296{ 1297 return 0; 1298} 1299 1300static struct tmem_pamops zcache_pamops = { 1301 .create = zcache_pampd_create, 1302 .get_data = zcache_pampd_get_data, 1303 .get_data_and_free = zcache_pampd_get_data_and_free, 1304 .free = zcache_pampd_free, 1305 .free_obj = zcache_pampd_free_obj, 1306 .new_obj = zcache_pampd_new_obj, 1307 .replace_in_obj = zcache_pampd_replace_in_obj, 1308 .is_remote = zcache_pampd_is_remote, 1309}; 1310 1311/* 1312 * zcache compression/decompression and related per-cpu stuff 1313 */ 1314 1315static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); 1316#define ZCACHE_DSTMEM_ORDER 1 1317 1318static int zcache_compress(struct page *from, void **out_va, unsigned *out_len) 1319{ 1320 int ret = 0; 1321 unsigned char *dmem = __get_cpu_var(zcache_dstmem); 1322 char *from_va; 1323 1324 BUG_ON(!irqs_disabled()); 1325 if (unlikely(dmem == NULL)) 1326 goto out; /* no buffer or no compressor so can't compress */ 1327 *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER; 1328 from_va = kmap_atomic(from); 1329 mb(); 1330 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem, 1331 out_len); 1332 BUG_ON(ret); 1333 *out_va = dmem; 1334 kunmap_atomic(from_va); 1335 ret = 1; 1336out: 1337 return ret; 1338} 1339 1340static int zcache_comp_cpu_up(int cpu) 1341{ 1342 struct crypto_comp *tfm; 1343 1344 tfm = crypto_alloc_comp(zcache_comp_name, 0, 0); 1345 if (IS_ERR(tfm)) 1346 return NOTIFY_BAD; 1347 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; 1348 return NOTIFY_OK; 1349} 1350 1351static void zcache_comp_cpu_down(int cpu) 1352{ 1353 struct crypto_comp *tfm; 1354 1355 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); 1356 crypto_free_comp(tfm); 1357 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; 1358} 1359 1360static int zcache_cpu_notifier(struct notifier_block *nb, 1361 unsigned long action, void *pcpu) 1362{ 1363 int ret, cpu = (long)pcpu; 1364 struct zcache_preload *kp; 1365 1366 switch (action) { 1367 case CPU_UP_PREPARE: 1368 ret = zcache_comp_cpu_up(cpu); 1369 if (ret != NOTIFY_OK) { 1370 pr_err("zcache: can't allocate compressor transform\n"); 1371 return ret; 1372 } 1373 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( 1374 GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER); 1375 break; 1376 case CPU_DEAD: 1377 case CPU_UP_CANCELED: 1378 zcache_comp_cpu_down(cpu); 1379 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), 1380 ZCACHE_DSTMEM_ORDER); 1381 per_cpu(zcache_dstmem, cpu) = NULL; 1382 kp = &per_cpu(zcache_preloads, cpu); 1383 while (kp->nr) { 1384 kmem_cache_free(zcache_objnode_cache, 1385 kp->objnodes[kp->nr - 1]); 1386 kp->objnodes[kp->nr - 1] = NULL; 1387 kp->nr--; 1388 } 1389 if (kp->obj) { 1390 kmem_cache_free(zcache_obj_cache, kp->obj); 1391 kp->obj = NULL; 1392 } 1393 if (kp->page) { 1394 free_page((unsigned long)kp->page); 1395 kp->page = NULL; 1396 } 1397 break; 1398 default: 1399 break; 1400 } 1401 return NOTIFY_OK; 1402} 1403 1404static struct notifier_block zcache_cpu_notifier_block = { 1405 .notifier_call = zcache_cpu_notifier 1406}; 1407 1408#ifdef CONFIG_SYSFS 1409#define ZCACHE_SYSFS_RO(_name) \ 1410 static ssize_t zcache_##_name##_show(struct kobject *kobj, \ 1411 struct kobj_attribute *attr, char *buf) \ 1412 { \ 1413 return sprintf(buf, "%lu\n", zcache_##_name); \ 1414 } \ 1415 static struct kobj_attribute zcache_##_name##_attr = { \ 1416 .attr = { .name = __stringify(_name), .mode = 0444 }, \ 1417 .show = zcache_##_name##_show, \ 1418 } 1419 1420#define ZCACHE_SYSFS_RO_ATOMIC(_name) \ 1421 static ssize_t zcache_##_name##_show(struct kobject *kobj, \ 1422 struct kobj_attribute *attr, char *buf) \ 1423 { \ 1424 return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \ 1425 } \ 1426 static struct kobj_attribute zcache_##_name##_attr = { \ 1427 .attr = { .name = __stringify(_name), .mode = 0444 }, \ 1428 .show = zcache_##_name##_show, \ 1429 } 1430 1431#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \ 1432 static ssize_t zcache_##_name##_show(struct kobject *kobj, \ 1433 struct kobj_attribute *attr, char *buf) \ 1434 { \ 1435 return _func(buf); \ 1436 } \ 1437 static struct kobj_attribute zcache_##_name##_attr = { \ 1438 .attr = { .name = __stringify(_name), .mode = 0444 }, \ 1439 .show = zcache_##_name##_show, \ 1440 } 1441 1442ZCACHE_SYSFS_RO(curr_obj_count_max); 1443ZCACHE_SYSFS_RO(curr_objnode_count_max); 1444ZCACHE_SYSFS_RO(flush_total); 1445ZCACHE_SYSFS_RO(flush_found); 1446ZCACHE_SYSFS_RO(flobj_total); 1447ZCACHE_SYSFS_RO(flobj_found); 1448ZCACHE_SYSFS_RO(failed_eph_puts); 1449ZCACHE_SYSFS_RO(failed_pers_puts); 1450ZCACHE_SYSFS_RO(zbud_curr_zbytes); 1451ZCACHE_SYSFS_RO(zbud_cumul_zpages); 1452ZCACHE_SYSFS_RO(zbud_cumul_zbytes); 1453ZCACHE_SYSFS_RO(zbud_buddied_count); 1454ZCACHE_SYSFS_RO(zbpg_unused_list_count); 1455ZCACHE_SYSFS_RO(evicted_raw_pages); 1456ZCACHE_SYSFS_RO(evicted_unbuddied_pages); 1457ZCACHE_SYSFS_RO(evicted_buddied_pages); 1458ZCACHE_SYSFS_RO(failed_get_free_pages); 1459ZCACHE_SYSFS_RO(failed_alloc); 1460ZCACHE_SYSFS_RO(put_to_flush); 1461ZCACHE_SYSFS_RO(compress_poor); 1462ZCACHE_SYSFS_RO(mean_compress_poor); 1463ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); 1464ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); 1465ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); 1466ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count); 1467ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts, 1468 zbud_show_unbuddied_list_counts); 1469ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, 1470 zbud_show_cumul_chunk_counts); 1471ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts, 1472 zv_curr_dist_counts_show); 1473ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts, 1474 zv_cumul_dist_counts_show); 1475 1476static struct attribute *zcache_attrs[] = { 1477 &zcache_curr_obj_count_attr.attr, 1478 &zcache_curr_obj_count_max_attr.attr, 1479 &zcache_curr_objnode_count_attr.attr, 1480 &zcache_curr_objnode_count_max_attr.attr, 1481 &zcache_flush_total_attr.attr, 1482 &zcache_flobj_total_attr.attr, 1483 &zcache_flush_found_attr.attr, 1484 &zcache_flobj_found_attr.attr, 1485 &zcache_failed_eph_puts_attr.attr, 1486 &zcache_failed_pers_puts_attr.attr, 1487 &zcache_compress_poor_attr.attr, 1488 &zcache_mean_compress_poor_attr.attr, 1489 &zcache_zbud_curr_raw_pages_attr.attr, 1490 &zcache_zbud_curr_zpages_attr.attr, 1491 &zcache_zbud_curr_zbytes_attr.attr, 1492 &zcache_zbud_cumul_zpages_attr.attr, 1493 &zcache_zbud_cumul_zbytes_attr.attr, 1494 &zcache_zbud_buddied_count_attr.attr, 1495 &zcache_zbpg_unused_list_count_attr.attr, 1496 &zcache_evicted_raw_pages_attr.attr, 1497 &zcache_evicted_unbuddied_pages_attr.attr, 1498 &zcache_evicted_buddied_pages_attr.attr, 1499 &zcache_failed_get_free_pages_attr.attr, 1500 &zcache_failed_alloc_attr.attr, 1501 &zcache_put_to_flush_attr.attr, 1502 &zcache_zbud_unbuddied_list_counts_attr.attr, 1503 &zcache_zbud_cumul_chunk_counts_attr.attr, 1504 &zcache_zv_curr_dist_counts_attr.attr, 1505 &zcache_zv_cumul_dist_counts_attr.attr, 1506 &zcache_zv_max_zsize_attr.attr, 1507 &zcache_zv_max_mean_zsize_attr.attr, 1508 &zcache_zv_page_count_policy_percent_attr.attr, 1509 NULL, 1510}; 1511 1512static struct attribute_group zcache_attr_group = { 1513 .attrs = zcache_attrs, 1514 .name = "zcache", 1515}; 1516 1517#endif /* CONFIG_SYSFS */ 1518/* 1519 * When zcache is disabled ("frozen"), pools can be created and destroyed, 1520 * but all puts (and thus all other operations that require memory allocation) 1521 * must fail. If zcache is unfrozen, accepts puts, then frozen again, 1522 * data consistency requires all puts while frozen to be converted into 1523 * flushes. 1524 */ 1525static bool zcache_freeze; 1526 1527/* 1528 * zcache shrinker interface (only useful for ephemeral pages, so zbud only) 1529 */ 1530static int shrink_zcache_memory(struct shrinker *shrink, 1531 struct shrink_control *sc) 1532{ 1533 int ret = -1; 1534 int nr = sc->nr_to_scan; 1535 gfp_t gfp_mask = sc->gfp_mask; 1536 1537 if (nr >= 0) { 1538 if (!(gfp_mask & __GFP_FS)) 1539 /* does this case really need to be skipped? */ 1540 goto out; 1541 zbud_evict_pages(nr); 1542 } 1543 ret = (int)atomic_read(&zcache_zbud_curr_raw_pages); 1544out: 1545 return ret; 1546} 1547 1548static struct shrinker zcache_shrinker = { 1549 .shrink = shrink_zcache_memory, 1550 .seeks = DEFAULT_SEEKS, 1551}; 1552 1553/* 1554 * zcache shims between cleancache/frontswap ops and tmem 1555 */ 1556 1557static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, 1558 uint32_t index, struct page *page) 1559{ 1560 struct tmem_pool *pool; 1561 int ret = -1; 1562 1563 BUG_ON(!irqs_disabled()); 1564 pool = zcache_get_pool_by_id(cli_id, pool_id); 1565 if (unlikely(pool == NULL)) 1566 goto out; 1567 if (!zcache_freeze && zcache_do_preload(pool) == 0) { 1568 /* preload does preempt_disable on success */ 1569 ret = tmem_put(pool, oidp, index, (char *)(page), 1570 PAGE_SIZE, 0, is_ephemeral(pool)); 1571 if (ret < 0) { 1572 if (is_ephemeral(pool)) 1573 zcache_failed_eph_puts++; 1574 else 1575 zcache_failed_pers_puts++; 1576 } 1577 } else { 1578 zcache_put_to_flush++; 1579 if (atomic_read(&pool->obj_count) > 0) 1580 /* the put fails whether the flush succeeds or not */ 1581 (void)tmem_flush_page(pool, oidp, index); 1582 } 1583 1584 zcache_put_pool(pool); 1585out: 1586 return ret; 1587} 1588 1589static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, 1590 uint32_t index, struct page *page) 1591{ 1592 struct tmem_pool *pool; 1593 int ret = -1; 1594 unsigned long flags; 1595 size_t size = PAGE_SIZE; 1596 1597 local_irq_save(flags); 1598 pool = zcache_get_pool_by_id(cli_id, pool_id); 1599 if (likely(pool != NULL)) { 1600 if (atomic_read(&pool->obj_count) > 0) 1601 ret = tmem_get(pool, oidp, index, (char *)(page), 1602 &size, 0, is_ephemeral(pool)); 1603 zcache_put_pool(pool); 1604 } 1605 local_irq_restore(flags); 1606 return ret; 1607} 1608 1609static int zcache_flush_page(int cli_id, int pool_id, 1610 struct tmem_oid *oidp, uint32_t index) 1611{ 1612 struct tmem_pool *pool; 1613 int ret = -1; 1614 unsigned long flags; 1615 1616 local_irq_save(flags); 1617 zcache_flush_total++; 1618 pool = zcache_get_pool_by_id(cli_id, pool_id); 1619 if (likely(pool != NULL)) { 1620 if (atomic_read(&pool->obj_count) > 0) 1621 ret = tmem_flush_page(pool, oidp, index); 1622 zcache_put_pool(pool); 1623 } 1624 if (ret >= 0) 1625 zcache_flush_found++; 1626 local_irq_restore(flags); 1627 return ret; 1628} 1629 1630static int zcache_flush_object(int cli_id, int pool_id, 1631 struct tmem_oid *oidp) 1632{ 1633 struct tmem_pool *pool; 1634 int ret = -1; 1635 unsigned long flags; 1636 1637 local_irq_save(flags); 1638 zcache_flobj_total++; 1639 pool = zcache_get_pool_by_id(cli_id, pool_id); 1640 if (likely(pool != NULL)) { 1641 if (atomic_read(&pool->obj_count) > 0) 1642 ret = tmem_flush_object(pool, oidp); 1643 zcache_put_pool(pool); 1644 } 1645 if (ret >= 0) 1646 zcache_flobj_found++; 1647 local_irq_restore(flags); 1648 return ret; 1649} 1650 1651static int zcache_destroy_pool(int cli_id, int pool_id) 1652{ 1653 struct tmem_pool *pool = NULL; 1654 struct zcache_client *cli; 1655 int ret = -1; 1656 1657 if (pool_id < 0) 1658 goto out; 1659 1660 cli = get_zcache_client(cli_id); 1661 if (cli == NULL) 1662 goto out; 1663 1664 atomic_inc(&cli->refcount); 1665 pool = idr_find(&cli->tmem_pools, pool_id); 1666 if (pool == NULL) 1667 goto out; 1668 idr_remove(&cli->tmem_pools, pool_id); 1669 /* wait for pool activity on other cpus to quiesce */ 1670 while (atomic_read(&pool->refcount) != 0) 1671 ; 1672 atomic_dec(&cli->refcount); 1673 local_bh_disable(); 1674 ret = tmem_destroy_pool(pool); 1675 local_bh_enable(); 1676 kfree(pool); 1677 pr_info("zcache: destroyed pool id=%d, cli_id=%d\n", 1678 pool_id, cli_id); 1679out: 1680 return ret; 1681} 1682 1683static int zcache_new_pool(uint16_t cli_id, uint32_t flags) 1684{ 1685 int poolid = -1; 1686 struct tmem_pool *pool; 1687 struct zcache_client *cli = NULL; 1688 int r; 1689 1690 cli = get_zcache_client(cli_id); 1691 if (cli == NULL) 1692 goto out; 1693 1694 atomic_inc(&cli->refcount); 1695 pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC); 1696 if (pool == NULL) { 1697 pr_info("zcache: pool creation failed: out of memory\n"); 1698 goto out; 1699 } 1700 1701 do { 1702 r = idr_pre_get(&cli->tmem_pools, GFP_ATOMIC); 1703 if (r != 1) { 1704 kfree(pool); 1705 pr_info("zcache: pool creation failed: out of memory\n"); 1706 goto out; 1707 } 1708 r = idr_get_new(&cli->tmem_pools, pool, &poolid); 1709 } while (r == -EAGAIN); 1710 if (r) { 1711 pr_info("zcache: pool creation failed: error %d\n", r); 1712 kfree(pool); 1713 goto out; 1714 } 1715 1716 atomic_set(&pool->refcount, 0); 1717 pool->client = cli; 1718 pool->pool_id = poolid; 1719 tmem_new_pool(pool, flags); 1720 pr_info("zcache: created %s tmem pool, id=%d, client=%d\n", 1721 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", 1722 poolid, cli_id); 1723out: 1724 if (cli != NULL) 1725 atomic_dec(&cli->refcount); 1726 return poolid; 1727} 1728 1729/********** 1730 * Two kernel functionalities currently can be layered on top of tmem. 1731 * These are "cleancache" which is used as a second-chance cache for clean 1732 * page cache pages; and "frontswap" which is used for swap pages 1733 * to avoid writes to disk. A generic "shim" is provided here for each 1734 * to translate in-kernel semantics to zcache semantics. 1735 */ 1736 1737#ifdef CONFIG_CLEANCACHE 1738static void zcache_cleancache_put_page(int pool_id, 1739 struct cleancache_filekey key, 1740 pgoff_t index, struct page *page) 1741{ 1742 u32 ind = (u32) index; 1743 struct tmem_oid oid = *(struct tmem_oid *)&key; 1744 1745 if (likely(ind == index)) 1746 (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page); 1747} 1748 1749static int zcache_cleancache_get_page(int pool_id, 1750 struct cleancache_filekey key, 1751 pgoff_t index, struct page *page) 1752{ 1753 u32 ind = (u32) index; 1754 struct tmem_oid oid = *(struct tmem_oid *)&key; 1755 int ret = -1; 1756 1757 if (likely(ind == index)) 1758 ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page); 1759 return ret; 1760} 1761 1762static void zcache_cleancache_flush_page(int pool_id, 1763 struct cleancache_filekey key, 1764 pgoff_t index) 1765{ 1766 u32 ind = (u32) index; 1767 struct tmem_oid oid = *(struct tmem_oid *)&key; 1768 1769 if (likely(ind == index)) 1770 (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind); 1771} 1772 1773static void zcache_cleancache_flush_inode(int pool_id, 1774 struct cleancache_filekey key) 1775{ 1776 struct tmem_oid oid = *(struct tmem_oid *)&key; 1777 1778 (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid); 1779} 1780 1781static void zcache_cleancache_flush_fs(int pool_id) 1782{ 1783 if (pool_id >= 0) 1784 (void)zcache_destroy_pool(LOCAL_CLIENT, pool_id); 1785} 1786 1787static int zcache_cleancache_init_fs(size_t pagesize) 1788{ 1789 BUG_ON(sizeof(struct cleancache_filekey) != 1790 sizeof(struct tmem_oid)); 1791 BUG_ON(pagesize != PAGE_SIZE); 1792 return zcache_new_pool(LOCAL_CLIENT, 0); 1793} 1794 1795static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) 1796{ 1797 /* shared pools are unsupported and map to private */ 1798 BUG_ON(sizeof(struct cleancache_filekey) != 1799 sizeof(struct tmem_oid)); 1800 BUG_ON(pagesize != PAGE_SIZE); 1801 return zcache_new_pool(LOCAL_CLIENT, 0); 1802} 1803 1804static struct cleancache_ops zcache_cleancache_ops = { 1805 .put_page = zcache_cleancache_put_page, 1806 .get_page = zcache_cleancache_get_page, 1807 .invalidate_page = zcache_cleancache_flush_page, 1808 .invalidate_inode = zcache_cleancache_flush_inode, 1809 .invalidate_fs = zcache_cleancache_flush_fs, 1810 .init_shared_fs = zcache_cleancache_init_shared_fs, 1811 .init_fs = zcache_cleancache_init_fs 1812}; 1813 1814struct cleancache_ops zcache_cleancache_register_ops(void) 1815{ 1816 struct cleancache_ops old_ops = 1817 cleancache_register_ops(&zcache_cleancache_ops); 1818 1819 return old_ops; 1820} 1821#endif 1822 1823#ifdef CONFIG_FRONTSWAP 1824/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 1825static int zcache_frontswap_poolid = -1; 1826 1827/* 1828 * Swizzling increases objects per swaptype, increasing tmem concurrency 1829 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS 1830 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from 1831 * frontswap_load(), but has side-effects. Hence using 8. 1832 */ 1833#define SWIZ_BITS 8 1834#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) 1835#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) 1836#define iswiz(_ind) (_ind >> SWIZ_BITS) 1837 1838static inline struct tmem_oid oswiz(unsigned type, u32 ind) 1839{ 1840 struct tmem_oid oid = { .oid = { 0 } }; 1841 oid.oid[0] = _oswiz(type, ind); 1842 return oid; 1843} 1844 1845static int zcache_frontswap_store(unsigned type, pgoff_t offset, 1846 struct page *page) 1847{ 1848 u64 ind64 = (u64)offset; 1849 u32 ind = (u32)offset; 1850 struct tmem_oid oid = oswiz(type, ind); 1851 int ret = -1; 1852 unsigned long flags; 1853 1854 BUG_ON(!PageLocked(page)); 1855 if (likely(ind64 == ind)) { 1856 local_irq_save(flags); 1857 ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid, 1858 &oid, iswiz(ind), page); 1859 local_irq_restore(flags); 1860 } 1861 return ret; 1862} 1863 1864/* returns 0 if the page was successfully gotten from frontswap, -1 if 1865 * was not present (should never happen!) */ 1866static int zcache_frontswap_load(unsigned type, pgoff_t offset, 1867 struct page *page) 1868{ 1869 u64 ind64 = (u64)offset; 1870 u32 ind = (u32)offset; 1871 struct tmem_oid oid = oswiz(type, ind); 1872 int ret = -1; 1873 1874 BUG_ON(!PageLocked(page)); 1875 if (likely(ind64 == ind)) 1876 ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid, 1877 &oid, iswiz(ind), page); 1878 return ret; 1879} 1880 1881/* flush a single page from frontswap */ 1882static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) 1883{ 1884 u64 ind64 = (u64)offset; 1885 u32 ind = (u32)offset; 1886 struct tmem_oid oid = oswiz(type, ind); 1887 1888 if (likely(ind64 == ind)) 1889 (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid, 1890 &oid, iswiz(ind)); 1891} 1892 1893/* flush all pages from the passed swaptype */ 1894static void zcache_frontswap_flush_area(unsigned type) 1895{ 1896 struct tmem_oid oid; 1897 int ind; 1898 1899 for (ind = SWIZ_MASK; ind >= 0; ind--) { 1900 oid = oswiz(type, ind); 1901 (void)zcache_flush_object(LOCAL_CLIENT, 1902 zcache_frontswap_poolid, &oid); 1903 } 1904} 1905 1906static void zcache_frontswap_init(unsigned ignored) 1907{ 1908 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 1909 if (zcache_frontswap_poolid < 0) 1910 zcache_frontswap_poolid = 1911 zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST); 1912} 1913 1914static struct frontswap_ops zcache_frontswap_ops = { 1915 .store = zcache_frontswap_store, 1916 .load = zcache_frontswap_load, 1917 .invalidate_page = zcache_frontswap_flush_page, 1918 .invalidate_area = zcache_frontswap_flush_area, 1919 .init = zcache_frontswap_init 1920}; 1921 1922struct frontswap_ops zcache_frontswap_register_ops(void) 1923{ 1924 struct frontswap_ops old_ops = 1925 frontswap_register_ops(&zcache_frontswap_ops); 1926 1927 return old_ops; 1928} 1929#endif 1930 1931/* 1932 * zcache initialization 1933 * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR 1934 * NOTHING HAPPENS! 1935 */ 1936 1937static int zcache_enabled; 1938 1939static int __init enable_zcache(char *s) 1940{ 1941 zcache_enabled = 1; 1942 return 1; 1943} 1944__setup("zcache", enable_zcache); 1945 1946/* allow independent dynamic disabling of cleancache and frontswap */ 1947 1948static int use_cleancache = 1; 1949 1950static int __init no_cleancache(char *s) 1951{ 1952 use_cleancache = 0; 1953 return 1; 1954} 1955 1956__setup("nocleancache", no_cleancache); 1957 1958static int use_frontswap = 1; 1959 1960static int __init no_frontswap(char *s) 1961{ 1962 use_frontswap = 0; 1963 return 1; 1964} 1965 1966__setup("nofrontswap", no_frontswap); 1967 1968static int __init enable_zcache_compressor(char *s) 1969{ 1970 strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ); 1971 zcache_enabled = 1; 1972 return 1; 1973} 1974__setup("zcache=", enable_zcache_compressor); 1975 1976 1977static int __init zcache_comp_init(void) 1978{ 1979 int ret = 0; 1980 1981 /* check crypto algorithm */ 1982 if (*zcache_comp_name != '\0') { 1983 ret = crypto_has_comp(zcache_comp_name, 0, 0); 1984 if (!ret) 1985 pr_info("zcache: %s not supported\n", 1986 zcache_comp_name); 1987 } 1988 if (!ret) 1989 strcpy(zcache_comp_name, "lzo"); 1990 ret = crypto_has_comp(zcache_comp_name, 0, 0); 1991 if (!ret) { 1992 ret = 1; 1993 goto out; 1994 } 1995 pr_info("zcache: using %s compressor\n", zcache_comp_name); 1996 1997 /* alloc percpu transforms */ 1998 ret = 0; 1999 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); 2000 if (!zcache_comp_pcpu_tfms) 2001 ret = 1; 2002out: 2003 return ret; 2004} 2005 2006static int __init zcache_init(void) 2007{ 2008 int ret = 0; 2009 2010#ifdef CONFIG_SYSFS 2011 ret = sysfs_create_group(mm_kobj, &zcache_attr_group); 2012 if (ret) { 2013 pr_err("zcache: can't create sysfs\n"); 2014 goto out; 2015 } 2016#endif /* CONFIG_SYSFS */ 2017 2018 if (zcache_enabled) { 2019 unsigned int cpu; 2020 2021 tmem_register_hostops(&zcache_hostops); 2022 tmem_register_pamops(&zcache_pamops); 2023 ret = register_cpu_notifier(&zcache_cpu_notifier_block); 2024 if (ret) { 2025 pr_err("zcache: can't register cpu notifier\n"); 2026 goto out; 2027 } 2028 ret = zcache_comp_init(); 2029 if (ret) { 2030 pr_err("zcache: compressor initialization failed\n"); 2031 goto out; 2032 } 2033 for_each_online_cpu(cpu) { 2034 void *pcpu = (void *)(long)cpu; 2035 zcache_cpu_notifier(&zcache_cpu_notifier_block, 2036 CPU_UP_PREPARE, pcpu); 2037 } 2038 } 2039 zcache_objnode_cache = kmem_cache_create("zcache_objnode", 2040 sizeof(struct tmem_objnode), 0, 0, NULL); 2041 zcache_obj_cache = kmem_cache_create("zcache_obj", 2042 sizeof(struct tmem_obj), 0, 0, NULL); 2043 ret = zcache_new_client(LOCAL_CLIENT); 2044 if (ret) { 2045 pr_err("zcache: can't create client\n"); 2046 goto out; 2047 } 2048 2049#ifdef CONFIG_CLEANCACHE 2050 if (zcache_enabled && use_cleancache) { 2051 struct cleancache_ops old_ops; 2052 2053 zbud_init(); 2054 register_shrinker(&zcache_shrinker); 2055 old_ops = zcache_cleancache_register_ops(); 2056 pr_info("zcache: cleancache enabled using kernel " 2057 "transcendent memory and compression buddies\n"); 2058 if (old_ops.init_fs != NULL) 2059 pr_warning("zcache: cleancache_ops overridden"); 2060 } 2061#endif 2062#ifdef CONFIG_FRONTSWAP 2063 if (zcache_enabled && use_frontswap) { 2064 struct frontswap_ops old_ops; 2065 2066 old_ops = zcache_frontswap_register_ops(); 2067 pr_info("zcache: frontswap enabled using kernel " 2068 "transcendent memory and zsmalloc\n"); 2069 if (old_ops.init != NULL) 2070 pr_warning("zcache: frontswap_ops overridden"); 2071 } 2072#endif 2073out: 2074 return ret; 2075} 2076 2077module_init(zcache_init)