Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.8-rc6 1820 lines 51 kB view raw
1/* 2 * zcache.c 3 * 4 * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. 5 * Copyright (c) 2010,2011, Nitin Gupta 6 * 7 * Zcache provides an in-kernel "host implementation" for transcendent memory 8 * ("tmem") and, thus indirectly, for cleancache and frontswap. Zcache uses 9 * lzo1x compression to improve density and an embedded allocator called 10 * "zbud" which "buddies" two compressed pages semi-optimally in each physical 11 * pageframe. Zbud is integrally tied into tmem to allow pageframes to 12 * be "reclaimed" efficiently. 13 */ 14 15#include <linux/module.h> 16#include <linux/cpu.h> 17#include <linux/highmem.h> 18#include <linux/list.h> 19#include <linux/slab.h> 20#include <linux/spinlock.h> 21#include <linux/types.h> 22#include <linux/atomic.h> 23#include <linux/math64.h> 24#include <linux/crypto.h> 25 26#include <linux/cleancache.h> 27#include <linux/frontswap.h> 28#include "tmem.h" 29#include "zcache.h" 30#include "zbud.h" 31#include "ramster.h" 32#ifdef CONFIG_RAMSTER 33static int ramster_enabled; 34#else 35#define ramster_enabled 0 36#endif 37 38#ifndef __PG_WAS_ACTIVE 39static inline bool PageWasActive(struct page *page) 40{ 41 return true; 42} 43 44static inline void SetPageWasActive(struct page *page) 45{ 46} 47#endif 48 49#ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS 50static bool frontswap_has_exclusive_gets __read_mostly = true; 51#else 52static bool frontswap_has_exclusive_gets __read_mostly; 53static inline void frontswap_tmem_exclusive_gets(bool b) 54{ 55} 56#endif 57 58static int zcache_enabled __read_mostly; 59static int disable_cleancache __read_mostly; 60static int disable_frontswap __read_mostly; 61static int disable_frontswap_ignore_nonactive __read_mostly; 62static int disable_cleancache_ignore_nonactive __read_mostly; 63static char *namestr __read_mostly = "zcache"; 64 65#define ZCACHE_GFP_MASK \ 66 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) 67 68MODULE_LICENSE("GPL"); 69 70/* crypto API for zcache */ 71#define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME 72static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly; 73static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly; 74 75enum comp_op { 76 ZCACHE_COMPOP_COMPRESS, 77 ZCACHE_COMPOP_DECOMPRESS 78}; 79 80static inline int zcache_comp_op(enum comp_op op, 81 const u8 *src, unsigned int slen, 82 u8 *dst, unsigned int *dlen) 83{ 84 struct crypto_comp *tfm; 85 int ret = -1; 86 87 BUG_ON(!zcache_comp_pcpu_tfms); 88 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); 89 BUG_ON(!tfm); 90 switch (op) { 91 case ZCACHE_COMPOP_COMPRESS: 92 ret = crypto_comp_compress(tfm, src, slen, dst, dlen); 93 break; 94 case ZCACHE_COMPOP_DECOMPRESS: 95 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); 96 break; 97 default: 98 ret = -EINVAL; 99 } 100 put_cpu(); 101 return ret; 102} 103 104/* 105 * policy parameters 106 */ 107 108/* 109 * byte count defining poor compression; pages with greater zsize will be 110 * rejected 111 */ 112static unsigned int zbud_max_zsize __read_mostly = (PAGE_SIZE / 8) * 7; 113/* 114 * byte count defining poor *mean* compression; pages with greater zsize 115 * will be rejected until sufficient better-compressed pages are accepted 116 * driving the mean below this threshold 117 */ 118static unsigned int zbud_max_mean_zsize __read_mostly = (PAGE_SIZE / 8) * 5; 119 120/* 121 * for now, used named slabs so can easily track usage; later can 122 * either just use kmalloc, or perhaps add a slab-like allocator 123 * to more carefully manage total memory utilization 124 */ 125static struct kmem_cache *zcache_objnode_cache; 126static struct kmem_cache *zcache_obj_cache; 127 128static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; 129 130/* we try to keep these statistics SMP-consistent */ 131static long zcache_obj_count; 132static atomic_t zcache_obj_atomic = ATOMIC_INIT(0); 133static long zcache_obj_count_max; 134static long zcache_objnode_count; 135static atomic_t zcache_objnode_atomic = ATOMIC_INIT(0); 136static long zcache_objnode_count_max; 137static u64 zcache_eph_zbytes; 138static atomic_long_t zcache_eph_zbytes_atomic = ATOMIC_INIT(0); 139static u64 zcache_eph_zbytes_max; 140static u64 zcache_pers_zbytes; 141static atomic_long_t zcache_pers_zbytes_atomic = ATOMIC_INIT(0); 142static u64 zcache_pers_zbytes_max; 143static long zcache_eph_pageframes; 144static atomic_t zcache_eph_pageframes_atomic = ATOMIC_INIT(0); 145static long zcache_eph_pageframes_max; 146static long zcache_pers_pageframes; 147static atomic_t zcache_pers_pageframes_atomic = ATOMIC_INIT(0); 148static long zcache_pers_pageframes_max; 149static long zcache_pageframes_alloced; 150static atomic_t zcache_pageframes_alloced_atomic = ATOMIC_INIT(0); 151static long zcache_pageframes_freed; 152static atomic_t zcache_pageframes_freed_atomic = ATOMIC_INIT(0); 153static long zcache_eph_zpages; 154static atomic_t zcache_eph_zpages_atomic = ATOMIC_INIT(0); 155static long zcache_eph_zpages_max; 156static long zcache_pers_zpages; 157static atomic_t zcache_pers_zpages_atomic = ATOMIC_INIT(0); 158static long zcache_pers_zpages_max; 159 160/* but for the rest of these, counting races are ok */ 161static unsigned long zcache_flush_total; 162static unsigned long zcache_flush_found; 163static unsigned long zcache_flobj_total; 164static unsigned long zcache_flobj_found; 165static unsigned long zcache_failed_eph_puts; 166static unsigned long zcache_failed_pers_puts; 167static unsigned long zcache_failed_getfreepages; 168static unsigned long zcache_failed_alloc; 169static unsigned long zcache_put_to_flush; 170static unsigned long zcache_compress_poor; 171static unsigned long zcache_mean_compress_poor; 172static unsigned long zcache_eph_ate_tail; 173static unsigned long zcache_eph_ate_tail_failed; 174static unsigned long zcache_pers_ate_eph; 175static unsigned long zcache_pers_ate_eph_failed; 176static unsigned long zcache_evicted_eph_zpages; 177static unsigned long zcache_evicted_eph_pageframes; 178static unsigned long zcache_last_active_file_pageframes; 179static unsigned long zcache_last_inactive_file_pageframes; 180static unsigned long zcache_last_active_anon_pageframes; 181static unsigned long zcache_last_inactive_anon_pageframes; 182static unsigned long zcache_eph_nonactive_puts_ignored; 183static unsigned long zcache_pers_nonactive_puts_ignored; 184 185#ifdef CONFIG_DEBUG_FS 186#include <linux/debugfs.h> 187#define zdfs debugfs_create_size_t 188#define zdfs64 debugfs_create_u64 189static int zcache_debugfs_init(void) 190{ 191 struct dentry *root = debugfs_create_dir("zcache", NULL); 192 if (root == NULL) 193 return -ENXIO; 194 195 zdfs("obj_count", S_IRUGO, root, &zcache_obj_count); 196 zdfs("obj_count_max", S_IRUGO, root, &zcache_obj_count_max); 197 zdfs("objnode_count", S_IRUGO, root, &zcache_objnode_count); 198 zdfs("objnode_count_max", S_IRUGO, root, &zcache_objnode_count_max); 199 zdfs("flush_total", S_IRUGO, root, &zcache_flush_total); 200 zdfs("flush_found", S_IRUGO, root, &zcache_flush_found); 201 zdfs("flobj_total", S_IRUGO, root, &zcache_flobj_total); 202 zdfs("flobj_found", S_IRUGO, root, &zcache_flobj_found); 203 zdfs("failed_eph_puts", S_IRUGO, root, &zcache_failed_eph_puts); 204 zdfs("failed_pers_puts", S_IRUGO, root, &zcache_failed_pers_puts); 205 zdfs("failed_get_free_pages", S_IRUGO, root, 206 &zcache_failed_getfreepages); 207 zdfs("failed_alloc", S_IRUGO, root, &zcache_failed_alloc); 208 zdfs("put_to_flush", S_IRUGO, root, &zcache_put_to_flush); 209 zdfs("compress_poor", S_IRUGO, root, &zcache_compress_poor); 210 zdfs("mean_compress_poor", S_IRUGO, root, &zcache_mean_compress_poor); 211 zdfs("eph_ate_tail", S_IRUGO, root, &zcache_eph_ate_tail); 212 zdfs("eph_ate_tail_failed", S_IRUGO, root, &zcache_eph_ate_tail_failed); 213 zdfs("pers_ate_eph", S_IRUGO, root, &zcache_pers_ate_eph); 214 zdfs("pers_ate_eph_failed", S_IRUGO, root, &zcache_pers_ate_eph_failed); 215 zdfs("evicted_eph_zpages", S_IRUGO, root, &zcache_evicted_eph_zpages); 216 zdfs("evicted_eph_pageframes", S_IRUGO, root, 217 &zcache_evicted_eph_pageframes); 218 zdfs("eph_pageframes", S_IRUGO, root, &zcache_eph_pageframes); 219 zdfs("eph_pageframes_max", S_IRUGO, root, &zcache_eph_pageframes_max); 220 zdfs("pers_pageframes", S_IRUGO, root, &zcache_pers_pageframes); 221 zdfs("pers_pageframes_max", S_IRUGO, root, &zcache_pers_pageframes_max); 222 zdfs("eph_zpages", S_IRUGO, root, &zcache_eph_zpages); 223 zdfs("eph_zpages_max", S_IRUGO, root, &zcache_eph_zpages_max); 224 zdfs("pers_zpages", S_IRUGO, root, &zcache_pers_zpages); 225 zdfs("pers_zpages_max", S_IRUGO, root, &zcache_pers_zpages_max); 226 zdfs("last_active_file_pageframes", S_IRUGO, root, 227 &zcache_last_active_file_pageframes); 228 zdfs("last_inactive_file_pageframes", S_IRUGO, root, 229 &zcache_last_inactive_file_pageframes); 230 zdfs("last_active_anon_pageframes", S_IRUGO, root, 231 &zcache_last_active_anon_pageframes); 232 zdfs("last_inactive_anon_pageframes", S_IRUGO, root, 233 &zcache_last_inactive_anon_pageframes); 234 zdfs("eph_nonactive_puts_ignored", S_IRUGO, root, 235 &zcache_eph_nonactive_puts_ignored); 236 zdfs("pers_nonactive_puts_ignored", S_IRUGO, root, 237 &zcache_pers_nonactive_puts_ignored); 238 zdfs64("eph_zbytes", S_IRUGO, root, &zcache_eph_zbytes); 239 zdfs64("eph_zbytes_max", S_IRUGO, root, &zcache_eph_zbytes_max); 240 zdfs64("pers_zbytes", S_IRUGO, root, &zcache_pers_zbytes); 241 zdfs64("pers_zbytes_max", S_IRUGO, root, &zcache_pers_zbytes_max); 242 return 0; 243} 244#undef zdebugfs 245#undef zdfs64 246#endif 247 248#define ZCACHE_DEBUG 249#ifdef ZCACHE_DEBUG 250/* developers can call this in case of ooms, e.g. to find memory leaks */ 251void zcache_dump(void) 252{ 253 pr_info("zcache: obj_count=%lu\n", zcache_obj_count); 254 pr_info("zcache: obj_count_max=%lu\n", zcache_obj_count_max); 255 pr_info("zcache: objnode_count=%lu\n", zcache_objnode_count); 256 pr_info("zcache: objnode_count_max=%lu\n", zcache_objnode_count_max); 257 pr_info("zcache: flush_total=%lu\n", zcache_flush_total); 258 pr_info("zcache: flush_found=%lu\n", zcache_flush_found); 259 pr_info("zcache: flobj_total=%lu\n", zcache_flobj_total); 260 pr_info("zcache: flobj_found=%lu\n", zcache_flobj_found); 261 pr_info("zcache: failed_eph_puts=%lu\n", zcache_failed_eph_puts); 262 pr_info("zcache: failed_pers_puts=%lu\n", zcache_failed_pers_puts); 263 pr_info("zcache: failed_get_free_pages=%lu\n", 264 zcache_failed_getfreepages); 265 pr_info("zcache: failed_alloc=%lu\n", zcache_failed_alloc); 266 pr_info("zcache: put_to_flush=%lu\n", zcache_put_to_flush); 267 pr_info("zcache: compress_poor=%lu\n", zcache_compress_poor); 268 pr_info("zcache: mean_compress_poor=%lu\n", 269 zcache_mean_compress_poor); 270 pr_info("zcache: eph_ate_tail=%lu\n", zcache_eph_ate_tail); 271 pr_info("zcache: eph_ate_tail_failed=%lu\n", 272 zcache_eph_ate_tail_failed); 273 pr_info("zcache: pers_ate_eph=%lu\n", zcache_pers_ate_eph); 274 pr_info("zcache: pers_ate_eph_failed=%lu\n", 275 zcache_pers_ate_eph_failed); 276 pr_info("zcache: evicted_eph_zpages=%lu\n", zcache_evicted_eph_zpages); 277 pr_info("zcache: evicted_eph_pageframes=%lu\n", 278 zcache_evicted_eph_pageframes); 279 pr_info("zcache: eph_pageframes=%lu\n", zcache_eph_pageframes); 280 pr_info("zcache: eph_pageframes_max=%lu\n", zcache_eph_pageframes_max); 281 pr_info("zcache: pers_pageframes=%lu\n", zcache_pers_pageframes); 282 pr_info("zcache: pers_pageframes_max=%lu\n", 283 zcache_pers_pageframes_max); 284 pr_info("zcache: eph_zpages=%lu\n", zcache_eph_zpages); 285 pr_info("zcache: eph_zpages_max=%lu\n", zcache_eph_zpages_max); 286 pr_info("zcache: pers_zpages=%lu\n", zcache_pers_zpages); 287 pr_info("zcache: pers_zpages_max=%lu\n", zcache_pers_zpages_max); 288 pr_info("zcache: eph_zbytes=%llu\n", 289 (unsigned long long)zcache_eph_zbytes); 290 pr_info("zcache: eph_zbytes_max=%llu\n", 291 (unsigned long long)zcache_eph_zbytes_max); 292 pr_info("zcache: pers_zbytes=%llu\n", 293 (unsigned long long)zcache_pers_zbytes); 294 pr_info("zcache: pers_zbytes_max=%llu\n", 295 (unsigned long long)zcache_pers_zbytes_max); 296} 297#endif 298 299/* 300 * zcache core code starts here 301 */ 302 303static struct zcache_client zcache_host; 304static struct zcache_client zcache_clients[MAX_CLIENTS]; 305 306static inline bool is_local_client(struct zcache_client *cli) 307{ 308 return cli == &zcache_host; 309} 310 311static struct zcache_client *zcache_get_client_by_id(uint16_t cli_id) 312{ 313 struct zcache_client *cli = &zcache_host; 314 315 if (cli_id != LOCAL_CLIENT) { 316 if (cli_id >= MAX_CLIENTS) 317 goto out; 318 cli = &zcache_clients[cli_id]; 319 } 320out: 321 return cli; 322} 323 324/* 325 * Tmem operations assume the poolid implies the invoking client. 326 * Zcache only has one client (the kernel itself): LOCAL_CLIENT. 327 * RAMster has each client numbered by cluster node, and a KVM version 328 * of zcache would have one client per guest and each client might 329 * have a poolid==N. 330 */ 331struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid) 332{ 333 struct tmem_pool *pool = NULL; 334 struct zcache_client *cli = NULL; 335 336 cli = zcache_get_client_by_id(cli_id); 337 if (cli == NULL) 338 goto out; 339 if (!is_local_client(cli)) 340 atomic_inc(&cli->refcount); 341 if (poolid < MAX_POOLS_PER_CLIENT) { 342 pool = cli->tmem_pools[poolid]; 343 if (pool != NULL) 344 atomic_inc(&pool->refcount); 345 } 346out: 347 return pool; 348} 349 350void zcache_put_pool(struct tmem_pool *pool) 351{ 352 struct zcache_client *cli = NULL; 353 354 if (pool == NULL) 355 BUG(); 356 cli = pool->client; 357 atomic_dec(&pool->refcount); 358 if (!is_local_client(cli)) 359 atomic_dec(&cli->refcount); 360} 361 362int zcache_new_client(uint16_t cli_id) 363{ 364 struct zcache_client *cli; 365 int ret = -1; 366 367 cli = zcache_get_client_by_id(cli_id); 368 if (cli == NULL) 369 goto out; 370 if (cli->allocated) 371 goto out; 372 cli->allocated = 1; 373 ret = 0; 374out: 375 return ret; 376} 377 378/* 379 * zcache implementation for tmem host ops 380 */ 381 382static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) 383{ 384 struct tmem_objnode *objnode = NULL; 385 struct zcache_preload *kp; 386 int i; 387 388 kp = &__get_cpu_var(zcache_preloads); 389 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { 390 objnode = kp->objnodes[i]; 391 if (objnode != NULL) { 392 kp->objnodes[i] = NULL; 393 break; 394 } 395 } 396 BUG_ON(objnode == NULL); 397 zcache_objnode_count = atomic_inc_return(&zcache_objnode_atomic); 398 if (zcache_objnode_count > zcache_objnode_count_max) 399 zcache_objnode_count_max = zcache_objnode_count; 400 return objnode; 401} 402 403static void zcache_objnode_free(struct tmem_objnode *objnode, 404 struct tmem_pool *pool) 405{ 406 zcache_objnode_count = 407 atomic_dec_return(&zcache_objnode_atomic); 408 BUG_ON(zcache_objnode_count < 0); 409 kmem_cache_free(zcache_objnode_cache, objnode); 410} 411 412static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) 413{ 414 struct tmem_obj *obj = NULL; 415 struct zcache_preload *kp; 416 417 kp = &__get_cpu_var(zcache_preloads); 418 obj = kp->obj; 419 BUG_ON(obj == NULL); 420 kp->obj = NULL; 421 zcache_obj_count = atomic_inc_return(&zcache_obj_atomic); 422 if (zcache_obj_count > zcache_obj_count_max) 423 zcache_obj_count_max = zcache_obj_count; 424 return obj; 425} 426 427static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) 428{ 429 zcache_obj_count = 430 atomic_dec_return(&zcache_obj_atomic); 431 BUG_ON(zcache_obj_count < 0); 432 kmem_cache_free(zcache_obj_cache, obj); 433} 434 435static struct tmem_hostops zcache_hostops = { 436 .obj_alloc = zcache_obj_alloc, 437 .obj_free = zcache_obj_free, 438 .objnode_alloc = zcache_objnode_alloc, 439 .objnode_free = zcache_objnode_free, 440}; 441 442static struct page *zcache_alloc_page(void) 443{ 444 struct page *page = alloc_page(ZCACHE_GFP_MASK); 445 446 if (page != NULL) 447 zcache_pageframes_alloced = 448 atomic_inc_return(&zcache_pageframes_alloced_atomic); 449 return page; 450} 451 452#ifdef FRONTSWAP_HAS_UNUSE 453static void zcache_unacct_page(void) 454{ 455 zcache_pageframes_freed = 456 atomic_inc_return(&zcache_pageframes_freed_atomic); 457} 458#endif 459 460static void zcache_free_page(struct page *page) 461{ 462 long curr_pageframes; 463 static long max_pageframes, min_pageframes; 464 465 if (page == NULL) 466 BUG(); 467 __free_page(page); 468 zcache_pageframes_freed = 469 atomic_inc_return(&zcache_pageframes_freed_atomic); 470 curr_pageframes = zcache_pageframes_alloced - 471 atomic_read(&zcache_pageframes_freed_atomic) - 472 atomic_read(&zcache_eph_pageframes_atomic) - 473 atomic_read(&zcache_pers_pageframes_atomic); 474 if (curr_pageframes > max_pageframes) 475 max_pageframes = curr_pageframes; 476 if (curr_pageframes < min_pageframes) 477 min_pageframes = curr_pageframes; 478#ifdef ZCACHE_DEBUG 479 if (curr_pageframes > 2L || curr_pageframes < -2L) { 480 /* pr_info here */ 481 } 482#endif 483} 484 485/* 486 * zcache implementations for PAM page descriptor ops 487 */ 488 489/* forward reference */ 490static void zcache_compress(struct page *from, 491 void **out_va, unsigned *out_len); 492 493static struct page *zcache_evict_eph_pageframe(void); 494 495static void *zcache_pampd_eph_create(char *data, size_t size, bool raw, 496 struct tmem_handle *th) 497{ 498 void *pampd = NULL, *cdata = data; 499 unsigned clen = size; 500 struct page *page = (struct page *)(data), *newpage; 501 502 if (!raw) { 503 zcache_compress(page, &cdata, &clen); 504 if (clen > zbud_max_buddy_size()) { 505 zcache_compress_poor++; 506 goto out; 507 } 508 } else { 509 BUG_ON(clen > zbud_max_buddy_size()); 510 } 511 512 /* look for space via an existing match first */ 513 pampd = (void *)zbud_match_prep(th, true, cdata, clen); 514 if (pampd != NULL) 515 goto got_pampd; 516 517 /* no match, now we need to find (or free up) a full page */ 518 newpage = zcache_alloc_page(); 519 if (newpage != NULL) 520 goto create_in_new_page; 521 522 zcache_failed_getfreepages++; 523 /* can't allocate a page, evict an ephemeral page via LRU */ 524 newpage = zcache_evict_eph_pageframe(); 525 if (newpage == NULL) { 526 zcache_eph_ate_tail_failed++; 527 goto out; 528 } 529 zcache_eph_ate_tail++; 530 531create_in_new_page: 532 pampd = (void *)zbud_create_prep(th, true, cdata, clen, newpage); 533 BUG_ON(pampd == NULL); 534 zcache_eph_pageframes = 535 atomic_inc_return(&zcache_eph_pageframes_atomic); 536 if (zcache_eph_pageframes > zcache_eph_pageframes_max) 537 zcache_eph_pageframes_max = zcache_eph_pageframes; 538 539got_pampd: 540 zcache_eph_zbytes = 541 atomic_long_add_return(clen, &zcache_eph_zbytes_atomic); 542 if (zcache_eph_zbytes > zcache_eph_zbytes_max) 543 zcache_eph_zbytes_max = zcache_eph_zbytes; 544 zcache_eph_zpages = atomic_inc_return(&zcache_eph_zpages_atomic); 545 if (zcache_eph_zpages > zcache_eph_zpages_max) 546 zcache_eph_zpages_max = zcache_eph_zpages; 547 if (ramster_enabled && raw) 548 ramster_count_foreign_pages(true, 1); 549out: 550 return pampd; 551} 552 553static void *zcache_pampd_pers_create(char *data, size_t size, bool raw, 554 struct tmem_handle *th) 555{ 556 void *pampd = NULL, *cdata = data; 557 unsigned clen = size; 558 struct page *page = (struct page *)(data), *newpage; 559 unsigned long zbud_mean_zsize; 560 unsigned long curr_pers_zpages, total_zsize; 561 562 if (data == NULL) { 563 BUG_ON(!ramster_enabled); 564 goto create_pampd; 565 } 566 curr_pers_zpages = zcache_pers_zpages; 567/* FIXME CONFIG_RAMSTER... subtract atomic remote_pers_pages here? */ 568 if (!raw) 569 zcache_compress(page, &cdata, &clen); 570 /* reject if compression is too poor */ 571 if (clen > zbud_max_zsize) { 572 zcache_compress_poor++; 573 goto out; 574 } 575 /* reject if mean compression is too poor */ 576 if ((clen > zbud_max_mean_zsize) && (curr_pers_zpages > 0)) { 577 total_zsize = zcache_pers_zbytes; 578 if ((long)total_zsize < 0) 579 total_zsize = 0; 580 zbud_mean_zsize = div_u64(total_zsize, 581 curr_pers_zpages); 582 if (zbud_mean_zsize > zbud_max_mean_zsize) { 583 zcache_mean_compress_poor++; 584 goto out; 585 } 586 } 587 588create_pampd: 589 /* look for space via an existing match first */ 590 pampd = (void *)zbud_match_prep(th, false, cdata, clen); 591 if (pampd != NULL) 592 goto got_pampd; 593 594 /* no match, now we need to find (or free up) a full page */ 595 newpage = zcache_alloc_page(); 596 if (newpage != NULL) 597 goto create_in_new_page; 598 /* 599 * FIXME do the following only if eph is oversized? 600 * if (zcache_eph_pageframes > 601 * (global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE) + 602 * global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE))) 603 */ 604 zcache_failed_getfreepages++; 605 /* can't allocate a page, evict an ephemeral page via LRU */ 606 newpage = zcache_evict_eph_pageframe(); 607 if (newpage == NULL) { 608 zcache_pers_ate_eph_failed++; 609 goto out; 610 } 611 zcache_pers_ate_eph++; 612 613create_in_new_page: 614 pampd = (void *)zbud_create_prep(th, false, cdata, clen, newpage); 615 BUG_ON(pampd == NULL); 616 zcache_pers_pageframes = 617 atomic_inc_return(&zcache_pers_pageframes_atomic); 618 if (zcache_pers_pageframes > zcache_pers_pageframes_max) 619 zcache_pers_pageframes_max = zcache_pers_pageframes; 620 621got_pampd: 622 zcache_pers_zpages = atomic_inc_return(&zcache_pers_zpages_atomic); 623 if (zcache_pers_zpages > zcache_pers_zpages_max) 624 zcache_pers_zpages_max = zcache_pers_zpages; 625 zcache_pers_zbytes = 626 atomic_long_add_return(clen, &zcache_pers_zbytes_atomic); 627 if (zcache_pers_zbytes > zcache_pers_zbytes_max) 628 zcache_pers_zbytes_max = zcache_pers_zbytes; 629 if (ramster_enabled && raw) 630 ramster_count_foreign_pages(false, 1); 631out: 632 return pampd; 633} 634 635/* 636 * This is called directly from zcache_put_page to pre-allocate space 637 * to store a zpage. 638 */ 639void *zcache_pampd_create(char *data, unsigned int size, bool raw, 640 int eph, struct tmem_handle *th) 641{ 642 void *pampd = NULL; 643 struct zcache_preload *kp; 644 struct tmem_objnode *objnode; 645 struct tmem_obj *obj; 646 int i; 647 648 BUG_ON(!irqs_disabled()); 649 /* pre-allocate per-cpu metadata */ 650 BUG_ON(zcache_objnode_cache == NULL); 651 BUG_ON(zcache_obj_cache == NULL); 652 kp = &__get_cpu_var(zcache_preloads); 653 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { 654 objnode = kp->objnodes[i]; 655 if (objnode == NULL) { 656 objnode = kmem_cache_alloc(zcache_objnode_cache, 657 ZCACHE_GFP_MASK); 658 if (unlikely(objnode == NULL)) { 659 zcache_failed_alloc++; 660 goto out; 661 } 662 kp->objnodes[i] = objnode; 663 } 664 } 665 if (kp->obj == NULL) { 666 obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); 667 kp->obj = obj; 668 } 669 if (unlikely(kp->obj == NULL)) { 670 zcache_failed_alloc++; 671 goto out; 672 } 673 /* 674 * ok, have all the metadata pre-allocated, now do the data 675 * but since how we allocate the data is dependent on ephemeral 676 * or persistent, we split the call here to different sub-functions 677 */ 678 if (eph) 679 pampd = zcache_pampd_eph_create(data, size, raw, th); 680 else 681 pampd = zcache_pampd_pers_create(data, size, raw, th); 682out: 683 return pampd; 684} 685 686/* 687 * This is a pamops called via tmem_put and is necessary to "finish" 688 * a pampd creation. 689 */ 690void zcache_pampd_create_finish(void *pampd, bool eph) 691{ 692 zbud_create_finish((struct zbudref *)pampd, eph); 693} 694 695/* 696 * This is passed as a function parameter to zbud_decompress so that 697 * zbud need not be familiar with the details of crypto. It assumes that 698 * the bytes from_va and to_va through from_va+size-1 and to_va+size-1 are 699 * kmapped. It must be successful, else there is a logic bug somewhere. 700 */ 701static void zcache_decompress(char *from_va, unsigned int size, char *to_va) 702{ 703 int ret; 704 unsigned int outlen = PAGE_SIZE; 705 706 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size, 707 to_va, &outlen); 708 BUG_ON(ret); 709 BUG_ON(outlen != PAGE_SIZE); 710} 711 712/* 713 * Decompress from the kernel va to a pageframe 714 */ 715void zcache_decompress_to_page(char *from_va, unsigned int size, 716 struct page *to_page) 717{ 718 char *to_va = kmap_atomic(to_page); 719 zcache_decompress(from_va, size, to_va); 720 kunmap_atomic(to_va); 721} 722 723/* 724 * fill the pageframe corresponding to the struct page with the data 725 * from the passed pampd 726 */ 727static int zcache_pampd_get_data(char *data, size_t *sizep, bool raw, 728 void *pampd, struct tmem_pool *pool, 729 struct tmem_oid *oid, uint32_t index) 730{ 731 int ret; 732 bool eph = !is_persistent(pool); 733 734 BUG_ON(preemptible()); 735 BUG_ON(eph); /* fix later if shared pools get implemented */ 736 BUG_ON(pampd_is_remote(pampd)); 737 if (raw) 738 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd, 739 sizep, eph); 740 else { 741 ret = zbud_decompress((struct page *)(data), 742 (struct zbudref *)pampd, false, 743 zcache_decompress); 744 *sizep = PAGE_SIZE; 745 } 746 return ret; 747} 748 749/* 750 * fill the pageframe corresponding to the struct page with the data 751 * from the passed pampd 752 */ 753static int zcache_pampd_get_data_and_free(char *data, size_t *sizep, bool raw, 754 void *pampd, struct tmem_pool *pool, 755 struct tmem_oid *oid, uint32_t index) 756{ 757 int ret; 758 bool eph = !is_persistent(pool); 759 struct page *page = NULL; 760 unsigned int zsize, zpages; 761 762 BUG_ON(preemptible()); 763 BUG_ON(pampd_is_remote(pampd)); 764 if (raw) 765 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd, 766 sizep, eph); 767 else { 768 ret = zbud_decompress((struct page *)(data), 769 (struct zbudref *)pampd, eph, 770 zcache_decompress); 771 *sizep = PAGE_SIZE; 772 } 773 page = zbud_free_and_delist((struct zbudref *)pampd, eph, 774 &zsize, &zpages); 775 if (eph) { 776 if (page) 777 zcache_eph_pageframes = 778 atomic_dec_return(&zcache_eph_pageframes_atomic); 779 zcache_eph_zpages = 780 atomic_sub_return(zpages, &zcache_eph_zpages_atomic); 781 zcache_eph_zbytes = 782 atomic_long_sub_return(zsize, &zcache_eph_zbytes_atomic); 783 } else { 784 if (page) 785 zcache_pers_pageframes = 786 atomic_dec_return(&zcache_pers_pageframes_atomic); 787 zcache_pers_zpages = 788 atomic_sub_return(zpages, &zcache_pers_zpages_atomic); 789 zcache_pers_zbytes = 790 atomic_long_sub_return(zsize, &zcache_pers_zbytes_atomic); 791 } 792 if (!is_local_client(pool->client)) 793 ramster_count_foreign_pages(eph, -1); 794 if (page) 795 zcache_free_page(page); 796 return ret; 797} 798 799/* 800 * free the pampd and remove it from any zcache lists 801 * pampd must no longer be pointed to from any tmem data structures! 802 */ 803static void zcache_pampd_free(void *pampd, struct tmem_pool *pool, 804 struct tmem_oid *oid, uint32_t index, bool acct) 805{ 806 struct page *page = NULL; 807 unsigned int zsize, zpages; 808 809 BUG_ON(preemptible()); 810 if (pampd_is_remote(pampd)) { 811 BUG_ON(!ramster_enabled); 812 pampd = ramster_pampd_free(pampd, pool, oid, index, acct); 813 if (pampd == NULL) 814 return; 815 } 816 if (is_ephemeral(pool)) { 817 page = zbud_free_and_delist((struct zbudref *)pampd, 818 true, &zsize, &zpages); 819 if (page) 820 zcache_eph_pageframes = 821 atomic_dec_return(&zcache_eph_pageframes_atomic); 822 zcache_eph_zpages = 823 atomic_sub_return(zpages, &zcache_eph_zpages_atomic); 824 zcache_eph_zbytes = 825 atomic_long_sub_return(zsize, &zcache_eph_zbytes_atomic); 826 /* FIXME CONFIG_RAMSTER... check acct parameter? */ 827 } else { 828 page = zbud_free_and_delist((struct zbudref *)pampd, 829 false, &zsize, &zpages); 830 if (page) 831 zcache_pers_pageframes = 832 atomic_dec_return(&zcache_pers_pageframes_atomic); 833 zcache_pers_zpages = 834 atomic_sub_return(zpages, &zcache_pers_zpages_atomic); 835 zcache_pers_zbytes = 836 atomic_long_sub_return(zsize, &zcache_pers_zbytes_atomic); 837 } 838 if (!is_local_client(pool->client)) 839 ramster_count_foreign_pages(is_ephemeral(pool), -1); 840 if (page) 841 zcache_free_page(page); 842} 843 844static struct tmem_pamops zcache_pamops = { 845 .create_finish = zcache_pampd_create_finish, 846 .get_data = zcache_pampd_get_data, 847 .get_data_and_free = zcache_pampd_get_data_and_free, 848 .free = zcache_pampd_free, 849}; 850 851/* 852 * zcache compression/decompression and related per-cpu stuff 853 */ 854 855static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); 856#define ZCACHE_DSTMEM_ORDER 1 857 858static void zcache_compress(struct page *from, void **out_va, unsigned *out_len) 859{ 860 int ret; 861 unsigned char *dmem = __get_cpu_var(zcache_dstmem); 862 char *from_va; 863 864 BUG_ON(!irqs_disabled()); 865 /* no buffer or no compressor so can't compress */ 866 BUG_ON(dmem == NULL); 867 *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER; 868 from_va = kmap_atomic(from); 869 mb(); 870 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem, 871 out_len); 872 BUG_ON(ret); 873 *out_va = dmem; 874 kunmap_atomic(from_va); 875} 876 877static int zcache_comp_cpu_up(int cpu) 878{ 879 struct crypto_comp *tfm; 880 881 tfm = crypto_alloc_comp(zcache_comp_name, 0, 0); 882 if (IS_ERR(tfm)) 883 return NOTIFY_BAD; 884 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; 885 return NOTIFY_OK; 886} 887 888static void zcache_comp_cpu_down(int cpu) 889{ 890 struct crypto_comp *tfm; 891 892 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); 893 crypto_free_comp(tfm); 894 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; 895} 896 897static int zcache_cpu_notifier(struct notifier_block *nb, 898 unsigned long action, void *pcpu) 899{ 900 int ret, i, cpu = (long)pcpu; 901 struct zcache_preload *kp; 902 903 switch (action) { 904 case CPU_UP_PREPARE: 905 ret = zcache_comp_cpu_up(cpu); 906 if (ret != NOTIFY_OK) { 907 pr_err("%s: can't allocate compressor xform\n", 908 namestr); 909 return ret; 910 } 911 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( 912 GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER); 913 if (ramster_enabled) 914 ramster_cpu_up(cpu); 915 break; 916 case CPU_DEAD: 917 case CPU_UP_CANCELED: 918 zcache_comp_cpu_down(cpu); 919 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), 920 ZCACHE_DSTMEM_ORDER); 921 per_cpu(zcache_dstmem, cpu) = NULL; 922 kp = &per_cpu(zcache_preloads, cpu); 923 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { 924 if (kp->objnodes[i]) 925 kmem_cache_free(zcache_objnode_cache, 926 kp->objnodes[i]); 927 } 928 if (kp->obj) { 929 kmem_cache_free(zcache_obj_cache, kp->obj); 930 kp->obj = NULL; 931 } 932 if (ramster_enabled) 933 ramster_cpu_down(cpu); 934 break; 935 default: 936 break; 937 } 938 return NOTIFY_OK; 939} 940 941static struct notifier_block zcache_cpu_notifier_block = { 942 .notifier_call = zcache_cpu_notifier 943}; 944 945/* 946 * The following code interacts with the zbud eviction and zbud 947 * zombify code to access LRU pages 948 */ 949 950static struct page *zcache_evict_eph_pageframe(void) 951{ 952 struct page *page; 953 unsigned int zsize = 0, zpages = 0; 954 955 page = zbud_evict_pageframe_lru(&zsize, &zpages); 956 if (page == NULL) 957 goto out; 958 zcache_eph_zbytes = atomic_long_sub_return(zsize, 959 &zcache_eph_zbytes_atomic); 960 zcache_eph_zpages = atomic_sub_return(zpages, 961 &zcache_eph_zpages_atomic); 962 zcache_evicted_eph_zpages++; 963 zcache_eph_pageframes = 964 atomic_dec_return(&zcache_eph_pageframes_atomic); 965 zcache_evicted_eph_pageframes++; 966out: 967 return page; 968} 969 970#ifdef FRONTSWAP_HAS_UNUSE 971static void unswiz(struct tmem_oid oid, u32 index, 972 unsigned *type, pgoff_t *offset); 973 974/* 975 * Choose an LRU persistent pageframe and attempt to "unuse" it by 976 * calling frontswap_unuse on both zpages. 977 * 978 * This is work-in-progress. 979 */ 980 981static int zcache_frontswap_unuse(void) 982{ 983 struct tmem_handle th[2]; 984 int ret = -ENOMEM; 985 int nzbuds, unuse_ret; 986 unsigned type; 987 struct page *newpage1 = NULL, *newpage2 = NULL; 988 struct page *evictpage1 = NULL, *evictpage2 = NULL; 989 pgoff_t offset; 990 991 newpage1 = alloc_page(ZCACHE_GFP_MASK); 992 newpage2 = alloc_page(ZCACHE_GFP_MASK); 993 if (newpage1 == NULL) 994 evictpage1 = zcache_evict_eph_pageframe(); 995 if (newpage2 == NULL) 996 evictpage2 = zcache_evict_eph_pageframe(); 997 if (evictpage1 == NULL || evictpage2 == NULL) 998 goto free_and_out; 999 /* ok, we have two pages pre-allocated */ 1000 nzbuds = zbud_make_zombie_lru(&th[0], NULL, NULL, false); 1001 if (nzbuds == 0) { 1002 ret = -ENOENT; 1003 goto free_and_out; 1004 } 1005 unswiz(th[0].oid, th[0].index, &type, &offset); 1006 unuse_ret = frontswap_unuse(type, offset, 1007 newpage1 != NULL ? newpage1 : evictpage1, 1008 ZCACHE_GFP_MASK); 1009 if (unuse_ret != 0) 1010 goto free_and_out; 1011 else if (evictpage1 != NULL) 1012 zcache_unacct_page(); 1013 newpage1 = NULL; 1014 evictpage1 = NULL; 1015 if (nzbuds == 2) { 1016 unswiz(th[1].oid, th[1].index, &type, &offset); 1017 unuse_ret = frontswap_unuse(type, offset, 1018 newpage2 != NULL ? newpage2 : evictpage2, 1019 ZCACHE_GFP_MASK); 1020 if (unuse_ret != 0) { 1021 goto free_and_out; 1022 } else if (evictpage2 != NULL) { 1023 zcache_unacct_page(); 1024 } 1025 } 1026 ret = 0; 1027 goto out; 1028 1029free_and_out: 1030 if (newpage1 != NULL) 1031 __free_page(newpage1); 1032 if (newpage2 != NULL) 1033 __free_page(newpage2); 1034 if (evictpage1 != NULL) 1035 zcache_free_page(evictpage1); 1036 if (evictpage2 != NULL) 1037 zcache_free_page(evictpage2); 1038out: 1039 return ret; 1040} 1041#endif 1042 1043/* 1044 * When zcache is disabled ("frozen"), pools can be created and destroyed, 1045 * but all puts (and thus all other operations that require memory allocation) 1046 * must fail. If zcache is unfrozen, accepts puts, then frozen again, 1047 * data consistency requires all puts while frozen to be converted into 1048 * flushes. 1049 */ 1050static bool zcache_freeze; 1051 1052/* 1053 * This zcache shrinker interface reduces the number of ephemeral pageframes 1054 * used by zcache to approximately the same as the total number of LRU_FILE 1055 * pageframes in use. 1056 */ 1057static int shrink_zcache_memory(struct shrinker *shrink, 1058 struct shrink_control *sc) 1059{ 1060 static bool in_progress; 1061 int ret = -1; 1062 int nr = sc->nr_to_scan; 1063 int nr_evict = 0; 1064 int nr_unuse = 0; 1065 struct page *page; 1066#ifdef FRONTSWAP_HAS_UNUSE 1067 int unuse_ret; 1068#endif 1069 1070 if (nr <= 0) 1071 goto skip_evict; 1072 1073 /* don't allow more than one eviction thread at a time */ 1074 if (in_progress) 1075 goto skip_evict; 1076 1077 in_progress = true; 1078 1079 /* we are going to ignore nr, and target a different value */ 1080 zcache_last_active_file_pageframes = 1081 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); 1082 zcache_last_inactive_file_pageframes = 1083 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); 1084 nr_evict = zcache_eph_pageframes - zcache_last_active_file_pageframes + 1085 zcache_last_inactive_file_pageframes; 1086 while (nr_evict-- > 0) { 1087 page = zcache_evict_eph_pageframe(); 1088 if (page == NULL) 1089 break; 1090 zcache_free_page(page); 1091 } 1092 1093 zcache_last_active_anon_pageframes = 1094 global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON); 1095 zcache_last_inactive_anon_pageframes = 1096 global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON); 1097 nr_unuse = zcache_pers_pageframes - zcache_last_active_anon_pageframes + 1098 zcache_last_inactive_anon_pageframes; 1099#ifdef FRONTSWAP_HAS_UNUSE 1100 /* rate limit for testing */ 1101 if (nr_unuse > 32) 1102 nr_unuse = 32; 1103 while (nr_unuse-- > 0) { 1104 unuse_ret = zcache_frontswap_unuse(); 1105 if (unuse_ret == -ENOMEM) 1106 break; 1107 } 1108#endif 1109 in_progress = false; 1110 1111skip_evict: 1112 /* resample: has changed, but maybe not all the way yet */ 1113 zcache_last_active_file_pageframes = 1114 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); 1115 zcache_last_inactive_file_pageframes = 1116 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); 1117 ret = zcache_eph_pageframes - zcache_last_active_file_pageframes + 1118 zcache_last_inactive_file_pageframes; 1119 if (ret < 0) 1120 ret = 0; 1121 return ret; 1122} 1123 1124static struct shrinker zcache_shrinker = { 1125 .shrink = shrink_zcache_memory, 1126 .seeks = DEFAULT_SEEKS, 1127}; 1128 1129/* 1130 * zcache shims between cleancache/frontswap ops and tmem 1131 */ 1132 1133/* FIXME rename these core routines to zcache_tmemput etc? */ 1134int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, 1135 uint32_t index, void *page, 1136 unsigned int size, bool raw, int ephemeral) 1137{ 1138 struct tmem_pool *pool; 1139 struct tmem_handle th; 1140 int ret = -1; 1141 void *pampd = NULL; 1142 1143 BUG_ON(!irqs_disabled()); 1144 pool = zcache_get_pool_by_id(cli_id, pool_id); 1145 if (unlikely(pool == NULL)) 1146 goto out; 1147 if (!zcache_freeze) { 1148 ret = 0; 1149 th.client_id = cli_id; 1150 th.pool_id = pool_id; 1151 th.oid = *oidp; 1152 th.index = index; 1153 pampd = zcache_pampd_create((char *)page, size, raw, 1154 ephemeral, &th); 1155 if (pampd == NULL) { 1156 ret = -ENOMEM; 1157 if (ephemeral) 1158 zcache_failed_eph_puts++; 1159 else 1160 zcache_failed_pers_puts++; 1161 } else { 1162 if (ramster_enabled) 1163 ramster_do_preload_flnode(pool); 1164 ret = tmem_put(pool, oidp, index, 0, pampd); 1165 if (ret < 0) 1166 BUG(); 1167 } 1168 zcache_put_pool(pool); 1169 } else { 1170 zcache_put_to_flush++; 1171 if (ramster_enabled) 1172 ramster_do_preload_flnode(pool); 1173 if (atomic_read(&pool->obj_count) > 0) 1174 /* the put fails whether the flush succeeds or not */ 1175 (void)tmem_flush_page(pool, oidp, index); 1176 zcache_put_pool(pool); 1177 } 1178out: 1179 return ret; 1180} 1181 1182int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, 1183 uint32_t index, void *page, 1184 size_t *sizep, bool raw, int get_and_free) 1185{ 1186 struct tmem_pool *pool; 1187 int ret = -1; 1188 bool eph; 1189 1190 if (!raw) { 1191 BUG_ON(irqs_disabled()); 1192 BUG_ON(in_softirq()); 1193 } 1194 pool = zcache_get_pool_by_id(cli_id, pool_id); 1195 eph = is_ephemeral(pool); 1196 if (likely(pool != NULL)) { 1197 if (atomic_read(&pool->obj_count) > 0) 1198 ret = tmem_get(pool, oidp, index, (char *)(page), 1199 sizep, raw, get_and_free); 1200 zcache_put_pool(pool); 1201 } 1202 WARN_ONCE((!is_ephemeral(pool) && (ret != 0)), 1203 "zcache_get fails on persistent pool, " 1204 "bad things are very likely to happen soon\n"); 1205#ifdef RAMSTER_TESTING 1206 if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool))) 1207 pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret); 1208#endif 1209 return ret; 1210} 1211 1212int zcache_flush_page(int cli_id, int pool_id, 1213 struct tmem_oid *oidp, uint32_t index) 1214{ 1215 struct tmem_pool *pool; 1216 int ret = -1; 1217 unsigned long flags; 1218 1219 local_irq_save(flags); 1220 zcache_flush_total++; 1221 pool = zcache_get_pool_by_id(cli_id, pool_id); 1222 if (ramster_enabled) 1223 ramster_do_preload_flnode(pool); 1224 if (likely(pool != NULL)) { 1225 if (atomic_read(&pool->obj_count) > 0) 1226 ret = tmem_flush_page(pool, oidp, index); 1227 zcache_put_pool(pool); 1228 } 1229 if (ret >= 0) 1230 zcache_flush_found++; 1231 local_irq_restore(flags); 1232 return ret; 1233} 1234 1235int zcache_flush_object(int cli_id, int pool_id, 1236 struct tmem_oid *oidp) 1237{ 1238 struct tmem_pool *pool; 1239 int ret = -1; 1240 unsigned long flags; 1241 1242 local_irq_save(flags); 1243 zcache_flobj_total++; 1244 pool = zcache_get_pool_by_id(cli_id, pool_id); 1245 if (ramster_enabled) 1246 ramster_do_preload_flnode(pool); 1247 if (likely(pool != NULL)) { 1248 if (atomic_read(&pool->obj_count) > 0) 1249 ret = tmem_flush_object(pool, oidp); 1250 zcache_put_pool(pool); 1251 } 1252 if (ret >= 0) 1253 zcache_flobj_found++; 1254 local_irq_restore(flags); 1255 return ret; 1256} 1257 1258static int zcache_client_destroy_pool(int cli_id, int pool_id) 1259{ 1260 struct tmem_pool *pool = NULL; 1261 struct zcache_client *cli = NULL; 1262 int ret = -1; 1263 1264 if (pool_id < 0) 1265 goto out; 1266 if (cli_id == LOCAL_CLIENT) 1267 cli = &zcache_host; 1268 else if ((unsigned int)cli_id < MAX_CLIENTS) 1269 cli = &zcache_clients[cli_id]; 1270 if (cli == NULL) 1271 goto out; 1272 atomic_inc(&cli->refcount); 1273 pool = cli->tmem_pools[pool_id]; 1274 if (pool == NULL) 1275 goto out; 1276 cli->tmem_pools[pool_id] = NULL; 1277 /* wait for pool activity on other cpus to quiesce */ 1278 while (atomic_read(&pool->refcount) != 0) 1279 ; 1280 atomic_dec(&cli->refcount); 1281 local_bh_disable(); 1282 ret = tmem_destroy_pool(pool); 1283 local_bh_enable(); 1284 kfree(pool); 1285 if (cli_id == LOCAL_CLIENT) 1286 pr_info("%s: destroyed local pool id=%d\n", namestr, pool_id); 1287 else 1288 pr_info("%s: destroyed pool id=%d, client=%d\n", 1289 namestr, pool_id, cli_id); 1290out: 1291 return ret; 1292} 1293 1294int zcache_new_pool(uint16_t cli_id, uint32_t flags) 1295{ 1296 int poolid = -1; 1297 struct tmem_pool *pool; 1298 struct zcache_client *cli = NULL; 1299 1300 if (cli_id == LOCAL_CLIENT) 1301 cli = &zcache_host; 1302 else if ((unsigned int)cli_id < MAX_CLIENTS) 1303 cli = &zcache_clients[cli_id]; 1304 if (cli == NULL) 1305 goto out; 1306 atomic_inc(&cli->refcount); 1307 pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC); 1308 if (pool == NULL) { 1309 pr_info("%s: pool creation failed: out of memory\n", namestr); 1310 goto out; 1311 } 1312 1313 for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) 1314 if (cli->tmem_pools[poolid] == NULL) 1315 break; 1316 if (poolid >= MAX_POOLS_PER_CLIENT) { 1317 pr_info("%s: pool creation failed: max exceeded\n", namestr); 1318 kfree(pool); 1319 poolid = -1; 1320 goto out; 1321 } 1322 atomic_set(&pool->refcount, 0); 1323 pool->client = cli; 1324 pool->pool_id = poolid; 1325 tmem_new_pool(pool, flags); 1326 cli->tmem_pools[poolid] = pool; 1327 if (cli_id == LOCAL_CLIENT) 1328 pr_info("%s: created %s local tmem pool, id=%d\n", namestr, 1329 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", 1330 poolid); 1331 else 1332 pr_info("%s: created %s tmem pool, id=%d, client=%d\n", namestr, 1333 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", 1334 poolid, cli_id); 1335out: 1336 if (cli != NULL) 1337 atomic_dec(&cli->refcount); 1338 return poolid; 1339} 1340 1341static int zcache_local_new_pool(uint32_t flags) 1342{ 1343 return zcache_new_pool(LOCAL_CLIENT, flags); 1344} 1345 1346int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph) 1347{ 1348 struct tmem_pool *pool; 1349 struct zcache_client *cli; 1350 uint32_t flags = eph ? 0 : TMEM_POOL_PERSIST; 1351 int ret = -1; 1352 1353 BUG_ON(!ramster_enabled); 1354 if (cli_id == LOCAL_CLIENT) 1355 goto out; 1356 if (pool_id >= MAX_POOLS_PER_CLIENT) 1357 goto out; 1358 if (cli_id >= MAX_CLIENTS) 1359 goto out; 1360 1361 cli = &zcache_clients[cli_id]; 1362 if ((eph && disable_cleancache) || (!eph && disable_frontswap)) { 1363 pr_err("zcache_autocreate_pool: pool type disabled\n"); 1364 goto out; 1365 } 1366 if (!cli->allocated) { 1367 if (zcache_new_client(cli_id)) { 1368 pr_err("zcache_autocreate_pool: can't create client\n"); 1369 goto out; 1370 } 1371 cli = &zcache_clients[cli_id]; 1372 } 1373 atomic_inc(&cli->refcount); 1374 pool = cli->tmem_pools[pool_id]; 1375 if (pool != NULL) { 1376 if (pool->persistent && eph) { 1377 pr_err("zcache_autocreate_pool: type mismatch\n"); 1378 goto out; 1379 } 1380 ret = 0; 1381 goto out; 1382 } 1383 pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); 1384 if (pool == NULL) { 1385 pr_info("%s: pool creation failed: out of memory\n", namestr); 1386 goto out; 1387 } 1388 atomic_set(&pool->refcount, 0); 1389 pool->client = cli; 1390 pool->pool_id = pool_id; 1391 tmem_new_pool(pool, flags); 1392 cli->tmem_pools[pool_id] = pool; 1393 pr_info("%s: AUTOcreated %s tmem poolid=%d, for remote client=%d\n", 1394 namestr, flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", 1395 pool_id, cli_id); 1396 ret = 0; 1397out: 1398 if (cli != NULL) 1399 atomic_dec(&cli->refcount); 1400 return ret; 1401} 1402 1403/********** 1404 * Two kernel functionalities currently can be layered on top of tmem. 1405 * These are "cleancache" which is used as a second-chance cache for clean 1406 * page cache pages; and "frontswap" which is used for swap pages 1407 * to avoid writes to disk. A generic "shim" is provided here for each 1408 * to translate in-kernel semantics to zcache semantics. 1409 */ 1410 1411static void zcache_cleancache_put_page(int pool_id, 1412 struct cleancache_filekey key, 1413 pgoff_t index, struct page *page) 1414{ 1415 u32 ind = (u32) index; 1416 struct tmem_oid oid = *(struct tmem_oid *)&key; 1417 1418 if (!disable_cleancache_ignore_nonactive && !PageWasActive(page)) { 1419 zcache_eph_nonactive_puts_ignored++; 1420 return; 1421 } 1422 if (likely(ind == index)) 1423 (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, 1424 page, PAGE_SIZE, false, 1); 1425} 1426 1427static int zcache_cleancache_get_page(int pool_id, 1428 struct cleancache_filekey key, 1429 pgoff_t index, struct page *page) 1430{ 1431 u32 ind = (u32) index; 1432 struct tmem_oid oid = *(struct tmem_oid *)&key; 1433 size_t size; 1434 int ret = -1; 1435 1436 if (likely(ind == index)) { 1437 ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, 1438 page, &size, false, 0); 1439 BUG_ON(ret >= 0 && size != PAGE_SIZE); 1440 if (ret == 0) 1441 SetPageWasActive(page); 1442 } 1443 return ret; 1444} 1445 1446static void zcache_cleancache_flush_page(int pool_id, 1447 struct cleancache_filekey key, 1448 pgoff_t index) 1449{ 1450 u32 ind = (u32) index; 1451 struct tmem_oid oid = *(struct tmem_oid *)&key; 1452 1453 if (likely(ind == index)) 1454 (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind); 1455} 1456 1457static void zcache_cleancache_flush_inode(int pool_id, 1458 struct cleancache_filekey key) 1459{ 1460 struct tmem_oid oid = *(struct tmem_oid *)&key; 1461 1462 (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid); 1463} 1464 1465static void zcache_cleancache_flush_fs(int pool_id) 1466{ 1467 if (pool_id >= 0) 1468 (void)zcache_client_destroy_pool(LOCAL_CLIENT, pool_id); 1469} 1470 1471static int zcache_cleancache_init_fs(size_t pagesize) 1472{ 1473 BUG_ON(sizeof(struct cleancache_filekey) != 1474 sizeof(struct tmem_oid)); 1475 BUG_ON(pagesize != PAGE_SIZE); 1476 return zcache_local_new_pool(0); 1477} 1478 1479static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) 1480{ 1481 /* shared pools are unsupported and map to private */ 1482 BUG_ON(sizeof(struct cleancache_filekey) != 1483 sizeof(struct tmem_oid)); 1484 BUG_ON(pagesize != PAGE_SIZE); 1485 return zcache_local_new_pool(0); 1486} 1487 1488static struct cleancache_ops zcache_cleancache_ops = { 1489 .put_page = zcache_cleancache_put_page, 1490 .get_page = zcache_cleancache_get_page, 1491 .invalidate_page = zcache_cleancache_flush_page, 1492 .invalidate_inode = zcache_cleancache_flush_inode, 1493 .invalidate_fs = zcache_cleancache_flush_fs, 1494 .init_shared_fs = zcache_cleancache_init_shared_fs, 1495 .init_fs = zcache_cleancache_init_fs 1496}; 1497 1498struct cleancache_ops zcache_cleancache_register_ops(void) 1499{ 1500 struct cleancache_ops old_ops = 1501 cleancache_register_ops(&zcache_cleancache_ops); 1502 1503 return old_ops; 1504} 1505 1506/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 1507static int zcache_frontswap_poolid __read_mostly = -1; 1508 1509/* 1510 * Swizzling increases objects per swaptype, increasing tmem concurrency 1511 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS 1512 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from 1513 * frontswap_get_page(), but has side-effects. Hence using 8. 1514 */ 1515#define SWIZ_BITS 8 1516#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) 1517#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) 1518#define iswiz(_ind) (_ind >> SWIZ_BITS) 1519 1520static inline struct tmem_oid oswiz(unsigned type, u32 ind) 1521{ 1522 struct tmem_oid oid = { .oid = { 0 } }; 1523 oid.oid[0] = _oswiz(type, ind); 1524 return oid; 1525} 1526 1527#ifdef FRONTSWAP_HAS_UNUSE 1528static void unswiz(struct tmem_oid oid, u32 index, 1529 unsigned *type, pgoff_t *offset) 1530{ 1531 *type = (unsigned)(oid.oid[0] >> SWIZ_BITS); 1532 *offset = (pgoff_t)((index << SWIZ_BITS) | 1533 (oid.oid[0] & SWIZ_MASK)); 1534} 1535#endif 1536 1537static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, 1538 struct page *page) 1539{ 1540 u64 ind64 = (u64)offset; 1541 u32 ind = (u32)offset; 1542 struct tmem_oid oid = oswiz(type, ind); 1543 int ret = -1; 1544 unsigned long flags; 1545 1546 BUG_ON(!PageLocked(page)); 1547 if (!disable_frontswap_ignore_nonactive && !PageWasActive(page)) { 1548 zcache_pers_nonactive_puts_ignored++; 1549 ret = -ERANGE; 1550 goto out; 1551 } 1552 if (likely(ind64 == ind)) { 1553 local_irq_save(flags); 1554 ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid, 1555 &oid, iswiz(ind), 1556 page, PAGE_SIZE, false, 0); 1557 local_irq_restore(flags); 1558 } 1559out: 1560 return ret; 1561} 1562 1563/* returns 0 if the page was successfully gotten from frontswap, -1 if 1564 * was not present (should never happen!) */ 1565static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, 1566 struct page *page) 1567{ 1568 u64 ind64 = (u64)offset; 1569 u32 ind = (u32)offset; 1570 struct tmem_oid oid = oswiz(type, ind); 1571 size_t size; 1572 int ret = -1, get_and_free; 1573 1574 if (frontswap_has_exclusive_gets) 1575 get_and_free = 1; 1576 else 1577 get_and_free = -1; 1578 BUG_ON(!PageLocked(page)); 1579 if (likely(ind64 == ind)) { 1580 ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid, 1581 &oid, iswiz(ind), 1582 page, &size, false, get_and_free); 1583 BUG_ON(ret >= 0 && size != PAGE_SIZE); 1584 } 1585 return ret; 1586} 1587 1588/* flush a single page from frontswap */ 1589static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) 1590{ 1591 u64 ind64 = (u64)offset; 1592 u32 ind = (u32)offset; 1593 struct tmem_oid oid = oswiz(type, ind); 1594 1595 if (likely(ind64 == ind)) 1596 (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid, 1597 &oid, iswiz(ind)); 1598} 1599 1600/* flush all pages from the passed swaptype */ 1601static void zcache_frontswap_flush_area(unsigned type) 1602{ 1603 struct tmem_oid oid; 1604 int ind; 1605 1606 for (ind = SWIZ_MASK; ind >= 0; ind--) { 1607 oid = oswiz(type, ind); 1608 (void)zcache_flush_object(LOCAL_CLIENT, 1609 zcache_frontswap_poolid, &oid); 1610 } 1611} 1612 1613static void zcache_frontswap_init(unsigned ignored) 1614{ 1615 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 1616 if (zcache_frontswap_poolid < 0) 1617 zcache_frontswap_poolid = 1618 zcache_local_new_pool(TMEM_POOL_PERSIST); 1619} 1620 1621static struct frontswap_ops zcache_frontswap_ops = { 1622 .store = zcache_frontswap_put_page, 1623 .load = zcache_frontswap_get_page, 1624 .invalidate_page = zcache_frontswap_flush_page, 1625 .invalidate_area = zcache_frontswap_flush_area, 1626 .init = zcache_frontswap_init 1627}; 1628 1629struct frontswap_ops zcache_frontswap_register_ops(void) 1630{ 1631 struct frontswap_ops old_ops = 1632 frontswap_register_ops(&zcache_frontswap_ops); 1633 1634 return old_ops; 1635} 1636 1637/* 1638 * zcache initialization 1639 * NOTE FOR NOW zcache or ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER 1640 * OR NOTHING HAPPENS! 1641 */ 1642 1643static int __init enable_zcache(char *s) 1644{ 1645 zcache_enabled = 1; 1646 return 1; 1647} 1648__setup("zcache", enable_zcache); 1649 1650static int __init enable_ramster(char *s) 1651{ 1652 zcache_enabled = 1; 1653#ifdef CONFIG_RAMSTER 1654 ramster_enabled = 1; 1655#endif 1656 return 1; 1657} 1658__setup("ramster", enable_ramster); 1659 1660/* allow independent dynamic disabling of cleancache and frontswap */ 1661 1662static int __init no_cleancache(char *s) 1663{ 1664 disable_cleancache = 1; 1665 return 1; 1666} 1667 1668__setup("nocleancache", no_cleancache); 1669 1670static int __init no_frontswap(char *s) 1671{ 1672 disable_frontswap = 1; 1673 return 1; 1674} 1675 1676__setup("nofrontswap", no_frontswap); 1677 1678static int __init no_frontswap_exclusive_gets(char *s) 1679{ 1680 frontswap_has_exclusive_gets = false; 1681 return 1; 1682} 1683 1684__setup("nofrontswapexclusivegets", no_frontswap_exclusive_gets); 1685 1686static int __init no_frontswap_ignore_nonactive(char *s) 1687{ 1688 disable_frontswap_ignore_nonactive = 1; 1689 return 1; 1690} 1691 1692__setup("nofrontswapignorenonactive", no_frontswap_ignore_nonactive); 1693 1694static int __init no_cleancache_ignore_nonactive(char *s) 1695{ 1696 disable_cleancache_ignore_nonactive = 1; 1697 return 1; 1698} 1699 1700__setup("nocleancacheignorenonactive", no_cleancache_ignore_nonactive); 1701 1702static int __init enable_zcache_compressor(char *s) 1703{ 1704 strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ); 1705 zcache_enabled = 1; 1706 return 1; 1707} 1708__setup("zcache=", enable_zcache_compressor); 1709 1710 1711static int __init zcache_comp_init(void) 1712{ 1713 int ret = 0; 1714 1715 /* check crypto algorithm */ 1716 if (*zcache_comp_name != '\0') { 1717 ret = crypto_has_comp(zcache_comp_name, 0, 0); 1718 if (!ret) 1719 pr_info("zcache: %s not supported\n", 1720 zcache_comp_name); 1721 } 1722 if (!ret) 1723 strcpy(zcache_comp_name, "lzo"); 1724 ret = crypto_has_comp(zcache_comp_name, 0, 0); 1725 if (!ret) { 1726 ret = 1; 1727 goto out; 1728 } 1729 pr_info("zcache: using %s compressor\n", zcache_comp_name); 1730 1731 /* alloc percpu transforms */ 1732 ret = 0; 1733 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); 1734 if (!zcache_comp_pcpu_tfms) 1735 ret = 1; 1736out: 1737 return ret; 1738} 1739 1740static int __init zcache_init(void) 1741{ 1742 int ret = 0; 1743 1744 if (ramster_enabled) { 1745 namestr = "ramster"; 1746 ramster_register_pamops(&zcache_pamops); 1747 } 1748#ifdef CONFIG_DEBUG_FS 1749 zcache_debugfs_init(); 1750#endif 1751 if (zcache_enabled) { 1752 unsigned int cpu; 1753 1754 tmem_register_hostops(&zcache_hostops); 1755 tmem_register_pamops(&zcache_pamops); 1756 ret = register_cpu_notifier(&zcache_cpu_notifier_block); 1757 if (ret) { 1758 pr_err("%s: can't register cpu notifier\n", namestr); 1759 goto out; 1760 } 1761 ret = zcache_comp_init(); 1762 if (ret) { 1763 pr_err("%s: compressor initialization failed\n", 1764 namestr); 1765 goto out; 1766 } 1767 for_each_online_cpu(cpu) { 1768 void *pcpu = (void *)(long)cpu; 1769 zcache_cpu_notifier(&zcache_cpu_notifier_block, 1770 CPU_UP_PREPARE, pcpu); 1771 } 1772 } 1773 zcache_objnode_cache = kmem_cache_create("zcache_objnode", 1774 sizeof(struct tmem_objnode), 0, 0, NULL); 1775 zcache_obj_cache = kmem_cache_create("zcache_obj", 1776 sizeof(struct tmem_obj), 0, 0, NULL); 1777 ret = zcache_new_client(LOCAL_CLIENT); 1778 if (ret) { 1779 pr_err("%s: can't create client\n", namestr); 1780 goto out; 1781 } 1782 zbud_init(); 1783 if (zcache_enabled && !disable_cleancache) { 1784 struct cleancache_ops old_ops; 1785 1786 register_shrinker(&zcache_shrinker); 1787 old_ops = zcache_cleancache_register_ops(); 1788 pr_info("%s: cleancache enabled using kernel transcendent " 1789 "memory and compression buddies\n", namestr); 1790#ifdef ZCACHE_DEBUG 1791 pr_info("%s: cleancache: ignorenonactive = %d\n", 1792 namestr, !disable_cleancache_ignore_nonactive); 1793#endif 1794 if (old_ops.init_fs != NULL) 1795 pr_warn("%s: cleancache_ops overridden\n", namestr); 1796 } 1797 if (zcache_enabled && !disable_frontswap) { 1798 struct frontswap_ops old_ops; 1799 1800 old_ops = zcache_frontswap_register_ops(); 1801 if (frontswap_has_exclusive_gets) 1802 frontswap_tmem_exclusive_gets(true); 1803 pr_info("%s: frontswap enabled using kernel transcendent " 1804 "memory and compression buddies\n", namestr); 1805#ifdef ZCACHE_DEBUG 1806 pr_info("%s: frontswap: excl gets = %d active only = %d\n", 1807 namestr, frontswap_has_exclusive_gets, 1808 !disable_frontswap_ignore_nonactive); 1809#endif 1810 if (old_ops.init != NULL) 1811 pr_warn("%s: frontswap_ops overridden\n", namestr); 1812 } 1813 if (ramster_enabled) 1814 ramster_init(!disable_cleancache, !disable_frontswap, 1815 frontswap_has_exclusive_gets); 1816out: 1817 return ret; 1818} 1819 1820late_initcall(zcache_init);