at v3.2 510 lines 13 kB view raw
1/* 2 * xvmalloc memory allocator 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 6 * This code is released using a dual license strategy: BSD/GPL 7 * You can choose the licence that better fits your requirements. 8 * 9 * Released under the terms of 3-clause BSD License 10 * Released under the terms of GNU General Public License Version 2.0 11 */ 12 13#ifdef CONFIG_ZRAM_DEBUG 14#define DEBUG 15#endif 16 17#include <linux/module.h> 18#include <linux/kernel.h> 19#include <linux/bitops.h> 20#include <linux/errno.h> 21#include <linux/highmem.h> 22#include <linux/init.h> 23#include <linux/string.h> 24#include <linux/slab.h> 25 26#include "xvmalloc.h" 27#include "xvmalloc_int.h" 28 29static void stat_inc(u64 *value) 30{ 31 *value = *value + 1; 32} 33 34static void stat_dec(u64 *value) 35{ 36 *value = *value - 1; 37} 38 39static int test_flag(struct block_header *block, enum blockflags flag) 40{ 41 return block->prev & BIT(flag); 42} 43 44static void set_flag(struct block_header *block, enum blockflags flag) 45{ 46 block->prev |= BIT(flag); 47} 48 49static void clear_flag(struct block_header *block, enum blockflags flag) 50{ 51 block->prev &= ~BIT(flag); 52} 53 54/* 55 * Given <page, offset> pair, provide a dereferencable pointer. 56 * This is called from xv_malloc/xv_free path, so it 57 * needs to be fast. 58 */ 59static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type) 60{ 61 unsigned char *base; 62 63 base = kmap_atomic(page, type); 64 return base + offset; 65} 66 67static void put_ptr_atomic(void *ptr, enum km_type type) 68{ 69 kunmap_atomic(ptr, type); 70} 71 72static u32 get_blockprev(struct block_header *block) 73{ 74 return block->prev & PREV_MASK; 75} 76 77static void set_blockprev(struct block_header *block, u16 new_offset) 78{ 79 block->prev = new_offset | (block->prev & FLAGS_MASK); 80} 81 82static struct block_header *BLOCK_NEXT(struct block_header *block) 83{ 84 return (struct block_header *) 85 ((char *)block + block->size + XV_ALIGN); 86} 87 88/* 89 * Get index of free list containing blocks of maximum size 90 * which is less than or equal to given size. 91 */ 92static u32 get_index_for_insert(u32 size) 93{ 94 if (unlikely(size > XV_MAX_ALLOC_SIZE)) 95 size = XV_MAX_ALLOC_SIZE; 96 size &= ~FL_DELTA_MASK; 97 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; 98} 99 100/* 101 * Get index of free list having blocks of size greater than 102 * or equal to requested size. 103 */ 104static u32 get_index(u32 size) 105{ 106 if (unlikely(size < XV_MIN_ALLOC_SIZE)) 107 size = XV_MIN_ALLOC_SIZE; 108 size = ALIGN(size, FL_DELTA); 109 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; 110} 111 112/** 113 * find_block - find block of at least given size 114 * @pool: memory pool to search from 115 * @size: size of block required 116 * @page: page containing required block 117 * @offset: offset within the page where block is located. 118 * 119 * Searches two level bitmap to locate block of at least 120 * the given size. If such a block is found, it provides 121 * <page, offset> to identify this block and returns index 122 * in freelist where we found this block. 123 * Otherwise, returns 0 and <page, offset> params are not touched. 124 */ 125static u32 find_block(struct xv_pool *pool, u32 size, 126 struct page **page, u32 *offset) 127{ 128 ulong flbitmap, slbitmap; 129 u32 flindex, slindex, slbitstart; 130 131 /* There are no free blocks in this pool */ 132 if (!pool->flbitmap) 133 return 0; 134 135 /* Get freelist index correspoding to this size */ 136 slindex = get_index(size); 137 slbitmap = pool->slbitmap[slindex / BITS_PER_LONG]; 138 slbitstart = slindex % BITS_PER_LONG; 139 140 /* 141 * If freelist is not empty at this index, we found the 142 * block - head of this list. This is approximate best-fit match. 143 */ 144 if (test_bit(slbitstart, &slbitmap)) { 145 *page = pool->freelist[slindex].page; 146 *offset = pool->freelist[slindex].offset; 147 return slindex; 148 } 149 150 /* 151 * No best-fit found. Search a bit further in bitmap for a free block. 152 * Second level bitmap consists of series of 32-bit chunks. Search 153 * further in the chunk where we expected a best-fit, starting from 154 * index location found above. 155 */ 156 slbitstart++; 157 slbitmap >>= slbitstart; 158 159 /* Skip this search if we were already at end of this bitmap chunk */ 160 if ((slbitstart != BITS_PER_LONG) && slbitmap) { 161 slindex += __ffs(slbitmap) + 1; 162 *page = pool->freelist[slindex].page; 163 *offset = pool->freelist[slindex].offset; 164 return slindex; 165 } 166 167 /* Now do a full two-level bitmap search to find next nearest fit */ 168 flindex = slindex / BITS_PER_LONG; 169 170 flbitmap = (pool->flbitmap) >> (flindex + 1); 171 if (!flbitmap) 172 return 0; 173 174 flindex += __ffs(flbitmap) + 1; 175 slbitmap = pool->slbitmap[flindex]; 176 slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap); 177 *page = pool->freelist[slindex].page; 178 *offset = pool->freelist[slindex].offset; 179 180 return slindex; 181} 182 183/* 184 * Insert block at <page, offset> in freelist of given pool. 185 * freelist used depends on block size. 186 */ 187static void insert_block(struct xv_pool *pool, struct page *page, u32 offset, 188 struct block_header *block) 189{ 190 u32 flindex, slindex; 191 struct block_header *nextblock; 192 193 slindex = get_index_for_insert(block->size); 194 flindex = slindex / BITS_PER_LONG; 195 196 block->link.prev_page = NULL; 197 block->link.prev_offset = 0; 198 block->link.next_page = pool->freelist[slindex].page; 199 block->link.next_offset = pool->freelist[slindex].offset; 200 pool->freelist[slindex].page = page; 201 pool->freelist[slindex].offset = offset; 202 203 if (block->link.next_page) { 204 nextblock = get_ptr_atomic(block->link.next_page, 205 block->link.next_offset, KM_USER1); 206 nextblock->link.prev_page = page; 207 nextblock->link.prev_offset = offset; 208 put_ptr_atomic(nextblock, KM_USER1); 209 /* If there was a next page then the free bits are set. */ 210 return; 211 } 212 213 __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); 214 __set_bit(flindex, &pool->flbitmap); 215} 216 217/* 218 * Remove block from freelist. Index 'slindex' identifies the freelist. 219 */ 220static void remove_block(struct xv_pool *pool, struct page *page, u32 offset, 221 struct block_header *block, u32 slindex) 222{ 223 u32 flindex = slindex / BITS_PER_LONG; 224 struct block_header *tmpblock; 225 226 if (block->link.prev_page) { 227 tmpblock = get_ptr_atomic(block->link.prev_page, 228 block->link.prev_offset, KM_USER1); 229 tmpblock->link.next_page = block->link.next_page; 230 tmpblock->link.next_offset = block->link.next_offset; 231 put_ptr_atomic(tmpblock, KM_USER1); 232 } 233 234 if (block->link.next_page) { 235 tmpblock = get_ptr_atomic(block->link.next_page, 236 block->link.next_offset, KM_USER1); 237 tmpblock->link.prev_page = block->link.prev_page; 238 tmpblock->link.prev_offset = block->link.prev_offset; 239 put_ptr_atomic(tmpblock, KM_USER1); 240 } 241 242 /* Is this block is at the head of the freelist? */ 243 if (pool->freelist[slindex].page == page 244 && pool->freelist[slindex].offset == offset) { 245 246 pool->freelist[slindex].page = block->link.next_page; 247 pool->freelist[slindex].offset = block->link.next_offset; 248 249 if (pool->freelist[slindex].page) { 250 struct block_header *tmpblock; 251 tmpblock = get_ptr_atomic(pool->freelist[slindex].page, 252 pool->freelist[slindex].offset, 253 KM_USER1); 254 tmpblock->link.prev_page = NULL; 255 tmpblock->link.prev_offset = 0; 256 put_ptr_atomic(tmpblock, KM_USER1); 257 } else { 258 /* This freelist bucket is empty */ 259 __clear_bit(slindex % BITS_PER_LONG, 260 &pool->slbitmap[flindex]); 261 if (!pool->slbitmap[flindex]) 262 __clear_bit(flindex, &pool->flbitmap); 263 } 264 } 265 266 block->link.prev_page = NULL; 267 block->link.prev_offset = 0; 268 block->link.next_page = NULL; 269 block->link.next_offset = 0; 270} 271 272/* 273 * Allocate a page and add it to freelist of given pool. 274 */ 275static int grow_pool(struct xv_pool *pool, gfp_t flags) 276{ 277 struct page *page; 278 struct block_header *block; 279 280 page = alloc_page(flags); 281 if (unlikely(!page)) 282 return -ENOMEM; 283 284 stat_inc(&pool->total_pages); 285 286 spin_lock(&pool->lock); 287 block = get_ptr_atomic(page, 0, KM_USER0); 288 289 block->size = PAGE_SIZE - XV_ALIGN; 290 set_flag(block, BLOCK_FREE); 291 clear_flag(block, PREV_FREE); 292 set_blockprev(block, 0); 293 294 insert_block(pool, page, 0, block); 295 296 put_ptr_atomic(block, KM_USER0); 297 spin_unlock(&pool->lock); 298 299 return 0; 300} 301 302/* 303 * Create a memory pool. Allocates freelist, bitmaps and other 304 * per-pool metadata. 305 */ 306struct xv_pool *xv_create_pool(void) 307{ 308 u32 ovhd_size; 309 struct xv_pool *pool; 310 311 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); 312 pool = kzalloc(ovhd_size, GFP_KERNEL); 313 if (!pool) 314 return NULL; 315 316 spin_lock_init(&pool->lock); 317 318 return pool; 319} 320EXPORT_SYMBOL_GPL(xv_create_pool); 321 322void xv_destroy_pool(struct xv_pool *pool) 323{ 324 kfree(pool); 325} 326EXPORT_SYMBOL_GPL(xv_destroy_pool); 327 328/** 329 * xv_malloc - Allocate block of given size from pool. 330 * @pool: pool to allocate from 331 * @size: size of block to allocate 332 * @page: page no. that holds the object 333 * @offset: location of object within page 334 * 335 * On success, <page, offset> identifies block allocated 336 * and 0 is returned. On failure, <page, offset> is set to 337 * 0 and -ENOMEM is returned. 338 * 339 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail. 340 */ 341int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, 342 u32 *offset, gfp_t flags) 343{ 344 int error; 345 u32 index, tmpsize, origsize, tmpoffset; 346 struct block_header *block, *tmpblock; 347 348 *page = NULL; 349 *offset = 0; 350 origsize = size; 351 352 if (unlikely(!size || size > XV_MAX_ALLOC_SIZE)) 353 return -ENOMEM; 354 355 size = ALIGN(size, XV_ALIGN); 356 357 spin_lock(&pool->lock); 358 359 index = find_block(pool, size, page, offset); 360 361 if (!*page) { 362 spin_unlock(&pool->lock); 363 if (flags & GFP_NOWAIT) 364 return -ENOMEM; 365 error = grow_pool(pool, flags); 366 if (unlikely(error)) 367 return error; 368 369 spin_lock(&pool->lock); 370 index = find_block(pool, size, page, offset); 371 } 372 373 if (!*page) { 374 spin_unlock(&pool->lock); 375 return -ENOMEM; 376 } 377 378 block = get_ptr_atomic(*page, *offset, KM_USER0); 379 380 remove_block(pool, *page, *offset, block, index); 381 382 /* Split the block if required */ 383 tmpoffset = *offset + size + XV_ALIGN; 384 tmpsize = block->size - size; 385 tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN); 386 if (tmpsize) { 387 tmpblock->size = tmpsize - XV_ALIGN; 388 set_flag(tmpblock, BLOCK_FREE); 389 clear_flag(tmpblock, PREV_FREE); 390 391 set_blockprev(tmpblock, *offset); 392 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) 393 insert_block(pool, *page, tmpoffset, tmpblock); 394 395 if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) { 396 tmpblock = BLOCK_NEXT(tmpblock); 397 set_blockprev(tmpblock, tmpoffset); 398 } 399 } else { 400 /* This block is exact fit */ 401 if (tmpoffset != PAGE_SIZE) 402 clear_flag(tmpblock, PREV_FREE); 403 } 404 405 block->size = origsize; 406 clear_flag(block, BLOCK_FREE); 407 408 put_ptr_atomic(block, KM_USER0); 409 spin_unlock(&pool->lock); 410 411 *offset += XV_ALIGN; 412 413 return 0; 414} 415EXPORT_SYMBOL_GPL(xv_malloc); 416 417/* 418 * Free block identified with <page, offset> 419 */ 420void xv_free(struct xv_pool *pool, struct page *page, u32 offset) 421{ 422 void *page_start; 423 struct block_header *block, *tmpblock; 424 425 offset -= XV_ALIGN; 426 427 spin_lock(&pool->lock); 428 429 page_start = get_ptr_atomic(page, 0, KM_USER0); 430 block = (struct block_header *)((char *)page_start + offset); 431 432 /* Catch double free bugs */ 433 BUG_ON(test_flag(block, BLOCK_FREE)); 434 435 block->size = ALIGN(block->size, XV_ALIGN); 436 437 tmpblock = BLOCK_NEXT(block); 438 if (offset + block->size + XV_ALIGN == PAGE_SIZE) 439 tmpblock = NULL; 440 441 /* Merge next block if its free */ 442 if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) { 443 /* 444 * Blocks smaller than XV_MIN_ALLOC_SIZE 445 * are not inserted in any free list. 446 */ 447 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) { 448 remove_block(pool, page, 449 offset + block->size + XV_ALIGN, tmpblock, 450 get_index_for_insert(tmpblock->size)); 451 } 452 block->size += tmpblock->size + XV_ALIGN; 453 } 454 455 /* Merge previous block if its free */ 456 if (test_flag(block, PREV_FREE)) { 457 tmpblock = (struct block_header *)((char *)(page_start) + 458 get_blockprev(block)); 459 offset = offset - tmpblock->size - XV_ALIGN; 460 461 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) 462 remove_block(pool, page, offset, tmpblock, 463 get_index_for_insert(tmpblock->size)); 464 465 tmpblock->size += block->size + XV_ALIGN; 466 block = tmpblock; 467 } 468 469 /* No used objects in this page. Free it. */ 470 if (block->size == PAGE_SIZE - XV_ALIGN) { 471 put_ptr_atomic(page_start, KM_USER0); 472 spin_unlock(&pool->lock); 473 474 __free_page(page); 475 stat_dec(&pool->total_pages); 476 return; 477 } 478 479 set_flag(block, BLOCK_FREE); 480 if (block->size >= XV_MIN_ALLOC_SIZE) 481 insert_block(pool, page, offset, block); 482 483 if (offset + block->size + XV_ALIGN != PAGE_SIZE) { 484 tmpblock = BLOCK_NEXT(block); 485 set_flag(tmpblock, PREV_FREE); 486 set_blockprev(tmpblock, offset); 487 } 488 489 put_ptr_atomic(page_start, KM_USER0); 490 spin_unlock(&pool->lock); 491} 492EXPORT_SYMBOL_GPL(xv_free); 493 494u32 xv_get_object_size(void *obj) 495{ 496 struct block_header *blk; 497 498 blk = (struct block_header *)((char *)(obj) - XV_ALIGN); 499 return blk->size; 500} 501EXPORT_SYMBOL_GPL(xv_get_object_size); 502 503/* 504 * Returns total memory used by allocator (userdata + metadata) 505 */ 506u64 xv_get_total_size_bytes(struct xv_pool *pool) 507{ 508 return pool->total_pages << PAGE_SHIFT; 509} 510EXPORT_SYMBOL_GPL(xv_get_total_size_bytes);