at v2.6.34-rc4 936 lines 24 kB view raw
1/* 2 * fs/logfs/segment.c - Handling the Object Store 3 * 4 * As should be obvious for Linux kernel code, license is GPLv2 5 * 6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> 7 * 8 * Object store or ostore makes up the complete device with exception of 9 * the superblock and journal areas. Apart from its own metadata it stores 10 * three kinds of objects: inodes, dentries and blocks, both data and indirect. 11 */ 12#include "logfs.h" 13#include <linux/slab.h> 14 15static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) 16{ 17 struct logfs_super *super = logfs_super(sb); 18 struct btree_head32 *head = &super->s_reserved_segments; 19 int err; 20 21 err = btree_insert32(head, segno, (void *)1, GFP_NOFS); 22 if (err) 23 return err; 24 logfs_super(sb)->s_bad_segments++; 25 /* FIXME: write to journal */ 26 return 0; 27} 28 29int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase) 30{ 31 struct logfs_super *super = logfs_super(sb); 32 33 super->s_gec++; 34 35 return super->s_devops->erase(sb, (u64)segno << super->s_segshift, 36 super->s_segsize, ensure_erase); 37} 38 39static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes) 40{ 41 s32 ofs; 42 43 logfs_open_area(area, bytes); 44 45 ofs = area->a_used_bytes; 46 area->a_used_bytes += bytes; 47 BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize); 48 49 return dev_ofs(area->a_sb, area->a_segno, ofs); 50} 51 52static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, 53 int use_filler) 54{ 55 struct logfs_super *super = logfs_super(sb); 56 struct address_space *mapping = super->s_mapping_inode->i_mapping; 57 filler_t *filler = super->s_devops->readpage; 58 struct page *page; 59 60 BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); 61 if (use_filler) 62 page = read_cache_page(mapping, index, filler, sb); 63 else { 64 page = find_or_create_page(mapping, index, GFP_NOFS); 65 unlock_page(page); 66 } 67 return page; 68} 69 70void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, 71 int use_filler) 72{ 73 pgoff_t index = ofs >> PAGE_SHIFT; 74 struct page *page; 75 long offset = ofs & (PAGE_SIZE-1); 76 long copylen; 77 78 /* Only logfs_wbuf_recover may use len==0 */ 79 BUG_ON(!len && !use_filler); 80 do { 81 copylen = min((ulong)len, PAGE_SIZE - offset); 82 83 page = get_mapping_page(area->a_sb, index, use_filler); 84 SetPageUptodate(page); 85 BUG_ON(!page); /* FIXME: reserve a pool */ 86 memcpy(page_address(page) + offset, buf, copylen); 87 SetPagePrivate(page); 88 page_cache_release(page); 89 90 buf += copylen; 91 len -= copylen; 92 offset = 0; 93 index++; 94 } while (len); 95} 96 97static void pad_partial_page(struct logfs_area *area) 98{ 99 struct super_block *sb = area->a_sb; 100 struct page *page; 101 u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); 102 pgoff_t index = ofs >> PAGE_SHIFT; 103 long offset = ofs & (PAGE_SIZE-1); 104 u32 len = PAGE_SIZE - offset; 105 106 if (len % PAGE_SIZE) { 107 page = get_mapping_page(sb, index, 0); 108 BUG_ON(!page); /* FIXME: reserve a pool */ 109 memset(page_address(page) + offset, 0xff, len); 110 SetPagePrivate(page); 111 page_cache_release(page); 112 } 113} 114 115static void pad_full_pages(struct logfs_area *area) 116{ 117 struct super_block *sb = area->a_sb; 118 struct logfs_super *super = logfs_super(sb); 119 u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); 120 u32 len = super->s_segsize - area->a_used_bytes; 121 pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; 122 pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; 123 struct page *page; 124 125 while (no_indizes) { 126 page = get_mapping_page(sb, index, 0); 127 BUG_ON(!page); /* FIXME: reserve a pool */ 128 SetPageUptodate(page); 129 memset(page_address(page), 0xff, PAGE_CACHE_SIZE); 130 SetPagePrivate(page); 131 page_cache_release(page); 132 index++; 133 no_indizes--; 134 } 135} 136 137/* 138 * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. 139 * Also make sure we allocate (and memset) all pages for final writeout. 140 */ 141static void pad_wbuf(struct logfs_area *area, int final) 142{ 143 pad_partial_page(area); 144 if (final) 145 pad_full_pages(area); 146} 147 148/* 149 * We have to be careful with the alias tree. Since lookup is done by bix, 150 * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with 151 * indirect blocks. So always use it through accessor functions. 152 */ 153static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix, 154 level_t level) 155{ 156 struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; 157 pgoff_t index = logfs_pack_index(bix, level); 158 159 return btree_lookup128(head, ino, index); 160} 161 162static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix, 163 level_t level, void *val) 164{ 165 struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; 166 pgoff_t index = logfs_pack_index(bix, level); 167 168 return btree_insert128(head, ino, index, val, GFP_NOFS); 169} 170 171static int btree_write_alias(struct super_block *sb, struct logfs_block *block, 172 write_alias_t *write_one_alias) 173{ 174 struct object_alias_item *item; 175 int err; 176 177 list_for_each_entry(item, &block->item_list, list) { 178 err = write_alias_journal(sb, block->ino, block->bix, 179 block->level, item->child_no, item->val); 180 if (err) 181 return err; 182 } 183 return 0; 184} 185 186static gc_level_t btree_block_level(struct logfs_block *block) 187{ 188 return expand_level(block->ino, block->level); 189} 190 191static struct logfs_block_ops btree_block_ops = { 192 .write_block = btree_write_block, 193 .block_level = btree_block_level, 194 .free_block = __free_block, 195 .write_alias = btree_write_alias, 196}; 197 198int logfs_load_object_aliases(struct super_block *sb, 199 struct logfs_obj_alias *oa, int count) 200{ 201 struct logfs_super *super = logfs_super(sb); 202 struct logfs_block *block; 203 struct object_alias_item *item; 204 u64 ino, bix; 205 level_t level; 206 int i, err; 207 208 super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; 209 count /= sizeof(*oa); 210 for (i = 0; i < count; i++) { 211 item = mempool_alloc(super->s_alias_pool, GFP_NOFS); 212 if (!item) 213 return -ENOMEM; 214 memset(item, 0, sizeof(*item)); 215 216 super->s_no_object_aliases++; 217 item->val = oa[i].val; 218 item->child_no = be16_to_cpu(oa[i].child_no); 219 220 ino = be64_to_cpu(oa[i].ino); 221 bix = be64_to_cpu(oa[i].bix); 222 level = LEVEL(oa[i].level); 223 224 log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n", 225 ino, bix, level, item->child_no, 226 be64_to_cpu(item->val)); 227 block = alias_tree_lookup(sb, ino, bix, level); 228 if (!block) { 229 block = __alloc_block(sb, ino, bix, level); 230 block->ops = &btree_block_ops; 231 err = alias_tree_insert(sb, ino, bix, level, block); 232 BUG_ON(err); /* mempool empty */ 233 } 234 if (test_and_set_bit(item->child_no, block->alias_map)) { 235 printk(KERN_ERR"LogFS: Alias collision detected\n"); 236 return -EIO; 237 } 238 list_move_tail(&block->alias_list, &super->s_object_alias); 239 list_add(&item->list, &block->item_list); 240 } 241 return 0; 242} 243 244static void kill_alias(void *_block, unsigned long ignore0, 245 u64 ignore1, u64 ignore2, size_t ignore3) 246{ 247 struct logfs_block *block = _block; 248 struct super_block *sb = block->sb; 249 struct logfs_super *super = logfs_super(sb); 250 struct object_alias_item *item; 251 252 while (!list_empty(&block->item_list)) { 253 item = list_entry(block->item_list.next, typeof(*item), list); 254 list_del(&item->list); 255 mempool_free(item, super->s_alias_pool); 256 } 257 block->ops->free_block(sb, block); 258} 259 260static int obj_type(struct inode *inode, level_t level) 261{ 262 if (level == 0) { 263 if (S_ISDIR(inode->i_mode)) 264 return OBJ_DENTRY; 265 if (inode->i_ino == LOGFS_INO_MASTER) 266 return OBJ_INODE; 267 } 268 return OBJ_BLOCK; 269} 270 271static int obj_len(struct super_block *sb, int obj_type) 272{ 273 switch (obj_type) { 274 case OBJ_DENTRY: 275 return sizeof(struct logfs_disk_dentry); 276 case OBJ_INODE: 277 return sizeof(struct logfs_disk_inode); 278 case OBJ_BLOCK: 279 return sb->s_blocksize; 280 default: 281 BUG(); 282 } 283} 284 285static int __logfs_segment_write(struct inode *inode, void *buf, 286 struct logfs_shadow *shadow, int type, int len, int compr) 287{ 288 struct logfs_area *area; 289 struct super_block *sb = inode->i_sb; 290 s64 ofs; 291 struct logfs_object_header h; 292 int acc_len; 293 294 if (shadow->gc_level == 0) 295 acc_len = len; 296 else 297 acc_len = obj_len(sb, type); 298 299 area = get_area(sb, shadow->gc_level); 300 ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE); 301 LOGFS_BUG_ON(ofs <= 0, sb); 302 /* 303 * Order is important. logfs_get_free_bytes(), by modifying the 304 * segment file, may modify the content of the very page we're about 305 * to write now. Which is fine, as long as the calculated crc and 306 * written data still match. So do the modifications _before_ 307 * calculating the crc. 308 */ 309 310 h.len = cpu_to_be16(len); 311 h.type = type; 312 h.compr = compr; 313 h.ino = cpu_to_be64(inode->i_ino); 314 h.bix = cpu_to_be64(shadow->bix); 315 h.crc = logfs_crc32(&h, sizeof(h) - 4, 4); 316 h.data_crc = logfs_crc32(buf, len, 0); 317 318 logfs_buf_write(area, ofs, &h, sizeof(h)); 319 logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len); 320 321 shadow->new_ofs = ofs; 322 shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE; 323 324 return 0; 325} 326 327static s64 logfs_segment_write_compress(struct inode *inode, void *buf, 328 struct logfs_shadow *shadow, int type, int len) 329{ 330 struct super_block *sb = inode->i_sb; 331 void *compressor_buf = logfs_super(sb)->s_compressed_je; 332 ssize_t compr_len; 333 int ret; 334 335 mutex_lock(&logfs_super(sb)->s_journal_mutex); 336 compr_len = logfs_compress(buf, compressor_buf, len, len); 337 338 if (compr_len >= 0) { 339 ret = __logfs_segment_write(inode, compressor_buf, shadow, 340 type, compr_len, COMPR_ZLIB); 341 } else { 342 ret = __logfs_segment_write(inode, buf, shadow, type, len, 343 COMPR_NONE); 344 } 345 mutex_unlock(&logfs_super(sb)->s_journal_mutex); 346 return ret; 347} 348 349/** 350 * logfs_segment_write - write data block to object store 351 * @inode: inode containing data 352 * 353 * Returns an errno or zero. 354 */ 355int logfs_segment_write(struct inode *inode, struct page *page, 356 struct logfs_shadow *shadow) 357{ 358 struct super_block *sb = inode->i_sb; 359 struct logfs_super *super = logfs_super(sb); 360 int do_compress, type, len; 361 int ret; 362 void *buf; 363 364 super->s_flags |= LOGFS_SB_FLAG_DIRTY; 365 BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); 366 do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED; 367 if (shadow->gc_level != 0) { 368 /* temporarily disable compression for indirect blocks */ 369 do_compress = 0; 370 } 371 372 type = obj_type(inode, shrink_level(shadow->gc_level)); 373 len = obj_len(sb, type); 374 buf = kmap(page); 375 if (do_compress) 376 ret = logfs_segment_write_compress(inode, buf, shadow, type, 377 len); 378 else 379 ret = __logfs_segment_write(inode, buf, shadow, type, len, 380 COMPR_NONE); 381 kunmap(page); 382 383 log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n", 384 shadow->ino, shadow->bix, shadow->gc_level, 385 shadow->old_ofs, shadow->new_ofs, 386 shadow->old_len, shadow->new_len); 387 /* this BUG_ON did catch a locking bug. useful */ 388 BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1))); 389 return ret; 390} 391 392int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf) 393{ 394 pgoff_t index = ofs >> PAGE_SHIFT; 395 struct page *page; 396 long offset = ofs & (PAGE_SIZE-1); 397 long copylen; 398 399 while (len) { 400 copylen = min((ulong)len, PAGE_SIZE - offset); 401 402 page = get_mapping_page(sb, index, 1); 403 if (IS_ERR(page)) 404 return PTR_ERR(page); 405 memcpy(buf, page_address(page) + offset, copylen); 406 page_cache_release(page); 407 408 buf += copylen; 409 len -= copylen; 410 offset = 0; 411 index++; 412 } 413 return 0; 414} 415 416/* 417 * The "position" of indirect blocks is ambiguous. It can be the position 418 * of any data block somewhere behind this indirect block. So we need to 419 * normalize the positions through logfs_block_mask() before comparing. 420 */ 421static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level) 422{ 423 return (pos1 & logfs_block_mask(sb, level)) != 424 (pos2 & logfs_block_mask(sb, level)); 425} 426 427#if 0 428static int read_seg_header(struct super_block *sb, u64 ofs, 429 struct logfs_segment_header *sh) 430{ 431 __be32 crc; 432 int err; 433 434 err = wbuf_read(sb, ofs, sizeof(*sh), sh); 435 if (err) 436 return err; 437 crc = logfs_crc32(sh, sizeof(*sh), 4); 438 if (crc != sh->crc) { 439 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " 440 "got %x\n", ofs, be32_to_cpu(sh->crc), 441 be32_to_cpu(crc)); 442 return -EIO; 443 } 444 return 0; 445} 446#endif 447 448static int read_obj_header(struct super_block *sb, u64 ofs, 449 struct logfs_object_header *oh) 450{ 451 __be32 crc; 452 int err; 453 454 err = wbuf_read(sb, ofs, sizeof(*oh), oh); 455 if (err) 456 return err; 457 crc = logfs_crc32(oh, sizeof(*oh) - 4, 4); 458 if (crc != oh->crc) { 459 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " 460 "got %x\n", ofs, be32_to_cpu(oh->crc), 461 be32_to_cpu(crc)); 462 return -EIO; 463 } 464 return 0; 465} 466 467static void move_btree_to_page(struct inode *inode, struct page *page, 468 __be64 *data) 469{ 470 struct super_block *sb = inode->i_sb; 471 struct logfs_super *super = logfs_super(sb); 472 struct btree_head128 *head = &super->s_object_alias_tree; 473 struct logfs_block *block; 474 struct object_alias_item *item, *next; 475 476 if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS)) 477 return; 478 479 block = btree_remove128(head, inode->i_ino, page->index); 480 if (!block) 481 return; 482 483 log_blockmove("move_btree_to_page(%llx, %llx, %x)\n", 484 block->ino, block->bix, block->level); 485 list_for_each_entry_safe(item, next, &block->item_list, list) { 486 data[item->child_no] = item->val; 487 list_del(&item->list); 488 mempool_free(item, super->s_alias_pool); 489 } 490 block->page = page; 491 SetPagePrivate(page); 492 page->private = (unsigned long)block; 493 block->ops = &indirect_block_ops; 494 initialize_block_counters(page, block, data, 0); 495} 496 497/* 498 * This silences a false, yet annoying gcc warning. I hate it when my editor 499 * jumps into bitops.h each time I recompile this file. 500 * TODO: Complain to gcc folks about this and upgrade compiler. 501 */ 502static unsigned long fnb(const unsigned long *addr, 503 unsigned long size, unsigned long offset) 504{ 505 return find_next_bit(addr, size, offset); 506} 507 508void move_page_to_btree(struct page *page) 509{ 510 struct logfs_block *block = logfs_block(page); 511 struct super_block *sb = block->sb; 512 struct logfs_super *super = logfs_super(sb); 513 struct object_alias_item *item; 514 unsigned long pos; 515 __be64 *child; 516 int err; 517 518 if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) { 519 block->ops->free_block(sb, block); 520 return; 521 } 522 log_blockmove("move_page_to_btree(%llx, %llx, %x)\n", 523 block->ino, block->bix, block->level); 524 super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; 525 526 for (pos = 0; ; pos++) { 527 pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos); 528 if (pos >= LOGFS_BLOCK_FACTOR) 529 break; 530 531 item = mempool_alloc(super->s_alias_pool, GFP_NOFS); 532 BUG_ON(!item); /* mempool empty */ 533 memset(item, 0, sizeof(*item)); 534 535 child = kmap_atomic(page, KM_USER0); 536 item->val = child[pos]; 537 kunmap_atomic(child, KM_USER0); 538 item->child_no = pos; 539 list_add(&item->list, &block->item_list); 540 } 541 block->page = NULL; 542 ClearPagePrivate(page); 543 page->private = 0; 544 block->ops = &btree_block_ops; 545 err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, 546 block); 547 BUG_ON(err); /* mempool empty */ 548 ClearPageUptodate(page); 549} 550 551static int __logfs_segment_read(struct inode *inode, void *buf, 552 u64 ofs, u64 bix, level_t level) 553{ 554 struct super_block *sb = inode->i_sb; 555 void *compressor_buf = logfs_super(sb)->s_compressed_je; 556 struct logfs_object_header oh; 557 __be32 crc; 558 u16 len; 559 int err, block_len; 560 561 block_len = obj_len(sb, obj_type(inode, level)); 562 err = read_obj_header(sb, ofs, &oh); 563 if (err) 564 goto out_err; 565 566 err = -EIO; 567 if (be64_to_cpu(oh.ino) != inode->i_ino 568 || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) { 569 printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: " 570 "expected (%lx, %llx), got (%llx, %llx)\n", 571 ofs, inode->i_ino, bix, 572 be64_to_cpu(oh.ino), be64_to_cpu(oh.bix)); 573 goto out_err; 574 } 575 576 len = be16_to_cpu(oh.len); 577 578 switch (oh.compr) { 579 case COMPR_NONE: 580 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf); 581 if (err) 582 goto out_err; 583 crc = logfs_crc32(buf, len, 0); 584 if (crc != oh.data_crc) { 585 printk(KERN_ERR"LOGFS: uncompressed data crc error at " 586 "%llx: expected %x, got %x\n", ofs, 587 be32_to_cpu(oh.data_crc), 588 be32_to_cpu(crc)); 589 goto out_err; 590 } 591 break; 592 case COMPR_ZLIB: 593 mutex_lock(&logfs_super(sb)->s_journal_mutex); 594 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, 595 compressor_buf); 596 if (err) { 597 mutex_unlock(&logfs_super(sb)->s_journal_mutex); 598 goto out_err; 599 } 600 crc = logfs_crc32(compressor_buf, len, 0); 601 if (crc != oh.data_crc) { 602 printk(KERN_ERR"LOGFS: compressed data crc error at " 603 "%llx: expected %x, got %x\n", ofs, 604 be32_to_cpu(oh.data_crc), 605 be32_to_cpu(crc)); 606 mutex_unlock(&logfs_super(sb)->s_journal_mutex); 607 goto out_err; 608 } 609 err = logfs_uncompress(compressor_buf, buf, len, block_len); 610 mutex_unlock(&logfs_super(sb)->s_journal_mutex); 611 if (err) { 612 printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs); 613 goto out_err; 614 } 615 break; 616 default: 617 LOGFS_BUG(sb); 618 err = -EIO; 619 goto out_err; 620 } 621 return 0; 622 623out_err: 624 logfs_set_ro(sb); 625 printk(KERN_ERR"LOGFS: device is read-only now\n"); 626 LOGFS_BUG(sb); 627 return err; 628} 629 630/** 631 * logfs_segment_read - read data block from object store 632 * @inode: inode containing data 633 * @buf: data buffer 634 * @ofs: physical data offset 635 * @bix: block index 636 * @level: block level 637 * 638 * Returns 0 on success or a negative errno. 639 */ 640int logfs_segment_read(struct inode *inode, struct page *page, 641 u64 ofs, u64 bix, level_t level) 642{ 643 int err; 644 void *buf; 645 646 if (PageUptodate(page)) 647 return 0; 648 649 ofs &= ~LOGFS_FULLY_POPULATED; 650 651 buf = kmap(page); 652 err = __logfs_segment_read(inode, buf, ofs, bix, level); 653 if (!err) { 654 move_btree_to_page(inode, page, buf); 655 SetPageUptodate(page); 656 } 657 kunmap(page); 658 log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n", 659 inode->i_ino, bix, level, ofs, err); 660 return err; 661} 662 663int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) 664{ 665 struct super_block *sb = inode->i_sb; 666 struct logfs_super *super = logfs_super(sb); 667 struct logfs_object_header h; 668 u16 len; 669 int err; 670 671 super->s_flags |= LOGFS_SB_FLAG_DIRTY; 672 BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); 673 BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED); 674 if (!shadow->old_ofs) 675 return 0; 676 677 log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n", 678 shadow->ino, shadow->bix, shadow->gc_level, 679 shadow->old_ofs, shadow->new_ofs, 680 shadow->old_len, shadow->new_len); 681 err = read_obj_header(sb, shadow->old_ofs, &h); 682 LOGFS_BUG_ON(err, sb); 683 LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb); 684 LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix), 685 shrink_level(shadow->gc_level)), sb); 686 687 if (shadow->gc_level == 0) 688 len = be16_to_cpu(h.len); 689 else 690 len = obj_len(sb, h.type); 691 shadow->old_len = len + sizeof(h); 692 return 0; 693} 694 695void freeseg(struct super_block *sb, u32 segno) 696{ 697 struct logfs_super *super = logfs_super(sb); 698 struct address_space *mapping = super->s_mapping_inode->i_mapping; 699 struct page *page; 700 u64 ofs, start, end; 701 702 start = dev_ofs(sb, segno, 0); 703 end = dev_ofs(sb, segno + 1, 0); 704 for (ofs = start; ofs < end; ofs += PAGE_SIZE) { 705 page = find_get_page(mapping, ofs >> PAGE_SHIFT); 706 if (!page) 707 continue; 708 ClearPagePrivate(page); 709 page_cache_release(page); 710 } 711} 712 713int logfs_open_area(struct logfs_area *area, size_t bytes) 714{ 715 struct super_block *sb = area->a_sb; 716 struct logfs_super *super = logfs_super(sb); 717 int err, closed = 0; 718 719 if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize) 720 return 0; 721 722 if (area->a_is_open) { 723 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); 724 u32 len = super->s_segsize - area->a_written_bytes; 725 726 log_gc("logfs_close_area(%x)\n", area->a_segno); 727 pad_wbuf(area, 1); 728 super->s_devops->writeseg(area->a_sb, ofs, len); 729 freeseg(sb, area->a_segno); 730 closed = 1; 731 } 732 733 area->a_used_bytes = 0; 734 area->a_written_bytes = 0; 735again: 736 area->a_ops->get_free_segment(area); 737 area->a_ops->get_erase_count(area); 738 739 log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level); 740 err = area->a_ops->erase_segment(area); 741 if (err) { 742 printk(KERN_WARNING "LogFS: Error erasing segment %x\n", 743 area->a_segno); 744 logfs_mark_segment_bad(sb, area->a_segno); 745 goto again; 746 } 747 area->a_is_open = 1; 748 return closed; 749} 750 751void logfs_sync_area(struct logfs_area *area) 752{ 753 struct super_block *sb = area->a_sb; 754 struct logfs_super *super = logfs_super(sb); 755 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); 756 u32 len = (area->a_used_bytes - area->a_written_bytes); 757 758 if (super->s_writesize) 759 len &= ~(super->s_writesize - 1); 760 if (len == 0) 761 return; 762 pad_wbuf(area, 0); 763 super->s_devops->writeseg(sb, ofs, len); 764 area->a_written_bytes += len; 765} 766 767void logfs_sync_segments(struct super_block *sb) 768{ 769 struct logfs_super *super = logfs_super(sb); 770 int i; 771 772 for_each_area(i) 773 logfs_sync_area(super->s_area[i]); 774} 775 776/* 777 * Pick a free segment to be used for this area. Effectively takes a 778 * candidate from the free list (not really a candidate anymore). 779 */ 780static void ostore_get_free_segment(struct logfs_area *area) 781{ 782 struct super_block *sb = area->a_sb; 783 struct logfs_super *super = logfs_super(sb); 784 785 if (super->s_free_list.count == 0) { 786 printk(KERN_ERR"LOGFS: ran out of free segments\n"); 787 LOGFS_BUG(sb); 788 } 789 790 area->a_segno = get_best_cand(sb, &super->s_free_list, NULL); 791} 792 793static void ostore_get_erase_count(struct logfs_area *area) 794{ 795 struct logfs_segment_entry se; 796 u32 ec_level; 797 798 logfs_get_segment_entry(area->a_sb, area->a_segno, &se); 799 BUG_ON(se.ec_level == cpu_to_be32(BADSEG) || 800 se.valid == cpu_to_be32(RESERVED)); 801 802 ec_level = be32_to_cpu(se.ec_level); 803 area->a_erase_count = (ec_level >> 4) + 1; 804} 805 806static int ostore_erase_segment(struct logfs_area *area) 807{ 808 struct super_block *sb = area->a_sb; 809 struct logfs_segment_header sh; 810 u64 ofs; 811 int err; 812 813 err = logfs_erase_segment(sb, area->a_segno, 0); 814 if (err) 815 return err; 816 817 sh.pad = 0; 818 sh.type = SEG_OSTORE; 819 sh.level = (__force u8)area->a_level; 820 sh.segno = cpu_to_be32(area->a_segno); 821 sh.ec = cpu_to_be32(area->a_erase_count); 822 sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); 823 sh.crc = logfs_crc32(&sh, sizeof(sh), 4); 824 825 logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 826 area->a_level); 827 828 ofs = dev_ofs(sb, area->a_segno, 0); 829 area->a_used_bytes = sizeof(sh); 830 logfs_buf_write(area, ofs, &sh, sizeof(sh)); 831 return 0; 832} 833 834static const struct logfs_area_ops ostore_area_ops = { 835 .get_free_segment = ostore_get_free_segment, 836 .get_erase_count = ostore_get_erase_count, 837 .erase_segment = ostore_erase_segment, 838}; 839 840static void free_area(struct logfs_area *area) 841{ 842 if (area) 843 freeseg(area->a_sb, area->a_segno); 844 kfree(area); 845} 846 847static struct logfs_area *alloc_area(struct super_block *sb) 848{ 849 struct logfs_area *area; 850 851 area = kzalloc(sizeof(*area), GFP_KERNEL); 852 if (!area) 853 return NULL; 854 855 area->a_sb = sb; 856 return area; 857} 858 859static void map_invalidatepage(struct page *page, unsigned long l) 860{ 861 BUG(); 862} 863 864static int map_releasepage(struct page *page, gfp_t g) 865{ 866 /* Don't release these pages */ 867 return 0; 868} 869 870static const struct address_space_operations mapping_aops = { 871 .invalidatepage = map_invalidatepage, 872 .releasepage = map_releasepage, 873 .set_page_dirty = __set_page_dirty_nobuffers, 874}; 875 876int logfs_init_mapping(struct super_block *sb) 877{ 878 struct logfs_super *super = logfs_super(sb); 879 struct address_space *mapping; 880 struct inode *inode; 881 882 inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING); 883 if (IS_ERR(inode)) 884 return PTR_ERR(inode); 885 super->s_mapping_inode = inode; 886 mapping = inode->i_mapping; 887 mapping->a_ops = &mapping_aops; 888 /* Would it be possible to use __GFP_HIGHMEM as well? */ 889 mapping_set_gfp_mask(mapping, GFP_NOFS); 890 return 0; 891} 892 893int logfs_init_areas(struct super_block *sb) 894{ 895 struct logfs_super *super = logfs_super(sb); 896 int i = -1; 897 898 super->s_alias_pool = mempool_create_kmalloc_pool(600, 899 sizeof(struct object_alias_item)); 900 if (!super->s_alias_pool) 901 return -ENOMEM; 902 903 super->s_journal_area = alloc_area(sb); 904 if (!super->s_journal_area) 905 goto err; 906 907 for_each_area(i) { 908 super->s_area[i] = alloc_area(sb); 909 if (!super->s_area[i]) 910 goto err; 911 super->s_area[i]->a_level = GC_LEVEL(i); 912 super->s_area[i]->a_ops = &ostore_area_ops; 913 } 914 btree_init_mempool128(&super->s_object_alias_tree, 915 super->s_btree_pool); 916 return 0; 917 918err: 919 for (i--; i >= 0; i--) 920 free_area(super->s_area[i]); 921 free_area(super->s_journal_area); 922 mempool_destroy(super->s_alias_pool); 923 return -ENOMEM; 924} 925 926void logfs_cleanup_areas(struct super_block *sb) 927{ 928 struct logfs_super *super = logfs_super(sb); 929 int i; 930 931 btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); 932 for_each_area(i) 933 free_area(super->s_area[i]); 934 free_area(super->s_journal_area); 935 destroy_meta_inode(super->s_mapping_inode); 936}