Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.30 2225 lines 67 kB view raw
1/* 2 * linux/fs/jbd/transaction.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Generic filesystem transaction handling code; part of the ext2fs 13 * journaling system. 14 * 15 * This file manages transactions (compound commits managed by the 16 * journaling code) and handles (individual atomic operations by the 17 * filesystem). 18 */ 19 20#include <linux/time.h> 21#include <linux/fs.h> 22#include <linux/jbd.h> 23#include <linux/errno.h> 24#include <linux/slab.h> 25#include <linux/timer.h> 26#include <linux/mm.h> 27#include <linux/highmem.h> 28#include <linux/hrtimer.h> 29 30static void __journal_temp_unlink_buffer(struct journal_head *jh); 31 32/* 33 * get_transaction: obtain a new transaction_t object. 34 * 35 * Simply allocate and initialise a new transaction. Create it in 36 * RUNNING state and add it to the current journal (which should not 37 * have an existing running transaction: we only make a new transaction 38 * once we have started to commit the old one). 39 * 40 * Preconditions: 41 * The journal MUST be locked. We don't perform atomic mallocs on the 42 * new transaction and we can't block without protecting against other 43 * processes trying to touch the journal while it is in transition. 44 * 45 * Called under j_state_lock 46 */ 47 48static transaction_t * 49get_transaction(journal_t *journal, transaction_t *transaction) 50{ 51 transaction->t_journal = journal; 52 transaction->t_state = T_RUNNING; 53 transaction->t_start_time = ktime_get(); 54 transaction->t_tid = journal->j_transaction_sequence++; 55 transaction->t_expires = jiffies + journal->j_commit_interval; 56 spin_lock_init(&transaction->t_handle_lock); 57 58 /* Set up the commit timer for the new transaction. */ 59 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); 60 add_timer(&journal->j_commit_timer); 61 62 J_ASSERT(journal->j_running_transaction == NULL); 63 journal->j_running_transaction = transaction; 64 65 return transaction; 66} 67 68/* 69 * Handle management. 70 * 71 * A handle_t is an object which represents a single atomic update to a 72 * filesystem, and which tracks all of the modifications which form part 73 * of that one update. 74 */ 75 76/* 77 * start_this_handle: Given a handle, deal with any locking or stalling 78 * needed to make sure that there is enough journal space for the handle 79 * to begin. Attach the handle to a transaction and set up the 80 * transaction's buffer credits. 81 */ 82 83static int start_this_handle(journal_t *journal, handle_t *handle) 84{ 85 transaction_t *transaction; 86 int needed; 87 int nblocks = handle->h_buffer_credits; 88 transaction_t *new_transaction = NULL; 89 int ret = 0; 90 91 if (nblocks > journal->j_max_transaction_buffers) { 92 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 93 current->comm, nblocks, 94 journal->j_max_transaction_buffers); 95 ret = -ENOSPC; 96 goto out; 97 } 98 99alloc_transaction: 100 if (!journal->j_running_transaction) { 101 new_transaction = kzalloc(sizeof(*new_transaction), 102 GFP_NOFS|__GFP_NOFAIL); 103 if (!new_transaction) { 104 ret = -ENOMEM; 105 goto out; 106 } 107 } 108 109 jbd_debug(3, "New handle %p going live.\n", handle); 110 111repeat: 112 113 /* 114 * We need to hold j_state_lock until t_updates has been incremented, 115 * for proper journal barrier handling 116 */ 117 spin_lock(&journal->j_state_lock); 118repeat_locked: 119 if (is_journal_aborted(journal) || 120 (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { 121 spin_unlock(&journal->j_state_lock); 122 ret = -EROFS; 123 goto out; 124 } 125 126 /* Wait on the journal's transaction barrier if necessary */ 127 if (journal->j_barrier_count) { 128 spin_unlock(&journal->j_state_lock); 129 wait_event(journal->j_wait_transaction_locked, 130 journal->j_barrier_count == 0); 131 goto repeat; 132 } 133 134 if (!journal->j_running_transaction) { 135 if (!new_transaction) { 136 spin_unlock(&journal->j_state_lock); 137 goto alloc_transaction; 138 } 139 get_transaction(journal, new_transaction); 140 new_transaction = NULL; 141 } 142 143 transaction = journal->j_running_transaction; 144 145 /* 146 * If the current transaction is locked down for commit, wait for the 147 * lock to be released. 148 */ 149 if (transaction->t_state == T_LOCKED) { 150 DEFINE_WAIT(wait); 151 152 prepare_to_wait(&journal->j_wait_transaction_locked, 153 &wait, TASK_UNINTERRUPTIBLE); 154 spin_unlock(&journal->j_state_lock); 155 schedule(); 156 finish_wait(&journal->j_wait_transaction_locked, &wait); 157 goto repeat; 158 } 159 160 /* 161 * If there is not enough space left in the log to write all potential 162 * buffers requested by this operation, we need to stall pending a log 163 * checkpoint to free some more log space. 164 */ 165 spin_lock(&transaction->t_handle_lock); 166 needed = transaction->t_outstanding_credits + nblocks; 167 168 if (needed > journal->j_max_transaction_buffers) { 169 /* 170 * If the current transaction is already too large, then start 171 * to commit it: we can then go back and attach this handle to 172 * a new transaction. 173 */ 174 DEFINE_WAIT(wait); 175 176 jbd_debug(2, "Handle %p starting new commit...\n", handle); 177 spin_unlock(&transaction->t_handle_lock); 178 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 179 TASK_UNINTERRUPTIBLE); 180 __log_start_commit(journal, transaction->t_tid); 181 spin_unlock(&journal->j_state_lock); 182 schedule(); 183 finish_wait(&journal->j_wait_transaction_locked, &wait); 184 goto repeat; 185 } 186 187 /* 188 * The commit code assumes that it can get enough log space 189 * without forcing a checkpoint. This is *critical* for 190 * correctness: a checkpoint of a buffer which is also 191 * associated with a committing transaction creates a deadlock, 192 * so commit simply cannot force through checkpoints. 193 * 194 * We must therefore ensure the necessary space in the journal 195 * *before* starting to dirty potentially checkpointed buffers 196 * in the new transaction. 197 * 198 * The worst part is, any transaction currently committing can 199 * reduce the free space arbitrarily. Be careful to account for 200 * those buffers when checkpointing. 201 */ 202 203 /* 204 * @@@ AKPM: This seems rather over-defensive. We're giving commit 205 * a _lot_ of headroom: 1/4 of the journal plus the size of 206 * the committing transaction. Really, we only need to give it 207 * committing_transaction->t_outstanding_credits plus "enough" for 208 * the log control blocks. 209 * Also, this test is inconsitent with the matching one in 210 * journal_extend(). 211 */ 212 if (__log_space_left(journal) < jbd_space_needed(journal)) { 213 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); 214 spin_unlock(&transaction->t_handle_lock); 215 __log_wait_for_space(journal); 216 goto repeat_locked; 217 } 218 219 /* OK, account for the buffers that this operation expects to 220 * use and add the handle to the running transaction. */ 221 222 handle->h_transaction = transaction; 223 transaction->t_outstanding_credits += nblocks; 224 transaction->t_updates++; 225 transaction->t_handle_count++; 226 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", 227 handle, nblocks, transaction->t_outstanding_credits, 228 __log_space_left(journal)); 229 spin_unlock(&transaction->t_handle_lock); 230 spin_unlock(&journal->j_state_lock); 231out: 232 if (unlikely(new_transaction)) /* It's usually NULL */ 233 kfree(new_transaction); 234 return ret; 235} 236 237static struct lock_class_key jbd_handle_key; 238 239/* Allocate a new handle. This should probably be in a slab... */ 240static handle_t *new_handle(int nblocks) 241{ 242 handle_t *handle = jbd_alloc_handle(GFP_NOFS); 243 if (!handle) 244 return NULL; 245 memset(handle, 0, sizeof(*handle)); 246 handle->h_buffer_credits = nblocks; 247 handle->h_ref = 1; 248 249 lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0); 250 251 return handle; 252} 253 254/** 255 * handle_t *journal_start() - Obtain a new handle. 256 * @journal: Journal to start transaction on. 257 * @nblocks: number of block buffer we might modify 258 * 259 * We make sure that the transaction can guarantee at least nblocks of 260 * modified buffers in the log. We block until the log can guarantee 261 * that much space. 262 * 263 * This function is visible to journal users (like ext3fs), so is not 264 * called with the journal already locked. 265 * 266 * Return a pointer to a newly allocated handle, or NULL on failure 267 */ 268handle_t *journal_start(journal_t *journal, int nblocks) 269{ 270 handle_t *handle = journal_current_handle(); 271 int err; 272 273 if (!journal) 274 return ERR_PTR(-EROFS); 275 276 if (handle) { 277 J_ASSERT(handle->h_transaction->t_journal == journal); 278 handle->h_ref++; 279 return handle; 280 } 281 282 handle = new_handle(nblocks); 283 if (!handle) 284 return ERR_PTR(-ENOMEM); 285 286 current->journal_info = handle; 287 288 err = start_this_handle(journal, handle); 289 if (err < 0) { 290 jbd_free_handle(handle); 291 current->journal_info = NULL; 292 handle = ERR_PTR(err); 293 goto out; 294 } 295 296 lock_map_acquire(&handle->h_lockdep_map); 297 298out: 299 return handle; 300} 301 302/** 303 * int journal_extend() - extend buffer credits. 304 * @handle: handle to 'extend' 305 * @nblocks: nr blocks to try to extend by. 306 * 307 * Some transactions, such as large extends and truncates, can be done 308 * atomically all at once or in several stages. The operation requests 309 * a credit for a number of buffer modications in advance, but can 310 * extend its credit if it needs more. 311 * 312 * journal_extend tries to give the running handle more buffer credits. 313 * It does not guarantee that allocation - this is a best-effort only. 314 * The calling process MUST be able to deal cleanly with a failure to 315 * extend here. 316 * 317 * Return 0 on success, non-zero on failure. 318 * 319 * return code < 0 implies an error 320 * return code > 0 implies normal transaction-full status. 321 */ 322int journal_extend(handle_t *handle, int nblocks) 323{ 324 transaction_t *transaction = handle->h_transaction; 325 journal_t *journal = transaction->t_journal; 326 int result; 327 int wanted; 328 329 result = -EIO; 330 if (is_handle_aborted(handle)) 331 goto out; 332 333 result = 1; 334 335 spin_lock(&journal->j_state_lock); 336 337 /* Don't extend a locked-down transaction! */ 338 if (handle->h_transaction->t_state != T_RUNNING) { 339 jbd_debug(3, "denied handle %p %d blocks: " 340 "transaction not running\n", handle, nblocks); 341 goto error_out; 342 } 343 344 spin_lock(&transaction->t_handle_lock); 345 wanted = transaction->t_outstanding_credits + nblocks; 346 347 if (wanted > journal->j_max_transaction_buffers) { 348 jbd_debug(3, "denied handle %p %d blocks: " 349 "transaction too large\n", handle, nblocks); 350 goto unlock; 351 } 352 353 if (wanted > __log_space_left(journal)) { 354 jbd_debug(3, "denied handle %p %d blocks: " 355 "insufficient log space\n", handle, nblocks); 356 goto unlock; 357 } 358 359 handle->h_buffer_credits += nblocks; 360 transaction->t_outstanding_credits += nblocks; 361 result = 0; 362 363 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 364unlock: 365 spin_unlock(&transaction->t_handle_lock); 366error_out: 367 spin_unlock(&journal->j_state_lock); 368out: 369 return result; 370} 371 372 373/** 374 * int journal_restart() - restart a handle. 375 * @handle: handle to restart 376 * @nblocks: nr credits requested 377 * 378 * Restart a handle for a multi-transaction filesystem 379 * operation. 380 * 381 * If the journal_extend() call above fails to grant new buffer credits 382 * to a running handle, a call to journal_restart will commit the 383 * handle's transaction so far and reattach the handle to a new 384 * transaction capabable of guaranteeing the requested number of 385 * credits. 386 */ 387 388int journal_restart(handle_t *handle, int nblocks) 389{ 390 transaction_t *transaction = handle->h_transaction; 391 journal_t *journal = transaction->t_journal; 392 int ret; 393 394 /* If we've had an abort of any type, don't even think about 395 * actually doing the restart! */ 396 if (is_handle_aborted(handle)) 397 return 0; 398 399 /* 400 * First unlink the handle from its current transaction, and start the 401 * commit on that. 402 */ 403 J_ASSERT(transaction->t_updates > 0); 404 J_ASSERT(journal_current_handle() == handle); 405 406 spin_lock(&journal->j_state_lock); 407 spin_lock(&transaction->t_handle_lock); 408 transaction->t_outstanding_credits -= handle->h_buffer_credits; 409 transaction->t_updates--; 410 411 if (!transaction->t_updates) 412 wake_up(&journal->j_wait_updates); 413 spin_unlock(&transaction->t_handle_lock); 414 415 jbd_debug(2, "restarting handle %p\n", handle); 416 __log_start_commit(journal, transaction->t_tid); 417 spin_unlock(&journal->j_state_lock); 418 419 handle->h_buffer_credits = nblocks; 420 ret = start_this_handle(journal, handle); 421 return ret; 422} 423 424 425/** 426 * void journal_lock_updates () - establish a transaction barrier. 427 * @journal: Journal to establish a barrier on. 428 * 429 * This locks out any further updates from being started, and blocks 430 * until all existing updates have completed, returning only once the 431 * journal is in a quiescent state with no updates running. 432 * 433 * The journal lock should not be held on entry. 434 */ 435void journal_lock_updates(journal_t *journal) 436{ 437 DEFINE_WAIT(wait); 438 439 spin_lock(&journal->j_state_lock); 440 ++journal->j_barrier_count; 441 442 /* Wait until there are no running updates */ 443 while (1) { 444 transaction_t *transaction = journal->j_running_transaction; 445 446 if (!transaction) 447 break; 448 449 spin_lock(&transaction->t_handle_lock); 450 if (!transaction->t_updates) { 451 spin_unlock(&transaction->t_handle_lock); 452 break; 453 } 454 prepare_to_wait(&journal->j_wait_updates, &wait, 455 TASK_UNINTERRUPTIBLE); 456 spin_unlock(&transaction->t_handle_lock); 457 spin_unlock(&journal->j_state_lock); 458 schedule(); 459 finish_wait(&journal->j_wait_updates, &wait); 460 spin_lock(&journal->j_state_lock); 461 } 462 spin_unlock(&journal->j_state_lock); 463 464 /* 465 * We have now established a barrier against other normal updates, but 466 * we also need to barrier against other journal_lock_updates() calls 467 * to make sure that we serialise special journal-locked operations 468 * too. 469 */ 470 mutex_lock(&journal->j_barrier); 471} 472 473/** 474 * void journal_unlock_updates (journal_t* journal) - release barrier 475 * @journal: Journal to release the barrier on. 476 * 477 * Release a transaction barrier obtained with journal_lock_updates(). 478 * 479 * Should be called without the journal lock held. 480 */ 481void journal_unlock_updates (journal_t *journal) 482{ 483 J_ASSERT(journal->j_barrier_count != 0); 484 485 mutex_unlock(&journal->j_barrier); 486 spin_lock(&journal->j_state_lock); 487 --journal->j_barrier_count; 488 spin_unlock(&journal->j_state_lock); 489 wake_up(&journal->j_wait_transaction_locked); 490} 491 492/* 493 * Report any unexpected dirty buffers which turn up. Normally those 494 * indicate an error, but they can occur if the user is running (say) 495 * tune2fs to modify the live filesystem, so we need the option of 496 * continuing as gracefully as possible. # 497 * 498 * The caller should already hold the journal lock and 499 * j_list_lock spinlock: most callers will need those anyway 500 * in order to probe the buffer's journaling state safely. 501 */ 502static void jbd_unexpected_dirty_buffer(struct journal_head *jh) 503{ 504 int jlist; 505 506 /* If this buffer is one which might reasonably be dirty 507 * --- ie. data, or not part of this journal --- then 508 * we're OK to leave it alone, but otherwise we need to 509 * move the dirty bit to the journal's own internal 510 * JBDDirty bit. */ 511 jlist = jh->b_jlist; 512 513 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 514 jlist == BJ_Shadow || jlist == BJ_Forget) { 515 struct buffer_head *bh = jh2bh(jh); 516 517 if (test_clear_buffer_dirty(bh)) 518 set_buffer_jbddirty(bh); 519 } 520} 521 522/* 523 * If the buffer is already part of the current transaction, then there 524 * is nothing we need to do. If it is already part of a prior 525 * transaction which we are still committing to disk, then we need to 526 * make sure that we do not overwrite the old copy: we do copy-out to 527 * preserve the copy going to disk. We also account the buffer against 528 * the handle's metadata buffer credits (unless the buffer is already 529 * part of the transaction, that is). 530 * 531 */ 532static int 533do_get_write_access(handle_t *handle, struct journal_head *jh, 534 int force_copy) 535{ 536 struct buffer_head *bh; 537 transaction_t *transaction; 538 journal_t *journal; 539 int error; 540 char *frozen_buffer = NULL; 541 int need_copy = 0; 542 543 if (is_handle_aborted(handle)) 544 return -EROFS; 545 546 transaction = handle->h_transaction; 547 journal = transaction->t_journal; 548 549 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 550 551 JBUFFER_TRACE(jh, "entry"); 552repeat: 553 bh = jh2bh(jh); 554 555 /* @@@ Need to check for errors here at some point. */ 556 557 lock_buffer(bh); 558 jbd_lock_bh_state(bh); 559 560 /* We now hold the buffer lock so it is safe to query the buffer 561 * state. Is the buffer dirty? 562 * 563 * If so, there are two possibilities. The buffer may be 564 * non-journaled, and undergoing a quite legitimate writeback. 565 * Otherwise, it is journaled, and we don't expect dirty buffers 566 * in that state (the buffers should be marked JBD_Dirty 567 * instead.) So either the IO is being done under our own 568 * control and this is a bug, or it's a third party IO such as 569 * dump(8) (which may leave the buffer scheduled for read --- 570 * ie. locked but not dirty) or tune2fs (which may actually have 571 * the buffer dirtied, ugh.) */ 572 573 if (buffer_dirty(bh)) { 574 /* 575 * First question: is this buffer already part of the current 576 * transaction or the existing committing transaction? 577 */ 578 if (jh->b_transaction) { 579 J_ASSERT_JH(jh, 580 jh->b_transaction == transaction || 581 jh->b_transaction == 582 journal->j_committing_transaction); 583 if (jh->b_next_transaction) 584 J_ASSERT_JH(jh, jh->b_next_transaction == 585 transaction); 586 } 587 /* 588 * In any case we need to clean the dirty flag and we must 589 * do it under the buffer lock to be sure we don't race 590 * with running write-out. 591 */ 592 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 593 jbd_unexpected_dirty_buffer(jh); 594 } 595 596 unlock_buffer(bh); 597 598 error = -EROFS; 599 if (is_handle_aborted(handle)) { 600 jbd_unlock_bh_state(bh); 601 goto out; 602 } 603 error = 0; 604 605 /* 606 * The buffer is already part of this transaction if b_transaction or 607 * b_next_transaction points to it 608 */ 609 if (jh->b_transaction == transaction || 610 jh->b_next_transaction == transaction) 611 goto done; 612 613 /* 614 * this is the first time this transaction is touching this buffer, 615 * reset the modified flag 616 */ 617 jh->b_modified = 0; 618 619 /* 620 * If there is already a copy-out version of this buffer, then we don't 621 * need to make another one 622 */ 623 if (jh->b_frozen_data) { 624 JBUFFER_TRACE(jh, "has frozen data"); 625 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 626 jh->b_next_transaction = transaction; 627 goto done; 628 } 629 630 /* Is there data here we need to preserve? */ 631 632 if (jh->b_transaction && jh->b_transaction != transaction) { 633 JBUFFER_TRACE(jh, "owned by older transaction"); 634 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 635 J_ASSERT_JH(jh, jh->b_transaction == 636 journal->j_committing_transaction); 637 638 /* There is one case we have to be very careful about. 639 * If the committing transaction is currently writing 640 * this buffer out to disk and has NOT made a copy-out, 641 * then we cannot modify the buffer contents at all 642 * right now. The essence of copy-out is that it is the 643 * extra copy, not the primary copy, which gets 644 * journaled. If the primary copy is already going to 645 * disk then we cannot do copy-out here. */ 646 647 if (jh->b_jlist == BJ_Shadow) { 648 DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); 649 wait_queue_head_t *wqh; 650 651 wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); 652 653 JBUFFER_TRACE(jh, "on shadow: sleep"); 654 jbd_unlock_bh_state(bh); 655 /* commit wakes up all shadow buffers after IO */ 656 for ( ; ; ) { 657 prepare_to_wait(wqh, &wait.wait, 658 TASK_UNINTERRUPTIBLE); 659 if (jh->b_jlist != BJ_Shadow) 660 break; 661 schedule(); 662 } 663 finish_wait(wqh, &wait.wait); 664 goto repeat; 665 } 666 667 /* Only do the copy if the currently-owning transaction 668 * still needs it. If it is on the Forget list, the 669 * committing transaction is past that stage. The 670 * buffer had better remain locked during the kmalloc, 671 * but that should be true --- we hold the journal lock 672 * still and the buffer is already on the BUF_JOURNAL 673 * list so won't be flushed. 674 * 675 * Subtle point, though: if this is a get_undo_access, 676 * then we will be relying on the frozen_data to contain 677 * the new value of the committed_data record after the 678 * transaction, so we HAVE to force the frozen_data copy 679 * in that case. */ 680 681 if (jh->b_jlist != BJ_Forget || force_copy) { 682 JBUFFER_TRACE(jh, "generate frozen data"); 683 if (!frozen_buffer) { 684 JBUFFER_TRACE(jh, "allocate memory for buffer"); 685 jbd_unlock_bh_state(bh); 686 frozen_buffer = 687 jbd_alloc(jh2bh(jh)->b_size, 688 GFP_NOFS); 689 if (!frozen_buffer) { 690 printk(KERN_EMERG 691 "%s: OOM for frozen_buffer\n", 692 __func__); 693 JBUFFER_TRACE(jh, "oom!"); 694 error = -ENOMEM; 695 jbd_lock_bh_state(bh); 696 goto done; 697 } 698 goto repeat; 699 } 700 jh->b_frozen_data = frozen_buffer; 701 frozen_buffer = NULL; 702 need_copy = 1; 703 } 704 jh->b_next_transaction = transaction; 705 } 706 707 708 /* 709 * Finally, if the buffer is not journaled right now, we need to make 710 * sure it doesn't get written to disk before the caller actually 711 * commits the new data 712 */ 713 if (!jh->b_transaction) { 714 JBUFFER_TRACE(jh, "no transaction"); 715 J_ASSERT_JH(jh, !jh->b_next_transaction); 716 jh->b_transaction = transaction; 717 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 718 spin_lock(&journal->j_list_lock); 719 __journal_file_buffer(jh, transaction, BJ_Reserved); 720 spin_unlock(&journal->j_list_lock); 721 } 722 723done: 724 if (need_copy) { 725 struct page *page; 726 int offset; 727 char *source; 728 729 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 730 "Possible IO failure.\n"); 731 page = jh2bh(jh)->b_page; 732 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 733 source = kmap_atomic(page, KM_USER0); 734 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 735 kunmap_atomic(source, KM_USER0); 736 } 737 jbd_unlock_bh_state(bh); 738 739 /* 740 * If we are about to journal a buffer, then any revoke pending on it is 741 * no longer valid 742 */ 743 journal_cancel_revoke(handle, jh); 744 745out: 746 if (unlikely(frozen_buffer)) /* It's usually NULL */ 747 jbd_free(frozen_buffer, bh->b_size); 748 749 JBUFFER_TRACE(jh, "exit"); 750 return error; 751} 752 753/** 754 * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. 755 * @handle: transaction to add buffer modifications to 756 * @bh: bh to be used for metadata writes 757 * 758 * Returns an error code or 0 on success. 759 * 760 * In full data journalling mode the buffer may be of type BJ_AsyncData, 761 * because we're write()ing a buffer which is also part of a shared mapping. 762 */ 763 764int journal_get_write_access(handle_t *handle, struct buffer_head *bh) 765{ 766 struct journal_head *jh = journal_add_journal_head(bh); 767 int rc; 768 769 /* We do not want to get caught playing with fields which the 770 * log thread also manipulates. Make sure that the buffer 771 * completes any outstanding IO before proceeding. */ 772 rc = do_get_write_access(handle, jh, 0); 773 journal_put_journal_head(jh); 774 return rc; 775} 776 777 778/* 779 * When the user wants to journal a newly created buffer_head 780 * (ie. getblk() returned a new buffer and we are going to populate it 781 * manually rather than reading off disk), then we need to keep the 782 * buffer_head locked until it has been completely filled with new 783 * data. In this case, we should be able to make the assertion that 784 * the bh is not already part of an existing transaction. 785 * 786 * The buffer should already be locked by the caller by this point. 787 * There is no lock ranking violation: it was a newly created, 788 * unlocked buffer beforehand. */ 789 790/** 791 * int journal_get_create_access () - notify intent to use newly created bh 792 * @handle: transaction to new buffer to 793 * @bh: new buffer. 794 * 795 * Call this if you create a new bh. 796 */ 797int journal_get_create_access(handle_t *handle, struct buffer_head *bh) 798{ 799 transaction_t *transaction = handle->h_transaction; 800 journal_t *journal = transaction->t_journal; 801 struct journal_head *jh = journal_add_journal_head(bh); 802 int err; 803 804 jbd_debug(5, "journal_head %p\n", jh); 805 err = -EROFS; 806 if (is_handle_aborted(handle)) 807 goto out; 808 err = 0; 809 810 JBUFFER_TRACE(jh, "entry"); 811 /* 812 * The buffer may already belong to this transaction due to pre-zeroing 813 * in the filesystem's new_block code. It may also be on the previous, 814 * committing transaction's lists, but it HAS to be in Forget state in 815 * that case: the transaction must have deleted the buffer for it to be 816 * reused here. 817 */ 818 jbd_lock_bh_state(bh); 819 spin_lock(&journal->j_list_lock); 820 J_ASSERT_JH(jh, (jh->b_transaction == transaction || 821 jh->b_transaction == NULL || 822 (jh->b_transaction == journal->j_committing_transaction && 823 jh->b_jlist == BJ_Forget))); 824 825 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 826 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 827 828 if (jh->b_transaction == NULL) { 829 jh->b_transaction = transaction; 830 831 /* first access by this transaction */ 832 jh->b_modified = 0; 833 834 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 835 __journal_file_buffer(jh, transaction, BJ_Reserved); 836 } else if (jh->b_transaction == journal->j_committing_transaction) { 837 /* first access by this transaction */ 838 jh->b_modified = 0; 839 840 JBUFFER_TRACE(jh, "set next transaction"); 841 jh->b_next_transaction = transaction; 842 } 843 spin_unlock(&journal->j_list_lock); 844 jbd_unlock_bh_state(bh); 845 846 /* 847 * akpm: I added this. ext3_alloc_branch can pick up new indirect 848 * blocks which contain freed but then revoked metadata. We need 849 * to cancel the revoke in case we end up freeing it yet again 850 * and the reallocating as data - this would cause a second revoke, 851 * which hits an assertion error. 852 */ 853 JBUFFER_TRACE(jh, "cancelling revoke"); 854 journal_cancel_revoke(handle, jh); 855 journal_put_journal_head(jh); 856out: 857 return err; 858} 859 860/** 861 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences 862 * @handle: transaction 863 * @bh: buffer to undo 864 * 865 * Sometimes there is a need to distinguish between metadata which has 866 * been committed to disk and that which has not. The ext3fs code uses 867 * this for freeing and allocating space, we have to make sure that we 868 * do not reuse freed space until the deallocation has been committed, 869 * since if we overwrote that space we would make the delete 870 * un-rewindable in case of a crash. 871 * 872 * To deal with that, journal_get_undo_access requests write access to a 873 * buffer for parts of non-rewindable operations such as delete 874 * operations on the bitmaps. The journaling code must keep a copy of 875 * the buffer's contents prior to the undo_access call until such time 876 * as we know that the buffer has definitely been committed to disk. 877 * 878 * We never need to know which transaction the committed data is part 879 * of, buffers touched here are guaranteed to be dirtied later and so 880 * will be committed to a new transaction in due course, at which point 881 * we can discard the old committed data pointer. 882 * 883 * Returns error number or 0 on success. 884 */ 885int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) 886{ 887 int err; 888 struct journal_head *jh = journal_add_journal_head(bh); 889 char *committed_data = NULL; 890 891 JBUFFER_TRACE(jh, "entry"); 892 893 /* 894 * Do this first --- it can drop the journal lock, so we want to 895 * make sure that obtaining the committed_data is done 896 * atomically wrt. completion of any outstanding commits. 897 */ 898 err = do_get_write_access(handle, jh, 1); 899 if (err) 900 goto out; 901 902repeat: 903 if (!jh->b_committed_data) { 904 committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); 905 if (!committed_data) { 906 printk(KERN_EMERG "%s: No memory for committed data\n", 907 __func__); 908 err = -ENOMEM; 909 goto out; 910 } 911 } 912 913 jbd_lock_bh_state(bh); 914 if (!jh->b_committed_data) { 915 /* Copy out the current buffer contents into the 916 * preserved, committed copy. */ 917 JBUFFER_TRACE(jh, "generate b_committed data"); 918 if (!committed_data) { 919 jbd_unlock_bh_state(bh); 920 goto repeat; 921 } 922 923 jh->b_committed_data = committed_data; 924 committed_data = NULL; 925 memcpy(jh->b_committed_data, bh->b_data, bh->b_size); 926 } 927 jbd_unlock_bh_state(bh); 928out: 929 journal_put_journal_head(jh); 930 if (unlikely(committed_data)) 931 jbd_free(committed_data, bh->b_size); 932 return err; 933} 934 935/** 936 * int journal_dirty_data() - mark a buffer as containing dirty data to be flushed 937 * @handle: transaction 938 * @bh: bufferhead to mark 939 * 940 * Description: 941 * Mark a buffer as containing dirty data which needs to be flushed before 942 * we can commit the current transaction. 943 * 944 * The buffer is placed on the transaction's data list and is marked as 945 * belonging to the transaction. 946 * 947 * Returns error number or 0 on success. 948 * 949 * journal_dirty_data() can be called via page_launder->ext3_writepage 950 * by kswapd. 951 */ 952int journal_dirty_data(handle_t *handle, struct buffer_head *bh) 953{ 954 journal_t *journal = handle->h_transaction->t_journal; 955 int need_brelse = 0; 956 struct journal_head *jh; 957 int ret = 0; 958 959 if (is_handle_aborted(handle)) 960 return ret; 961 962 jh = journal_add_journal_head(bh); 963 JBUFFER_TRACE(jh, "entry"); 964 965 /* 966 * The buffer could *already* be dirty. Writeout can start 967 * at any time. 968 */ 969 jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); 970 971 /* 972 * What if the buffer is already part of a running transaction? 973 * 974 * There are two cases: 975 * 1) It is part of the current running transaction. Refile it, 976 * just in case we have allocated it as metadata, deallocated 977 * it, then reallocated it as data. 978 * 2) It is part of the previous, still-committing transaction. 979 * If all we want to do is to guarantee that the buffer will be 980 * written to disk before this new transaction commits, then 981 * being sure that the *previous* transaction has this same 982 * property is sufficient for us! Just leave it on its old 983 * transaction. 984 * 985 * In case (2), the buffer must not already exist as metadata 986 * --- that would violate write ordering (a transaction is free 987 * to write its data at any point, even before the previous 988 * committing transaction has committed). The caller must 989 * never, ever allow this to happen: there's nothing we can do 990 * about it in this layer. 991 */ 992 jbd_lock_bh_state(bh); 993 spin_lock(&journal->j_list_lock); 994 995 /* Now that we have bh_state locked, are we really still mapped? */ 996 if (!buffer_mapped(bh)) { 997 JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); 998 goto no_journal; 999 } 1000 1001 if (jh->b_transaction) { 1002 JBUFFER_TRACE(jh, "has transaction"); 1003 if (jh->b_transaction != handle->h_transaction) { 1004 JBUFFER_TRACE(jh, "belongs to older transaction"); 1005 J_ASSERT_JH(jh, jh->b_transaction == 1006 journal->j_committing_transaction); 1007 1008 /* @@@ IS THIS TRUE ? */ 1009 /* 1010 * Not any more. Scenario: someone does a write() 1011 * in data=journal mode. The buffer's transaction has 1012 * moved into commit. Then someone does another 1013 * write() to the file. We do the frozen data copyout 1014 * and set b_next_transaction to point to j_running_t. 1015 * And while we're in that state, someone does a 1016 * writepage() in an attempt to pageout the same area 1017 * of the file via a shared mapping. At present that 1018 * calls journal_dirty_data(), and we get right here. 1019 * It may be too late to journal the data. Simply 1020 * falling through to the next test will suffice: the 1021 * data will be dirty and wil be checkpointed. The 1022 * ordering comments in the next comment block still 1023 * apply. 1024 */ 1025 //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 1026 1027 /* 1028 * If we're journalling data, and this buffer was 1029 * subject to a write(), it could be metadata, forget 1030 * or shadow against the committing transaction. Now, 1031 * someone has dirtied the same darn page via a mapping 1032 * and it is being writepage()'d. 1033 * We *could* just steal the page from commit, with some 1034 * fancy locking there. Instead, we just skip it - 1035 * don't tie the page's buffers to the new transaction 1036 * at all. 1037 * Implication: if we crash before the writepage() data 1038 * is written into the filesystem, recovery will replay 1039 * the write() data. 1040 */ 1041 if (jh->b_jlist != BJ_None && 1042 jh->b_jlist != BJ_SyncData && 1043 jh->b_jlist != BJ_Locked) { 1044 JBUFFER_TRACE(jh, "Not stealing"); 1045 goto no_journal; 1046 } 1047 1048 /* 1049 * This buffer may be undergoing writeout in commit. We 1050 * can't return from here and let the caller dirty it 1051 * again because that can cause the write-out loop in 1052 * commit to never terminate. 1053 */ 1054 if (buffer_dirty(bh)) { 1055 get_bh(bh); 1056 spin_unlock(&journal->j_list_lock); 1057 jbd_unlock_bh_state(bh); 1058 need_brelse = 1; 1059 sync_dirty_buffer(bh); 1060 jbd_lock_bh_state(bh); 1061 spin_lock(&journal->j_list_lock); 1062 /* Since we dropped the lock... */ 1063 if (!buffer_mapped(bh)) { 1064 JBUFFER_TRACE(jh, "buffer got unmapped"); 1065 goto no_journal; 1066 } 1067 /* The buffer may become locked again at any 1068 time if it is redirtied */ 1069 } 1070 1071 /* 1072 * We cannot remove the buffer with io error from the 1073 * committing transaction, because otherwise it would 1074 * miss the error and the commit would not abort. 1075 */ 1076 if (unlikely(!buffer_uptodate(bh))) { 1077 ret = -EIO; 1078 goto no_journal; 1079 } 1080 1081 if (jh->b_transaction != NULL) { 1082 JBUFFER_TRACE(jh, "unfile from commit"); 1083 __journal_temp_unlink_buffer(jh); 1084 /* It still points to the committing 1085 * transaction; move it to this one so 1086 * that the refile assert checks are 1087 * happy. */ 1088 jh->b_transaction = handle->h_transaction; 1089 } 1090 /* The buffer will be refiled below */ 1091 1092 } 1093 /* 1094 * Special case --- the buffer might actually have been 1095 * allocated and then immediately deallocated in the previous, 1096 * committing transaction, so might still be left on that 1097 * transaction's metadata lists. 1098 */ 1099 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { 1100 JBUFFER_TRACE(jh, "not on correct data list: unfile"); 1101 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); 1102 __journal_temp_unlink_buffer(jh); 1103 jh->b_transaction = handle->h_transaction; 1104 JBUFFER_TRACE(jh, "file as data"); 1105 __journal_file_buffer(jh, handle->h_transaction, 1106 BJ_SyncData); 1107 } 1108 } else { 1109 JBUFFER_TRACE(jh, "not on a transaction"); 1110 __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); 1111 } 1112no_journal: 1113 spin_unlock(&journal->j_list_lock); 1114 jbd_unlock_bh_state(bh); 1115 if (need_brelse) { 1116 BUFFER_TRACE(bh, "brelse"); 1117 __brelse(bh); 1118 } 1119 JBUFFER_TRACE(jh, "exit"); 1120 journal_put_journal_head(jh); 1121 return ret; 1122} 1123 1124/** 1125 * int journal_dirty_metadata() - mark a buffer as containing dirty metadata 1126 * @handle: transaction to add buffer to. 1127 * @bh: buffer to mark 1128 * 1129 * Mark dirty metadata which needs to be journaled as part of the current 1130 * transaction. 1131 * 1132 * The buffer is placed on the transaction's metadata list and is marked 1133 * as belonging to the transaction. 1134 * 1135 * Returns error number or 0 on success. 1136 * 1137 * Special care needs to be taken if the buffer already belongs to the 1138 * current committing transaction (in which case we should have frozen 1139 * data present for that commit). In that case, we don't relink the 1140 * buffer: that only gets done when the old transaction finally 1141 * completes its commit. 1142 */ 1143int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) 1144{ 1145 transaction_t *transaction = handle->h_transaction; 1146 journal_t *journal = transaction->t_journal; 1147 struct journal_head *jh = bh2jh(bh); 1148 1149 jbd_debug(5, "journal_head %p\n", jh); 1150 JBUFFER_TRACE(jh, "entry"); 1151 if (is_handle_aborted(handle)) 1152 goto out; 1153 1154 jbd_lock_bh_state(bh); 1155 1156 if (jh->b_modified == 0) { 1157 /* 1158 * This buffer's got modified and becoming part 1159 * of the transaction. This needs to be done 1160 * once a transaction -bzzz 1161 */ 1162 jh->b_modified = 1; 1163 J_ASSERT_JH(jh, handle->h_buffer_credits > 0); 1164 handle->h_buffer_credits--; 1165 } 1166 1167 /* 1168 * fastpath, to avoid expensive locking. If this buffer is already 1169 * on the running transaction's metadata list there is nothing to do. 1170 * Nobody can take it off again because there is a handle open. 1171 * I _think_ we're OK here with SMP barriers - a mistaken decision will 1172 * result in this test being false, so we go in and take the locks. 1173 */ 1174 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 1175 JBUFFER_TRACE(jh, "fastpath"); 1176 J_ASSERT_JH(jh, jh->b_transaction == 1177 journal->j_running_transaction); 1178 goto out_unlock_bh; 1179 } 1180 1181 set_buffer_jbddirty(bh); 1182 1183 /* 1184 * Metadata already on the current transaction list doesn't 1185 * need to be filed. Metadata on another transaction's list must 1186 * be committing, and will be refiled once the commit completes: 1187 * leave it alone for now. 1188 */ 1189 if (jh->b_transaction != transaction) { 1190 JBUFFER_TRACE(jh, "already on other transaction"); 1191 J_ASSERT_JH(jh, jh->b_transaction == 1192 journal->j_committing_transaction); 1193 J_ASSERT_JH(jh, jh->b_next_transaction == transaction); 1194 /* And this case is illegal: we can't reuse another 1195 * transaction's data buffer, ever. */ 1196 goto out_unlock_bh; 1197 } 1198 1199 /* That test should have eliminated the following case: */ 1200 J_ASSERT_JH(jh, jh->b_frozen_data == NULL); 1201 1202 JBUFFER_TRACE(jh, "file as BJ_Metadata"); 1203 spin_lock(&journal->j_list_lock); 1204 __journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); 1205 spin_unlock(&journal->j_list_lock); 1206out_unlock_bh: 1207 jbd_unlock_bh_state(bh); 1208out: 1209 JBUFFER_TRACE(jh, "exit"); 1210 return 0; 1211} 1212 1213/* 1214 * journal_release_buffer: undo a get_write_access without any buffer 1215 * updates, if the update decided in the end that it didn't need access. 1216 * 1217 */ 1218void 1219journal_release_buffer(handle_t *handle, struct buffer_head *bh) 1220{ 1221 BUFFER_TRACE(bh, "entry"); 1222} 1223 1224/** 1225 * void journal_forget() - bforget() for potentially-journaled buffers. 1226 * @handle: transaction handle 1227 * @bh: bh to 'forget' 1228 * 1229 * We can only do the bforget if there are no commits pending against the 1230 * buffer. If the buffer is dirty in the current running transaction we 1231 * can safely unlink it. 1232 * 1233 * bh may not be a journalled buffer at all - it may be a non-JBD 1234 * buffer which came off the hashtable. Check for this. 1235 * 1236 * Decrements bh->b_count by one. 1237 * 1238 * Allow this call even if the handle has aborted --- it may be part of 1239 * the caller's cleanup after an abort. 1240 */ 1241int journal_forget (handle_t *handle, struct buffer_head *bh) 1242{ 1243 transaction_t *transaction = handle->h_transaction; 1244 journal_t *journal = transaction->t_journal; 1245 struct journal_head *jh; 1246 int drop_reserve = 0; 1247 int err = 0; 1248 int was_modified = 0; 1249 1250 BUFFER_TRACE(bh, "entry"); 1251 1252 jbd_lock_bh_state(bh); 1253 spin_lock(&journal->j_list_lock); 1254 1255 if (!buffer_jbd(bh)) 1256 goto not_jbd; 1257 jh = bh2jh(bh); 1258 1259 /* Critical error: attempting to delete a bitmap buffer, maybe? 1260 * Don't do any jbd operations, and return an error. */ 1261 if (!J_EXPECT_JH(jh, !jh->b_committed_data, 1262 "inconsistent data on disk")) { 1263 err = -EIO; 1264 goto not_jbd; 1265 } 1266 1267 /* keep track of wether or not this transaction modified us */ 1268 was_modified = jh->b_modified; 1269 1270 /* 1271 * The buffer's going from the transaction, we must drop 1272 * all references -bzzz 1273 */ 1274 jh->b_modified = 0; 1275 1276 if (jh->b_transaction == handle->h_transaction) { 1277 J_ASSERT_JH(jh, !jh->b_frozen_data); 1278 1279 /* If we are forgetting a buffer which is already part 1280 * of this transaction, then we can just drop it from 1281 * the transaction immediately. */ 1282 clear_buffer_dirty(bh); 1283 clear_buffer_jbddirty(bh); 1284 1285 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1286 1287 /* 1288 * we only want to drop a reference if this transaction 1289 * modified the buffer 1290 */ 1291 if (was_modified) 1292 drop_reserve = 1; 1293 1294 /* 1295 * We are no longer going to journal this buffer. 1296 * However, the commit of this transaction is still 1297 * important to the buffer: the delete that we are now 1298 * processing might obsolete an old log entry, so by 1299 * committing, we can satisfy the buffer's checkpoint. 1300 * 1301 * So, if we have a checkpoint on the buffer, we should 1302 * now refile the buffer on our BJ_Forget list so that 1303 * we know to remove the checkpoint after we commit. 1304 */ 1305 1306 if (jh->b_cp_transaction) { 1307 __journal_temp_unlink_buffer(jh); 1308 __journal_file_buffer(jh, transaction, BJ_Forget); 1309 } else { 1310 __journal_unfile_buffer(jh); 1311 journal_remove_journal_head(bh); 1312 __brelse(bh); 1313 if (!buffer_jbd(bh)) { 1314 spin_unlock(&journal->j_list_lock); 1315 jbd_unlock_bh_state(bh); 1316 __bforget(bh); 1317 goto drop; 1318 } 1319 } 1320 } else if (jh->b_transaction) { 1321 J_ASSERT_JH(jh, (jh->b_transaction == 1322 journal->j_committing_transaction)); 1323 /* However, if the buffer is still owned by a prior 1324 * (committing) transaction, we can't drop it yet... */ 1325 JBUFFER_TRACE(jh, "belongs to older transaction"); 1326 /* ... but we CAN drop it from the new transaction if we 1327 * have also modified it since the original commit. */ 1328 1329 if (jh->b_next_transaction) { 1330 J_ASSERT(jh->b_next_transaction == transaction); 1331 jh->b_next_transaction = NULL; 1332 1333 /* 1334 * only drop a reference if this transaction modified 1335 * the buffer 1336 */ 1337 if (was_modified) 1338 drop_reserve = 1; 1339 } 1340 } 1341 1342not_jbd: 1343 spin_unlock(&journal->j_list_lock); 1344 jbd_unlock_bh_state(bh); 1345 __brelse(bh); 1346drop: 1347 if (drop_reserve) { 1348 /* no need to reserve log space for this block -bzzz */ 1349 handle->h_buffer_credits++; 1350 } 1351 return err; 1352} 1353 1354/** 1355 * int journal_stop() - complete a transaction 1356 * @handle: tranaction to complete. 1357 * 1358 * All done for a particular handle. 1359 * 1360 * There is not much action needed here. We just return any remaining 1361 * buffer credits to the transaction and remove the handle. The only 1362 * complication is that we need to start a commit operation if the 1363 * filesystem is marked for synchronous update. 1364 * 1365 * journal_stop itself will not usually return an error, but it may 1366 * do so in unusual circumstances. In particular, expect it to 1367 * return -EIO if a journal_abort has been executed since the 1368 * transaction began. 1369 */ 1370int journal_stop(handle_t *handle) 1371{ 1372 transaction_t *transaction = handle->h_transaction; 1373 journal_t *journal = transaction->t_journal; 1374 int err; 1375 pid_t pid; 1376 1377 J_ASSERT(journal_current_handle() == handle); 1378 1379 if (is_handle_aborted(handle)) 1380 err = -EIO; 1381 else { 1382 J_ASSERT(transaction->t_updates > 0); 1383 err = 0; 1384 } 1385 1386 if (--handle->h_ref > 0) { 1387 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, 1388 handle->h_ref); 1389 return err; 1390 } 1391 1392 jbd_debug(4, "Handle %p going down\n", handle); 1393 1394 /* 1395 * Implement synchronous transaction batching. If the handle 1396 * was synchronous, don't force a commit immediately. Let's 1397 * yield and let another thread piggyback onto this transaction. 1398 * Keep doing that while new threads continue to arrive. 1399 * It doesn't cost much - we're about to run a commit and sleep 1400 * on IO anyway. Speeds up many-threaded, many-dir operations 1401 * by 30x or more... 1402 * 1403 * We try and optimize the sleep time against what the underlying disk 1404 * can do, instead of having a static sleep time. This is usefull for 1405 * the case where our storage is so fast that it is more optimal to go 1406 * ahead and force a flush and wait for the transaction to be committed 1407 * than it is to wait for an arbitrary amount of time for new writers to 1408 * join the transaction. We acheive this by measuring how long it takes 1409 * to commit a transaction, and compare it with how long this 1410 * transaction has been running, and if run time < commit time then we 1411 * sleep for the delta and commit. This greatly helps super fast disks 1412 * that would see slowdowns as more threads started doing fsyncs. 1413 * 1414 * But don't do this if this process was the most recent one to 1415 * perform a synchronous write. We do this to detect the case where a 1416 * single process is doing a stream of sync writes. No point in waiting 1417 * for joiners in that case. 1418 */ 1419 pid = current->pid; 1420 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1421 u64 commit_time, trans_time; 1422 1423 journal->j_last_sync_writer = pid; 1424 1425 spin_lock(&journal->j_state_lock); 1426 commit_time = journal->j_average_commit_time; 1427 spin_unlock(&journal->j_state_lock); 1428 1429 trans_time = ktime_to_ns(ktime_sub(ktime_get(), 1430 transaction->t_start_time)); 1431 1432 commit_time = min_t(u64, commit_time, 1433 1000*jiffies_to_usecs(1)); 1434 1435 if (trans_time < commit_time) { 1436 ktime_t expires = ktime_add_ns(ktime_get(), 1437 commit_time); 1438 set_current_state(TASK_UNINTERRUPTIBLE); 1439 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 1440 } 1441 } 1442 1443 if (handle->h_sync) 1444 transaction->t_synchronous_commit = 1; 1445 current->journal_info = NULL; 1446 spin_lock(&journal->j_state_lock); 1447 spin_lock(&transaction->t_handle_lock); 1448 transaction->t_outstanding_credits -= handle->h_buffer_credits; 1449 transaction->t_updates--; 1450 if (!transaction->t_updates) { 1451 wake_up(&journal->j_wait_updates); 1452 if (journal->j_barrier_count) 1453 wake_up(&journal->j_wait_transaction_locked); 1454 } 1455 1456 /* 1457 * If the handle is marked SYNC, we need to set another commit 1458 * going! We also want to force a commit if the current 1459 * transaction is occupying too much of the log, or if the 1460 * transaction is too old now. 1461 */ 1462 if (handle->h_sync || 1463 transaction->t_outstanding_credits > 1464 journal->j_max_transaction_buffers || 1465 time_after_eq(jiffies, transaction->t_expires)) { 1466 /* Do this even for aborted journals: an abort still 1467 * completes the commit thread, it just doesn't write 1468 * anything to disk. */ 1469 tid_t tid = transaction->t_tid; 1470 1471 spin_unlock(&transaction->t_handle_lock); 1472 jbd_debug(2, "transaction too old, requesting commit for " 1473 "handle %p\n", handle); 1474 /* This is non-blocking */ 1475 __log_start_commit(journal, transaction->t_tid); 1476 spin_unlock(&journal->j_state_lock); 1477 1478 /* 1479 * Special case: JFS_SYNC synchronous updates require us 1480 * to wait for the commit to complete. 1481 */ 1482 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1483 err = log_wait_commit(journal, tid); 1484 } else { 1485 spin_unlock(&transaction->t_handle_lock); 1486 spin_unlock(&journal->j_state_lock); 1487 } 1488 1489 lock_map_release(&handle->h_lockdep_map); 1490 1491 jbd_free_handle(handle); 1492 return err; 1493} 1494 1495/** 1496 * int journal_force_commit() - force any uncommitted transactions 1497 * @journal: journal to force 1498 * 1499 * For synchronous operations: force any uncommitted transactions 1500 * to disk. May seem kludgy, but it reuses all the handle batching 1501 * code in a very simple manner. 1502 */ 1503int journal_force_commit(journal_t *journal) 1504{ 1505 handle_t *handle; 1506 int ret; 1507 1508 handle = journal_start(journal, 1); 1509 if (IS_ERR(handle)) { 1510 ret = PTR_ERR(handle); 1511 } else { 1512 handle->h_sync = 1; 1513 ret = journal_stop(handle); 1514 } 1515 return ret; 1516} 1517 1518/* 1519 * 1520 * List management code snippets: various functions for manipulating the 1521 * transaction buffer lists. 1522 * 1523 */ 1524 1525/* 1526 * Append a buffer to a transaction list, given the transaction's list head 1527 * pointer. 1528 * 1529 * j_list_lock is held. 1530 * 1531 * jbd_lock_bh_state(jh2bh(jh)) is held. 1532 */ 1533 1534static inline void 1535__blist_add_buffer(struct journal_head **list, struct journal_head *jh) 1536{ 1537 if (!*list) { 1538 jh->b_tnext = jh->b_tprev = jh; 1539 *list = jh; 1540 } else { 1541 /* Insert at the tail of the list to preserve order */ 1542 struct journal_head *first = *list, *last = first->b_tprev; 1543 jh->b_tprev = last; 1544 jh->b_tnext = first; 1545 last->b_tnext = first->b_tprev = jh; 1546 } 1547} 1548 1549/* 1550 * Remove a buffer from a transaction list, given the transaction's list 1551 * head pointer. 1552 * 1553 * Called with j_list_lock held, and the journal may not be locked. 1554 * 1555 * jbd_lock_bh_state(jh2bh(jh)) is held. 1556 */ 1557 1558static inline void 1559__blist_del_buffer(struct journal_head **list, struct journal_head *jh) 1560{ 1561 if (*list == jh) { 1562 *list = jh->b_tnext; 1563 if (*list == jh) 1564 *list = NULL; 1565 } 1566 jh->b_tprev->b_tnext = jh->b_tnext; 1567 jh->b_tnext->b_tprev = jh->b_tprev; 1568} 1569 1570/* 1571 * Remove a buffer from the appropriate transaction list. 1572 * 1573 * Note that this function can *change* the value of 1574 * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, 1575 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller 1576 * is holding onto a copy of one of thee pointers, it could go bad. 1577 * Generally the caller needs to re-read the pointer from the transaction_t. 1578 * 1579 * Called under j_list_lock. The journal may not be locked. 1580 */ 1581static void __journal_temp_unlink_buffer(struct journal_head *jh) 1582{ 1583 struct journal_head **list = NULL; 1584 transaction_t *transaction; 1585 struct buffer_head *bh = jh2bh(jh); 1586 1587 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 1588 transaction = jh->b_transaction; 1589 if (transaction) 1590 assert_spin_locked(&transaction->t_journal->j_list_lock); 1591 1592 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1593 if (jh->b_jlist != BJ_None) 1594 J_ASSERT_JH(jh, transaction != NULL); 1595 1596 switch (jh->b_jlist) { 1597 case BJ_None: 1598 return; 1599 case BJ_SyncData: 1600 list = &transaction->t_sync_datalist; 1601 break; 1602 case BJ_Metadata: 1603 transaction->t_nr_buffers--; 1604 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); 1605 list = &transaction->t_buffers; 1606 break; 1607 case BJ_Forget: 1608 list = &transaction->t_forget; 1609 break; 1610 case BJ_IO: 1611 list = &transaction->t_iobuf_list; 1612 break; 1613 case BJ_Shadow: 1614 list = &transaction->t_shadow_list; 1615 break; 1616 case BJ_LogCtl: 1617 list = &transaction->t_log_list; 1618 break; 1619 case BJ_Reserved: 1620 list = &transaction->t_reserved_list; 1621 break; 1622 case BJ_Locked: 1623 list = &transaction->t_locked_list; 1624 break; 1625 } 1626 1627 __blist_del_buffer(list, jh); 1628 jh->b_jlist = BJ_None; 1629 if (test_clear_buffer_jbddirty(bh)) 1630 mark_buffer_dirty(bh); /* Expose it to the VM */ 1631} 1632 1633void __journal_unfile_buffer(struct journal_head *jh) 1634{ 1635 __journal_temp_unlink_buffer(jh); 1636 jh->b_transaction = NULL; 1637} 1638 1639void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1640{ 1641 jbd_lock_bh_state(jh2bh(jh)); 1642 spin_lock(&journal->j_list_lock); 1643 __journal_unfile_buffer(jh); 1644 spin_unlock(&journal->j_list_lock); 1645 jbd_unlock_bh_state(jh2bh(jh)); 1646} 1647 1648/* 1649 * Called from journal_try_to_free_buffers(). 1650 * 1651 * Called under jbd_lock_bh_state(bh) 1652 */ 1653static void 1654__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) 1655{ 1656 struct journal_head *jh; 1657 1658 jh = bh2jh(bh); 1659 1660 if (buffer_locked(bh) || buffer_dirty(bh)) 1661 goto out; 1662 1663 if (jh->b_next_transaction != NULL) 1664 goto out; 1665 1666 spin_lock(&journal->j_list_lock); 1667 if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { 1668 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { 1669 /* A written-back ordered data buffer */ 1670 JBUFFER_TRACE(jh, "release data"); 1671 __journal_unfile_buffer(jh); 1672 journal_remove_journal_head(bh); 1673 __brelse(bh); 1674 } 1675 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1676 /* written-back checkpointed metadata buffer */ 1677 if (jh->b_jlist == BJ_None) { 1678 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1679 __journal_remove_checkpoint(jh); 1680 journal_remove_journal_head(bh); 1681 __brelse(bh); 1682 } 1683 } 1684 spin_unlock(&journal->j_list_lock); 1685out: 1686 return; 1687} 1688 1689/* 1690 * journal_try_to_free_buffers() could race with journal_commit_transaction() 1691 * The latter might still hold the a count on buffers when inspecting 1692 * them on t_syncdata_list or t_locked_list. 1693 * 1694 * journal_try_to_free_buffers() will call this function to 1695 * wait for the current transaction to finish syncing data buffers, before 1696 * tryinf to free that buffer. 1697 * 1698 * Called with journal->j_state_lock held. 1699 */ 1700static void journal_wait_for_transaction_sync_data(journal_t *journal) 1701{ 1702 transaction_t *transaction = NULL; 1703 tid_t tid; 1704 1705 spin_lock(&journal->j_state_lock); 1706 transaction = journal->j_committing_transaction; 1707 1708 if (!transaction) { 1709 spin_unlock(&journal->j_state_lock); 1710 return; 1711 } 1712 1713 tid = transaction->t_tid; 1714 spin_unlock(&journal->j_state_lock); 1715 log_wait_commit(journal, tid); 1716} 1717 1718/** 1719 * int journal_try_to_free_buffers() - try to free page buffers. 1720 * @journal: journal for operation 1721 * @page: to try and free 1722 * @gfp_mask: we use the mask to detect how hard should we try to release 1723 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to 1724 * release the buffers. 1725 * 1726 * 1727 * For all the buffers on this page, 1728 * if they are fully written out ordered data, move them onto BUF_CLEAN 1729 * so try_to_free_buffers() can reap them. 1730 * 1731 * This function returns non-zero if we wish try_to_free_buffers() 1732 * to be called. We do this if the page is releasable by try_to_free_buffers(). 1733 * We also do it if the page has locked or dirty buffers and the caller wants 1734 * us to perform sync or async writeout. 1735 * 1736 * This complicates JBD locking somewhat. We aren't protected by the 1737 * BKL here. We wish to remove the buffer from its committing or 1738 * running transaction's ->t_datalist via __journal_unfile_buffer. 1739 * 1740 * This may *change* the value of transaction_t->t_datalist, so anyone 1741 * who looks at t_datalist needs to lock against this function. 1742 * 1743 * Even worse, someone may be doing a journal_dirty_data on this 1744 * buffer. So we need to lock against that. journal_dirty_data() 1745 * will come out of the lock with the buffer dirty, which makes it 1746 * ineligible for release here. 1747 * 1748 * Who else is affected by this? hmm... Really the only contender 1749 * is do_get_write_access() - it could be looking at the buffer while 1750 * journal_try_to_free_buffer() is changing its state. But that 1751 * cannot happen because we never reallocate freed data as metadata 1752 * while the data is part of a transaction. Yes? 1753 * 1754 * Return 0 on failure, 1 on success 1755 */ 1756int journal_try_to_free_buffers(journal_t *journal, 1757 struct page *page, gfp_t gfp_mask) 1758{ 1759 struct buffer_head *head; 1760 struct buffer_head *bh; 1761 int ret = 0; 1762 1763 J_ASSERT(PageLocked(page)); 1764 1765 head = page_buffers(page); 1766 bh = head; 1767 do { 1768 struct journal_head *jh; 1769 1770 /* 1771 * We take our own ref against the journal_head here to avoid 1772 * having to add tons of locking around each instance of 1773 * journal_remove_journal_head() and journal_put_journal_head(). 1774 */ 1775 jh = journal_grab_journal_head(bh); 1776 if (!jh) 1777 continue; 1778 1779 jbd_lock_bh_state(bh); 1780 __journal_try_to_free_buffer(journal, bh); 1781 journal_put_journal_head(jh); 1782 jbd_unlock_bh_state(bh); 1783 if (buffer_jbd(bh)) 1784 goto busy; 1785 } while ((bh = bh->b_this_page) != head); 1786 1787 ret = try_to_free_buffers(page); 1788 1789 /* 1790 * There are a number of places where journal_try_to_free_buffers() 1791 * could race with journal_commit_transaction(), the later still 1792 * holds the reference to the buffers to free while processing them. 1793 * try_to_free_buffers() failed to free those buffers. Some of the 1794 * caller of releasepage() request page buffers to be dropped, otherwise 1795 * treat the fail-to-free as errors (such as generic_file_direct_IO()) 1796 * 1797 * So, if the caller of try_to_release_page() wants the synchronous 1798 * behaviour(i.e make sure buffers are dropped upon return), 1799 * let's wait for the current transaction to finish flush of 1800 * dirty data buffers, then try to free those buffers again, 1801 * with the journal locked. 1802 */ 1803 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { 1804 journal_wait_for_transaction_sync_data(journal); 1805 ret = try_to_free_buffers(page); 1806 } 1807 1808busy: 1809 return ret; 1810} 1811 1812/* 1813 * This buffer is no longer needed. If it is on an older transaction's 1814 * checkpoint list we need to record it on this transaction's forget list 1815 * to pin this buffer (and hence its checkpointing transaction) down until 1816 * this transaction commits. If the buffer isn't on a checkpoint list, we 1817 * release it. 1818 * Returns non-zero if JBD no longer has an interest in the buffer. 1819 * 1820 * Called under j_list_lock. 1821 * 1822 * Called under jbd_lock_bh_state(bh). 1823 */ 1824static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) 1825{ 1826 int may_free = 1; 1827 struct buffer_head *bh = jh2bh(jh); 1828 1829 __journal_unfile_buffer(jh); 1830 1831 if (jh->b_cp_transaction) { 1832 JBUFFER_TRACE(jh, "on running+cp transaction"); 1833 __journal_file_buffer(jh, transaction, BJ_Forget); 1834 clear_buffer_jbddirty(bh); 1835 may_free = 0; 1836 } else { 1837 JBUFFER_TRACE(jh, "on running transaction"); 1838 journal_remove_journal_head(bh); 1839 __brelse(bh); 1840 } 1841 return may_free; 1842} 1843 1844/* 1845 * journal_invalidatepage 1846 * 1847 * This code is tricky. It has a number of cases to deal with. 1848 * 1849 * There are two invariants which this code relies on: 1850 * 1851 * i_size must be updated on disk before we start calling invalidatepage on the 1852 * data. 1853 * 1854 * This is done in ext3 by defining an ext3_setattr method which 1855 * updates i_size before truncate gets going. By maintaining this 1856 * invariant, we can be sure that it is safe to throw away any buffers 1857 * attached to the current transaction: once the transaction commits, 1858 * we know that the data will not be needed. 1859 * 1860 * Note however that we can *not* throw away data belonging to the 1861 * previous, committing transaction! 1862 * 1863 * Any disk blocks which *are* part of the previous, committing 1864 * transaction (and which therefore cannot be discarded immediately) are 1865 * not going to be reused in the new running transaction 1866 * 1867 * The bitmap committed_data images guarantee this: any block which is 1868 * allocated in one transaction and removed in the next will be marked 1869 * as in-use in the committed_data bitmap, so cannot be reused until 1870 * the next transaction to delete the block commits. This means that 1871 * leaving committing buffers dirty is quite safe: the disk blocks 1872 * cannot be reallocated to a different file and so buffer aliasing is 1873 * not possible. 1874 * 1875 * 1876 * The above applies mainly to ordered data mode. In writeback mode we 1877 * don't make guarantees about the order in which data hits disk --- in 1878 * particular we don't guarantee that new dirty data is flushed before 1879 * transaction commit --- so it is always safe just to discard data 1880 * immediately in that mode. --sct 1881 */ 1882 1883/* 1884 * The journal_unmap_buffer helper function returns zero if the buffer 1885 * concerned remains pinned as an anonymous buffer belonging to an older 1886 * transaction. 1887 * 1888 * We're outside-transaction here. Either or both of j_running_transaction 1889 * and j_committing_transaction may be NULL. 1890 */ 1891static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1892{ 1893 transaction_t *transaction; 1894 struct journal_head *jh; 1895 int may_free = 1; 1896 int ret; 1897 1898 BUFFER_TRACE(bh, "entry"); 1899 1900 /* 1901 * It is safe to proceed here without the j_list_lock because the 1902 * buffers cannot be stolen by try_to_free_buffers as long as we are 1903 * holding the page lock. --sct 1904 */ 1905 1906 if (!buffer_jbd(bh)) 1907 goto zap_buffer_unlocked; 1908 1909 spin_lock(&journal->j_state_lock); 1910 jbd_lock_bh_state(bh); 1911 spin_lock(&journal->j_list_lock); 1912 1913 jh = journal_grab_journal_head(bh); 1914 if (!jh) 1915 goto zap_buffer_no_jh; 1916 1917 transaction = jh->b_transaction; 1918 if (transaction == NULL) { 1919 /* First case: not on any transaction. If it 1920 * has no checkpoint link, then we can zap it: 1921 * it's a writeback-mode buffer so we don't care 1922 * if it hits disk safely. */ 1923 if (!jh->b_cp_transaction) { 1924 JBUFFER_TRACE(jh, "not on any transaction: zap"); 1925 goto zap_buffer; 1926 } 1927 1928 if (!buffer_dirty(bh)) { 1929 /* bdflush has written it. We can drop it now */ 1930 goto zap_buffer; 1931 } 1932 1933 /* OK, it must be in the journal but still not 1934 * written fully to disk: it's metadata or 1935 * journaled data... */ 1936 1937 if (journal->j_running_transaction) { 1938 /* ... and once the current transaction has 1939 * committed, the buffer won't be needed any 1940 * longer. */ 1941 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1942 ret = __dispose_buffer(jh, 1943 journal->j_running_transaction); 1944 journal_put_journal_head(jh); 1945 spin_unlock(&journal->j_list_lock); 1946 jbd_unlock_bh_state(bh); 1947 spin_unlock(&journal->j_state_lock); 1948 return ret; 1949 } else { 1950 /* There is no currently-running transaction. So the 1951 * orphan record which we wrote for this file must have 1952 * passed into commit. We must attach this buffer to 1953 * the committing transaction, if it exists. */ 1954 if (journal->j_committing_transaction) { 1955 JBUFFER_TRACE(jh, "give to committing trans"); 1956 ret = __dispose_buffer(jh, 1957 journal->j_committing_transaction); 1958 journal_put_journal_head(jh); 1959 spin_unlock(&journal->j_list_lock); 1960 jbd_unlock_bh_state(bh); 1961 spin_unlock(&journal->j_state_lock); 1962 return ret; 1963 } else { 1964 /* The orphan record's transaction has 1965 * committed. We can cleanse this buffer */ 1966 clear_buffer_jbddirty(bh); 1967 goto zap_buffer; 1968 } 1969 } 1970 } else if (transaction == journal->j_committing_transaction) { 1971 JBUFFER_TRACE(jh, "on committing transaction"); 1972 if (jh->b_jlist == BJ_Locked) { 1973 /* 1974 * The buffer is on the committing transaction's locked 1975 * list. We have the buffer locked, so I/O has 1976 * completed. So we can nail the buffer now. 1977 */ 1978 may_free = __dispose_buffer(jh, transaction); 1979 goto zap_buffer; 1980 } 1981 /* 1982 * If it is committing, we simply cannot touch it. We 1983 * can remove it's next_transaction pointer from the 1984 * running transaction if that is set, but nothing 1985 * else. */ 1986 set_buffer_freed(bh); 1987 if (jh->b_next_transaction) { 1988 J_ASSERT(jh->b_next_transaction == 1989 journal->j_running_transaction); 1990 jh->b_next_transaction = NULL; 1991 } 1992 journal_put_journal_head(jh); 1993 spin_unlock(&journal->j_list_lock); 1994 jbd_unlock_bh_state(bh); 1995 spin_unlock(&journal->j_state_lock); 1996 return 0; 1997 } else { 1998 /* Good, the buffer belongs to the running transaction. 1999 * We are writing our own transaction's data, not any 2000 * previous one's, so it is safe to throw it away 2001 * (remember that we expect the filesystem to have set 2002 * i_size already for this truncate so recovery will not 2003 * expose the disk blocks we are discarding here.) */ 2004 J_ASSERT_JH(jh, transaction == journal->j_running_transaction); 2005 JBUFFER_TRACE(jh, "on running transaction"); 2006 may_free = __dispose_buffer(jh, transaction); 2007 } 2008 2009zap_buffer: 2010 journal_put_journal_head(jh); 2011zap_buffer_no_jh: 2012 spin_unlock(&journal->j_list_lock); 2013 jbd_unlock_bh_state(bh); 2014 spin_unlock(&journal->j_state_lock); 2015zap_buffer_unlocked: 2016 clear_buffer_dirty(bh); 2017 J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 2018 clear_buffer_mapped(bh); 2019 clear_buffer_req(bh); 2020 clear_buffer_new(bh); 2021 bh->b_bdev = NULL; 2022 return may_free; 2023} 2024 2025/** 2026 * void journal_invalidatepage() - invalidate a journal page 2027 * @journal: journal to use for flush 2028 * @page: page to flush 2029 * @offset: length of page to invalidate. 2030 * 2031 * Reap page buffers containing data after offset in page. 2032 */ 2033void journal_invalidatepage(journal_t *journal, 2034 struct page *page, 2035 unsigned long offset) 2036{ 2037 struct buffer_head *head, *bh, *next; 2038 unsigned int curr_off = 0; 2039 int may_free = 1; 2040 2041 if (!PageLocked(page)) 2042 BUG(); 2043 if (!page_has_buffers(page)) 2044 return; 2045 2046 /* We will potentially be playing with lists other than just the 2047 * data lists (especially for journaled data mode), so be 2048 * cautious in our locking. */ 2049 2050 head = bh = page_buffers(page); 2051 do { 2052 unsigned int next_off = curr_off + bh->b_size; 2053 next = bh->b_this_page; 2054 2055 if (offset <= curr_off) { 2056 /* This block is wholly outside the truncation point */ 2057 lock_buffer(bh); 2058 may_free &= journal_unmap_buffer(journal, bh); 2059 unlock_buffer(bh); 2060 } 2061 curr_off = next_off; 2062 bh = next; 2063 2064 } while (bh != head); 2065 2066 if (!offset) { 2067 if (may_free && try_to_free_buffers(page)) 2068 J_ASSERT(!page_has_buffers(page)); 2069 } 2070} 2071 2072/* 2073 * File a buffer on the given transaction list. 2074 */ 2075void __journal_file_buffer(struct journal_head *jh, 2076 transaction_t *transaction, int jlist) 2077{ 2078 struct journal_head **list = NULL; 2079 int was_dirty = 0; 2080 struct buffer_head *bh = jh2bh(jh); 2081 2082 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 2083 assert_spin_locked(&transaction->t_journal->j_list_lock); 2084 2085 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 2086 J_ASSERT_JH(jh, jh->b_transaction == transaction || 2087 jh->b_transaction == NULL); 2088 2089 if (jh->b_transaction && jh->b_jlist == jlist) 2090 return; 2091 2092 /* The following list of buffer states needs to be consistent 2093 * with __jbd_unexpected_dirty_buffer()'s handling of dirty 2094 * state. */ 2095 2096 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 2097 jlist == BJ_Shadow || jlist == BJ_Forget) { 2098 if (test_clear_buffer_dirty(bh) || 2099 test_clear_buffer_jbddirty(bh)) 2100 was_dirty = 1; 2101 } 2102 2103 if (jh->b_transaction) 2104 __journal_temp_unlink_buffer(jh); 2105 jh->b_transaction = transaction; 2106 2107 switch (jlist) { 2108 case BJ_None: 2109 J_ASSERT_JH(jh, !jh->b_committed_data); 2110 J_ASSERT_JH(jh, !jh->b_frozen_data); 2111 return; 2112 case BJ_SyncData: 2113 list = &transaction->t_sync_datalist; 2114 break; 2115 case BJ_Metadata: 2116 transaction->t_nr_buffers++; 2117 list = &transaction->t_buffers; 2118 break; 2119 case BJ_Forget: 2120 list = &transaction->t_forget; 2121 break; 2122 case BJ_IO: 2123 list = &transaction->t_iobuf_list; 2124 break; 2125 case BJ_Shadow: 2126 list = &transaction->t_shadow_list; 2127 break; 2128 case BJ_LogCtl: 2129 list = &transaction->t_log_list; 2130 break; 2131 case BJ_Reserved: 2132 list = &transaction->t_reserved_list; 2133 break; 2134 case BJ_Locked: 2135 list = &transaction->t_locked_list; 2136 break; 2137 } 2138 2139 __blist_add_buffer(list, jh); 2140 jh->b_jlist = jlist; 2141 2142 if (was_dirty) 2143 set_buffer_jbddirty(bh); 2144} 2145 2146void journal_file_buffer(struct journal_head *jh, 2147 transaction_t *transaction, int jlist) 2148{ 2149 jbd_lock_bh_state(jh2bh(jh)); 2150 spin_lock(&transaction->t_journal->j_list_lock); 2151 __journal_file_buffer(jh, transaction, jlist); 2152 spin_unlock(&transaction->t_journal->j_list_lock); 2153 jbd_unlock_bh_state(jh2bh(jh)); 2154} 2155 2156/* 2157 * Remove a buffer from its current buffer list in preparation for 2158 * dropping it from its current transaction entirely. If the buffer has 2159 * already started to be used by a subsequent transaction, refile the 2160 * buffer on that transaction's metadata list. 2161 * 2162 * Called under journal->j_list_lock 2163 * 2164 * Called under jbd_lock_bh_state(jh2bh(jh)) 2165 */ 2166void __journal_refile_buffer(struct journal_head *jh) 2167{ 2168 int was_dirty; 2169 struct buffer_head *bh = jh2bh(jh); 2170 2171 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 2172 if (jh->b_transaction) 2173 assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); 2174 2175 /* If the buffer is now unused, just drop it. */ 2176 if (jh->b_next_transaction == NULL) { 2177 __journal_unfile_buffer(jh); 2178 return; 2179 } 2180 2181 /* 2182 * It has been modified by a later transaction: add it to the new 2183 * transaction's metadata list. 2184 */ 2185 2186 was_dirty = test_clear_buffer_jbddirty(bh); 2187 __journal_temp_unlink_buffer(jh); 2188 jh->b_transaction = jh->b_next_transaction; 2189 jh->b_next_transaction = NULL; 2190 __journal_file_buffer(jh, jh->b_transaction, 2191 jh->b_modified ? BJ_Metadata : BJ_Reserved); 2192 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2193 2194 if (was_dirty) 2195 set_buffer_jbddirty(bh); 2196} 2197 2198/* 2199 * For the unlocked version of this call, also make sure that any 2200 * hanging journal_head is cleaned up if necessary. 2201 * 2202 * __journal_refile_buffer is usually called as part of a single locked 2203 * operation on a buffer_head, in which the caller is probably going to 2204 * be hooking the journal_head onto other lists. In that case it is up 2205 * to the caller to remove the journal_head if necessary. For the 2206 * unlocked journal_refile_buffer call, the caller isn't going to be 2207 * doing anything else to the buffer so we need to do the cleanup 2208 * ourselves to avoid a jh leak. 2209 * 2210 * *** The journal_head may be freed by this call! *** 2211 */ 2212void journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2213{ 2214 struct buffer_head *bh = jh2bh(jh); 2215 2216 jbd_lock_bh_state(bh); 2217 spin_lock(&journal->j_list_lock); 2218 2219 __journal_refile_buffer(jh); 2220 jbd_unlock_bh_state(bh); 2221 journal_remove_journal_head(bh); 2222 2223 spin_unlock(&journal->j_list_lock); 2224 __brelse(bh); 2225}