fs/jbd2/transaction.c at v2.6.26-rc9

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / fs / jbd2 / transaction.c
at v2.6.26-rc9 2143 lines 65 kB view raw
wrap content
   1/*
   2 * linux/fs/jbd2/transaction.c
   3 *
   4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
   5 *
   6 * Copyright 1998 Red Hat corp --- All Rights Reserved
   7 *
   8 * This file is part of the Linux kernel and is made available under
   9 * the terms of the GNU General Public License, version 2, or at your
  10 * option, any later version, incorporated herein by reference.
  11 *
  12 * Generic filesystem transaction handling code; part of the ext2fs
  13 * journaling system.
  14 *
  15 * This file manages transactions (compound commits managed by the
  16 * journaling code) and handles (individual atomic operations by the
  17 * filesystem).
  18 */
  19
  20#include <linux/time.h>
  21#include <linux/fs.h>
  22#include <linux/jbd2.h>
  23#include <linux/errno.h>
  24#include <linux/slab.h>
  25#include <linux/timer.h>
  26#include <linux/mm.h>
  27#include <linux/highmem.h>
  28
  29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
  30
  31/*
  32 * jbd2_get_transaction: obtain a new transaction_t object.
  33 *
  34 * Simply allocate and initialise a new transaction.  Create it in
  35 * RUNNING state and add it to the current journal (which should not
  36 * have an existing running transaction: we only make a new transaction
  37 * once we have started to commit the old one).
  38 *
  39 * Preconditions:
  40 *	The journal MUST be locked.  We don't perform atomic mallocs on the
  41 *	new transaction	and we can't block without protecting against other
  42 *	processes trying to touch the journal while it is in transition.
  43 *
  44 * Called under j_state_lock
  45 */
  46
  47static transaction_t *
  48jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
  49{
  50	transaction->t_journal = journal;
  51	transaction->t_state = T_RUNNING;
  52	transaction->t_tid = journal->j_transaction_sequence++;
  53	transaction->t_expires = jiffies + journal->j_commit_interval;
  54	spin_lock_init(&transaction->t_handle_lock);
  55
  56	/* Set up the commit timer for the new transaction. */
  57	journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
  58	add_timer(&journal->j_commit_timer);
  59
  60	J_ASSERT(journal->j_running_transaction == NULL);
  61	journal->j_running_transaction = transaction;
  62	transaction->t_max_wait = 0;
  63	transaction->t_start = jiffies;
  64
  65	return transaction;
  66}
  67
  68/*
  69 * Handle management.
  70 *
  71 * A handle_t is an object which represents a single atomic update to a
  72 * filesystem, and which tracks all of the modifications which form part
  73 * of that one update.
  74 */
  75
  76/*
  77 * start_this_handle: Given a handle, deal with any locking or stalling
  78 * needed to make sure that there is enough journal space for the handle
  79 * to begin.  Attach the handle to a transaction and set up the
  80 * transaction's buffer credits.
  81 */
  82
  83static int start_this_handle(journal_t *journal, handle_t *handle)
  84{
  85	transaction_t *transaction;
  86	int needed;
  87	int nblocks = handle->h_buffer_credits;
  88	transaction_t *new_transaction = NULL;
  89	int ret = 0;
  90	unsigned long ts = jiffies;
  91
  92	if (nblocks > journal->j_max_transaction_buffers) {
  93		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
  94		       current->comm, nblocks,
  95		       journal->j_max_transaction_buffers);
  96		ret = -ENOSPC;
  97		goto out;
  98	}
  99
 100alloc_transaction:
 101	if (!journal->j_running_transaction) {
 102		new_transaction = kzalloc(sizeof(*new_transaction),
 103						GFP_NOFS|__GFP_NOFAIL);
 104		if (!new_transaction) {
 105			ret = -ENOMEM;
 106			goto out;
 107		}
 108	}
 109
 110	jbd_debug(3, "New handle %p going live.\n", handle);
 111
 112repeat:
 113
 114	/*
 115	 * We need to hold j_state_lock until t_updates has been incremented,
 116	 * for proper journal barrier handling
 117	 */
 118	spin_lock(&journal->j_state_lock);
 119repeat_locked:
 120	if (is_journal_aborted(journal) ||
 121	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
 122		spin_unlock(&journal->j_state_lock);
 123		ret = -EROFS;
 124		goto out;
 125	}
 126
 127	/* Wait on the journal's transaction barrier if necessary */
 128	if (journal->j_barrier_count) {
 129		spin_unlock(&journal->j_state_lock);
 130		wait_event(journal->j_wait_transaction_locked,
 131				journal->j_barrier_count == 0);
 132		goto repeat;
 133	}
 134
 135	if (!journal->j_running_transaction) {
 136		if (!new_transaction) {
 137			spin_unlock(&journal->j_state_lock);
 138			goto alloc_transaction;
 139		}
 140		jbd2_get_transaction(journal, new_transaction);
 141		new_transaction = NULL;
 142	}
 143
 144	transaction = journal->j_running_transaction;
 145
 146	/*
 147	 * If the current transaction is locked down for commit, wait for the
 148	 * lock to be released.
 149	 */
 150	if (transaction->t_state == T_LOCKED) {
 151		DEFINE_WAIT(wait);
 152
 153		prepare_to_wait(&journal->j_wait_transaction_locked,
 154					&wait, TASK_UNINTERRUPTIBLE);
 155		spin_unlock(&journal->j_state_lock);
 156		schedule();
 157		finish_wait(&journal->j_wait_transaction_locked, &wait);
 158		goto repeat;
 159	}
 160
 161	/*
 162	 * If there is not enough space left in the log to write all potential
 163	 * buffers requested by this operation, we need to stall pending a log
 164	 * checkpoint to free some more log space.
 165	 */
 166	spin_lock(&transaction->t_handle_lock);
 167	needed = transaction->t_outstanding_credits + nblocks;
 168
 169	if (needed > journal->j_max_transaction_buffers) {
 170		/*
 171		 * If the current transaction is already too large, then start
 172		 * to commit it: we can then go back and attach this handle to
 173		 * a new transaction.
 174		 */
 175		DEFINE_WAIT(wait);
 176
 177		jbd_debug(2, "Handle %p starting new commit...\n", handle);
 178		spin_unlock(&transaction->t_handle_lock);
 179		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 180				TASK_UNINTERRUPTIBLE);
 181		__jbd2_log_start_commit(journal, transaction->t_tid);
 182		spin_unlock(&journal->j_state_lock);
 183		schedule();
 184		finish_wait(&journal->j_wait_transaction_locked, &wait);
 185		goto repeat;
 186	}
 187
 188	/*
 189	 * The commit code assumes that it can get enough log space
 190	 * without forcing a checkpoint.  This is *critical* for
 191	 * correctness: a checkpoint of a buffer which is also
 192	 * associated with a committing transaction creates a deadlock,
 193	 * so commit simply cannot force through checkpoints.
 194	 *
 195	 * We must therefore ensure the necessary space in the journal
 196	 * *before* starting to dirty potentially checkpointed buffers
 197	 * in the new transaction.
 198	 *
 199	 * The worst part is, any transaction currently committing can
 200	 * reduce the free space arbitrarily.  Be careful to account for
 201	 * those buffers when checkpointing.
 202	 */
 203
 204	/*
 205	 * @@@ AKPM: This seems rather over-defensive.  We're giving commit
 206	 * a _lot_ of headroom: 1/4 of the journal plus the size of
 207	 * the committing transaction.  Really, we only need to give it
 208	 * committing_transaction->t_outstanding_credits plus "enough" for
 209	 * the log control blocks.
 210	 * Also, this test is inconsitent with the matching one in
 211	 * jbd2_journal_extend().
 212	 */
 213	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
 214		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
 215		spin_unlock(&transaction->t_handle_lock);
 216		__jbd2_log_wait_for_space(journal);
 217		goto repeat_locked;
 218	}
 219
 220	/* OK, account for the buffers that this operation expects to
 221	 * use and add the handle to the running transaction. */
 222
 223	if (time_after(transaction->t_start, ts)) {
 224		ts = jbd2_time_diff(ts, transaction->t_start);
 225		if (ts > transaction->t_max_wait)
 226			transaction->t_max_wait = ts;
 227	}
 228
 229	handle->h_transaction = transaction;
 230	transaction->t_outstanding_credits += nblocks;
 231	transaction->t_updates++;
 232	transaction->t_handle_count++;
 233	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
 234		  handle, nblocks, transaction->t_outstanding_credits,
 235		  __jbd2_log_space_left(journal));
 236	spin_unlock(&transaction->t_handle_lock);
 237	spin_unlock(&journal->j_state_lock);
 238out:
 239	if (unlikely(new_transaction))		/* It's usually NULL */
 240		kfree(new_transaction);
 241	return ret;
 242}
 243
 244static struct lock_class_key jbd2_handle_key;
 245
 246/* Allocate a new handle.  This should probably be in a slab... */
 247static handle_t *new_handle(int nblocks)
 248{
 249	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
 250	if (!handle)
 251		return NULL;
 252	memset(handle, 0, sizeof(*handle));
 253	handle->h_buffer_credits = nblocks;
 254	handle->h_ref = 1;
 255
 256	lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
 257						&jbd2_handle_key, 0);
 258
 259	return handle;
 260}
 261
 262/**
 263 * handle_t *jbd2_journal_start() - Obtain a new handle.
 264 * @journal: Journal to start transaction on.
 265 * @nblocks: number of block buffer we might modify
 266 *
 267 * We make sure that the transaction can guarantee at least nblocks of
 268 * modified buffers in the log.  We block until the log can guarantee
 269 * that much space.
 270 *
 271 * This function is visible to journal users (like ext3fs), so is not
 272 * called with the journal already locked.
 273 *
 274 * Return a pointer to a newly allocated handle, or NULL on failure
 275 */
 276handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 277{
 278	handle_t *handle = journal_current_handle();
 279	int err;
 280
 281	if (!journal)
 282		return ERR_PTR(-EROFS);
 283
 284	if (handle) {
 285		J_ASSERT(handle->h_transaction->t_journal == journal);
 286		handle->h_ref++;
 287		return handle;
 288	}
 289
 290	handle = new_handle(nblocks);
 291	if (!handle)
 292		return ERR_PTR(-ENOMEM);
 293
 294	current->journal_info = handle;
 295
 296	err = start_this_handle(journal, handle);
 297	if (err < 0) {
 298		jbd2_free_handle(handle);
 299		current->journal_info = NULL;
 300		handle = ERR_PTR(err);
 301		goto out;
 302	}
 303
 304	lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 305out:
 306	return handle;
 307}
 308
 309/**
 310 * int jbd2_journal_extend() - extend buffer credits.
 311 * @handle:  handle to 'extend'
 312 * @nblocks: nr blocks to try to extend by.
 313 *
 314 * Some transactions, such as large extends and truncates, can be done
 315 * atomically all at once or in several stages.  The operation requests
 316 * a credit for a number of buffer modications in advance, but can
 317 * extend its credit if it needs more.
 318 *
 319 * jbd2_journal_extend tries to give the running handle more buffer credits.
 320 * It does not guarantee that allocation - this is a best-effort only.
 321 * The calling process MUST be able to deal cleanly with a failure to
 322 * extend here.
 323 *
 324 * Return 0 on success, non-zero on failure.
 325 *
 326 * return code < 0 implies an error
 327 * return code > 0 implies normal transaction-full status.
 328 */
 329int jbd2_journal_extend(handle_t *handle, int nblocks)
 330{
 331	transaction_t *transaction = handle->h_transaction;
 332	journal_t *journal = transaction->t_journal;
 333	int result;
 334	int wanted;
 335
 336	result = -EIO;
 337	if (is_handle_aborted(handle))
 338		goto out;
 339
 340	result = 1;
 341
 342	spin_lock(&journal->j_state_lock);
 343
 344	/* Don't extend a locked-down transaction! */
 345	if (handle->h_transaction->t_state != T_RUNNING) {
 346		jbd_debug(3, "denied handle %p %d blocks: "
 347			  "transaction not running\n", handle, nblocks);
 348		goto error_out;
 349	}
 350
 351	spin_lock(&transaction->t_handle_lock);
 352	wanted = transaction->t_outstanding_credits + nblocks;
 353
 354	if (wanted > journal->j_max_transaction_buffers) {
 355		jbd_debug(3, "denied handle %p %d blocks: "
 356			  "transaction too large\n", handle, nblocks);
 357		goto unlock;
 358	}
 359
 360	if (wanted > __jbd2_log_space_left(journal)) {
 361		jbd_debug(3, "denied handle %p %d blocks: "
 362			  "insufficient log space\n", handle, nblocks);
 363		goto unlock;
 364	}
 365
 366	handle->h_buffer_credits += nblocks;
 367	transaction->t_outstanding_credits += nblocks;
 368	result = 0;
 369
 370	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
 371unlock:
 372	spin_unlock(&transaction->t_handle_lock);
 373error_out:
 374	spin_unlock(&journal->j_state_lock);
 375out:
 376	return result;
 377}
 378
 379
 380/**
 381 * int jbd2_journal_restart() - restart a handle .
 382 * @handle:  handle to restart
 383 * @nblocks: nr credits requested
 384 *
 385 * Restart a handle for a multi-transaction filesystem
 386 * operation.
 387 *
 388 * If the jbd2_journal_extend() call above fails to grant new buffer credits
 389 * to a running handle, a call to jbd2_journal_restart will commit the
 390 * handle's transaction so far and reattach the handle to a new
 391 * transaction capabable of guaranteeing the requested number of
 392 * credits.
 393 */
 394
 395int jbd2_journal_restart(handle_t *handle, int nblocks)
 396{
 397	transaction_t *transaction = handle->h_transaction;
 398	journal_t *journal = transaction->t_journal;
 399	int ret;
 400
 401	/* If we've had an abort of any type, don't even think about
 402	 * actually doing the restart! */
 403	if (is_handle_aborted(handle))
 404		return 0;
 405
 406	/*
 407	 * First unlink the handle from its current transaction, and start the
 408	 * commit on that.
 409	 */
 410	J_ASSERT(transaction->t_updates > 0);
 411	J_ASSERT(journal_current_handle() == handle);
 412
 413	spin_lock(&journal->j_state_lock);
 414	spin_lock(&transaction->t_handle_lock);
 415	transaction->t_outstanding_credits -= handle->h_buffer_credits;
 416	transaction->t_updates--;
 417
 418	if (!transaction->t_updates)
 419		wake_up(&journal->j_wait_updates);
 420	spin_unlock(&transaction->t_handle_lock);
 421
 422	jbd_debug(2, "restarting handle %p\n", handle);
 423	__jbd2_log_start_commit(journal, transaction->t_tid);
 424	spin_unlock(&journal->j_state_lock);
 425
 426	handle->h_buffer_credits = nblocks;
 427	ret = start_this_handle(journal, handle);
 428	return ret;
 429}
 430
 431
 432/**
 433 * void jbd2_journal_lock_updates () - establish a transaction barrier.
 434 * @journal:  Journal to establish a barrier on.
 435 *
 436 * This locks out any further updates from being started, and blocks
 437 * until all existing updates have completed, returning only once the
 438 * journal is in a quiescent state with no updates running.
 439 *
 440 * The journal lock should not be held on entry.
 441 */
 442void jbd2_journal_lock_updates(journal_t *journal)
 443{
 444	DEFINE_WAIT(wait);
 445
 446	spin_lock(&journal->j_state_lock);
 447	++journal->j_barrier_count;
 448
 449	/* Wait until there are no running updates */
 450	while (1) {
 451		transaction_t *transaction = journal->j_running_transaction;
 452
 453		if (!transaction)
 454			break;
 455
 456		spin_lock(&transaction->t_handle_lock);
 457		if (!transaction->t_updates) {
 458			spin_unlock(&transaction->t_handle_lock);
 459			break;
 460		}
 461		prepare_to_wait(&journal->j_wait_updates, &wait,
 462				TASK_UNINTERRUPTIBLE);
 463		spin_unlock(&transaction->t_handle_lock);
 464		spin_unlock(&journal->j_state_lock);
 465		schedule();
 466		finish_wait(&journal->j_wait_updates, &wait);
 467		spin_lock(&journal->j_state_lock);
 468	}
 469	spin_unlock(&journal->j_state_lock);
 470
 471	/*
 472	 * We have now established a barrier against other normal updates, but
 473	 * we also need to barrier against other jbd2_journal_lock_updates() calls
 474	 * to make sure that we serialise special journal-locked operations
 475	 * too.
 476	 */
 477	mutex_lock(&journal->j_barrier);
 478}
 479
 480/**
 481 * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
 482 * @journal:  Journal to release the barrier on.
 483 *
 484 * Release a transaction barrier obtained with jbd2_journal_lock_updates().
 485 *
 486 * Should be called without the journal lock held.
 487 */
 488void jbd2_journal_unlock_updates (journal_t *journal)
 489{
 490	J_ASSERT(journal->j_barrier_count != 0);
 491
 492	mutex_unlock(&journal->j_barrier);
 493	spin_lock(&journal->j_state_lock);
 494	--journal->j_barrier_count;
 495	spin_unlock(&journal->j_state_lock);
 496	wake_up(&journal->j_wait_transaction_locked);
 497}
 498
 499/*
 500 * Report any unexpected dirty buffers which turn up.  Normally those
 501 * indicate an error, but they can occur if the user is running (say)
 502 * tune2fs to modify the live filesystem, so we need the option of
 503 * continuing as gracefully as possible.  #
 504 *
 505 * The caller should already hold the journal lock and
 506 * j_list_lock spinlock: most callers will need those anyway
 507 * in order to probe the buffer's journaling state safely.
 508 */
 509static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 510{
 511	int jlist;
 512
 513	/* If this buffer is one which might reasonably be dirty
 514	 * --- ie. data, or not part of this journal --- then
 515	 * we're OK to leave it alone, but otherwise we need to
 516	 * move the dirty bit to the journal's own internal
 517	 * JBDDirty bit. */
 518	jlist = jh->b_jlist;
 519
 520	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
 521	    jlist == BJ_Shadow || jlist == BJ_Forget) {
 522		struct buffer_head *bh = jh2bh(jh);
 523
 524		if (test_clear_buffer_dirty(bh))
 525			set_buffer_jbddirty(bh);
 526	}
 527}
 528
 529/*
 530 * If the buffer is already part of the current transaction, then there
 531 * is nothing we need to do.  If it is already part of a prior
 532 * transaction which we are still committing to disk, then we need to
 533 * make sure that we do not overwrite the old copy: we do copy-out to
 534 * preserve the copy going to disk.  We also account the buffer against
 535 * the handle's metadata buffer credits (unless the buffer is already
 536 * part of the transaction, that is).
 537 *
 538 */
 539static int
 540do_get_write_access(handle_t *handle, struct journal_head *jh,
 541			int force_copy)
 542{
 543	struct buffer_head *bh;
 544	transaction_t *transaction;
 545	journal_t *journal;
 546	int error;
 547	char *frozen_buffer = NULL;
 548	int need_copy = 0;
 549
 550	if (is_handle_aborted(handle))
 551		return -EROFS;
 552
 553	transaction = handle->h_transaction;
 554	journal = transaction->t_journal;
 555
 556	jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
 557
 558	JBUFFER_TRACE(jh, "entry");
 559repeat:
 560	bh = jh2bh(jh);
 561
 562	/* @@@ Need to check for errors here at some point. */
 563
 564	lock_buffer(bh);
 565	jbd_lock_bh_state(bh);
 566
 567	/* We now hold the buffer lock so it is safe to query the buffer
 568	 * state.  Is the buffer dirty?
 569	 *
 570	 * If so, there are two possibilities.  The buffer may be
 571	 * non-journaled, and undergoing a quite legitimate writeback.
 572	 * Otherwise, it is journaled, and we don't expect dirty buffers
 573	 * in that state (the buffers should be marked JBD_Dirty
 574	 * instead.)  So either the IO is being done under our own
 575	 * control and this is a bug, or it's a third party IO such as
 576	 * dump(8) (which may leave the buffer scheduled for read ---
 577	 * ie. locked but not dirty) or tune2fs (which may actually have
 578	 * the buffer dirtied, ugh.)  */
 579
 580	if (buffer_dirty(bh)) {
 581		/*
 582		 * First question: is this buffer already part of the current
 583		 * transaction or the existing committing transaction?
 584		 */
 585		if (jh->b_transaction) {
 586			J_ASSERT_JH(jh,
 587				jh->b_transaction == transaction ||
 588				jh->b_transaction ==
 589					journal->j_committing_transaction);
 590			if (jh->b_next_transaction)
 591				J_ASSERT_JH(jh, jh->b_next_transaction ==
 592							transaction);
 593		}
 594		/*
 595		 * In any case we need to clean the dirty flag and we must
 596		 * do it under the buffer lock to be sure we don't race
 597		 * with running write-out.
 598		 */
 599		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
 600		jbd_unexpected_dirty_buffer(jh);
 601	}
 602
 603	unlock_buffer(bh);
 604
 605	error = -EROFS;
 606	if (is_handle_aborted(handle)) {
 607		jbd_unlock_bh_state(bh);
 608		goto out;
 609	}
 610	error = 0;
 611
 612	/*
 613	 * The buffer is already part of this transaction if b_transaction or
 614	 * b_next_transaction points to it
 615	 */
 616	if (jh->b_transaction == transaction ||
 617	    jh->b_next_transaction == transaction)
 618		goto done;
 619
 620	/*
 621	 * this is the first time this transaction is touching this buffer,
 622	 * reset the modified flag
 623	 */
 624       jh->b_modified = 0;
 625
 626	/*
 627	 * If there is already a copy-out version of this buffer, then we don't
 628	 * need to make another one
 629	 */
 630	if (jh->b_frozen_data) {
 631		JBUFFER_TRACE(jh, "has frozen data");
 632		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 633		jh->b_next_transaction = transaction;
 634		goto done;
 635	}
 636
 637	/* Is there data here we need to preserve? */
 638
 639	if (jh->b_transaction && jh->b_transaction != transaction) {
 640		JBUFFER_TRACE(jh, "owned by older transaction");
 641		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 642		J_ASSERT_JH(jh, jh->b_transaction ==
 643					journal->j_committing_transaction);
 644
 645		/* There is one case we have to be very careful about.
 646		 * If the committing transaction is currently writing
 647		 * this buffer out to disk and has NOT made a copy-out,
 648		 * then we cannot modify the buffer contents at all
 649		 * right now.  The essence of copy-out is that it is the
 650		 * extra copy, not the primary copy, which gets
 651		 * journaled.  If the primary copy is already going to
 652		 * disk then we cannot do copy-out here. */
 653
 654		if (jh->b_jlist == BJ_Shadow) {
 655			DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
 656			wait_queue_head_t *wqh;
 657
 658			wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
 659
 660			JBUFFER_TRACE(jh, "on shadow: sleep");
 661			jbd_unlock_bh_state(bh);
 662			/* commit wakes up all shadow buffers after IO */
 663			for ( ; ; ) {
 664				prepare_to_wait(wqh, &wait.wait,
 665						TASK_UNINTERRUPTIBLE);
 666				if (jh->b_jlist != BJ_Shadow)
 667					break;
 668				schedule();
 669			}
 670			finish_wait(wqh, &wait.wait);
 671			goto repeat;
 672		}
 673
 674		/* Only do the copy if the currently-owning transaction
 675		 * still needs it.  If it is on the Forget list, the
 676		 * committing transaction is past that stage.  The
 677		 * buffer had better remain locked during the kmalloc,
 678		 * but that should be true --- we hold the journal lock
 679		 * still and the buffer is already on the BUF_JOURNAL
 680		 * list so won't be flushed.
 681		 *
 682		 * Subtle point, though: if this is a get_undo_access,
 683		 * then we will be relying on the frozen_data to contain
 684		 * the new value of the committed_data record after the
 685		 * transaction, so we HAVE to force the frozen_data copy
 686		 * in that case. */
 687
 688		if (jh->b_jlist != BJ_Forget || force_copy) {
 689			JBUFFER_TRACE(jh, "generate frozen data");
 690			if (!frozen_buffer) {
 691				JBUFFER_TRACE(jh, "allocate memory for buffer");
 692				jbd_unlock_bh_state(bh);
 693				frozen_buffer =
 694					jbd2_alloc(jh2bh(jh)->b_size,
 695							 GFP_NOFS);
 696				if (!frozen_buffer) {
 697					printk(KERN_EMERG
 698					       "%s: OOM for frozen_buffer\n",
 699					       __func__);
 700					JBUFFER_TRACE(jh, "oom!");
 701					error = -ENOMEM;
 702					jbd_lock_bh_state(bh);
 703					goto done;
 704				}
 705				goto repeat;
 706			}
 707			jh->b_frozen_data = frozen_buffer;
 708			frozen_buffer = NULL;
 709			need_copy = 1;
 710		}
 711		jh->b_next_transaction = transaction;
 712	}
 713
 714
 715	/*
 716	 * Finally, if the buffer is not journaled right now, we need to make
 717	 * sure it doesn't get written to disk before the caller actually
 718	 * commits the new data
 719	 */
 720	if (!jh->b_transaction) {
 721		JBUFFER_TRACE(jh, "no transaction");
 722		J_ASSERT_JH(jh, !jh->b_next_transaction);
 723		jh->b_transaction = transaction;
 724		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 725		spin_lock(&journal->j_list_lock);
 726		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 727		spin_unlock(&journal->j_list_lock);
 728	}
 729
 730done:
 731	if (need_copy) {
 732		struct page *page;
 733		int offset;
 734		char *source;
 735
 736		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
 737			    "Possible IO failure.\n");
 738		page = jh2bh(jh)->b_page;
 739		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
 740		source = kmap_atomic(page, KM_USER0);
 741		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 742		kunmap_atomic(source, KM_USER0);
 743	}
 744	jbd_unlock_bh_state(bh);
 745
 746	/*
 747	 * If we are about to journal a buffer, then any revoke pending on it is
 748	 * no longer valid
 749	 */
 750	jbd2_journal_cancel_revoke(handle, jh);
 751
 752out:
 753	if (unlikely(frozen_buffer))	/* It's usually NULL */
 754		jbd2_free(frozen_buffer, bh->b_size);
 755
 756	JBUFFER_TRACE(jh, "exit");
 757	return error;
 758}
 759
 760/**
 761 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
 762 * @handle: transaction to add buffer modifications to
 763 * @bh:     bh to be used for metadata writes
 764 * @credits: variable that will receive credits for the buffer
 765 *
 766 * Returns an error code or 0 on success.
 767 *
 768 * In full data journalling mode the buffer may be of type BJ_AsyncData,
 769 * because we're write()ing a buffer which is also part of a shared mapping.
 770 */
 771
 772int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 773{
 774	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 775	int rc;
 776
 777	/* We do not want to get caught playing with fields which the
 778	 * log thread also manipulates.  Make sure that the buffer
 779	 * completes any outstanding IO before proceeding. */
 780	rc = do_get_write_access(handle, jh, 0);
 781	jbd2_journal_put_journal_head(jh);
 782	return rc;
 783}
 784
 785
 786/*
 787 * When the user wants to journal a newly created buffer_head
 788 * (ie. getblk() returned a new buffer and we are going to populate it
 789 * manually rather than reading off disk), then we need to keep the
 790 * buffer_head locked until it has been completely filled with new
 791 * data.  In this case, we should be able to make the assertion that
 792 * the bh is not already part of an existing transaction.
 793 *
 794 * The buffer should already be locked by the caller by this point.
 795 * There is no lock ranking violation: it was a newly created,
 796 * unlocked buffer beforehand. */
 797
 798/**
 799 * int jbd2_journal_get_create_access () - notify intent to use newly created bh
 800 * @handle: transaction to new buffer to
 801 * @bh: new buffer.
 802 *
 803 * Call this if you create a new bh.
 804 */
 805int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 806{
 807	transaction_t *transaction = handle->h_transaction;
 808	journal_t *journal = transaction->t_journal;
 809	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 810	int err;
 811
 812	jbd_debug(5, "journal_head %p\n", jh);
 813	err = -EROFS;
 814	if (is_handle_aborted(handle))
 815		goto out;
 816	err = 0;
 817
 818	JBUFFER_TRACE(jh, "entry");
 819	/*
 820	 * The buffer may already belong to this transaction due to pre-zeroing
 821	 * in the filesystem's new_block code.  It may also be on the previous,
 822	 * committing transaction's lists, but it HAS to be in Forget state in
 823	 * that case: the transaction must have deleted the buffer for it to be
 824	 * reused here.
 825	 */
 826	jbd_lock_bh_state(bh);
 827	spin_lock(&journal->j_list_lock);
 828	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
 829		jh->b_transaction == NULL ||
 830		(jh->b_transaction == journal->j_committing_transaction &&
 831			  jh->b_jlist == BJ_Forget)));
 832
 833	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 834	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
 835
 836	if (jh->b_transaction == NULL) {
 837		jh->b_transaction = transaction;
 838
 839		/* first access by this transaction */
 840		jh->b_modified = 0;
 841
 842		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 843		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 844	} else if (jh->b_transaction == journal->j_committing_transaction) {
 845		/* first access by this transaction */
 846		jh->b_modified = 0;
 847
 848		JBUFFER_TRACE(jh, "set next transaction");
 849		jh->b_next_transaction = transaction;
 850	}
 851	spin_unlock(&journal->j_list_lock);
 852	jbd_unlock_bh_state(bh);
 853
 854	/*
 855	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect
 856	 * blocks which contain freed but then revoked metadata.  We need
 857	 * to cancel the revoke in case we end up freeing it yet again
 858	 * and the reallocating as data - this would cause a second revoke,
 859	 * which hits an assertion error.
 860	 */
 861	JBUFFER_TRACE(jh, "cancelling revoke");
 862	jbd2_journal_cancel_revoke(handle, jh);
 863	jbd2_journal_put_journal_head(jh);
 864out:
 865	return err;
 866}
 867
 868/**
 869 * int jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
 870 *     non-rewindable consequences
 871 * @handle: transaction
 872 * @bh: buffer to undo
 873 * @credits: store the number of taken credits here (if not NULL)
 874 *
 875 * Sometimes there is a need to distinguish between metadata which has
 876 * been committed to disk and that which has not.  The ext3fs code uses
 877 * this for freeing and allocating space, we have to make sure that we
 878 * do not reuse freed space until the deallocation has been committed,
 879 * since if we overwrote that space we would make the delete
 880 * un-rewindable in case of a crash.
 881 *
 882 * To deal with that, jbd2_journal_get_undo_access requests write access to a
 883 * buffer for parts of non-rewindable operations such as delete
 884 * operations on the bitmaps.  The journaling code must keep a copy of
 885 * the buffer's contents prior to the undo_access call until such time
 886 * as we know that the buffer has definitely been committed to disk.
 887 *
 888 * We never need to know which transaction the committed data is part
 889 * of, buffers touched here are guaranteed to be dirtied later and so
 890 * will be committed to a new transaction in due course, at which point
 891 * we can discard the old committed data pointer.
 892 *
 893 * Returns error number or 0 on success.
 894 */
 895int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 896{
 897	int err;
 898	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 899	char *committed_data = NULL;
 900
 901	JBUFFER_TRACE(jh, "entry");
 902
 903	/*
 904	 * Do this first --- it can drop the journal lock, so we want to
 905	 * make sure that obtaining the committed_data is done
 906	 * atomically wrt. completion of any outstanding commits.
 907	 */
 908	err = do_get_write_access(handle, jh, 1);
 909	if (err)
 910		goto out;
 911
 912repeat:
 913	if (!jh->b_committed_data) {
 914		committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 915		if (!committed_data) {
 916			printk(KERN_EMERG "%s: No memory for committed data\n",
 917				__func__);
 918			err = -ENOMEM;
 919			goto out;
 920		}
 921	}
 922
 923	jbd_lock_bh_state(bh);
 924	if (!jh->b_committed_data) {
 925		/* Copy out the current buffer contents into the
 926		 * preserved, committed copy. */
 927		JBUFFER_TRACE(jh, "generate b_committed data");
 928		if (!committed_data) {
 929			jbd_unlock_bh_state(bh);
 930			goto repeat;
 931		}
 932
 933		jh->b_committed_data = committed_data;
 934		committed_data = NULL;
 935		memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
 936	}
 937	jbd_unlock_bh_state(bh);
 938out:
 939	jbd2_journal_put_journal_head(jh);
 940	if (unlikely(committed_data))
 941		jbd2_free(committed_data, bh->b_size);
 942	return err;
 943}
 944
 945/**
 946 * int jbd2_journal_dirty_data() -  mark a buffer as containing dirty data which
 947 *                             needs to be flushed before we can commit the
 948 *                             current transaction.
 949 * @handle: transaction
 950 * @bh: bufferhead to mark
 951 *
 952 * The buffer is placed on the transaction's data list and is marked as
 953 * belonging to the transaction.
 954 *
 955 * Returns error number or 0 on success.
 956 *
 957 * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
 958 * by kswapd.
 959 */
 960int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 961{
 962	journal_t *journal = handle->h_transaction->t_journal;
 963	int need_brelse = 0;
 964	struct journal_head *jh;
 965
 966	if (is_handle_aborted(handle))
 967		return 0;
 968
 969	jh = jbd2_journal_add_journal_head(bh);
 970	JBUFFER_TRACE(jh, "entry");
 971
 972	/*
 973	 * The buffer could *already* be dirty.  Writeout can start
 974	 * at any time.
 975	 */
 976	jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
 977
 978	/*
 979	 * What if the buffer is already part of a running transaction?
 980	 *
 981	 * There are two cases:
 982	 * 1) It is part of the current running transaction.  Refile it,
 983	 *    just in case we have allocated it as metadata, deallocated
 984	 *    it, then reallocated it as data.
 985	 * 2) It is part of the previous, still-committing transaction.
 986	 *    If all we want to do is to guarantee that the buffer will be
 987	 *    written to disk before this new transaction commits, then
 988	 *    being sure that the *previous* transaction has this same
 989	 *    property is sufficient for us!  Just leave it on its old
 990	 *    transaction.
 991	 *
 992	 * In case (2), the buffer must not already exist as metadata
 993	 * --- that would violate write ordering (a transaction is free
 994	 * to write its data at any point, even before the previous
 995	 * committing transaction has committed).  The caller must
 996	 * never, ever allow this to happen: there's nothing we can do
 997	 * about it in this layer.
 998	 */
 999	jbd_lock_bh_state(bh);
1000	spin_lock(&journal->j_list_lock);
1001
1002	/* Now that we have bh_state locked, are we really still mapped? */
1003	if (!buffer_mapped(bh)) {
1004		JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
1005		goto no_journal;
1006	}
1007
1008	if (jh->b_transaction) {
1009		JBUFFER_TRACE(jh, "has transaction");
1010		if (jh->b_transaction != handle->h_transaction) {
1011			JBUFFER_TRACE(jh, "belongs to older transaction");
1012			J_ASSERT_JH(jh, jh->b_transaction ==
1013					journal->j_committing_transaction);
1014
1015			/* @@@ IS THIS TRUE  ? */
1016			/*
1017			 * Not any more.  Scenario: someone does a write()
1018			 * in data=journal mode.  The buffer's transaction has
1019			 * moved into commit.  Then someone does another
1020			 * write() to the file.  We do the frozen data copyout
1021			 * and set b_next_transaction to point to j_running_t.
1022			 * And while we're in that state, someone does a
1023			 * writepage() in an attempt to pageout the same area
1024			 * of the file via a shared mapping.  At present that
1025			 * calls jbd2_journal_dirty_data(), and we get right here.
1026			 * It may be too late to journal the data.  Simply
1027			 * falling through to the next test will suffice: the
1028			 * data will be dirty and wil be checkpointed.  The
1029			 * ordering comments in the next comment block still
1030			 * apply.
1031			 */
1032			//J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1033
1034			/*
1035			 * If we're journalling data, and this buffer was
1036			 * subject to a write(), it could be metadata, forget
1037			 * or shadow against the committing transaction.  Now,
1038			 * someone has dirtied the same darn page via a mapping
1039			 * and it is being writepage()'d.
1040			 * We *could* just steal the page from commit, with some
1041			 * fancy locking there.  Instead, we just skip it -
1042			 * don't tie the page's buffers to the new transaction
1043			 * at all.
1044			 * Implication: if we crash before the writepage() data
1045			 * is written into the filesystem, recovery will replay
1046			 * the write() data.
1047			 */
1048			if (jh->b_jlist != BJ_None &&
1049					jh->b_jlist != BJ_SyncData &&
1050					jh->b_jlist != BJ_Locked) {
1051				JBUFFER_TRACE(jh, "Not stealing");
1052				goto no_journal;
1053			}
1054
1055			/*
1056			 * This buffer may be undergoing writeout in commit.  We
1057			 * can't return from here and let the caller dirty it
1058			 * again because that can cause the write-out loop in
1059			 * commit to never terminate.
1060			 */
1061			if (buffer_dirty(bh)) {
1062				get_bh(bh);
1063				spin_unlock(&journal->j_list_lock);
1064				jbd_unlock_bh_state(bh);
1065				need_brelse = 1;
1066				sync_dirty_buffer(bh);
1067				jbd_lock_bh_state(bh);
1068				spin_lock(&journal->j_list_lock);
1069				/* Since we dropped the lock... */
1070				if (!buffer_mapped(bh)) {
1071					JBUFFER_TRACE(jh, "buffer got unmapped");
1072					goto no_journal;
1073				}
1074				/* The buffer may become locked again at any
1075				   time if it is redirtied */
1076			}
1077
1078			/* journal_clean_data_list() may have got there first */
1079			if (jh->b_transaction != NULL) {
1080				JBUFFER_TRACE(jh, "unfile from commit");
1081				__jbd2_journal_temp_unlink_buffer(jh);
1082				/* It still points to the committing
1083				 * transaction; move it to this one so
1084				 * that the refile assert checks are
1085				 * happy. */
1086				jh->b_transaction = handle->h_transaction;
1087			}
1088			/* The buffer will be refiled below */
1089
1090		}
1091		/*
1092		 * Special case --- the buffer might actually have been
1093		 * allocated and then immediately deallocated in the previous,
1094		 * committing transaction, so might still be left on that
1095		 * transaction's metadata lists.
1096		 */
1097		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1098			JBUFFER_TRACE(jh, "not on correct data list: unfile");
1099			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1100			__jbd2_journal_temp_unlink_buffer(jh);
1101			jh->b_transaction = handle->h_transaction;
1102			JBUFFER_TRACE(jh, "file as data");
1103			__jbd2_journal_file_buffer(jh, handle->h_transaction,
1104						BJ_SyncData);
1105		}
1106	} else {
1107		JBUFFER_TRACE(jh, "not on a transaction");
1108		__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
1109	}
1110no_journal:
1111	spin_unlock(&journal->j_list_lock);
1112	jbd_unlock_bh_state(bh);
1113	if (need_brelse) {
1114		BUFFER_TRACE(bh, "brelse");
1115		__brelse(bh);
1116	}
1117	JBUFFER_TRACE(jh, "exit");
1118	jbd2_journal_put_journal_head(jh);
1119	return 0;
1120}
1121
1122/**
1123 * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
1124 * @handle: transaction to add buffer to.
1125 * @bh: buffer to mark
1126 *
1127 * mark dirty metadata which needs to be journaled as part of the current
1128 * transaction.
1129 *
1130 * The buffer is placed on the transaction's metadata list and is marked
1131 * as belonging to the transaction.
1132 *
1133 * Returns error number or 0 on success.
1134 *
1135 * Special care needs to be taken if the buffer already belongs to the
1136 * current committing transaction (in which case we should have frozen
1137 * data present for that commit).  In that case, we don't relink the
1138 * buffer: that only gets done when the old transaction finally
1139 * completes its commit.
1140 */
1141int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1142{
1143	transaction_t *transaction = handle->h_transaction;
1144	journal_t *journal = transaction->t_journal;
1145	struct journal_head *jh = bh2jh(bh);
1146
1147	jbd_debug(5, "journal_head %p\n", jh);
1148	JBUFFER_TRACE(jh, "entry");
1149	if (is_handle_aborted(handle))
1150		goto out;
1151
1152	jbd_lock_bh_state(bh);
1153
1154	if (jh->b_modified == 0) {
1155		/*
1156		 * This buffer's got modified and becoming part
1157		 * of the transaction. This needs to be done
1158		 * once a transaction -bzzz
1159		 */
1160		jh->b_modified = 1;
1161		J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
1162		handle->h_buffer_credits--;
1163	}
1164
1165	/*
1166	 * fastpath, to avoid expensive locking.  If this buffer is already
1167	 * on the running transaction's metadata list there is nothing to do.
1168	 * Nobody can take it off again because there is a handle open.
1169	 * I _think_ we're OK here with SMP barriers - a mistaken decision will
1170	 * result in this test being false, so we go in and take the locks.
1171	 */
1172	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
1173		JBUFFER_TRACE(jh, "fastpath");
1174		J_ASSERT_JH(jh, jh->b_transaction ==
1175					journal->j_running_transaction);
1176		goto out_unlock_bh;
1177	}
1178
1179	set_buffer_jbddirty(bh);
1180
1181	/*
1182	 * Metadata already on the current transaction list doesn't
1183	 * need to be filed.  Metadata on another transaction's list must
1184	 * be committing, and will be refiled once the commit completes:
1185	 * leave it alone for now.
1186	 */
1187	if (jh->b_transaction != transaction) {
1188		JBUFFER_TRACE(jh, "already on other transaction");
1189		J_ASSERT_JH(jh, jh->b_transaction ==
1190					journal->j_committing_transaction);
1191		J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
1192		/* And this case is illegal: we can't reuse another
1193		 * transaction's data buffer, ever. */
1194		goto out_unlock_bh;
1195	}
1196
1197	/* That test should have eliminated the following case: */
1198	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
1199
1200	JBUFFER_TRACE(jh, "file as BJ_Metadata");
1201	spin_lock(&journal->j_list_lock);
1202	__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
1203	spin_unlock(&journal->j_list_lock);
1204out_unlock_bh:
1205	jbd_unlock_bh_state(bh);
1206out:
1207	JBUFFER_TRACE(jh, "exit");
1208	return 0;
1209}
1210
1211/*
1212 * jbd2_journal_release_buffer: undo a get_write_access without any buffer
1213 * updates, if the update decided in the end that it didn't need access.
1214 *
1215 */
1216void
1217jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
1218{
1219	BUFFER_TRACE(bh, "entry");
1220}
1221
1222/**
1223 * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
1224 * @handle: transaction handle
1225 * @bh:     bh to 'forget'
1226 *
1227 * We can only do the bforget if there are no commits pending against the
1228 * buffer.  If the buffer is dirty in the current running transaction we
1229 * can safely unlink it.
1230 *
1231 * bh may not be a journalled buffer at all - it may be a non-JBD
1232 * buffer which came off the hashtable.  Check for this.
1233 *
1234 * Decrements bh->b_count by one.
1235 *
1236 * Allow this call even if the handle has aborted --- it may be part of
1237 * the caller's cleanup after an abort.
1238 */
1239int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1240{
1241	transaction_t *transaction = handle->h_transaction;
1242	journal_t *journal = transaction->t_journal;
1243	struct journal_head *jh;
1244	int drop_reserve = 0;
1245	int err = 0;
1246	int was_modified = 0;
1247
1248	BUFFER_TRACE(bh, "entry");
1249
1250	jbd_lock_bh_state(bh);
1251	spin_lock(&journal->j_list_lock);
1252
1253	if (!buffer_jbd(bh))
1254		goto not_jbd;
1255	jh = bh2jh(bh);
1256
1257	/* Critical error: attempting to delete a bitmap buffer, maybe?
1258	 * Don't do any jbd operations, and return an error. */
1259	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
1260			 "inconsistent data on disk")) {
1261		err = -EIO;
1262		goto not_jbd;
1263	}
1264
1265	/* keep track of wether or not this transaction modified us */
1266	was_modified = jh->b_modified;
1267
1268	/*
1269	 * The buffer's going from the transaction, we must drop
1270	 * all references -bzzz
1271	 */
1272	jh->b_modified = 0;
1273
1274	if (jh->b_transaction == handle->h_transaction) {
1275		J_ASSERT_JH(jh, !jh->b_frozen_data);
1276
1277		/* If we are forgetting a buffer which is already part
1278		 * of this transaction, then we can just drop it from
1279		 * the transaction immediately. */
1280		clear_buffer_dirty(bh);
1281		clear_buffer_jbddirty(bh);
1282
1283		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1284
1285		/*
1286		 * we only want to drop a reference if this transaction
1287		 * modified the buffer
1288		 */
1289		if (was_modified)
1290			drop_reserve = 1;
1291
1292		/*
1293		 * We are no longer going to journal this buffer.
1294		 * However, the commit of this transaction is still
1295		 * important to the buffer: the delete that we are now
1296		 * processing might obsolete an old log entry, so by
1297		 * committing, we can satisfy the buffer's checkpoint.
1298		 *
1299		 * So, if we have a checkpoint on the buffer, we should
1300		 * now refile the buffer on our BJ_Forget list so that
1301		 * we know to remove the checkpoint after we commit.
1302		 */
1303
1304		if (jh->b_cp_transaction) {
1305			__jbd2_journal_temp_unlink_buffer(jh);
1306			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1307		} else {
1308			__jbd2_journal_unfile_buffer(jh);
1309			jbd2_journal_remove_journal_head(bh);
1310			__brelse(bh);
1311			if (!buffer_jbd(bh)) {
1312				spin_unlock(&journal->j_list_lock);
1313				jbd_unlock_bh_state(bh);
1314				__bforget(bh);
1315				goto drop;
1316			}
1317		}
1318	} else if (jh->b_transaction) {
1319		J_ASSERT_JH(jh, (jh->b_transaction ==
1320				 journal->j_committing_transaction));
1321		/* However, if the buffer is still owned by a prior
1322		 * (committing) transaction, we can't drop it yet... */
1323		JBUFFER_TRACE(jh, "belongs to older transaction");
1324		/* ... but we CAN drop it from the new transaction if we
1325		 * have also modified it since the original commit. */
1326
1327		if (jh->b_next_transaction) {
1328			J_ASSERT(jh->b_next_transaction == transaction);
1329			jh->b_next_transaction = NULL;
1330
1331			/*
1332			 * only drop a reference if this transaction modified
1333			 * the buffer
1334			 */
1335			if (was_modified)
1336				drop_reserve = 1;
1337		}
1338	}
1339
1340not_jbd:
1341	spin_unlock(&journal->j_list_lock);
1342	jbd_unlock_bh_state(bh);
1343	__brelse(bh);
1344drop:
1345	if (drop_reserve) {
1346		/* no need to reserve log space for this block -bzzz */
1347		handle->h_buffer_credits++;
1348	}
1349	return err;
1350}
1351
1352/**
1353 * int jbd2_journal_stop() - complete a transaction
1354 * @handle: tranaction to complete.
1355 *
1356 * All done for a particular handle.
1357 *
1358 * There is not much action needed here.  We just return any remaining
1359 * buffer credits to the transaction and remove the handle.  The only
1360 * complication is that we need to start a commit operation if the
1361 * filesystem is marked for synchronous update.
1362 *
1363 * jbd2_journal_stop itself will not usually return an error, but it may
1364 * do so in unusual circumstances.  In particular, expect it to
1365 * return -EIO if a jbd2_journal_abort has been executed since the
1366 * transaction began.
1367 */
1368int jbd2_journal_stop(handle_t *handle)
1369{
1370	transaction_t *transaction = handle->h_transaction;
1371	journal_t *journal = transaction->t_journal;
1372	int old_handle_count, err;
1373	pid_t pid;
1374
1375	J_ASSERT(journal_current_handle() == handle);
1376
1377	if (is_handle_aborted(handle))
1378		err = -EIO;
1379	else {
1380		J_ASSERT(transaction->t_updates > 0);
1381		err = 0;
1382	}
1383
1384	if (--handle->h_ref > 0) {
1385		jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1386			  handle->h_ref);
1387		return err;
1388	}
1389
1390	jbd_debug(4, "Handle %p going down\n", handle);
1391
1392	/*
1393	 * Implement synchronous transaction batching.  If the handle
1394	 * was synchronous, don't force a commit immediately.  Let's
1395	 * yield and let another thread piggyback onto this transaction.
1396	 * Keep doing that while new threads continue to arrive.
1397	 * It doesn't cost much - we're about to run a commit and sleep
1398	 * on IO anyway.  Speeds up many-threaded, many-dir operations
1399	 * by 30x or more...
1400	 *
1401	 * But don't do this if this process was the most recent one to
1402	 * perform a synchronous write.  We do this to detect the case where a
1403	 * single process is doing a stream of sync writes.  No point in waiting
1404	 * for joiners in that case.
1405	 */
1406	pid = current->pid;
1407	if (handle->h_sync && journal->j_last_sync_writer != pid) {
1408		journal->j_last_sync_writer = pid;
1409		do {
1410			old_handle_count = transaction->t_handle_count;
1411			schedule_timeout_uninterruptible(1);
1412		} while (old_handle_count != transaction->t_handle_count);
1413	}
1414
1415	current->journal_info = NULL;
1416	spin_lock(&journal->j_state_lock);
1417	spin_lock(&transaction->t_handle_lock);
1418	transaction->t_outstanding_credits -= handle->h_buffer_credits;
1419	transaction->t_updates--;
1420	if (!transaction->t_updates) {
1421		wake_up(&journal->j_wait_updates);
1422		if (journal->j_barrier_count)
1423			wake_up(&journal->j_wait_transaction_locked);
1424	}
1425
1426	/*
1427	 * If the handle is marked SYNC, we need to set another commit
1428	 * going!  We also want to force a commit if the current
1429	 * transaction is occupying too much of the log, or if the
1430	 * transaction is too old now.
1431	 */
1432	if (handle->h_sync ||
1433			transaction->t_outstanding_credits >
1434				journal->j_max_transaction_buffers ||
1435			time_after_eq(jiffies, transaction->t_expires)) {
1436		/* Do this even for aborted journals: an abort still
1437		 * completes the commit thread, it just doesn't write
1438		 * anything to disk. */
1439		tid_t tid = transaction->t_tid;
1440
1441		spin_unlock(&transaction->t_handle_lock);
1442		jbd_debug(2, "transaction too old, requesting commit for "
1443					"handle %p\n", handle);
1444		/* This is non-blocking */
1445		__jbd2_log_start_commit(journal, transaction->t_tid);
1446		spin_unlock(&journal->j_state_lock);
1447
1448		/*
1449		 * Special case: JBD2_SYNC synchronous updates require us
1450		 * to wait for the commit to complete.
1451		 */
1452		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
1453			err = jbd2_log_wait_commit(journal, tid);
1454	} else {
1455		spin_unlock(&transaction->t_handle_lock);
1456		spin_unlock(&journal->j_state_lock);
1457	}
1458
1459	lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
1460
1461	jbd2_free_handle(handle);
1462	return err;
1463}
1464
1465/**
1466 * int jbd2_journal_force_commit() - force any uncommitted transactions
1467 * @journal: journal to force
1468 *
1469 * For synchronous operations: force any uncommitted transactions
1470 * to disk.  May seem kludgy, but it reuses all the handle batching
1471 * code in a very simple manner.
1472 */
1473int jbd2_journal_force_commit(journal_t *journal)
1474{
1475	handle_t *handle;
1476	int ret;
1477
1478	handle = jbd2_journal_start(journal, 1);
1479	if (IS_ERR(handle)) {
1480		ret = PTR_ERR(handle);
1481	} else {
1482		handle->h_sync = 1;
1483		ret = jbd2_journal_stop(handle);
1484	}
1485	return ret;
1486}
1487
1488/*
1489 *
1490 * List management code snippets: various functions for manipulating the
1491 * transaction buffer lists.
1492 *
1493 */
1494
1495/*
1496 * Append a buffer to a transaction list, given the transaction's list head
1497 * pointer.
1498 *
1499 * j_list_lock is held.
1500 *
1501 * jbd_lock_bh_state(jh2bh(jh)) is held.
1502 */
1503
1504static inline void
1505__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
1506{
1507	if (!*list) {
1508		jh->b_tnext = jh->b_tprev = jh;
1509		*list = jh;
1510	} else {
1511		/* Insert at the tail of the list to preserve order */
1512		struct journal_head *first = *list, *last = first->b_tprev;
1513		jh->b_tprev = last;
1514		jh->b_tnext = first;
1515		last->b_tnext = first->b_tprev = jh;
1516	}
1517}
1518
1519/*
1520 * Remove a buffer from a transaction list, given the transaction's list
1521 * head pointer.
1522 *
1523 * Called with j_list_lock held, and the journal may not be locked.
1524 *
1525 * jbd_lock_bh_state(jh2bh(jh)) is held.
1526 */
1527
1528static inline void
1529__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1530{
1531	if (*list == jh) {
1532		*list = jh->b_tnext;
1533		if (*list == jh)
1534			*list = NULL;
1535	}
1536	jh->b_tprev->b_tnext = jh->b_tnext;
1537	jh->b_tnext->b_tprev = jh->b_tprev;
1538}
1539
1540/*
1541 * Remove a buffer from the appropriate transaction list.
1542 *
1543 * Note that this function can *change* the value of
1544 * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
1545 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
1546 * is holding onto a copy of one of thee pointers, it could go bad.
1547 * Generally the caller needs to re-read the pointer from the transaction_t.
1548 *
1549 * Called under j_list_lock.  The journal may not be locked.
1550 */
1551void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1552{
1553	struct journal_head **list = NULL;
1554	transaction_t *transaction;
1555	struct buffer_head *bh = jh2bh(jh);
1556
1557	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
1558	transaction = jh->b_transaction;
1559	if (transaction)
1560		assert_spin_locked(&transaction->t_journal->j_list_lock);
1561
1562	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
1563	if (jh->b_jlist != BJ_None)
1564		J_ASSERT_JH(jh, transaction != NULL);
1565
1566	switch (jh->b_jlist) {
1567	case BJ_None:
1568		return;
1569	case BJ_SyncData:
1570		list = &transaction->t_sync_datalist;
1571		break;
1572	case BJ_Metadata:
1573		transaction->t_nr_buffers--;
1574		J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
1575		list = &transaction->t_buffers;
1576		break;
1577	case BJ_Forget:
1578		list = &transaction->t_forget;
1579		break;
1580	case BJ_IO:
1581		list = &transaction->t_iobuf_list;
1582		break;
1583	case BJ_Shadow:
1584		list = &transaction->t_shadow_list;
1585		break;
1586	case BJ_LogCtl:
1587		list = &transaction->t_log_list;
1588		break;
1589	case BJ_Reserved:
1590		list = &transaction->t_reserved_list;
1591		break;
1592	case BJ_Locked:
1593		list = &transaction->t_locked_list;
1594		break;
1595	}
1596
1597	__blist_del_buffer(list, jh);
1598	jh->b_jlist = BJ_None;
1599	if (test_clear_buffer_jbddirty(bh))
1600		mark_buffer_dirty(bh);	/* Expose it to the VM */
1601}
1602
1603void __jbd2_journal_unfile_buffer(struct journal_head *jh)
1604{
1605	__jbd2_journal_temp_unlink_buffer(jh);
1606	jh->b_transaction = NULL;
1607}
1608
1609void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1610{
1611	jbd_lock_bh_state(jh2bh(jh));
1612	spin_lock(&journal->j_list_lock);
1613	__jbd2_journal_unfile_buffer(jh);
1614	spin_unlock(&journal->j_list_lock);
1615	jbd_unlock_bh_state(jh2bh(jh));
1616}
1617
1618/*
1619 * Called from jbd2_journal_try_to_free_buffers().
1620 *
1621 * Called under jbd_lock_bh_state(bh)
1622 */
1623static void
1624__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1625{
1626	struct journal_head *jh;
1627
1628	jh = bh2jh(bh);
1629
1630	if (buffer_locked(bh) || buffer_dirty(bh))
1631		goto out;
1632
1633	if (jh->b_next_transaction != NULL)
1634		goto out;
1635
1636	spin_lock(&journal->j_list_lock);
1637	if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
1638		if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
1639			/* A written-back ordered data buffer */
1640			JBUFFER_TRACE(jh, "release data");
1641			__jbd2_journal_unfile_buffer(jh);
1642			jbd2_journal_remove_journal_head(bh);
1643			__brelse(bh);
1644		}
1645	} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1646		/* written-back checkpointed metadata buffer */
1647		if (jh->b_jlist == BJ_None) {
1648			JBUFFER_TRACE(jh, "remove from checkpoint list");
1649			__jbd2_journal_remove_checkpoint(jh);
1650			jbd2_journal_remove_journal_head(bh);
1651			__brelse(bh);
1652		}
1653	}
1654	spin_unlock(&journal->j_list_lock);
1655out:
1656	return;
1657}
1658
1659
1660/**
1661 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1662 * @journal: journal for operation
1663 * @page: to try and free
1664 * @unused_gfp_mask: unused
1665 *
1666 *
1667 * For all the buffers on this page,
1668 * if they are fully written out ordered data, move them onto BUF_CLEAN
1669 * so try_to_free_buffers() can reap them.
1670 *
1671 * This function returns non-zero if we wish try_to_free_buffers()
1672 * to be called. We do this if the page is releasable by try_to_free_buffers().
1673 * We also do it if the page has locked or dirty buffers and the caller wants
1674 * us to perform sync or async writeout.
1675 *
1676 * This complicates JBD locking somewhat.  We aren't protected by the
1677 * BKL here.  We wish to remove the buffer from its committing or
1678 * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
1679 *
1680 * This may *change* the value of transaction_t->t_datalist, so anyone
1681 * who looks at t_datalist needs to lock against this function.
1682 *
1683 * Even worse, someone may be doing a jbd2_journal_dirty_data on this
1684 * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
1685 * will come out of the lock with the buffer dirty, which makes it
1686 * ineligible for release here.
1687 *
1688 * Who else is affected by this?  hmm...  Really the only contender
1689 * is do_get_write_access() - it could be looking at the buffer while
1690 * journal_try_to_free_buffer() is changing its state.  But that
1691 * cannot happen because we never reallocate freed data as metadata
1692 * while the data is part of a transaction.  Yes?
1693 */
1694int jbd2_journal_try_to_free_buffers(journal_t *journal,
1695				struct page *page, gfp_t unused_gfp_mask)
1696{
1697	struct buffer_head *head;
1698	struct buffer_head *bh;
1699	int ret = 0;
1700
1701	J_ASSERT(PageLocked(page));
1702
1703	head = page_buffers(page);
1704	bh = head;
1705	do {
1706		struct journal_head *jh;
1707
1708		/*
1709		 * We take our own ref against the journal_head here to avoid
1710		 * having to add tons of locking around each instance of
1711		 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head().
1712		 */
1713		jh = jbd2_journal_grab_journal_head(bh);
1714		if (!jh)
1715			continue;
1716
1717		jbd_lock_bh_state(bh);
1718		__journal_try_to_free_buffer(journal, bh);
1719		jbd2_journal_put_journal_head(jh);
1720		jbd_unlock_bh_state(bh);
1721		if (buffer_jbd(bh))
1722			goto busy;
1723	} while ((bh = bh->b_this_page) != head);
1724	ret = try_to_free_buffers(page);
1725busy:
1726	return ret;
1727}
1728
1729/*
1730 * This buffer is no longer needed.  If it is on an older transaction's
1731 * checkpoint list we need to record it on this transaction's forget list
1732 * to pin this buffer (and hence its checkpointing transaction) down until
1733 * this transaction commits.  If the buffer isn't on a checkpoint list, we
1734 * release it.
1735 * Returns non-zero if JBD no longer has an interest in the buffer.
1736 *
1737 * Called under j_list_lock.
1738 *
1739 * Called under jbd_lock_bh_state(bh).
1740 */
1741static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1742{
1743	int may_free = 1;
1744	struct buffer_head *bh = jh2bh(jh);
1745
1746	__jbd2_journal_unfile_buffer(jh);
1747
1748	if (jh->b_cp_transaction) {
1749		JBUFFER_TRACE(jh, "on running+cp transaction");
1750		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1751		clear_buffer_jbddirty(bh);
1752		may_free = 0;
1753	} else {
1754		JBUFFER_TRACE(jh, "on running transaction");
1755		jbd2_journal_remove_journal_head(bh);
1756		__brelse(bh);
1757	}
1758	return may_free;
1759}
1760
1761/*
1762 * jbd2_journal_invalidatepage
1763 *
1764 * This code is tricky.  It has a number of cases to deal with.
1765 *
1766 * There are two invariants which this code relies on:
1767 *
1768 * i_size must be updated on disk before we start calling invalidatepage on the
1769 * data.
1770 *
1771 *  This is done in ext3 by defining an ext3_setattr method which
1772 *  updates i_size before truncate gets going.  By maintaining this
1773 *  invariant, we can be sure that it is safe to throw away any buffers
1774 *  attached to the current transaction: once the transaction commits,
1775 *  we know that the data will not be needed.
1776 *
1777 *  Note however that we can *not* throw away data belonging to the
1778 *  previous, committing transaction!
1779 *
1780 * Any disk blocks which *are* part of the previous, committing
1781 * transaction (and which therefore cannot be discarded immediately) are
1782 * not going to be reused in the new running transaction
1783 *
1784 *  The bitmap committed_data images guarantee this: any block which is
1785 *  allocated in one transaction and removed in the next will be marked
1786 *  as in-use in the committed_data bitmap, so cannot be reused until
1787 *  the next transaction to delete the block commits.  This means that
1788 *  leaving committing buffers dirty is quite safe: the disk blocks
1789 *  cannot be reallocated to a different file and so buffer aliasing is
1790 *  not possible.
1791 *
1792 *
1793 * The above applies mainly to ordered data mode.  In writeback mode we
1794 * don't make guarantees about the order in which data hits disk --- in
1795 * particular we don't guarantee that new dirty data is flushed before
1796 * transaction commit --- so it is always safe just to discard data
1797 * immediately in that mode.  --sct
1798 */
1799
1800/*
1801 * The journal_unmap_buffer helper function returns zero if the buffer
1802 * concerned remains pinned as an anonymous buffer belonging to an older
1803 * transaction.
1804 *
1805 * We're outside-transaction here.  Either or both of j_running_transaction
1806 * and j_committing_transaction may be NULL.
1807 */
1808static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1809{
1810	transaction_t *transaction;
1811	struct journal_head *jh;
1812	int may_free = 1;
1813	int ret;
1814
1815	BUFFER_TRACE(bh, "entry");
1816
1817	/*
1818	 * It is safe to proceed here without the j_list_lock because the
1819	 * buffers cannot be stolen by try_to_free_buffers as long as we are
1820	 * holding the page lock. --sct
1821	 */
1822
1823	if (!buffer_jbd(bh))
1824		goto zap_buffer_unlocked;
1825
1826	spin_lock(&journal->j_state_lock);
1827	jbd_lock_bh_state(bh);
1828	spin_lock(&journal->j_list_lock);
1829
1830	jh = jbd2_journal_grab_journal_head(bh);
1831	if (!jh)
1832		goto zap_buffer_no_jh;
1833
1834	transaction = jh->b_transaction;
1835	if (transaction == NULL) {
1836		/* First case: not on any transaction.  If it
1837		 * has no checkpoint link, then we can zap it:
1838		 * it's a writeback-mode buffer so we don't care
1839		 * if it hits disk safely. */
1840		if (!jh->b_cp_transaction) {
1841			JBUFFER_TRACE(jh, "not on any transaction: zap");
1842			goto zap_buffer;
1843		}
1844
1845		if (!buffer_dirty(bh)) {
1846			/* bdflush has written it.  We can drop it now */
1847			goto zap_buffer;
1848		}
1849
1850		/* OK, it must be in the journal but still not
1851		 * written fully to disk: it's metadata or
1852		 * journaled data... */
1853
1854		if (journal->j_running_transaction) {
1855			/* ... and once the current transaction has
1856			 * committed, the buffer won't be needed any
1857			 * longer. */
1858			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1859			ret = __dispose_buffer(jh,
1860					journal->j_running_transaction);
1861			jbd2_journal_put_journal_head(jh);
1862			spin_unlock(&journal->j_list_lock);
1863			jbd_unlock_bh_state(bh);
1864			spin_unlock(&journal->j_state_lock);
1865			return ret;
1866		} else {
1867			/* There is no currently-running transaction. So the
1868			 * orphan record which we wrote for this file must have
1869			 * passed into commit.  We must attach this buffer to
1870			 * the committing transaction, if it exists. */
1871			if (journal->j_committing_transaction) {
1872				JBUFFER_TRACE(jh, "give to committing trans");
1873				ret = __dispose_buffer(jh,
1874					journal->j_committing_transaction);
1875				jbd2_journal_put_journal_head(jh);
1876				spin_unlock(&journal->j_list_lock);
1877				jbd_unlock_bh_state(bh);
1878				spin_unlock(&journal->j_state_lock);
1879				return ret;
1880			} else {
1881				/* The orphan record's transaction has
1882				 * committed.  We can cleanse this buffer */
1883				clear_buffer_jbddirty(bh);
1884				goto zap_buffer;
1885			}
1886		}
1887	} else if (transaction == journal->j_committing_transaction) {
1888		JBUFFER_TRACE(jh, "on committing transaction");
1889		if (jh->b_jlist == BJ_Locked) {
1890			/*
1891			 * The buffer is on the committing transaction's locked
1892			 * list.  We have the buffer locked, so I/O has
1893			 * completed.  So we can nail the buffer now.
1894			 */
1895			may_free = __dispose_buffer(jh, transaction);
1896			goto zap_buffer;
1897		}
1898		/*
1899		 * If it is committing, we simply cannot touch it.  We
1900		 * can remove it's next_transaction pointer from the
1901		 * running transaction if that is set, but nothing
1902		 * else. */
1903		set_buffer_freed(bh);
1904		if (jh->b_next_transaction) {
1905			J_ASSERT(jh->b_next_transaction ==
1906					journal->j_running_transaction);
1907			jh->b_next_transaction = NULL;
1908		}
1909		jbd2_journal_put_journal_head(jh);
1910		spin_unlock(&journal->j_list_lock);
1911		jbd_unlock_bh_state(bh);
1912		spin_unlock(&journal->j_state_lock);
1913		return 0;
1914	} else {
1915		/* Good, the buffer belongs to the running transaction.
1916		 * We are writing our own transaction's data, not any
1917		 * previous one's, so it is safe to throw it away
1918		 * (remember that we expect the filesystem to have set
1919		 * i_size already for this truncate so recovery will not
1920		 * expose the disk blocks we are discarding here.) */
1921		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
1922		JBUFFER_TRACE(jh, "on running transaction");
1923		may_free = __dispose_buffer(jh, transaction);
1924	}
1925
1926zap_buffer:
1927	jbd2_journal_put_journal_head(jh);
1928zap_buffer_no_jh:
1929	spin_unlock(&journal->j_list_lock);
1930	jbd_unlock_bh_state(bh);
1931	spin_unlock(&journal->j_state_lock);
1932zap_buffer_unlocked:
1933	clear_buffer_dirty(bh);
1934	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
1935	clear_buffer_mapped(bh);
1936	clear_buffer_req(bh);
1937	clear_buffer_new(bh);
1938	bh->b_bdev = NULL;
1939	return may_free;
1940}
1941
1942/**
1943 * void jbd2_journal_invalidatepage()
1944 * @journal: journal to use for flush...
1945 * @page:    page to flush
1946 * @offset:  length of page to invalidate.
1947 *
1948 * Reap page buffers containing data after offset in page.
1949 *
1950 */
1951void jbd2_journal_invalidatepage(journal_t *journal,
1952		      struct page *page,
1953		      unsigned long offset)
1954{
1955	struct buffer_head *head, *bh, *next;
1956	unsigned int curr_off = 0;
1957	int may_free = 1;
1958
1959	if (!PageLocked(page))
1960		BUG();
1961	if (!page_has_buffers(page))
1962		return;
1963
1964	/* We will potentially be playing with lists other than just the
1965	 * data lists (especially for journaled data mode), so be
1966	 * cautious in our locking. */
1967
1968	head = bh = page_buffers(page);
1969	do {
1970		unsigned int next_off = curr_off + bh->b_size;
1971		next = bh->b_this_page;
1972
1973		if (offset <= curr_off) {
1974			/* This block is wholly outside the truncation point */
1975			lock_buffer(bh);
1976			may_free &= journal_unmap_buffer(journal, bh);
1977			unlock_buffer(bh);
1978		}
1979		curr_off = next_off;
1980		bh = next;
1981
1982	} while (bh != head);
1983
1984	if (!offset) {
1985		if (may_free && try_to_free_buffers(page))
1986			J_ASSERT(!page_has_buffers(page));
1987	}
1988}
1989
1990/*
1991 * File a buffer on the given transaction list.
1992 */
1993void __jbd2_journal_file_buffer(struct journal_head *jh,
1994			transaction_t *transaction, int jlist)
1995{
1996	struct journal_head **list = NULL;
1997	int was_dirty = 0;
1998	struct buffer_head *bh = jh2bh(jh);
1999
2000	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2001	assert_spin_locked(&transaction->t_journal->j_list_lock);
2002
2003	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
2004	J_ASSERT_JH(jh, jh->b_transaction == transaction ||
2005				jh->b_transaction == NULL);
2006
2007	if (jh->b_transaction && jh->b_jlist == jlist)
2008		return;
2009
2010	/* The following list of buffer states needs to be consistent
2011	 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
2012	 * state. */
2013
2014	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
2015	    jlist == BJ_Shadow || jlist == BJ_Forget) {
2016		if (test_clear_buffer_dirty(bh) ||
2017		    test_clear_buffer_jbddirty(bh))
2018			was_dirty = 1;
2019	}
2020
2021	if (jh->b_transaction)
2022		__jbd2_journal_temp_unlink_buffer(jh);
2023	jh->b_transaction = transaction;
2024
2025	switch (jlist) {
2026	case BJ_None:
2027		J_ASSERT_JH(jh, !jh->b_committed_data);
2028		J_ASSERT_JH(jh, !jh->b_frozen_data);
2029		return;
2030	case BJ_SyncData:
2031		list = &transaction->t_sync_datalist;
2032		break;
2033	case BJ_Metadata:
2034		transaction->t_nr_buffers++;
2035		list = &transaction->t_buffers;
2036		break;
2037	case BJ_Forget:
2038		list = &transaction->t_forget;
2039		break;
2040	case BJ_IO:
2041		list = &transaction->t_iobuf_list;
2042		break;
2043	case BJ_Shadow:
2044		list = &transaction->t_shadow_list;
2045		break;
2046	case BJ_LogCtl:
2047		list = &transaction->t_log_list;
2048		break;
2049	case BJ_Reserved:
2050		list = &transaction->t_reserved_list;
2051		break;
2052	case BJ_Locked:
2053		list =  &transaction->t_locked_list;
2054		break;
2055	}
2056
2057	__blist_add_buffer(list, jh);
2058	jh->b_jlist = jlist;
2059
2060	if (was_dirty)
2061		set_buffer_jbddirty(bh);
2062}
2063
2064void jbd2_journal_file_buffer(struct journal_head *jh,
2065				transaction_t *transaction, int jlist)
2066{
2067	jbd_lock_bh_state(jh2bh(jh));
2068	spin_lock(&transaction->t_journal->j_list_lock);
2069	__jbd2_journal_file_buffer(jh, transaction, jlist);
2070	spin_unlock(&transaction->t_journal->j_list_lock);
2071	jbd_unlock_bh_state(jh2bh(jh));
2072}
2073
2074/*
2075 * Remove a buffer from its current buffer list in preparation for
2076 * dropping it from its current transaction entirely.  If the buffer has
2077 * already started to be used by a subsequent transaction, refile the
2078 * buffer on that transaction's metadata list.
2079 *
2080 * Called under journal->j_list_lock
2081 *
2082 * Called under jbd_lock_bh_state(jh2bh(jh))
2083 */
2084void __jbd2_journal_refile_buffer(struct journal_head *jh)
2085{
2086	int was_dirty;
2087	struct buffer_head *bh = jh2bh(jh);
2088
2089	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2090	if (jh->b_transaction)
2091		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
2092
2093	/* If the buffer is now unused, just drop it. */
2094	if (jh->b_next_transaction == NULL) {
2095		__jbd2_journal_unfile_buffer(jh);
2096		return;
2097	}
2098
2099	/*
2100	 * It has been modified by a later transaction: add it to the new
2101	 * transaction's metadata list.
2102	 */
2103
2104	was_dirty = test_clear_buffer_jbddirty(bh);
2105	__jbd2_journal_temp_unlink_buffer(jh);
2106	jh->b_transaction = jh->b_next_transaction;
2107	jh->b_next_transaction = NULL;
2108	__jbd2_journal_file_buffer(jh, jh->b_transaction,
2109				jh->b_modified ? BJ_Metadata : BJ_Reserved);
2110	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2111
2112	if (was_dirty)
2113		set_buffer_jbddirty(bh);
2114}
2115
2116/*
2117 * For the unlocked version of this call, also make sure that any
2118 * hanging journal_head is cleaned up if necessary.
2119 *
2120 * __jbd2_journal_refile_buffer is usually called as part of a single locked
2121 * operation on a buffer_head, in which the caller is probably going to
2122 * be hooking the journal_head onto other lists.  In that case it is up
2123 * to the caller to remove the journal_head if necessary.  For the
2124 * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
2125 * doing anything else to the buffer so we need to do the cleanup
2126 * ourselves to avoid a jh leak.
2127 *
2128 * *** The journal_head may be freed by this call! ***
2129 */
2130void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2131{
2132	struct buffer_head *bh = jh2bh(jh);
2133
2134	jbd_lock_bh_state(bh);
2135	spin_lock(&journal->j_list_lock);
2136
2137	__jbd2_journal_refile_buffer(jh);
2138	jbd_unlock_bh_state(bh);
2139	jbd2_journal_remove_journal_head(bh);
2140
2141	spin_unlock(&journal->j_list_lock);
2142	__brelse(bh);
2143}
Configure Feed

Configure Feed