Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfs: always rejoin held resources during defer roll

During testing of xfs/141 on a V4 filesystem, I observed some
inconsistent behavior with regards to resources that are held (i.e.
remain locked) across a defer roll. The transaction roll always gives
the defer roll function a new transaction, even if committing the old
transaction fails. However, the defer roll function only rejoins the
held resources if the transaction commit succeedied. This means that
callers of defer roll have to figure out whether the held resources are
attached to the transaction being passed back.

Worse yet, if the defer roll was part of a defer finish call, we have a
third possibility: the defer finish could pass back a dirty transaction
with dirty held resources and an error code.

The only sane way to handle all of these scenarios is to require that
the code that held the resource either cancel the transaction before
unlocking and releasing the resources, or use functions that detach
resources from a transaction properly (e.g. xfs_trans_brelse) if they
need to drop the reference before committing or cancelling the
transaction.

In order to make this so, change the defer roll code to join held
resources to the new transaction unconditionally and fix all the bhold
callers to release the held buffers correctly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>

+31 -37
+12 -23
fs/xfs/libxfs/xfs_attr.c
··· 224 224 */ 225 225 int 226 226 xfs_attr_set_args( 227 - struct xfs_da_args *args, 228 - struct xfs_buf **leaf_bp) 227 + struct xfs_da_args *args) 229 228 { 230 229 struct xfs_inode *dp = args->dp; 230 + struct xfs_buf *leaf_bp = NULL; 231 231 int error; 232 232 233 233 /* ··· 255 255 * It won't fit in the shortform, transform to a leaf block. 256 256 * GROT: another possible req'mt for a double-split btree op. 257 257 */ 258 - error = xfs_attr_shortform_to_leaf(args, leaf_bp); 258 + error = xfs_attr_shortform_to_leaf(args, &leaf_bp); 259 259 if (error) 260 260 return error; 261 261 ··· 263 263 * Prevent the leaf buffer from being unlocked so that a 264 264 * concurrent AIL push cannot grab the half-baked leaf 265 265 * buffer and run into problems with the write verifier. 266 + * Once we're done rolling the transaction we can release 267 + * the hold and add the attr to the leaf. 266 268 */ 267 - xfs_trans_bhold(args->trans, *leaf_bp); 268 - 269 + xfs_trans_bhold(args->trans, leaf_bp); 269 270 error = xfs_defer_finish(&args->trans); 270 - if (error) 271 + xfs_trans_bhold_release(args->trans, leaf_bp); 272 + if (error) { 273 + xfs_trans_brelse(args->trans, leaf_bp); 271 274 return error; 272 - 273 - /* 274 - * Commit the leaf transformation. We'll need another 275 - * (linked) transaction to add the new attribute to the 276 - * leaf. 277 - */ 278 - error = xfs_trans_roll_inode(&args->trans, dp); 279 - if (error) 280 - return error; 281 - xfs_trans_bjoin(args->trans, *leaf_bp); 282 - *leaf_bp = NULL; 275 + } 283 276 } 284 277 285 278 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) ··· 315 322 int flags) 316 323 { 317 324 struct xfs_mount *mp = dp->i_mount; 318 - struct xfs_buf *leaf_bp = NULL; 319 325 struct xfs_da_args args; 320 326 struct xfs_trans_res tres; 321 327 int rsvd = (flags & ATTR_ROOT) != 0; ··· 373 381 goto out_trans_cancel; 374 382 375 383 xfs_trans_ijoin(args.trans, dp, 0); 376 - error = xfs_attr_set_args(&args, &leaf_bp); 384 + error = xfs_attr_set_args(&args); 377 385 if (error) 378 - goto out_release_leaf; 386 + goto out_trans_cancel; 379 387 if (!args.trans) { 380 388 /* shortform attribute has already been committed */ 381 389 goto out_unlock; ··· 400 408 xfs_iunlock(dp, XFS_ILOCK_EXCL); 401 409 return error; 402 410 403 - out_release_leaf: 404 - if (leaf_bp) 405 - xfs_trans_brelse(args.trans, leaf_bp); 406 411 out_trans_cancel: 407 412 if (args.trans) 408 413 xfs_trans_cancel(args.trans);
+1 -1
fs/xfs/libxfs/xfs_attr.h
··· 140 140 unsigned char *value, int *valuelenp, int flags); 141 141 int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, 142 142 unsigned char *value, int valuelen, int flags); 143 - int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp); 143 + int xfs_attr_set_args(struct xfs_da_args *args); 144 144 int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); 145 145 int xfs_attr_remove_args(struct xfs_da_args *args); 146 146 int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
+9 -5
fs/xfs/libxfs/xfs_defer.c
··· 274 274 275 275 trace_xfs_defer_trans_roll(tp, _RET_IP_); 276 276 277 - /* Roll the transaction. */ 277 + /* 278 + * Roll the transaction. Rolling always given a new transaction (even 279 + * if committing the old one fails!) to hand back to the caller, so we 280 + * join the held resources to the new transaction so that we always 281 + * return with the held resources joined to @tpp, no matter what 282 + * happened. 283 + */ 278 284 error = xfs_trans_roll(tpp); 279 285 tp = *tpp; 280 - if (error) { 281 - trace_xfs_defer_trans_roll_error(tp, error); 282 - return error; 283 - } 284 286 285 287 /* Rejoin the joined inodes. */ 286 288 for (i = 0; i < ipcount; i++) ··· 294 292 xfs_trans_bhold(tp, bplist[i]); 295 293 } 296 294 295 + if (error) 296 + trace_xfs_defer_trans_roll_error(tp, error); 297 297 return error; 298 298 } 299 299
+9 -8
fs/xfs/xfs_dquot.c
··· 277 277 278 278 /* 279 279 * Ensure that the given in-core dquot has a buffer on disk backing it, and 280 - * return the buffer. This is called when the bmapi finds a hole. 280 + * return the buffer locked and held. This is called when the bmapi finds a 281 + * hole. 281 282 */ 282 283 STATIC int 283 284 xfs_dquot_disk_alloc( ··· 356 355 * If everything succeeds, the caller of this function is returned a 357 356 * buffer that is locked and held to the transaction. The caller 358 357 * is responsible for unlocking any buffer passed back, either 359 - * manually or by committing the transaction. 358 + * manually or by committing the transaction. On error, the buffer is 359 + * released and not passed back. 360 360 */ 361 361 xfs_trans_bhold(tp, bp); 362 362 error = xfs_defer_finish(tpp); 363 - tp = *tpp; 364 363 if (error) { 365 - xfs_buf_relse(bp); 364 + xfs_trans_bhold_release(*tpp, bp); 365 + xfs_trans_brelse(*tpp, bp); 366 366 return error; 367 367 } 368 368 *bpp = bp; ··· 523 521 struct xfs_buf **bpp) 524 522 { 525 523 struct xfs_trans *tp; 526 - struct xfs_buf *bp; 527 524 int error; 528 525 529 526 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc, ··· 530 529 if (error) 531 530 goto err; 532 531 533 - error = xfs_dquot_disk_alloc(&tp, dqp, &bp); 532 + error = xfs_dquot_disk_alloc(&tp, dqp, bpp); 534 533 if (error) 535 534 goto err_cancel; 536 535 ··· 540 539 * Buffer was held to the transaction, so we have to unlock it 541 540 * manually here because we're not passing it back. 542 541 */ 543 - xfs_buf_relse(bp); 542 + xfs_buf_relse(*bpp); 543 + *bpp = NULL; 544 544 goto err; 545 545 } 546 - *bpp = bp; 547 546 return 0; 548 547 549 548 err_cancel: