Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfs: lobotomise xfs_trans_read_buf_map()

There's a case in that code where it checks for a buffer match in a
transaction where the buffer is not marked done. i.e. trying to
catch a buffer we have locked in the transaction but have not
completed IO on.

The only way we can find a buffer that has not had IO completed on
it is if it had readahead issued on it, but we never do readahead on
buffers that we have already joined into a transaction. Hence this
condition cannot occur, and buffers locked and joined into a
transaction should always be marked done and not under IO.

Remove this code and re-order xfs_trans_read_buf_map() to remove
duplicated IO dispatch and error handling code.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>

authored by

Dave Chinner and committed by
Dave Chinner
2d3d0c53 cdc9cec7

+32 -101
+32 -101
fs/xfs/xfs_trans_buf.c
··· 229 229 return bp; 230 230 } 231 231 232 - #ifdef DEBUG 233 - xfs_buftarg_t *xfs_error_target; 234 - int xfs_do_error; 235 - int xfs_req_num; 236 - int xfs_error_mod = 33; 237 - #endif 238 - 239 232 /* 240 233 * Get and lock the buffer for the caller if it is not already 241 234 * locked within the given transaction. If it has not yet been ··· 250 257 struct xfs_buf **bpp, 251 258 const struct xfs_buf_ops *ops) 252 259 { 253 - xfs_buf_t *bp; 254 - xfs_buf_log_item_t *bip; 260 + struct xfs_buf *bp = NULL; 261 + struct xfs_buf_log_item *bip; 255 262 int error; 256 263 257 264 *bpp = NULL; 258 - if (!tp) { 259 - bp = xfs_buf_read_map(target, map, nmaps, flags, ops); 260 - if (!bp) 261 - return (flags & XBF_TRYLOCK) ? 262 - -EAGAIN : -ENOMEM; 263 - 264 - if (bp->b_error) { 265 - error = bp->b_error; 266 - xfs_buf_ioerror_alert(bp, __func__); 267 - XFS_BUF_UNDONE(bp); 268 - xfs_buf_stale(bp); 269 - xfs_buf_relse(bp); 270 - 271 - /* bad CRC means corrupted metadata */ 272 - if (error == -EFSBADCRC) 273 - error = -EFSCORRUPTED; 274 - return error; 275 - } 276 - #ifdef DEBUG 277 - if (xfs_do_error) { 278 - if (xfs_error_target == target) { 279 - if (((xfs_req_num++) % xfs_error_mod) == 0) { 280 - xfs_buf_relse(bp); 281 - xfs_debug(mp, "Returning error!"); 282 - return -EIO; 283 - } 284 - } 285 - } 286 - #endif 287 - if (XFS_FORCED_SHUTDOWN(mp)) 288 - goto shutdown_abort; 289 - *bpp = bp; 290 - return 0; 291 - } 292 - 293 265 /* 294 266 * If we find the buffer in the cache with this transaction 295 267 * pointer in its b_fsprivate2 field, then we know we already ··· 263 305 * If the buffer is not yet read in, then we read it in, increment 264 306 * the lock recursion count, and return it to the caller. 265 307 */ 266 - bp = xfs_trans_buf_item_match(tp, target, map, nmaps); 267 - if (bp != NULL) { 308 + if (tp) 309 + bp = xfs_trans_buf_item_match(tp, target, map, nmaps); 310 + if (bp) { 268 311 ASSERT(xfs_buf_islocked(bp)); 269 312 ASSERT(bp->b_transp == tp); 270 313 ASSERT(bp->b_fspriv != NULL); 271 314 ASSERT(!bp->b_error); 272 - if (!(XFS_BUF_ISDONE(bp))) { 273 - trace_xfs_trans_read_buf_io(bp, _RET_IP_); 274 - ASSERT(!XFS_BUF_ISASYNC(bp)); 275 - ASSERT(bp->b_iodone == NULL); 276 - XFS_BUF_READ(bp); 277 - bp->b_ops = ops; 315 + ASSERT(bp->b_flags & XBF_DONE); 278 316 279 - error = xfs_buf_submit_wait(bp); 280 - if (error) { 281 - if (!XFS_FORCED_SHUTDOWN(mp)) 282 - xfs_buf_ioerror_alert(bp, __func__); 283 - xfs_buf_relse(bp); 284 - /* 285 - * We can gracefully recover from most read 286 - * errors. Ones we can't are those that happen 287 - * after the transaction's already dirty. 288 - */ 289 - if (tp->t_flags & XFS_TRANS_DIRTY) 290 - xfs_force_shutdown(tp->t_mountp, 291 - SHUTDOWN_META_IO_ERROR); 292 - /* bad CRC means corrupted metadata */ 293 - if (error == -EFSBADCRC) 294 - error = -EFSCORRUPTED; 295 - return error; 296 - } 297 - } 298 317 /* 299 318 * We never locked this buf ourselves, so we shouldn't 300 319 * brelse it either. Just get out. 301 320 */ 302 321 if (XFS_FORCED_SHUTDOWN(mp)) { 303 322 trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 304 - *bpp = NULL; 305 323 return -EIO; 306 324 } 307 - 308 325 309 326 bip = bp->b_fspriv; 310 327 bip->bli_recur++; ··· 291 358 } 292 359 293 360 bp = xfs_buf_read_map(target, map, nmaps, flags, ops); 294 - if (bp == NULL) { 295 - *bpp = NULL; 296 - return (flags & XBF_TRYLOCK) ? 297 - 0 : -ENOMEM; 361 + if (!bp) { 362 + if (!(flags & XBF_TRYLOCK)) 363 + return -ENOMEM; 364 + return tp ? 0 : -EAGAIN; 298 365 } 366 + 367 + /* 368 + * If we've had a read error, then the contents of the buffer are 369 + * invalid and should not be used. To ensure that a followup read tries 370 + * to pull the buffer from disk again, we clear the XBF_DONE flag and 371 + * mark the buffer stale. This ensures that anyone who has a current 372 + * reference to the buffer will interpret it's contents correctly and 373 + * future cache lookups will also treat it as an empty, uninitialised 374 + * buffer. 375 + */ 299 376 if (bp->b_error) { 300 377 error = bp->b_error; 378 + if (!XFS_FORCED_SHUTDOWN(mp)) 379 + xfs_buf_ioerror_alert(bp, __func__); 380 + bp->b_flags &= ~XBF_DONE; 301 381 xfs_buf_stale(bp); 302 - XFS_BUF_DONE(bp); 303 - xfs_buf_ioerror_alert(bp, __func__); 304 - if (tp->t_flags & XFS_TRANS_DIRTY) 382 + 383 + if (tp && (tp->t_flags & XFS_TRANS_DIRTY)) 305 384 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); 306 385 xfs_buf_relse(bp); 307 386 ··· 322 377 error = -EFSCORRUPTED; 323 378 return error; 324 379 } 325 - #ifdef DEBUG 326 - if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) { 327 - if (xfs_error_target == target) { 328 - if (((xfs_req_num++) % xfs_error_mod) == 0) { 329 - xfs_force_shutdown(tp->t_mountp, 330 - SHUTDOWN_META_IO_ERROR); 331 - xfs_buf_relse(bp); 332 - xfs_debug(mp, "Returning trans error!"); 333 - return -EIO; 334 - } 335 - } 380 + 381 + if (XFS_FORCED_SHUTDOWN(mp)) { 382 + xfs_buf_relse(bp); 383 + trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 384 + return -EIO; 336 385 } 337 - #endif 338 - if (XFS_FORCED_SHUTDOWN(mp)) 339 - goto shutdown_abort; 340 386 341 - _xfs_trans_bjoin(tp, bp, 1); 387 + if (tp) 388 + _xfs_trans_bjoin(tp, bp, 1); 342 389 trace_xfs_trans_read_buf(bp->b_fspriv); 343 - 344 390 *bpp = bp; 345 391 return 0; 346 392 347 - shutdown_abort: 348 - trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 349 - xfs_buf_relse(bp); 350 - *bpp = NULL; 351 - return -EIO; 352 393 } 353 394 354 395 /*