Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.7 592 lines 16 kB view raw
1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_format.h" 9#include "xfs_log_format.h" 10#include "xfs_trans_resv.h" 11#include "xfs_bit.h" 12#include "xfs_shared.h" 13#include "xfs_mount.h" 14#include "xfs_defer.h" 15#include "xfs_trans.h" 16#include "xfs_trans_priv.h" 17#include "xfs_refcount_item.h" 18#include "xfs_log.h" 19#include "xfs_refcount.h" 20#include "xfs_error.h" 21 22kmem_zone_t *xfs_cui_zone; 23kmem_zone_t *xfs_cud_zone; 24 25static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 26{ 27 return container_of(lip, struct xfs_cui_log_item, cui_item); 28} 29 30void 31xfs_cui_item_free( 32 struct xfs_cui_log_item *cuip) 33{ 34 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 35 kmem_free(cuip); 36 else 37 kmem_cache_free(xfs_cui_zone, cuip); 38} 39 40/* 41 * Freeing the CUI requires that we remove it from the AIL if it has already 42 * been placed there. However, the CUI may not yet have been placed in the AIL 43 * when called by xfs_cui_release() from CUD processing due to the ordering of 44 * committed vs unpin operations in bulk insert operations. Hence the reference 45 * count to ensure only the last caller frees the CUI. 46 */ 47void 48xfs_cui_release( 49 struct xfs_cui_log_item *cuip) 50{ 51 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 52 if (atomic_dec_and_test(&cuip->cui_refcount)) { 53 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); 54 xfs_cui_item_free(cuip); 55 } 56} 57 58 59STATIC void 60xfs_cui_item_size( 61 struct xfs_log_item *lip, 62 int *nvecs, 63 int *nbytes) 64{ 65 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 66 67 *nvecs += 1; 68 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 69} 70 71/* 72 * This is called to fill in the vector of log iovecs for the 73 * given cui log item. We use only 1 iovec, and we point that 74 * at the cui_log_format structure embedded in the cui item. 75 * It is at this point that we assert that all of the extent 76 * slots in the cui item have been filled. 77 */ 78STATIC void 79xfs_cui_item_format( 80 struct xfs_log_item *lip, 81 struct xfs_log_vec *lv) 82{ 83 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 84 struct xfs_log_iovec *vecp = NULL; 85 86 ASSERT(atomic_read(&cuip->cui_next_extent) == 87 cuip->cui_format.cui_nextents); 88 89 cuip->cui_format.cui_type = XFS_LI_CUI; 90 cuip->cui_format.cui_size = 1; 91 92 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 93 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 94} 95 96/* 97 * The unpin operation is the last place an CUI is manipulated in the log. It is 98 * either inserted in the AIL or aborted in the event of a log I/O error. In 99 * either case, the CUI transaction has been successfully committed to make it 100 * this far. Therefore, we expect whoever committed the CUI to either construct 101 * and commit the CUD or drop the CUD's reference in the event of error. Simply 102 * drop the log's CUI reference now that the log is done with it. 103 */ 104STATIC void 105xfs_cui_item_unpin( 106 struct xfs_log_item *lip, 107 int remove) 108{ 109 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 110 111 xfs_cui_release(cuip); 112} 113 114/* 115 * The CUI has been either committed or aborted if the transaction has been 116 * cancelled. If the transaction was cancelled, an CUD isn't going to be 117 * constructed and thus we free the CUI here directly. 118 */ 119STATIC void 120xfs_cui_item_release( 121 struct xfs_log_item *lip) 122{ 123 xfs_cui_release(CUI_ITEM(lip)); 124} 125 126static const struct xfs_item_ops xfs_cui_item_ops = { 127 .iop_size = xfs_cui_item_size, 128 .iop_format = xfs_cui_item_format, 129 .iop_unpin = xfs_cui_item_unpin, 130 .iop_release = xfs_cui_item_release, 131}; 132 133/* 134 * Allocate and initialize an cui item with the given number of extents. 135 */ 136struct xfs_cui_log_item * 137xfs_cui_init( 138 struct xfs_mount *mp, 139 uint nextents) 140 141{ 142 struct xfs_cui_log_item *cuip; 143 144 ASSERT(nextents > 0); 145 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 146 cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 147 0); 148 else 149 cuip = kmem_zone_zalloc(xfs_cui_zone, 0); 150 151 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 152 cuip->cui_format.cui_nextents = nextents; 153 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 154 atomic_set(&cuip->cui_next_extent, 0); 155 atomic_set(&cuip->cui_refcount, 2); 156 157 return cuip; 158} 159 160static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 161{ 162 return container_of(lip, struct xfs_cud_log_item, cud_item); 163} 164 165STATIC void 166xfs_cud_item_size( 167 struct xfs_log_item *lip, 168 int *nvecs, 169 int *nbytes) 170{ 171 *nvecs += 1; 172 *nbytes += sizeof(struct xfs_cud_log_format); 173} 174 175/* 176 * This is called to fill in the vector of log iovecs for the 177 * given cud log item. We use only 1 iovec, and we point that 178 * at the cud_log_format structure embedded in the cud item. 179 * It is at this point that we assert that all of the extent 180 * slots in the cud item have been filled. 181 */ 182STATIC void 183xfs_cud_item_format( 184 struct xfs_log_item *lip, 185 struct xfs_log_vec *lv) 186{ 187 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 188 struct xfs_log_iovec *vecp = NULL; 189 190 cudp->cud_format.cud_type = XFS_LI_CUD; 191 cudp->cud_format.cud_size = 1; 192 193 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 194 sizeof(struct xfs_cud_log_format)); 195} 196 197/* 198 * The CUD is either committed or aborted if the transaction is cancelled. If 199 * the transaction is cancelled, drop our reference to the CUI and free the 200 * CUD. 201 */ 202STATIC void 203xfs_cud_item_release( 204 struct xfs_log_item *lip) 205{ 206 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 207 208 xfs_cui_release(cudp->cud_cuip); 209 kmem_cache_free(xfs_cud_zone, cudp); 210} 211 212static const struct xfs_item_ops xfs_cud_item_ops = { 213 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 214 .iop_size = xfs_cud_item_size, 215 .iop_format = xfs_cud_item_format, 216 .iop_release = xfs_cud_item_release, 217}; 218 219static struct xfs_cud_log_item * 220xfs_trans_get_cud( 221 struct xfs_trans *tp, 222 struct xfs_cui_log_item *cuip) 223{ 224 struct xfs_cud_log_item *cudp; 225 226 cudp = kmem_zone_zalloc(xfs_cud_zone, 0); 227 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 228 &xfs_cud_item_ops); 229 cudp->cud_cuip = cuip; 230 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 231 232 xfs_trans_add_item(tp, &cudp->cud_item); 233 return cudp; 234} 235 236/* 237 * Finish an refcount update and log it to the CUD. Note that the 238 * transaction is marked dirty regardless of whether the refcount 239 * update succeeds or fails to support the CUI/CUD lifecycle rules. 240 */ 241static int 242xfs_trans_log_finish_refcount_update( 243 struct xfs_trans *tp, 244 struct xfs_cud_log_item *cudp, 245 enum xfs_refcount_intent_type type, 246 xfs_fsblock_t startblock, 247 xfs_extlen_t blockcount, 248 xfs_fsblock_t *new_fsb, 249 xfs_extlen_t *new_len, 250 struct xfs_btree_cur **pcur) 251{ 252 int error; 253 254 error = xfs_refcount_finish_one(tp, type, startblock, 255 blockcount, new_fsb, new_len, pcur); 256 257 /* 258 * Mark the transaction dirty, even on error. This ensures the 259 * transaction is aborted, which: 260 * 261 * 1.) releases the CUI and frees the CUD 262 * 2.) shuts down the filesystem 263 */ 264 tp->t_flags |= XFS_TRANS_DIRTY; 265 set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); 266 267 return error; 268} 269 270/* Sort refcount intents by AG. */ 271static int 272xfs_refcount_update_diff_items( 273 void *priv, 274 struct list_head *a, 275 struct list_head *b) 276{ 277 struct xfs_mount *mp = priv; 278 struct xfs_refcount_intent *ra; 279 struct xfs_refcount_intent *rb; 280 281 ra = container_of(a, struct xfs_refcount_intent, ri_list); 282 rb = container_of(b, struct xfs_refcount_intent, ri_list); 283 return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - 284 XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 285} 286 287/* Get an CUI. */ 288STATIC void * 289xfs_refcount_update_create_intent( 290 struct xfs_trans *tp, 291 unsigned int count) 292{ 293 struct xfs_cui_log_item *cuip; 294 295 ASSERT(tp != NULL); 296 ASSERT(count > 0); 297 298 cuip = xfs_cui_init(tp->t_mountp, count); 299 ASSERT(cuip != NULL); 300 301 /* 302 * Get a log_item_desc to point at the new item. 303 */ 304 xfs_trans_add_item(tp, &cuip->cui_item); 305 return cuip; 306} 307 308/* Set the phys extent flags for this reverse mapping. */ 309static void 310xfs_trans_set_refcount_flags( 311 struct xfs_phys_extent *refc, 312 enum xfs_refcount_intent_type type) 313{ 314 refc->pe_flags = 0; 315 switch (type) { 316 case XFS_REFCOUNT_INCREASE: 317 case XFS_REFCOUNT_DECREASE: 318 case XFS_REFCOUNT_ALLOC_COW: 319 case XFS_REFCOUNT_FREE_COW: 320 refc->pe_flags |= type; 321 break; 322 default: 323 ASSERT(0); 324 } 325} 326 327/* Log refcount updates in the intent item. */ 328STATIC void 329xfs_refcount_update_log_item( 330 struct xfs_trans *tp, 331 void *intent, 332 struct list_head *item) 333{ 334 struct xfs_cui_log_item *cuip = intent; 335 struct xfs_refcount_intent *refc; 336 uint next_extent; 337 struct xfs_phys_extent *ext; 338 339 refc = container_of(item, struct xfs_refcount_intent, ri_list); 340 341 tp->t_flags |= XFS_TRANS_DIRTY; 342 set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); 343 344 /* 345 * atomic_inc_return gives us the value after the increment; 346 * we want to use it as an array index so we need to subtract 1 from 347 * it. 348 */ 349 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 350 ASSERT(next_extent < cuip->cui_format.cui_nextents); 351 ext = &cuip->cui_format.cui_extents[next_extent]; 352 ext->pe_startblock = refc->ri_startblock; 353 ext->pe_len = refc->ri_blockcount; 354 xfs_trans_set_refcount_flags(ext, refc->ri_type); 355} 356 357/* Get an CUD so we can process all the deferred refcount updates. */ 358STATIC void * 359xfs_refcount_update_create_done( 360 struct xfs_trans *tp, 361 void *intent, 362 unsigned int count) 363{ 364 return xfs_trans_get_cud(tp, intent); 365} 366 367/* Process a deferred refcount update. */ 368STATIC int 369xfs_refcount_update_finish_item( 370 struct xfs_trans *tp, 371 struct list_head *item, 372 void *done_item, 373 void **state) 374{ 375 struct xfs_refcount_intent *refc; 376 xfs_fsblock_t new_fsb; 377 xfs_extlen_t new_aglen; 378 int error; 379 380 refc = container_of(item, struct xfs_refcount_intent, ri_list); 381 error = xfs_trans_log_finish_refcount_update(tp, done_item, 382 refc->ri_type, 383 refc->ri_startblock, 384 refc->ri_blockcount, 385 &new_fsb, &new_aglen, 386 (struct xfs_btree_cur **)state); 387 /* Did we run out of reservation? Requeue what we didn't finish. */ 388 if (!error && new_aglen > 0) { 389 ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || 390 refc->ri_type == XFS_REFCOUNT_DECREASE); 391 refc->ri_startblock = new_fsb; 392 refc->ri_blockcount = new_aglen; 393 return -EAGAIN; 394 } 395 kmem_free(refc); 396 return error; 397} 398 399/* Clean up after processing deferred refcounts. */ 400STATIC void 401xfs_refcount_update_finish_cleanup( 402 struct xfs_trans *tp, 403 void *state, 404 int error) 405{ 406 struct xfs_btree_cur *rcur = state; 407 408 xfs_refcount_finish_one_cleanup(tp, rcur, error); 409} 410 411/* Abort all pending CUIs. */ 412STATIC void 413xfs_refcount_update_abort_intent( 414 void *intent) 415{ 416 xfs_cui_release(intent); 417} 418 419/* Cancel a deferred refcount update. */ 420STATIC void 421xfs_refcount_update_cancel_item( 422 struct list_head *item) 423{ 424 struct xfs_refcount_intent *refc; 425 426 refc = container_of(item, struct xfs_refcount_intent, ri_list); 427 kmem_free(refc); 428} 429 430const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 431 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 432 .diff_items = xfs_refcount_update_diff_items, 433 .create_intent = xfs_refcount_update_create_intent, 434 .abort_intent = xfs_refcount_update_abort_intent, 435 .log_item = xfs_refcount_update_log_item, 436 .create_done = xfs_refcount_update_create_done, 437 .finish_item = xfs_refcount_update_finish_item, 438 .finish_cleanup = xfs_refcount_update_finish_cleanup, 439 .cancel_item = xfs_refcount_update_cancel_item, 440}; 441 442/* 443 * Process a refcount update intent item that was recovered from the log. 444 * We need to update the refcountbt. 445 */ 446int 447xfs_cui_recover( 448 struct xfs_trans *parent_tp, 449 struct xfs_cui_log_item *cuip) 450{ 451 int i; 452 int error = 0; 453 unsigned int refc_type; 454 struct xfs_phys_extent *refc; 455 xfs_fsblock_t startblock_fsb; 456 bool op_ok; 457 struct xfs_cud_log_item *cudp; 458 struct xfs_trans *tp; 459 struct xfs_btree_cur *rcur = NULL; 460 enum xfs_refcount_intent_type type; 461 xfs_fsblock_t new_fsb; 462 xfs_extlen_t new_len; 463 struct xfs_bmbt_irec irec; 464 bool requeue_only = false; 465 struct xfs_mount *mp = parent_tp->t_mountp; 466 467 ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); 468 469 /* 470 * First check the validity of the extents described by the 471 * CUI. If any are bad, then assume that all are bad and 472 * just toss the CUI. 473 */ 474 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 475 refc = &cuip->cui_format.cui_extents[i]; 476 startblock_fsb = XFS_BB_TO_FSB(mp, 477 XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); 478 switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 479 case XFS_REFCOUNT_INCREASE: 480 case XFS_REFCOUNT_DECREASE: 481 case XFS_REFCOUNT_ALLOC_COW: 482 case XFS_REFCOUNT_FREE_COW: 483 op_ok = true; 484 break; 485 default: 486 op_ok = false; 487 break; 488 } 489 if (!op_ok || startblock_fsb == 0 || 490 refc->pe_len == 0 || 491 startblock_fsb >= mp->m_sb.sb_dblocks || 492 refc->pe_len >= mp->m_sb.sb_agblocks || 493 (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) { 494 /* 495 * This will pull the CUI from the AIL and 496 * free the memory associated with it. 497 */ 498 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 499 xfs_cui_release(cuip); 500 return -EFSCORRUPTED; 501 } 502 } 503 504 /* 505 * Under normal operation, refcount updates are deferred, so we 506 * wouldn't be adding them directly to a transaction. All 507 * refcount updates manage reservation usage internally and 508 * dynamically by deferring work that won't fit in the 509 * transaction. Normally, any work that needs to be deferred 510 * gets attached to the same defer_ops that scheduled the 511 * refcount update. However, we're in log recovery here, so we 512 * we use the passed in defer_ops and to finish up any work that 513 * doesn't fit. We need to reserve enough blocks to handle a 514 * full btree split on either end of the refcount range. 515 */ 516 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 517 mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); 518 if (error) 519 return error; 520 /* 521 * Recovery stashes all deferred ops during intent processing and 522 * finishes them on completion. Transfer current dfops state to this 523 * transaction and transfer the result back before we return. 524 */ 525 xfs_defer_move(tp, parent_tp); 526 cudp = xfs_trans_get_cud(tp, cuip); 527 528 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 529 refc = &cuip->cui_format.cui_extents[i]; 530 refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 531 switch (refc_type) { 532 case XFS_REFCOUNT_INCREASE: 533 case XFS_REFCOUNT_DECREASE: 534 case XFS_REFCOUNT_ALLOC_COW: 535 case XFS_REFCOUNT_FREE_COW: 536 type = refc_type; 537 break; 538 default: 539 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 540 error = -EFSCORRUPTED; 541 goto abort_error; 542 } 543 if (requeue_only) { 544 new_fsb = refc->pe_startblock; 545 new_len = refc->pe_len; 546 } else 547 error = xfs_trans_log_finish_refcount_update(tp, cudp, 548 type, refc->pe_startblock, refc->pe_len, 549 &new_fsb, &new_len, &rcur); 550 if (error) 551 goto abort_error; 552 553 /* Requeue what we didn't finish. */ 554 if (new_len > 0) { 555 irec.br_startblock = new_fsb; 556 irec.br_blockcount = new_len; 557 switch (type) { 558 case XFS_REFCOUNT_INCREASE: 559 xfs_refcount_increase_extent(tp, &irec); 560 break; 561 case XFS_REFCOUNT_DECREASE: 562 xfs_refcount_decrease_extent(tp, &irec); 563 break; 564 case XFS_REFCOUNT_ALLOC_COW: 565 xfs_refcount_alloc_cow_extent(tp, 566 irec.br_startblock, 567 irec.br_blockcount); 568 break; 569 case XFS_REFCOUNT_FREE_COW: 570 xfs_refcount_free_cow_extent(tp, 571 irec.br_startblock, 572 irec.br_blockcount); 573 break; 574 default: 575 ASSERT(0); 576 } 577 requeue_only = true; 578 } 579 } 580 581 xfs_refcount_finish_one_cleanup(tp, rcur, error); 582 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 583 xfs_defer_move(parent_tp, tp); 584 error = xfs_trans_commit(tp); 585 return error; 586 587abort_error: 588 xfs_refcount_finish_one_cleanup(tp, rcur, error); 589 xfs_defer_move(parent_tp, tp); 590 xfs_trans_cancel(tp); 591 return error; 592}