Merge tag 'xfs-for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

+5

fs/dax.c

··· 29 29 #include <linux/uio.h> 30 30 #include <linux/vmstat.h> 31 31 32 + /* 33 + * dax_clear_blocks() is called from within transaction context from XFS, 34 + * and hence this means the stack from this point must follow GFP_NOFS 35 + * semantics for all operations. 36 + */ 32 37 int dax_clear_blocks(struct inode *inode, sector_t block, long size) 33 38 { 34 39 struct block_device *bdev = inode->i_sb->s_bdev;

+1 -1

fs/xfs/Makefile

··· 84 84 xfs_message.o \ 85 85 xfs_mount.o \ 86 86 xfs_mru_cache.o \ 87 + xfs_stats.o \ 87 88 xfs_super.o \ 88 89 xfs_symlink.o \ 89 90 xfs_sysfs.o \ ··· 119 118 xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o 120 119 121 120 xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 122 - xfs-$(CONFIG_PROC_FS) += xfs_stats.o 123 121 xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o 124 122 xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o 125 123 xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o

+6 -4

fs/xfs/kmem.c

··· 55 55 return ptr; 56 56 if (!(++retries % 100)) 57 57 xfs_err(NULL, 58 - "possible memory allocation deadlock in %s (mode:0x%x)", 59 - __func__, lflags); 58 + "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)", 59 + current->comm, current->pid, 60 + (unsigned int)size, __func__, lflags); 60 61 congestion_wait(BLK_RW_ASYNC, HZ/50); 61 62 } while (1); 62 63 } ··· 121 120 return ptr; 122 121 if (!(++retries % 100)) 123 122 xfs_err(NULL, 124 - "possible memory allocation deadlock in %s (mode:0x%x)", 125 - __func__, lflags); 123 + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", 124 + current->comm, current->pid, 125 + __func__, lflags); 126 126 congestion_wait(BLK_RW_ASYNC, HZ/50); 127 127 } while (1); 128 128 }

+22 -8

fs/xfs/libxfs/xfs_alloc.c

··· 482 482 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 483 483 return false; 484 484 } 485 - return true; 485 + 486 + return xfs_log_check_lsn(mp, 487 + be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); 486 488 } 487 489 488 490 static void ··· 653 651 -((long)(args->len))); 654 652 } 655 653 656 - XFS_STATS_INC(xs_allocx); 657 - XFS_STATS_ADD(xs_allocb, args->len); 654 + XFS_STATS_INC(args->mp, xs_allocx); 655 + XFS_STATS_ADD(args->mp, xs_allocb, args->len); 658 656 return error; 659 657 } 660 658 ··· 1810 1808 1811 1809 if (!isfl) 1812 1810 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1813 - XFS_STATS_INC(xs_freex); 1814 - XFS_STATS_ADD(xs_freeb, len); 1811 + XFS_STATS_INC(mp, xs_freex); 1812 + XFS_STATS_ADD(mp, xs_freeb, len); 1815 1813 1816 1814 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1817 1815 ··· 2261 2259 { 2262 2260 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2263 2261 2264 - if (xfs_sb_version_hascrc(&mp->m_sb) && 2265 - !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2262 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 2263 + if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2266 2264 return false; 2265 + if (!xfs_log_check_lsn(mp, 2266 + be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2267 + return false; 2268 + } 2267 2269 2268 2270 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2269 2271 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && ··· 2509 2503 * Try near allocation first, then anywhere-in-ag after 2510 2504 * the first a.g. fails. 2511 2505 */ 2512 - if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && 2506 + if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) && 2513 2507 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2514 2508 args->fsbno = XFS_AGB_TO_FSB(mp, 2515 2509 ((mp->m_agfrotor / rotorstep) % ··· 2640 2634 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2641 2635 args->len); 2642 2636 #endif 2637 + 2638 + /* Zero the extent if we were asked to do so */ 2639 + if (args->userdata & XFS_ALLOC_USERDATA_ZERO) { 2640 + error = xfs_zero_extent(args->ip, args->fsbno, args->len); 2641 + if (error) 2642 + goto error0; 2643 + } 2644 + 2643 2645 } 2644 2646 xfs_perag_put(args->pag); 2645 2647 return 0;

+5 -3

fs/xfs/libxfs/xfs_alloc.h

··· 101 101 struct xfs_mount *mp; /* file system mount point */ 102 102 struct xfs_buf *agbp; /* buffer for a.g. freelist header */ 103 103 struct xfs_perag *pag; /* per-ag struct for this agno */ 104 + struct xfs_inode *ip; /* for userdata zeroing method */ 104 105 xfs_fsblock_t fsbno; /* file system block number */ 105 106 xfs_agnumber_t agno; /* allocation group number */ 106 107 xfs_agblock_t agbno; /* allocation group-relative block # */ ··· 121 120 char wasdel; /* set if allocation was prev delayed */ 122 121 char wasfromfl; /* set if allocation is from freelist */ 123 122 char isfl; /* set if is freelist blocks - !acctg */ 124 - char userdata; /* set if this is user data */ 123 + char userdata; /* mask defining userdata treatment */ 125 124 xfs_fsblock_t firstblock; /* io first block allocated */ 126 125 } xfs_alloc_arg_t; 127 126 128 127 /* 129 128 * Defines for userdata 130 129 */ 131 - #define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ 132 - #define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ 130 + #define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ 131 + #define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ 132 + #define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ 133 133 134 134 xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, 135 135 struct xfs_perag *pag, xfs_extlen_t need);

+3 -3

fs/xfs/libxfs/xfs_attr.c

··· 125 125 uint lock_mode; 126 126 int error; 127 127 128 - XFS_STATS_INC(xs_attr_get); 128 + XFS_STATS_INC(ip->i_mount, xs_attr_get); 129 129 130 130 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 131 131 return -EIO; ··· 209 209 int rsvd = (flags & ATTR_ROOT) != 0; 210 210 int error, err2, committed, local; 211 211 212 - XFS_STATS_INC(xs_attr_set); 212 + XFS_STATS_INC(mp, xs_attr_set); 213 213 214 214 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 215 215 return -EIO; ··· 412 412 xfs_fsblock_t firstblock; 413 413 int error; 414 414 415 - XFS_STATS_INC(xs_attr_remove); 415 + XFS_STATS_INC(mp, xs_attr_remove); 416 416 417 417 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 418 418 return -EIO;

+3

fs/xfs/libxfs/xfs_attr_leaf.c

··· 41 41 #include "xfs_buf_item.h" 42 42 #include "xfs_cksum.h" 43 43 #include "xfs_dir2.h" 44 + #include "xfs_log.h" 44 45 45 46 46 47 /* ··· 266 265 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 267 266 return false; 268 267 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 268 + return false; 269 + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 269 270 return false; 270 271 } else { 271 272 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)

+1 -1

fs/xfs/libxfs/xfs_attr_remote.c

··· 107 107 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 108 108 return false; 109 109 if (be32_to_cpu(rmt->rm_offset) + 110 - be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) 110 + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 111 111 return false; 112 112 if (rmt->rm_owner == 0) 113 113 return false;

+49 -16

fs/xfs/libxfs/xfs_bmap.c

··· 948 948 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 949 949 950 950 /* 951 - * Initialise the block and copy the data 951 + * Initialize the block, copy the data and log the remote buffer. 952 952 * 953 - * Note: init_fn must set the buffer log item type correctly! 953 + * The callout is responsible for logging because the remote format 954 + * might differ from the local format and thus we don't know how much to 955 + * log here. Note that init_fn must also set the buffer log item type 956 + * correctly. 954 957 */ 955 958 init_fn(tp, bp, ip, ifp); 956 959 957 - /* account for the change in fork size and log everything */ 958 - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); 960 + /* account for the change in fork size */ 959 961 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 960 962 xfs_bmap_local_to_extents_empty(ip, whichfork); 961 963 flags |= XFS_ILOG_CORE; ··· 1437 1435 xfs_ifork_t *ifp; /* inode fork pointer */ 1438 1436 xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 1439 1437 1440 - XFS_STATS_INC(xs_look_exlist); 1438 + XFS_STATS_INC(ip->i_mount, xs_look_exlist); 1441 1439 ifp = XFS_IFORK_PTR(ip, fork); 1442 1440 1443 1441 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); ··· 1734 1732 ASSERT(!bma->cur || 1735 1733 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 1736 1734 1737 - XFS_STATS_INC(xs_add_exlist); 1735 + XFS_STATS_INC(mp, xs_add_exlist); 1738 1736 1739 1737 #define LEFT r[0] 1740 1738 #define RIGHT r[1] ··· 2288 2286 ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 2289 2287 ASSERT(!isnullstartblock(new->br_startblock)); 2290 2288 2291 - XFS_STATS_INC(xs_add_exlist); 2289 + XFS_STATS_INC(mp, xs_add_exlist); 2292 2290 2293 2291 #define LEFT r[0] 2294 2292 #define RIGHT r[1] ··· 2948 2946 ASSERT(!bma->cur || 2949 2947 !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 2950 2948 2951 - XFS_STATS_INC(xs_add_exlist); 2949 + XFS_STATS_INC(mp, xs_add_exlist); 2952 2950 2953 2951 state = 0; 2954 2952 if (whichfork == XFS_ATTR_FORK) ··· 3802 3800 args.wasdel = ap->wasdel; 3803 3801 args.isfl = 0; 3804 3802 args.userdata = ap->userdata; 3805 - if ((error = xfs_alloc_vextent(&args))) 3803 + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) 3804 + args.ip = ap->ip; 3805 + 3806 + error = xfs_alloc_vextent(&args); 3807 + if (error) 3806 3808 return error; 3809 + 3807 3810 if (tryagain && args.fsbno == NULLFSBLOCK) { 3808 3811 /* 3809 3812 * Exact allocation failed. Now try with alignment ··· 4043 4036 if (XFS_FORCED_SHUTDOWN(mp)) 4044 4037 return -EIO; 4045 4038 4046 - XFS_STATS_INC(xs_blk_mapr); 4039 + XFS_STATS_INC(mp, xs_blk_mapr); 4047 4040 4048 4041 ifp = XFS_IFORK_PTR(ip, whichfork); 4049 4042 ··· 4228 4221 if (XFS_FORCED_SHUTDOWN(mp)) 4229 4222 return -EIO; 4230 4223 4231 - XFS_STATS_INC(xs_blk_mapw); 4224 + XFS_STATS_INC(mp, xs_blk_mapw); 4232 4225 4233 4226 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4234 4227 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); ··· 4307 4300 4308 4301 /* 4309 4302 * Indicate if this is the first user data in the file, or just any 4310 - * user data. 4303 + * user data. And if it is userdata, indicate whether it needs to 4304 + * be initialised to zero during allocation. 4311 4305 */ 4312 4306 if (!(bma->flags & XFS_BMAPI_METADATA)) { 4313 4307 bma->userdata = (bma->offset == 0) ? 4314 4308 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4309 + if (bma->flags & XFS_BMAPI_ZERO) 4310 + bma->userdata |= XFS_ALLOC_USERDATA_ZERO; 4315 4311 } 4316 4312 4317 4313 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; ··· 4429 4419 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4430 4420 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4431 4421 4422 + /* 4423 + * Before insertion into the bmbt, zero the range being converted 4424 + * if required. 4425 + */ 4426 + if (flags & XFS_BMAPI_ZERO) { 4427 + error = xfs_zero_extent(bma->ip, mval->br_startblock, 4428 + mval->br_blockcount); 4429 + if (error) 4430 + return error; 4431 + } 4432 + 4432 4433 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4433 4434 &bma->cur, mval, bma->firstblock, bma->flist, 4434 4435 &tmp_logflags); ··· 4533 4512 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4534 4513 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4535 4514 4515 + /* zeroing is for currently only for data extents, not metadata */ 4516 + ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4517 + (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); 4518 + /* 4519 + * we can allocate unwritten extents or pre-zero allocated blocks, 4520 + * but it makes no sense to do both at once. This would result in 4521 + * zeroing the unwritten extent twice, but it still being an 4522 + * unwritten extent.... 4523 + */ 4524 + ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != 4525 + (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); 4526 + 4536 4527 if (unlikely(XFS_TEST_ERROR( 4537 4528 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 4538 4529 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), ··· 4558 4525 4559 4526 ifp = XFS_IFORK_PTR(ip, whichfork); 4560 4527 4561 - XFS_STATS_INC(xs_blk_mapw); 4528 + XFS_STATS_INC(mp, xs_blk_mapw); 4562 4529 4563 4530 if (*firstblock == NULLFSBLOCK) { 4564 4531 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) ··· 4751 4718 xfs_filblks_t temp2; /* for indirect length calculations */ 4752 4719 int state = 0; 4753 4720 4754 - XFS_STATS_INC(xs_del_exlist); 4721 + mp = ip->i_mount; 4722 + XFS_STATS_INC(mp, xs_del_exlist); 4755 4723 4756 4724 if (whichfork == XFS_ATTR_FORK) 4757 4725 state |= BMAP_ATTRFORK; 4758 4726 4759 - mp = ip->i_mount; 4760 4727 ifp = XFS_IFORK_PTR(ip, whichfork); 4761 4728 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / 4762 4729 (uint)sizeof(xfs_bmbt_rec_t))); ··· 5103 5070 *done = 1; 5104 5071 return 0; 5105 5072 } 5106 - XFS_STATS_INC(xs_blk_unmap); 5073 + XFS_STATS_INC(mp, xs_blk_unmap); 5107 5074 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5108 5075 start = bno; 5109 5076 bno = start + len - 1;

+11 -2

fs/xfs/libxfs/xfs_bmap.h

··· 52 52 xfs_extlen_t minleft; /* amount must be left after alloc */ 53 53 bool eof; /* set if allocating past last extent */ 54 54 bool wasdel; /* replacing a delayed allocation */ 55 - bool userdata;/* set if is user data */ 56 55 bool aeof; /* allocated space at eof */ 57 56 bool conv; /* overwriting unwritten extents */ 57 + char userdata;/* userdata mask */ 58 58 int flags; 59 59 }; 60 60 ··· 109 109 */ 110 110 #define XFS_BMAPI_CONVERT 0x040 111 111 112 + /* 113 + * allocate zeroed extents - this requires all newly allocated user data extents 114 + * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set. 115 + * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found 116 + * during the allocation range to zeroed written extents. 117 + */ 118 + #define XFS_BMAPI_ZERO 0x080 119 + 112 120 #define XFS_BMAPI_FLAGS \ 113 121 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 114 122 { XFS_BMAPI_METADATA, "METADATA" }, \ ··· 124 116 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 125 117 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 126 118 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 127 - { XFS_BMAPI_CONVERT, "CONVERT" } 119 + { XFS_BMAPI_CONVERT, "CONVERT" }, \ 120 + { XFS_BMAPI_ZERO, "ZERO" } 128 121 129 122 130 123 static inline int xfs_bmapi_aflag(int w)

+17 -4

fs/xfs/libxfs/xfs_btree.c

··· 32 32 #include "xfs_trace.h" 33 33 #include "xfs_cksum.h" 34 34 #include "xfs_alloc.h" 35 + #include "xfs_log.h" 35 36 36 37 /* 37 38 * Cursor allocation zone. ··· 223 222 * long-form btree header. 224 223 * 225 224 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 226 - * it into the buffer so recovery knows what the last modifcation was that made 225 + * it into the buffer so recovery knows what the last modification was that made 227 226 * it to disk. 228 227 */ 229 228 void ··· 244 243 xfs_btree_lblock_verify_crc( 245 244 struct xfs_buf *bp) 246 245 { 247 - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 246 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 247 + struct xfs_mount *mp = bp->b_target->bt_mount; 248 + 249 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 250 + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 251 + return false; 248 252 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 253 + } 249 254 250 255 return true; 251 256 } ··· 261 254 * short-form btree header. 262 255 * 263 256 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 264 - * it into the buffer so recovery knows what the last modifcation was that made 257 + * it into the buffer so recovery knows what the last modification was that made 265 258 * it to disk. 266 259 */ 267 260 void ··· 282 275 xfs_btree_sblock_verify_crc( 283 276 struct xfs_buf *bp) 284 277 { 285 - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 278 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 279 + struct xfs_mount *mp = bp->b_target->bt_mount; 280 + 281 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 282 + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 283 + return false; 286 284 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 285 + } 287 286 288 287 return true; 289 288 }

+23 -16

fs/xfs/libxfs/xfs_btree.h

··· 84 84 /* 85 85 * Generic stats interface 86 86 */ 87 - #define __XFS_BTREE_STATS_INC(type, stat) \ 88 - XFS_STATS_INC(xs_ ## type ## _2_ ## stat) 89 - #define XFS_BTREE_STATS_INC(cur, stat) \ 87 + #define __XFS_BTREE_STATS_INC(mp, type, stat) \ 88 + XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat) 89 + #define XFS_BTREE_STATS_INC(cur, stat) \ 90 90 do { \ 91 + struct xfs_mount *__mp = cur->bc_mp; \ 91 92 switch (cur->bc_btnum) { \ 92 - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ 93 - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ 94 - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ 95 - case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ 96 - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ 93 + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \ 94 + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \ 95 + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ 96 + case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ 97 + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ 97 98 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 98 99 } \ 99 100 } while (0) 100 101 101 - #define __XFS_BTREE_STATS_ADD(type, stat, val) \ 102 - XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) 102 + #define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \ 103 + XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val) 103 104 #define XFS_BTREE_STATS_ADD(cur, stat, val) \ 104 105 do { \ 106 + struct xfs_mount *__mp = cur->bc_mp; \ 105 107 switch (cur->bc_btnum) { \ 106 - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ 107 - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ 108 - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ 109 - case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ 110 - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ 111 - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 108 + case XFS_BTNUM_BNO: \ 109 + __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \ 110 + case XFS_BTNUM_CNT: \ 111 + __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \ 112 + case XFS_BTNUM_BMAP: \ 113 + __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \ 114 + case XFS_BTNUM_INO: \ 115 + __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ 116 + case XFS_BTNUM_FINO: \ 117 + __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ 118 + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 112 119 } \ 113 120 } while (0) 114 121

+4

fs/xfs/libxfs/xfs_da_btree.c

··· 39 39 #include "xfs_trace.h" 40 40 #include "xfs_cksum.h" 41 41 #include "xfs_buf_item.h" 42 + #include "xfs_log.h" 42 43 43 44 /* 44 45 * xfs_da_btree.c ··· 150 149 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 151 150 return false; 152 151 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 152 + return false; 153 + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 153 154 return false; 154 155 } else { 155 156 if (ichdr.magic != XFS_DA_NODE_MAGIC) ··· 325 322 if (xfs_sb_version_hascrc(&mp->m_sb)) { 326 323 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 327 324 325 + memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); 328 326 ichdr.magic = XFS_DA3_NODE_MAGIC; 329 327 hdr3->info.blkno = cpu_to_be64(bp->b_bn); 330 328 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);

+3 -3

fs/xfs/libxfs/xfs_dir2.c

··· 271 271 rval = xfs_dir_ino_validate(tp->t_mountp, inum); 272 272 if (rval) 273 273 return rval; 274 - XFS_STATS_INC(xs_dir_create); 274 + XFS_STATS_INC(dp->i_mount, xs_dir_create); 275 275 } 276 276 277 277 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); ··· 365 365 int lock_mode; 366 366 367 367 ASSERT(S_ISDIR(dp->i_d.di_mode)); 368 - XFS_STATS_INC(xs_dir_lookup); 368 + XFS_STATS_INC(dp->i_mount, xs_dir_lookup); 369 369 370 370 /* 371 371 * We need to use KM_NOFS here so that lockdep will not throw false ··· 444 444 int v; /* type-checking value */ 445 445 446 446 ASSERT(S_ISDIR(dp->i_d.di_mode)); 447 - XFS_STATS_INC(xs_dir_remove); 447 + XFS_STATS_INC(dp->i_mount, xs_dir_remove); 448 448 449 449 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 450 450 if (!args)

+3

fs/xfs/libxfs/xfs_dir2_block.c

··· 34 34 #include "xfs_error.h" 35 35 #include "xfs_trace.h" 36 36 #include "xfs_cksum.h" 37 + #include "xfs_log.h" 37 38 38 39 /* 39 40 * Local function prototypes. ··· 71 70 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 72 71 return false; 73 72 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 73 + return false; 74 + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 74 75 return false; 75 76 } else { 76 77 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))

+3

fs/xfs/libxfs/xfs_dir2_data.c

··· 31 31 #include "xfs_trans.h" 32 32 #include "xfs_buf_item.h" 33 33 #include "xfs_cksum.h" 34 + #include "xfs_log.h" 34 35 35 36 /* 36 37 * Check the consistency of the data block. ··· 224 223 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 225 224 return false; 226 225 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 226 + return false; 227 + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 227 228 return false; 228 229 } else { 229 230 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))

+3

fs/xfs/libxfs/xfs_dir2_leaf.c

··· 33 33 #include "xfs_trans.h" 34 34 #include "xfs_buf_item.h" 35 35 #include "xfs_cksum.h" 36 + #include "xfs_log.h" 36 37 37 38 /* 38 39 * Local function declarations. ··· 164 163 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) 165 164 return false; 166 165 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 166 + return false; 167 + if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) 167 168 return false; 168 169 } else { 169 170 if (leaf->hdr.info.magic != cpu_to_be16(magic))

+3

fs/xfs/libxfs/xfs_dir2_node.c

··· 33 33 #include "xfs_trans.h" 34 34 #include "xfs_buf_item.h" 35 35 #include "xfs_cksum.h" 36 + #include "xfs_log.h" 36 37 37 38 /* 38 39 * Function declarations. ··· 97 96 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 98 97 return false; 99 98 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 99 + return false; 100 + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 100 101 return false; 101 102 } else { 102 103 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))

+15 -3

fs/xfs/libxfs/xfs_format.h

··· 60 60 #define XFS_SB_VERSION_MOREBITSBIT 0x8000 61 61 62 62 /* 63 + * The size of a single extended attribute on disk is limited by 64 + * the size of index values within the attribute entries themselves. 65 + * These are be16 fields, so we can only support attribute data 66 + * sizes up to 2^16 bytes in length. 67 + */ 68 + #define XFS_XATTR_SIZE_MAX (1 << 16) 69 + 70 + /* 63 71 * Supported feature bit list is just all bits in the versionnum field because 64 72 * we've used them all up and understand them all. Except, of course, for the 65 73 * shared superblock bit, which nobody knows what it does and so is unsupported. ··· 1491 1483 */ 1492 1484 #define XFS_ACL_MAX_ENTRIES(mp) \ 1493 1485 (xfs_sb_version_hascrc(&mp->m_sb) \ 1494 - ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ 1486 + ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ 1495 1487 sizeof(struct xfs_acl_entry) \ 1496 1488 : 25) 1497 1489 1498 - #define XFS_ACL_MAX_SIZE(mp) \ 1490 + #define XFS_ACL_SIZE(cnt) \ 1499 1491 (sizeof(struct xfs_acl) + \ 1500 - sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) 1492 + sizeof(struct xfs_acl_entry) * cnt) 1493 + 1494 + #define XFS_ACL_MAX_SIZE(mp) \ 1495 + XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp))) 1496 + 1501 1497 1502 1498 /* On-disk XFS extended attribute names */ 1503 1499 #define SGI_ACL_FILE "SGI_ACL_FILE"

+10

fs/xfs/libxfs/xfs_fs.h

··· 490 490 #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ 491 491 492 492 /* 493 + * ioctl limits 494 + */ 495 + #ifdef XATTR_LIST_MAX 496 + # define XFS_XATTR_LIST_MAX XATTR_LIST_MAX 497 + #else 498 + # define XFS_XATTR_LIST_MAX 65536 499 + #endif 500 + 501 + 502 + /* 493 503 * ioctl commands that are used by Linux filesystems 494 504 */ 495 505 #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS

+8 -2

fs/xfs/libxfs/xfs_ialloc.c

··· 38 38 #include "xfs_icreate_item.h" 39 39 #include "xfs_icache.h" 40 40 #include "xfs_trace.h" 41 + #include "xfs_log.h" 41 42 42 43 43 44 /* ··· 2501 2500 struct xfs_mount *mp = bp->b_target->bt_mount; 2502 2501 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 2503 2502 2504 - if (xfs_sb_version_hascrc(&mp->m_sb) && 2505 - !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) 2503 + if (xfs_sb_version_hascrc(&mp->m_sb)) { 2504 + if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) 2506 2505 return false; 2506 + if (!xfs_log_check_lsn(mp, 2507 + be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) 2508 + return false; 2509 + } 2510 + 2507 2511 /* 2508 2512 * Validate the magic number of the agi block. 2509 2513 */

+10

fs/xfs/libxfs/xfs_sb.c

··· 35 35 #include "xfs_bmap_btree.h" 36 36 #include "xfs_alloc_btree.h" 37 37 #include "xfs_ialloc_btree.h" 38 + #include "xfs_log.h" 38 39 39 40 /* 40 41 * Physical superblock buffer manipulations. Shared with libxfs in userspace. ··· 164 163 "Filesystem can not be safely mounted by this kernel."); 165 164 return -EINVAL; 166 165 } 166 + } else if (xfs_sb_version_hascrc(sbp)) { 167 + /* 168 + * We can't read verify the sb LSN because the read verifier is 169 + * called before the log is allocated and processed. We know the 170 + * log is set up before write verifier (!check_version) calls, 171 + * so just check it here. 172 + */ 173 + if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) 174 + return -EFSCORRUPTED; 167 175 } 168 176 169 177 if (xfs_sb_version_has_pquotino(sbp)) {

+7

fs/xfs/libxfs/xfs_symlink_remote.c

··· 31 31 #include "xfs_cksum.h" 32 32 #include "xfs_trans.h" 33 33 #include "xfs_buf_item.h" 34 + #include "xfs_log.h" 34 35 35 36 36 37 /* ··· 61 60 if (!xfs_sb_version_hascrc(&mp->m_sb)) 62 61 return 0; 63 62 63 + memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr)); 64 64 dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); 65 65 dsl->sl_offset = cpu_to_be32(offset); 66 66 dsl->sl_bytes = cpu_to_be32(size); ··· 117 115 be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN) 118 116 return false; 119 117 if (dsl->sl_owner == 0) 118 + return false; 119 + if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) 120 120 return false; 121 121 122 122 return true; ··· 187 183 if (!xfs_sb_version_hascrc(&mp->m_sb)) { 188 184 bp->b_ops = NULL; 189 185 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 186 + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); 190 187 return; 191 188 } 192 189 ··· 203 198 buf = bp->b_addr; 204 199 buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); 205 200 memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); 201 + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + 202 + ifp->if_bytes - 1); 206 203 }

+9 -5

fs/xfs/xfs_acl.c

··· 37 37 38 38 STATIC struct posix_acl * 39 39 xfs_acl_from_disk( 40 - struct xfs_acl *aclp, 41 - int max_entries) 40 + const struct xfs_acl *aclp, 41 + int len, 42 + int max_entries) 42 43 { 43 44 struct posix_acl_entry *acl_e; 44 45 struct posix_acl *acl; 45 - struct xfs_acl_entry *ace; 46 + const struct xfs_acl_entry *ace; 46 47 unsigned int count, i; 47 48 49 + if (len < sizeof(*aclp)) 50 + return ERR_PTR(-EFSCORRUPTED); 48 51 count = be32_to_cpu(aclp->acl_cnt); 49 - if (count > max_entries) 52 + if (count > max_entries || XFS_ACL_SIZE(count) != len) 50 53 return ERR_PTR(-EFSCORRUPTED); 51 54 52 55 acl = posix_acl_alloc(count, GFP_KERNEL); ··· 163 160 */ 164 161 if (error == -ENOATTR) 165 162 goto out_update_cache; 163 + acl = ERR_PTR(error); 166 164 goto out; 167 165 } 168 166 169 - acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); 167 + acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); 170 168 if (IS_ERR(acl)) 171 169 goto out; 172 170

+3 -1

fs/xfs/xfs_acl.h

··· 20 20 21 21 struct inode; 22 22 struct posix_acl; 23 - struct xfs_inode; 24 23 25 24 #ifdef CONFIG_XFS_POSIX_ACL 26 25 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); ··· 35 36 # define posix_acl_access_exists(inode) 0 36 37 # define posix_acl_default_exists(inode) 0 37 38 #endif /* CONFIG_XFS_POSIX_ACL */ 39 + 40 + extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags); 41 + 38 42 #endif /* __XFS_ACL_H__ */

+66 -53

fs/xfs/xfs_aops.c

··· 172 172 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 173 173 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 174 174 175 + /* we abort the update if there was an IO error */ 176 + if (ioend->io_error) { 177 + xfs_trans_cancel(tp); 178 + return ioend->io_error; 179 + } 180 + 175 181 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 176 182 } 177 183 ··· 218 212 ioend->io_error = -EIO; 219 213 goto done; 220 214 } 221 - if (ioend->io_error) 222 - goto done; 223 215 224 216 /* 225 217 * For unwritten extents we need to issue transactions to convert a 226 218 * range to normal written extens after the data I/O has finished. 219 + * Detecting and handling completion IO errors is done individually 220 + * for each case as different cleanup operations need to be performed 221 + * on error. 227 222 */ 228 223 if (ioend->io_type == XFS_IO_UNWRITTEN) { 224 + if (ioend->io_error) 225 + goto done; 229 226 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 230 227 ioend->io_size); 231 228 } else if (ioend->io_append_trans) { ··· 1259 1250 * the DIO. There is only going to be one reference to the ioend and its life 1260 1251 * cycle is constrained by the DIO completion code. hence we don't need 1261 1252 * reference counting here. 1253 + * 1254 + * Note that for DIO, an IO to the highest supported file block offset (i.e. 1255 + * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 1256 + * bit variable. Hence if we see this overflow, we have to assume that the IO is 1257 + * extending the file size. We won't know for sure until IO completion is run 1258 + * and the actual max write offset is communicated to the IO completion 1259 + * routine. 1260 + * 1261 + * For DAX page faults, we are preparing to never see unwritten extents here, 1262 + * nor should we ever extend the inode size. Hence we will soon have nothing to 1263 + * do here for this case, ensuring we don't have to provide an IO completion 1264 + * callback to free an ioend that we don't actually need for a fault into the 1265 + * page at offset (2^63 - 1FSB) bytes. 1262 1266 */ 1267 + 1263 1268 static void 1264 1269 xfs_map_direct( 1265 1270 struct inode *inode, 1266 1271 struct buffer_head *bh_result, 1267 1272 struct xfs_bmbt_irec *imap, 1268 - xfs_off_t offset) 1273 + xfs_off_t offset, 1274 + bool dax_fault) 1269 1275 { 1270 1276 struct xfs_ioend *ioend; 1271 1277 xfs_off_t size = bh_result->b_size; ··· 1292 1268 type = XFS_IO_OVERWRITE; 1293 1269 1294 1270 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); 1271 + 1272 + if (dax_fault) { 1273 + ASSERT(type == XFS_IO_OVERWRITE); 1274 + trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, 1275 + imap); 1276 + return; 1277 + } 1295 1278 1296 1279 if (bh_result->b_private) { 1297 1280 ioend = bh_result->b_private; ··· 1314 1283 ioend->io_size, ioend->io_type, 1315 1284 imap); 1316 1285 } else if (type == XFS_IO_UNWRITTEN || 1317 - offset + size > i_size_read(inode)) { 1286 + offset + size > i_size_read(inode) || 1287 + offset + size < 0) { 1318 1288 ioend = xfs_alloc_ioend(inode, type); 1319 1289 ioend->io_offset = offset; 1320 1290 ioend->io_size = size; ··· 1377 1345 sector_t iblock, 1378 1346 struct buffer_head *bh_result, 1379 1347 int create, 1380 - bool direct) 1348 + bool direct, 1349 + bool dax_fault) 1381 1350 { 1382 1351 struct xfs_inode *ip = XFS_I(inode); 1383 1352 struct xfs_mount *mp = ip->i_mount; ··· 1426 1393 if (error) 1427 1394 goto out_unlock; 1428 1395 1396 + /* for DAX, we convert unwritten extents directly */ 1429 1397 if (create && 1430 1398 (!nimaps || 1431 1399 (imap.br_startblock == HOLESTARTBLOCK || 1432 - imap.br_startblock == DELAYSTARTBLOCK))) { 1400 + imap.br_startblock == DELAYSTARTBLOCK) || 1401 + (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { 1433 1402 if (direct || xfs_get_extsz_hint(ip)) { 1434 1403 /* 1435 - * Drop the ilock in preparation for starting the block 1436 - * allocation transaction. It will be retaken 1437 - * exclusively inside xfs_iomap_write_direct for the 1438 - * actual allocation. 1404 + * xfs_iomap_write_direct() expects the shared lock. It 1405 + * is unlocked on return. 1439 1406 */ 1440 - xfs_iunlock(ip, lockmode); 1407 + if (lockmode == XFS_ILOCK_EXCL) 1408 + xfs_ilock_demote(ip, lockmode); 1409 + 1441 1410 error = xfs_iomap_write_direct(ip, offset, size, 1442 1411 &imap, nimaps); 1443 1412 if (error) ··· 1476 1441 goto out_unlock; 1477 1442 } 1478 1443 1444 + if (IS_DAX(inode) && create) { 1445 + ASSERT(!ISUNWRITTEN(&imap)); 1446 + /* zeroing is not needed at a higher layer */ 1447 + new = 0; 1448 + } 1449 + 1479 1450 /* trim mapping down to size requested */ 1480 1451 if (direct || size > (1 << inode->i_blkbits)) 1481 1452 xfs_map_trim_size(inode, iblock, bh_result, ··· 1499 1458 set_buffer_unwritten(bh_result); 1500 1459 /* direct IO needs special help */ 1501 1460 if (create && direct) 1502 - xfs_map_direct(inode, bh_result, &imap, offset); 1461 + xfs_map_direct(inode, bh_result, &imap, offset, 1462 + dax_fault); 1503 1463 } 1504 1464 1505 1465 /* ··· 1547 1505 struct buffer_head *bh_result, 1548 1506 int create) 1549 1507 { 1550 - return __xfs_get_blocks(inode, iblock, bh_result, create, false); 1508 + return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); 1551 1509 } 1552 1510 1553 1511 int ··· 1557 1515 struct buffer_head *bh_result, 1558 1516 int create) 1559 1517 { 1560 - return __xfs_get_blocks(inode, iblock, bh_result, create, true); 1518 + return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); 1519 + } 1520 + 1521 + int 1522 + xfs_get_blocks_dax_fault( 1523 + struct inode *inode, 1524 + sector_t iblock, 1525 + struct buffer_head *bh_result, 1526 + int create) 1527 + { 1528 + return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); 1561 1529 } 1562 1530 1563 1531 static void ··· 1665 1613 1666 1614 __xfs_end_io_direct_write(inode, ioend, offset, size); 1667 1615 } 1668 - 1669 - /* 1670 - * For DAX we need a mapping buffer callback for unwritten extent conversion 1671 - * when page faults allocate blocks and then zero them. Note that in this 1672 - * case the mapping indicated by the ioend may extend beyond EOF. We most 1673 - * definitely do not want to extend EOF here, so we trim back the ioend size to 1674 - * EOF. 1675 - */ 1676 - #ifdef CONFIG_FS_DAX 1677 - void 1678 - xfs_end_io_dax_write( 1679 - struct buffer_head *bh, 1680 - int uptodate) 1681 - { 1682 - struct xfs_ioend *ioend = bh->b_private; 1683 - struct inode *inode = ioend->io_inode; 1684 - ssize_t size = ioend->io_size; 1685 - 1686 - ASSERT(IS_DAX(ioend->io_inode)); 1687 - 1688 - /* if there was an error zeroing, then don't convert it */ 1689 - if (!uptodate) 1690 - ioend->io_error = -EIO; 1691 - 1692 - /* 1693 - * Trim update to EOF, so we don't extend EOF during unwritten extent 1694 - * conversion of partial EOF blocks. 1695 - */ 1696 - spin_lock(&XFS_I(inode)->i_flags_lock); 1697 - if (ioend->io_offset + size > i_size_read(inode)) 1698 - size = i_size_read(inode) - ioend->io_offset; 1699 - spin_unlock(&XFS_I(inode)->i_flags_lock); 1700 - 1701 - __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size); 1702 - 1703 - } 1704 - #else 1705 - void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { } 1706 - #endif 1707 1616 1708 1617 static inline ssize_t 1709 1618 xfs_vm_do_dio(

+2 -1

fs/xfs/xfs_aops.h

··· 58 58 struct buffer_head *map_bh, int create); 59 59 int xfs_get_blocks_direct(struct inode *inode, sector_t offset, 60 60 struct buffer_head *map_bh, int create); 61 - void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); 61 + int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, 62 + struct buffer_head *map_bh, int create); 62 63 63 64 extern void xfs_count_page_state(struct page *, int *, int *); 64 65

+1 -1

fs/xfs/xfs_attr_list.c

··· 511 511 xfs_inode_t *dp = context->dp; 512 512 uint lock_mode; 513 513 514 - XFS_STATS_INC(xs_attr_list); 514 + XFS_STATS_INC(dp->i_mount, xs_attr_list); 515 515 516 516 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 517 517 return -EIO;

+37 -1

fs/xfs/xfs_bmap_util.c

··· 57 57 } 58 58 59 59 /* 60 + * Routine to zero an extent on disk allocated to the specific inode. 61 + * 62 + * The VFS functions take a linearised filesystem block offset, so we have to 63 + * convert the sparse xfs fsb to the right format first. 64 + * VFS types are real funky, too. 65 + */ 66 + int 67 + xfs_zero_extent( 68 + struct xfs_inode *ip, 69 + xfs_fsblock_t start_fsb, 70 + xfs_off_t count_fsb) 71 + { 72 + struct xfs_mount *mp = ip->i_mount; 73 + xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 74 + sector_t block = XFS_BB_TO_FSBT(mp, sector); 75 + ssize_t size = XFS_FSB_TO_B(mp, count_fsb); 76 + 77 + if (IS_DAX(VFS_I(ip))) 78 + return dax_clear_blocks(VFS_I(ip), block, size); 79 + 80 + /* 81 + * let the block layer decide on the fastest method of 82 + * implementing the zeroing. 83 + */ 84 + return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); 85 + 86 + } 87 + 88 + /* 60 89 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 61 90 * caller. Frees all the extents that need freeing, which must be done 62 91 * last due to locking considerations. We never free any extents in ··· 258 229 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 259 230 ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 260 231 XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 232 + 233 + /* Zero the extent if we were asked to do so */ 234 + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { 235 + error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); 236 + if (error) 237 + return error; 238 + } 261 239 } else { 262 240 ap->length = 0; 263 241 } ··· 1063 1027 xfs_bmap_init(&free_list, &firstfsb); 1064 1028 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1065 1029 allocatesize_fsb, alloc_type, &firstfsb, 1066 - 0, imapp, &nimaps, &free_list); 1030 + resblks, imapp, &nimaps, &free_list); 1067 1031 if (error) { 1068 1032 goto error0; 1069 1033 }

+11 -10

fs/xfs/xfs_buf.c

··· 201 201 atomic_set(&bp->b_pin_count, 0); 202 202 init_waitqueue_head(&bp->b_waiters); 203 203 204 - XFS_STATS_INC(xb_create); 204 + XFS_STATS_INC(target->bt_mount, xb_create); 205 205 trace_xfs_buf_init(bp, _RET_IP_); 206 206 207 207 return bp; ··· 354 354 */ 355 355 if (!(++retries % 100)) 356 356 xfs_err(NULL, 357 - "possible memory allocation deadlock in %s (mode:0x%x)", 357 + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", 358 + current->comm, current->pid, 358 359 __func__, gfp_mask); 359 360 360 - XFS_STATS_INC(xb_page_retries); 361 + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); 361 362 congestion_wait(BLK_RW_ASYNC, HZ/50); 362 363 goto retry; 363 364 } 364 365 365 - XFS_STATS_INC(xb_page_found); 366 + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); 366 367 367 368 nbytes = min_t(size_t, size, PAGE_SIZE - offset); 368 369 size -= nbytes; ··· 517 516 new_bp->b_pag = pag; 518 517 spin_unlock(&pag->pag_buf_lock); 519 518 } else { 520 - XFS_STATS_INC(xb_miss_locked); 519 + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); 521 520 spin_unlock(&pag->pag_buf_lock); 522 521 xfs_perag_put(pag); 523 522 } ··· 530 529 if (!xfs_buf_trylock(bp)) { 531 530 if (flags & XBF_TRYLOCK) { 532 531 xfs_buf_rele(bp); 533 - XFS_STATS_INC(xb_busy_locked); 532 + XFS_STATS_INC(btp->bt_mount, xb_busy_locked); 534 533 return NULL; 535 534 } 536 535 xfs_buf_lock(bp); 537 - XFS_STATS_INC(xb_get_locked_waited); 536 + XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); 538 537 } 539 538 540 539 /* ··· 550 549 } 551 550 552 551 trace_xfs_buf_find(bp, flags, _RET_IP_); 553 - XFS_STATS_INC(xb_get_locked); 552 + XFS_STATS_INC(btp->bt_mount, xb_get_locked); 554 553 return bp; 555 554 } 556 555 ··· 604 603 } 605 604 } 606 605 607 - XFS_STATS_INC(xb_get); 606 + XFS_STATS_INC(target->bt_mount, xb_get); 608 607 trace_xfs_buf_get(bp, flags, _RET_IP_); 609 608 return bp; 610 609 } ··· 644 643 trace_xfs_buf_read(bp, flags, _RET_IP_); 645 644 646 645 if (!XFS_BUF_ISDONE(bp)) { 647 - XFS_STATS_INC(xb_get_read); 646 + XFS_STATS_INC(target->bt_mount, xb_get_read); 648 647 bp->b_ops = ops; 649 648 _xfs_buf_read(bp, flags); 650 649 } else if (flags & XBF_ASYNC) {

+1 -1

fs/xfs/xfs_dir2_readdir.c

··· 666 666 return -EIO; 667 667 668 668 ASSERT(S_ISDIR(dp->i_d.di_mode)); 669 - XFS_STATS_INC(xs_dir_getdents); 669 + XFS_STATS_INC(dp->i_mount, xs_dir_getdents); 670 670 671 671 args.dp = dp; 672 672 args.geo = dp->i_mount->m_dir_geo;

+7 -7

fs/xfs/xfs_dquot.c

··· 75 75 ASSERT(list_empty(&dqp->q_lru)); 76 76 77 77 mutex_destroy(&dqp->q_qlock); 78 - kmem_zone_free(xfs_qm_dqzone, dqp); 79 78 80 - XFS_STATS_DEC(xs_qm_dquot); 79 + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); 80 + kmem_zone_free(xfs_qm_dqzone, dqp); 81 81 } 82 82 83 83 /* ··· 605 605 break; 606 606 } 607 607 608 - XFS_STATS_INC(xs_qm_dquot); 608 + XFS_STATS_INC(mp, xs_qm_dquot); 609 609 610 610 trace_xfs_dqread(dqp); 611 611 ··· 747 747 mutex_unlock(&qi->qi_tree_lock); 748 748 749 749 trace_xfs_dqget_hit(dqp); 750 - XFS_STATS_INC(xs_qm_dqcachehits); 750 + XFS_STATS_INC(mp, xs_qm_dqcachehits); 751 751 *O_dqpp = dqp; 752 752 return 0; 753 753 } 754 754 mutex_unlock(&qi->qi_tree_lock); 755 - XFS_STATS_INC(xs_qm_dqcachemisses); 755 + XFS_STATS_INC(mp, xs_qm_dqcachemisses); 756 756 757 757 /* 758 758 * Dquot cache miss. We don't want to keep the inode lock across ··· 806 806 mutex_unlock(&qi->qi_tree_lock); 807 807 trace_xfs_dqget_dup(dqp); 808 808 xfs_qm_dqdestroy(dqp); 809 - XFS_STATS_INC(xs_qm_dquot_dups); 809 + XFS_STATS_INC(mp, xs_qm_dquot_dups); 810 810 goto restart; 811 811 } 812 812 ··· 846 846 trace_xfs_dqput_free(dqp); 847 847 848 848 if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) 849 - XFS_STATS_INC(xs_qm_dquot_unused); 849 + XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); 850 850 } 851 851 xfs_dqunlock(dqp); 852 852 }

+88 -26

fs/xfs/xfs_file.c

··· 242 242 } 243 243 244 244 /* 245 - * All metadata updates are logged, which means that we just have 246 - * to flush the log up to the latest LSN that touched the inode. 245 + * All metadata updates are logged, which means that we just have to 246 + * flush the log up to the latest LSN that touched the inode. If we have 247 + * concurrent fsync/fdatasync() calls, we need them to all block on the 248 + * log force before we clear the ili_fsync_fields field. This ensures 249 + * that we don't get a racing sync operation that does not wait for the 250 + * metadata to hit the journal before returning. If we race with 251 + * clearing the ili_fsync_fields, then all that will happen is the log 252 + * force will do nothing as the lsn will already be on disk. We can't 253 + * race with setting ili_fsync_fields because that is done under 254 + * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared 255 + * until after the ili_fsync_fields is cleared. 247 256 */ 248 257 xfs_ilock(ip, XFS_ILOCK_SHARED); 249 258 if (xfs_ipincount(ip)) { 250 259 if (!datasync || 251 - (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) 260 + (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 252 261 lsn = ip->i_itemp->ili_last_lsn; 253 262 } 254 - xfs_iunlock(ip, XFS_ILOCK_SHARED); 255 263 256 - if (lsn) 264 + if (lsn) { 257 265 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 266 + ip->i_itemp->ili_fsync_fields = 0; 267 + } 268 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 258 269 259 270 /* 260 271 * If we only have a single device, and the log force about was ··· 298 287 xfs_fsize_t n; 299 288 loff_t pos = iocb->ki_pos; 300 289 301 - XFS_STATS_INC(xs_read_calls); 290 + XFS_STATS_INC(mp, xs_read_calls); 302 291 303 292 if (unlikely(iocb->ki_flags & IOCB_DIRECT)) 304 293 ioflags |= XFS_IO_ISDIRECT; ··· 376 365 377 366 ret = generic_file_read_iter(iocb, to); 378 367 if (ret > 0) 379 - XFS_STATS_ADD(xs_read_bytes, ret); 368 + XFS_STATS_ADD(mp, xs_read_bytes, ret); 380 369 381 370 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 382 371 return ret; ··· 394 383 int ioflags = 0; 395 384 ssize_t ret; 396 385 397 - XFS_STATS_INC(xs_read_calls); 386 + XFS_STATS_INC(ip->i_mount, xs_read_calls); 398 387 399 388 if (infilp->f_mode & FMODE_NOCMTIME) 400 389 ioflags |= XFS_IO_INVIS; ··· 412 401 else 413 402 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 414 403 if (ret > 0) 415 - XFS_STATS_ADD(xs_read_bytes, ret); 404 + XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); 416 405 417 406 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 418 407 return ret; ··· 492 481 493 482 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 494 483 ASSERT(offset > isize); 484 + 485 + trace_xfs_zero_eof(ip, isize, offset - isize); 495 486 496 487 /* 497 488 * First handle zeroing the block on which isize resides. ··· 587 574 struct xfs_inode *ip = XFS_I(inode); 588 575 ssize_t error = 0; 589 576 size_t count = iov_iter_count(from); 577 + bool drained_dio = false; 590 578 591 579 restart: 592 580 error = generic_write_checks(iocb, from); ··· 625 611 bool zero = false; 626 612 627 613 spin_unlock(&ip->i_flags_lock); 628 - if (*iolock == XFS_IOLOCK_SHARED) { 629 - xfs_rw_iunlock(ip, *iolock); 630 - *iolock = XFS_IOLOCK_EXCL; 631 - xfs_rw_ilock(ip, *iolock); 632 - iov_iter_reexpand(from, count); 633 - 614 + if (!drained_dio) { 615 + if (*iolock == XFS_IOLOCK_SHARED) { 616 + xfs_rw_iunlock(ip, *iolock); 617 + *iolock = XFS_IOLOCK_EXCL; 618 + xfs_rw_ilock(ip, *iolock); 619 + iov_iter_reexpand(from, count); 620 + } 634 621 /* 635 622 * We now have an IO submission barrier in place, but 636 623 * AIO can do EOF updates during IO completion and hence ··· 641 626 * no-op. 642 627 */ 643 628 inode_dio_wait(inode); 629 + drained_dio = true; 644 630 goto restart; 645 631 } 646 632 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); ··· 883 867 ssize_t ret; 884 868 size_t ocount = iov_iter_count(from); 885 869 886 - XFS_STATS_INC(xs_write_calls); 870 + XFS_STATS_INC(ip->i_mount, xs_write_calls); 887 871 888 872 if (ocount == 0) 889 873 return 0; ··· 899 883 if (ret > 0) { 900 884 ssize_t err; 901 885 902 - XFS_STATS_ADD(xs_write_bytes, ret); 886 + XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 903 887 904 888 /* Handle various SYNC-type writes */ 905 889 err = generic_write_sync(file, iocb->ki_pos - ret, ret); ··· 1493 1477 * 1494 1478 * mmap_sem (MM) 1495 1479 * sb_start_pagefault(vfs, freeze) 1496 - * i_mmap_lock (XFS - truncate serialisation) 1480 + * i_mmaplock (XFS - truncate serialisation) 1497 1481 * page_lock (MM) 1498 1482 * i_lock (XFS - extent map serialisation) 1499 1483 */ ··· 1519 1503 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1520 1504 1521 1505 if (IS_DAX(inode)) { 1522 - ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, 1523 - xfs_end_io_dax_write); 1506 + ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL); 1524 1507 } else { 1525 1508 ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); 1526 1509 ret = block_page_mkwrite_return(ret); ··· 1553 1538 * changes to xfs_get_blocks_direct() to map unwritten extent 1554 1539 * ioend for conversion on read-only mappings. 1555 1540 */ 1556 - ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); 1541 + ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL); 1557 1542 } else 1558 1543 ret = filemap_fault(vma, vmf); 1559 1544 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ··· 1561 1546 return ret; 1562 1547 } 1563 1548 1549 + /* 1550 + * Similar to xfs_filemap_fault(), the DAX fault path can call into here on 1551 + * both read and write faults. Hence we need to handle both cases. There is no 1552 + * ->pmd_mkwrite callout for huge pages, so we have a single function here to 1553 + * handle both cases here. @flags carries the information on the type of fault 1554 + * occuring. 1555 + */ 1564 1556 STATIC int 1565 1557 xfs_filemap_pmd_fault( 1566 1558 struct vm_area_struct *vma, ··· 1584 1562 1585 1563 trace_xfs_filemap_pmd_fault(ip); 1586 1564 1587 - sb_start_pagefault(inode->i_sb); 1588 - file_update_time(vma->vm_file); 1565 + if (flags & FAULT_FLAG_WRITE) { 1566 + sb_start_pagefault(inode->i_sb); 1567 + file_update_time(vma->vm_file); 1568 + } 1569 + 1589 1570 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1590 - ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, 1591 - xfs_end_io_dax_write); 1571 + ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault, 1572 + NULL); 1592 1573 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1593 - sb_end_pagefault(inode->i_sb); 1574 + 1575 + if (flags & FAULT_FLAG_WRITE) 1576 + sb_end_pagefault(inode->i_sb); 1594 1577 1595 1578 return ret; 1579 + } 1580 + 1581 + /* 1582 + * pfn_mkwrite was originally inteneded to ensure we capture time stamp 1583 + * updates on write faults. In reality, it's need to serialise against 1584 + * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite() 1585 + * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault 1586 + * barrier in place. 1587 + */ 1588 + static int 1589 + xfs_filemap_pfn_mkwrite( 1590 + struct vm_area_struct *vma, 1591 + struct vm_fault *vmf) 1592 + { 1593 + 1594 + struct inode *inode = file_inode(vma->vm_file); 1595 + struct xfs_inode *ip = XFS_I(inode); 1596 + int ret = VM_FAULT_NOPAGE; 1597 + loff_t size; 1598 + 1599 + trace_xfs_filemap_pfn_mkwrite(ip); 1600 + 1601 + sb_start_pagefault(inode->i_sb); 1602 + file_update_time(vma->vm_file); 1603 + 1604 + /* check if the faulting page hasn't raced with truncate */ 1605 + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1606 + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1607 + if (vmf->pgoff >= size) 1608 + ret = VM_FAULT_SIGBUS; 1609 + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1610 + sb_end_pagefault(inode->i_sb); 1611 + return ret; 1612 + 1596 1613 } 1597 1614 1598 1615 static const struct vm_operations_struct xfs_file_vm_ops = { ··· 1639 1578 .pmd_fault = xfs_filemap_pmd_fault, 1640 1579 .map_pages = filemap_map_pages, 1641 1580 .page_mkwrite = xfs_filemap_page_mkwrite, 1581 + .pfn_mkwrite = xfs_filemap_pfn_mkwrite, 1642 1582 }; 1643 1583 1644 1584 STATIC int

+9 -9

fs/xfs/xfs_icache.c

··· 63 63 return NULL; 64 64 } 65 65 66 - XFS_STATS_INC(vn_active); 66 + XFS_STATS_INC(mp, vn_active); 67 67 ASSERT(atomic_read(&ip->i_pincount) == 0); 68 68 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 69 69 ASSERT(!xfs_isiflocked(ip)); ··· 129 129 /* asserts to verify all state is correct here */ 130 130 ASSERT(atomic_read(&ip->i_pincount) == 0); 131 131 ASSERT(!xfs_isiflocked(ip)); 132 - XFS_STATS_DEC(vn_active); 132 + XFS_STATS_DEC(ip->i_mount, vn_active); 133 133 134 134 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 135 135 } ··· 159 159 spin_lock(&ip->i_flags_lock); 160 160 if (ip->i_ino != ino) { 161 161 trace_xfs_iget_skip(ip); 162 - XFS_STATS_INC(xs_ig_frecycle); 162 + XFS_STATS_INC(mp, xs_ig_frecycle); 163 163 error = -EAGAIN; 164 164 goto out_error; 165 165 } ··· 177 177 */ 178 178 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 179 179 trace_xfs_iget_skip(ip); 180 - XFS_STATS_INC(xs_ig_frecycle); 180 + XFS_STATS_INC(mp, xs_ig_frecycle); 181 181 error = -EAGAIN; 182 182 goto out_error; 183 183 } ··· 259 259 xfs_ilock(ip, lock_flags); 260 260 261 261 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); 262 - XFS_STATS_INC(xs_ig_found); 262 + XFS_STATS_INC(mp, xs_ig_found); 263 263 264 264 return 0; 265 265 ··· 342 342 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 343 343 if (unlikely(error)) { 344 344 WARN_ON(error != -EEXIST); 345 - XFS_STATS_INC(xs_ig_dup); 345 + XFS_STATS_INC(mp, xs_ig_dup); 346 346 error = -EAGAIN; 347 347 goto out_preload_end; 348 348 } ··· 412 412 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 413 413 return -EINVAL; 414 414 415 - XFS_STATS_INC(xs_ig_attempts); 415 + XFS_STATS_INC(mp, xs_ig_attempts); 416 416 417 417 /* get the perag structure and ensure that it's inode capable */ 418 418 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); ··· 429 429 goto out_error_or_again; 430 430 } else { 431 431 rcu_read_unlock(); 432 - XFS_STATS_INC(xs_ig_missed); 432 + XFS_STATS_INC(mp, xs_ig_missed); 433 433 434 434 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 435 435 flags, lock_flags); ··· 965 965 xfs_ifunlock(ip); 966 966 xfs_iunlock(ip, XFS_ILOCK_EXCL); 967 967 968 - XFS_STATS_INC(xs_ig_reclaims); 968 + XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 969 969 /* 970 970 * Remove the inode from the per-AG radix tree. 971 971 *

+5 -3

fs/xfs/xfs_inode.c

··· 2365 2365 2366 2366 iip->ili_last_fields = iip->ili_fields; 2367 2367 iip->ili_fields = 0; 2368 + iip->ili_fsync_fields = 0; 2368 2369 iip->ili_logged = 1; 2369 2370 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2370 2371 &iip->ili_item.li_lsn); ··· 3272 3271 } 3273 3272 3274 3273 if (clcount) { 3275 - XFS_STATS_INC(xs_icluster_flushcnt); 3276 - XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3274 + XFS_STATS_INC(mp, xs_icluster_flushcnt); 3275 + XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); 3277 3276 } 3278 3277 3279 3278 out_free: ··· 3346 3345 struct xfs_dinode *dip; 3347 3346 int error; 3348 3347 3349 - XFS_STATS_INC(xs_iflush_count); 3348 + XFS_STATS_INC(mp, xs_iflush_count); 3350 3349 3351 3350 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3352 3351 ASSERT(xfs_isiflocked(ip)); ··· 3561 3560 */ 3562 3561 iip->ili_last_fields = iip->ili_fields; 3563 3562 iip->ili_fields = 0; 3563 + iip->ili_fsync_fields = 0; 3564 3564 iip->ili_logged = 1; 3565 3565 3566 3566 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,

+1

fs/xfs/xfs_inode_item.c

··· 719 719 * attempted. 720 720 */ 721 721 iip->ili_fields = 0; 722 + iip->ili_fsync_fields = 0; 722 723 } 723 724 /* 724 725 * Release the inode's flush lock since we're done with it.

+1

fs/xfs/xfs_inode_item.h

··· 34 34 unsigned short ili_logged; /* flushed logged data */ 35 35 unsigned int ili_last_fields; /* fields when flushed */ 36 36 unsigned int ili_fields; /* fields to be logged */ 37 + unsigned int ili_fsync_fields; /* logged since last fsync */ 37 38 } xfs_inode_log_item_t; 38 39 39 40 static inline int xfs_inode_clean(xfs_inode_t *ip)

+17 -6

fs/xfs/xfs_ioctl.c

··· 40 40 #include "xfs_symlink.h" 41 41 #include "xfs_trans.h" 42 42 #include "xfs_pnfs.h" 43 + #include "xfs_acl.h" 43 44 44 45 #include <linux/capability.h> 45 46 #include <linux/dcache.h> ··· 412 411 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) 413 412 return -EFAULT; 414 413 if (al_hreq.buflen < sizeof(struct attrlist) || 415 - al_hreq.buflen > XATTR_LIST_MAX) 414 + al_hreq.buflen > XFS_XATTR_LIST_MAX) 416 415 return -EINVAL; 417 416 418 417 /* ··· 456 455 unsigned char *kbuf; 457 456 int error = -EFAULT; 458 457 459 - if (*len > XATTR_SIZE_MAX) 458 + if (*len > XFS_XATTR_SIZE_MAX) 460 459 return -EINVAL; 461 460 kbuf = kmem_zalloc_large(*len, KM_SLEEP); 462 461 if (!kbuf) ··· 483 482 __uint32_t flags) 484 483 { 485 484 unsigned char *kbuf; 485 + int error; 486 486 487 487 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 488 488 return -EPERM; 489 - if (len > XATTR_SIZE_MAX) 489 + if (len > XFS_XATTR_SIZE_MAX) 490 490 return -EINVAL; 491 491 492 492 kbuf = memdup_user(ubuf, len); 493 493 if (IS_ERR(kbuf)) 494 494 return PTR_ERR(kbuf); 495 495 496 - return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 496 + error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 497 + if (!error) 498 + xfs_forget_acl(inode, name, flags); 499 + kfree(kbuf); 500 + return error; 497 501 } 498 502 499 503 int ··· 507 501 unsigned char *name, 508 502 __uint32_t flags) 509 503 { 504 + int error; 505 + 510 506 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 511 507 return -EPERM; 512 - return xfs_attr_remove(XFS_I(inode), name, flags); 508 + error = xfs_attr_remove(XFS_I(inode), name, flags); 509 + if (!error) 510 + xfs_forget_acl(inode, name, flags); 511 + return error; 513 512 } 514 513 515 514 STATIC int ··· 1039 1028 xfs_diflags_to_linux(ip); 1040 1029 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1041 1030 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1042 - XFS_STATS_INC(xs_ig_attrchg); 1031 + XFS_STATS_INC(mp, xs_ig_attrchg); 1043 1032 return 0; 1044 1033 } 1045 1034

+1 -1

fs/xfs/xfs_ioctl32.c

··· 356 356 sizeof(compat_xfs_fsop_attrlist_handlereq_t))) 357 357 return -EFAULT; 358 358 if (al_hreq.buflen < sizeof(struct attrlist) || 359 - al_hreq.buflen > XATTR_LIST_MAX) 359 + al_hreq.buflen > XFS_XATTR_LIST_MAX) 360 360 return -EINVAL; 361 361 362 362 /*

+54 -16

fs/xfs/xfs_iomap.c

··· 131 131 uint qblocks, resblks, resrtextents; 132 132 int committed; 133 133 int error; 134 - 135 - error = xfs_qm_dqattach(ip, 0); 136 - if (error) 137 - return error; 134 + int lockmode; 135 + int bmapi_flags = XFS_BMAPI_PREALLOC; 138 136 139 137 rt = XFS_IS_REALTIME_INODE(ip); 140 138 extsz = xfs_get_extsz_hint(ip); 139 + lockmode = XFS_ILOCK_SHARED; /* locked by caller */ 140 + 141 + ASSERT(xfs_isilocked(ip, lockmode)); 141 142 142 143 offset_fsb = XFS_B_TO_FSBT(mp, offset); 143 144 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 144 145 if ((offset + count) > XFS_ISIZE(ip)) { 146 + /* 147 + * Assert that the in-core extent list is present since this can 148 + * call xfs_iread_extents() and we only have the ilock shared. 149 + * This should be safe because the lock was held around a bmapi 150 + * call in the caller and we only need it to access the in-core 151 + * list. 152 + */ 153 + ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & 154 + XFS_IFEXTENTS); 145 155 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 146 156 if (error) 147 - return error; 157 + goto out_unlock; 148 158 } else { 149 159 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 150 160 last_fsb = MIN(last_fsb, (xfs_fileoff_t) ··· 184 174 } 185 175 186 176 /* 177 + * Drop the shared lock acquired by the caller, attach the dquot if 178 + * necessary and move on to transaction setup. 179 + */ 180 + xfs_iunlock(ip, lockmode); 181 + error = xfs_qm_dqattach(ip, 0); 182 + if (error) 183 + return error; 184 + 185 + /* 187 186 * Allocate and setup the transaction 188 187 */ 189 188 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 189 + 190 + /* 191 + * For DAX, we do not allocate unwritten extents, but instead we zero 192 + * the block before we commit the transaction. Ideally we'd like to do 193 + * this outside the transaction context, but if we commit and then crash 194 + * we may not have zeroed the blocks and this will be exposed on 195 + * recovery of the allocation. Hence we must zero before commit. 196 + * Further, if we are mapping unwritten extents here, we need to zero 197 + * and convert them to written so that we don't need an unwritten extent 198 + * callback for DAX. This also means that we need to be able to dip into 199 + * the reserve block pool if there is no space left but we need to do 200 + * unwritten extent conversion. 201 + */ 202 + if (IS_DAX(VFS_I(ip))) { 203 + bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 204 + tp->t_flags |= XFS_TRANS_RESERVE; 205 + } 190 206 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 191 207 resblks, resrtextents); 192 208 /* ··· 223 187 return error; 224 188 } 225 189 226 - xfs_ilock(ip, XFS_ILOCK_EXCL); 190 + lockmode = XFS_ILOCK_EXCL; 191 + xfs_ilock(ip, lockmode); 227 192 228 193 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 229 194 if (error) ··· 239 202 xfs_bmap_init(&free_list, &firstfsb); 240 203 nimaps = 1; 241 204 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 242 - XFS_BMAPI_PREALLOC, &firstfsb, 0, 243 - imap, &nimaps, &free_list); 205 + bmapi_flags, &firstfsb, resblks, imap, 206 + &nimaps, &free_list); 244 207 if (error) 245 208 goto out_bmap_cancel; 246 209 ··· 250 213 error = xfs_bmap_finish(&tp, &free_list, &committed); 251 214 if (error) 252 215 goto out_bmap_cancel; 216 + 253 217 error = xfs_trans_commit(tp); 254 218 if (error) 255 219 goto out_unlock; ··· 267 229 error = xfs_alert_fsblock_zero(ip, imap); 268 230 269 231 out_unlock: 270 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 232 + xfs_iunlock(ip, lockmode); 271 233 return error; 272 234 273 235 out_bmap_cancel: ··· 708 670 count_fsb = imap->br_blockcount; 709 671 map_start_fsb = imap->br_startoff; 710 672 711 - XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 673 + XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 712 674 713 675 while (count_fsb != 0) { 714 676 /* ··· 788 750 * pointer that the caller gave to us. 789 751 */ 790 752 error = xfs_bmapi_write(tp, ip, map_start_fsb, 791 - count_fsb, 0, 792 - &first_block, 1, 793 - imap, &nimaps, &free_list); 753 + count_fsb, 0, &first_block, 754 + nres, imap, &nimaps, 755 + &free_list); 794 756 if (error) 795 757 goto trans_cancel; 796 758 ··· 815 777 if ((offset_fsb >= imap->br_startoff) && 816 778 (offset_fsb < (imap->br_startoff + 817 779 imap->br_blockcount))) { 818 - XFS_STATS_INC(xs_xstrat_quick); 780 + XFS_STATS_INC(mp, xs_xstrat_quick); 819 781 return 0; 820 782 } 821 783 ··· 904 866 xfs_bmap_init(&free_list, &firstfsb); 905 867 nimaps = 1; 906 868 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 907 - XFS_BMAPI_CONVERT, &firstfsb, 908 - 1, &imap, &nimaps, &free_list); 869 + XFS_BMAPI_CONVERT, &firstfsb, resblks, 870 + &imap, &nimaps, &free_list); 909 871 if (error) 910 872 goto error_on_bmapi_transaction; 911 873

+2 -2

fs/xfs/xfs_iops.c

··· 695 695 696 696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 697 697 698 - XFS_STATS_INC(xs_ig_attrchg); 698 + XFS_STATS_INC(mp, xs_ig_attrchg); 699 699 700 700 if (mp->m_flags & XFS_MOUNT_WSYNC) 701 701 xfs_trans_set_sync(tp); ··· 922 922 923 923 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 924 924 925 - XFS_STATS_INC(xs_ig_attrchg); 925 + XFS_STATS_INC(mp, xs_ig_attrchg); 926 926 927 927 if (mp->m_flags & XFS_MOUNT_WSYNC) 928 928 xfs_trans_set_sync(tp);

+7

fs/xfs/xfs_linux.h

··· 171 171 struct completion complete; 172 172 }; 173 173 174 + struct xstats { 175 + struct xfsstats __percpu *xs_stats; 176 + struct xfs_kobj xs_kobj; 177 + }; 178 + 179 + extern struct xstats xfsstats; 180 + 174 181 /* Kernel uid/gid conversion. These are used to convert to/from the on disk 175 182 * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. 176 183 * The conversion here is type only, the value will remain the same since we

+76 -17

fs/xfs/xfs_log.c

··· 268 268 __set_current_state(TASK_UNINTERRUPTIBLE); 269 269 spin_unlock(&head->lock); 270 270 271 - XFS_STATS_INC(xs_sleep_logspace); 271 + XFS_STATS_INC(log->l_mp, xs_sleep_logspace); 272 272 273 273 trace_xfs_log_grant_sleep(log, tic); 274 274 schedule(); ··· 379 379 if (XLOG_FORCED_SHUTDOWN(log)) 380 380 return -EIO; 381 381 382 - XFS_STATS_INC(xs_try_logspace); 382 + XFS_STATS_INC(mp, xs_try_logspace); 383 383 384 384 /* 385 385 * This is a new transaction on the ticket, so we need to change the ··· 448 448 if (XLOG_FORCED_SHUTDOWN(log)) 449 449 return -EIO; 450 450 451 - XFS_STATS_INC(xs_try_logspace); 451 + XFS_STATS_INC(mp, xs_try_logspace); 452 452 453 453 ASSERT(*ticp == NULL); 454 454 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, ··· 1768 1768 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1769 1769 int size; 1770 1770 1771 - XFS_STATS_INC(xs_log_writes); 1771 + XFS_STATS_INC(log->l_mp, xs_log_writes); 1772 1772 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1773 1773 1774 1774 /* Add for LR header */ ··· 1805 1805 bp = iclog->ic_bp; 1806 1806 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); 1807 1807 1808 - XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); 1808 + XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); 1809 1809 1810 1810 /* Do we need to split this write into 2 parts? */ 1811 1811 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { ··· 2422 2422 &partial_copy_len); 2423 2423 xlog_verify_dest_ptr(log, ptr); 2424 2424 2425 - /* copy region */ 2425 + /* 2426 + * Copy region. 2427 + * 2428 + * Unmount records just log an opheader, so can have 2429 + * empty payloads with no data region to copy. Hence we 2430 + * only copy the payload if the vector says it has data 2431 + * to copy. 2432 + */ 2426 2433 ASSERT(copy_len >= 0); 2427 - memcpy(ptr, reg->i_addr + copy_off, copy_len); 2428 - xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); 2429 - 2434 + if (copy_len > 0) { 2435 + memcpy(ptr, reg->i_addr + copy_off, copy_len); 2436 + xlog_write_adv_cnt(&ptr, &len, &log_offset, 2437 + copy_len); 2438 + } 2430 2439 copy_len += start_rec_copy + sizeof(xlog_op_header_t); 2431 2440 record_cnt++; 2432 2441 data_cnt += contwr ? copy_len : 0; ··· 2922 2913 2923 2914 iclog = log->l_iclog; 2924 2915 if (iclog->ic_state != XLOG_STATE_ACTIVE) { 2925 - XFS_STATS_INC(xs_log_noiclogs); 2916 + XFS_STATS_INC(log->l_mp, xs_log_noiclogs); 2926 2917 2927 2918 /* Wait for log writes to have flushed */ 2928 2919 xlog_wait(&log->l_flush_wait, &log->l_icloglock); ··· 3174 3165 } 3175 3166 3176 3167 if (log->l_curr_block >= log->l_logBBsize) { 3168 + /* 3169 + * Rewind the current block before the cycle is bumped to make 3170 + * sure that the combined LSN never transiently moves forward 3171 + * when the log wraps to the next cycle. This is to support the 3172 + * unlocked sample of these fields from xlog_valid_lsn(). Most 3173 + * other cases should acquire l_icloglock. 3174 + */ 3175 + log->l_curr_block -= log->l_logBBsize; 3176 + ASSERT(log->l_curr_block >= 0); 3177 + smp_wmb(); 3177 3178 log->l_curr_cycle++; 3178 3179 if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) 3179 3180 log->l_curr_cycle++; 3180 - log->l_curr_block -= log->l_logBBsize; 3181 - ASSERT(log->l_curr_block >= 0); 3182 3181 } 3183 3182 ASSERT(iclog == log->l_iclog); 3184 3183 log->l_iclog = iclog->ic_next; ··· 3229 3212 struct xlog_in_core *iclog; 3230 3213 xfs_lsn_t lsn; 3231 3214 3232 - XFS_STATS_INC(xs_log_force); 3215 + XFS_STATS_INC(mp, xs_log_force); 3233 3216 3234 3217 xlog_cil_force(log); 3235 3218 ··· 3314 3297 spin_unlock(&log->l_icloglock); 3315 3298 return -EIO; 3316 3299 } 3317 - XFS_STATS_INC(xs_log_force_sleep); 3300 + XFS_STATS_INC(mp, xs_log_force_sleep); 3318 3301 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 3319 3302 /* 3320 3303 * No need to grab the log lock here since we're ··· 3379 3362 3380 3363 ASSERT(lsn != 0); 3381 3364 3382 - XFS_STATS_INC(xs_log_force); 3365 + XFS_STATS_INC(mp, xs_log_force); 3383 3366 3384 3367 lsn = xlog_cil_force_lsn(log, lsn); 3385 3368 if (lsn == NULLCOMMITLSN) ··· 3428 3411 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { 3429 3412 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3430 3413 3431 - XFS_STATS_INC(xs_log_force_sleep); 3414 + XFS_STATS_INC(mp, xs_log_force_sleep); 3432 3415 3433 3416 xlog_wait(&iclog->ic_prev->ic_write_wait, 3434 3417 &log->l_icloglock); ··· 3458 3441 spin_unlock(&log->l_icloglock); 3459 3442 return -EIO; 3460 3443 } 3461 - XFS_STATS_INC(xs_log_force_sleep); 3444 + XFS_STATS_INC(mp, xs_log_force_sleep); 3462 3445 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 3463 3446 /* 3464 3447 * No need to grab the log lock here since we're ··· 4040 4023 return 1; 4041 4024 } 4042 4025 4026 + /* 4027 + * Verify that an LSN stamped into a piece of metadata is valid. This is 4028 + * intended for use in read verifiers on v5 superblocks. 4029 + */ 4030 + bool 4031 + xfs_log_check_lsn( 4032 + struct xfs_mount *mp, 4033 + xfs_lsn_t lsn) 4034 + { 4035 + struct xlog *log = mp->m_log; 4036 + bool valid; 4037 + 4038 + /* 4039 + * norecovery mode skips mount-time log processing and unconditionally 4040 + * resets the in-core LSN. We can't validate in this mode, but 4041 + * modifications are not allowed anyways so just return true. 4042 + */ 4043 + if (mp->m_flags & XFS_MOUNT_NORECOVERY) 4044 + return true; 4045 + 4046 + /* 4047 + * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is 4048 + * handled by recovery and thus safe to ignore here. 4049 + */ 4050 + if (lsn == NULLCOMMITLSN) 4051 + return true; 4052 + 4053 + valid = xlog_valid_lsn(mp->m_log, lsn); 4054 + 4055 + /* warn the user about what's gone wrong before verifier failure */ 4056 + if (!valid) { 4057 + spin_lock(&log->l_icloglock); 4058 + xfs_warn(mp, 4059 + "Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). " 4060 + "Please unmount and run xfs_repair (>= v4.3) to resolve.", 4061 + CYCLE_LSN(lsn), BLOCK_LSN(lsn), 4062 + log->l_curr_cycle, log->l_curr_block); 4063 + spin_unlock(&log->l_icloglock); 4064 + } 4065 + 4066 + return valid; 4067 + }

+1

fs/xfs/xfs_log.h

··· 181 181 void xfs_log_work_queue(struct xfs_mount *mp); 182 182 void xfs_log_worker(struct work_struct *work); 183 183 void xfs_log_quiesce(struct xfs_mount *mp); 184 + bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); 184 185 185 186 #endif /* __XFS_LOG_H__ */

+51

fs/xfs/xfs_log_priv.h

··· 560 560 remove_wait_queue(wq, &wait); 561 561 } 562 562 563 + /* 564 + * The LSN is valid so long as it is behind the current LSN. If it isn't, this 565 + * means that the next log record that includes this metadata could have a 566 + * smaller LSN. In turn, this means that the modification in the log would not 567 + * replay. 568 + */ 569 + static inline bool 570 + xlog_valid_lsn( 571 + struct xlog *log, 572 + xfs_lsn_t lsn) 573 + { 574 + int cur_cycle; 575 + int cur_block; 576 + bool valid = true; 577 + 578 + /* 579 + * First, sample the current lsn without locking to avoid added 580 + * contention from metadata I/O. The current cycle and block are updated 581 + * (in xlog_state_switch_iclogs()) and read here in a particular order 582 + * to avoid false negatives (e.g., thinking the metadata LSN is valid 583 + * when it is not). 584 + * 585 + * The current block is always rewound before the cycle is bumped in 586 + * xlog_state_switch_iclogs() to ensure the current LSN is never seen in 587 + * a transiently forward state. Instead, we can see the LSN in a 588 + * transiently behind state if we happen to race with a cycle wrap. 589 + */ 590 + cur_cycle = ACCESS_ONCE(log->l_curr_cycle); 591 + smp_rmb(); 592 + cur_block = ACCESS_ONCE(log->l_curr_block); 593 + 594 + if ((CYCLE_LSN(lsn) > cur_cycle) || 595 + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { 596 + /* 597 + * If the metadata LSN appears invalid, it's possible the check 598 + * above raced with a wrap to the next log cycle. Grab the lock 599 + * to check for sure. 600 + */ 601 + spin_lock(&log->l_icloglock); 602 + cur_cycle = log->l_curr_cycle; 603 + cur_block = log->l_curr_block; 604 + spin_unlock(&log->l_icloglock); 605 + 606 + if ((CYCLE_LSN(lsn) > cur_cycle) || 607 + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) 608 + valid = false; 609 + } 610 + 611 + return valid; 612 + } 613 + 563 614 #endif /* __XFS_LOG_PRIV_H__ */

+12 -2

fs/xfs/xfs_log_recover.c

··· 3431 3431 * previous record. Copy the rest of the header. 3432 3432 */ 3433 3433 if (list_empty(&trans->r_itemq)) { 3434 - ASSERT(len < sizeof(struct xfs_trans_header)); 3434 + ASSERT(len <= sizeof(struct xfs_trans_header)); 3435 3435 if (len > sizeof(struct xfs_trans_header)) { 3436 3436 xfs_warn(log->l_mp, "%s: bad header length", __func__); 3437 3437 return -EIO; ··· 4609 4609 int error; 4610 4610 4611 4611 /* find the tail of the log */ 4612 - if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 4612 + error = xlog_find_tail(log, &head_blk, &tail_blk); 4613 + if (error) 4613 4614 return error; 4615 + 4616 + /* 4617 + * The superblock was read before the log was available and thus the LSN 4618 + * could not be verified. Check the superblock LSN against the current 4619 + * LSN now that it's known. 4620 + */ 4621 + if (xfs_sb_version_hascrc(&log->l_mp->m_sb) && 4622 + !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) 4623 + return -EINVAL; 4614 4624 4615 4625 if (tail_blk != head_blk) { 4616 4626 /* There used to be a comment here:

+7

fs/xfs/xfs_message.c

··· 17 17 18 18 #include "xfs.h" 19 19 #include "xfs_fs.h" 20 + #include "xfs_error.h" 20 21 #include "xfs_format.h" 21 22 #include "xfs_log_format.h" 22 23 #include "xfs_trans_resv.h" ··· 44 43 { \ 45 44 struct va_format vaf; \ 46 45 va_list args; \ 46 + int level; \ 47 47 \ 48 48 va_start(args, fmt); \ 49 49 \ ··· 53 51 \ 54 52 __xfs_printk(kern_level, mp, &vaf); \ 55 53 va_end(args); \ 54 + \ 55 + if (!kstrtoint(kern_level, 0, &level) && \ 56 + level <= LOGLEVEL_ERR && \ 57 + xfs_error_level >= XFS_ERRLEVEL_HIGH) \ 58 + xfs_stack_trace(); \ 56 59 } \ 57 60 58 61 define_xfs_printk_level(xfs_emerg, KERN_EMERG);

+20 -1

fs/xfs/xfs_mount.c

··· 47 47 static int xfs_uuid_table_size; 48 48 static uuid_t *xfs_uuid_table; 49 49 50 + void 51 + xfs_uuid_table_free(void) 52 + { 53 + if (xfs_uuid_table_size == 0) 54 + return; 55 + kmem_free(xfs_uuid_table); 56 + xfs_uuid_table = NULL; 57 + xfs_uuid_table_size = 0; 58 + } 59 + 50 60 /* 51 61 * See if the UUID is unique among mounted XFS filesystems. 52 62 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. ··· 703 693 if (error) 704 694 goto out; 705 695 706 - error = xfs_uuid_mount(mp); 696 + error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype, 697 + &mp->m_kobj, "stats"); 707 698 if (error) 708 699 goto out_remove_sysfs; 700 + 701 + error = xfs_uuid_mount(mp); 702 + if (error) 703 + goto out_del_stats; 709 704 710 705 /* 711 706 * Set the minimum read and write sizes ··· 986 971 xfs_da_unmount(mp); 987 972 out_remove_uuid: 988 973 xfs_uuid_unmount(mp); 974 + out_del_stats: 975 + xfs_sysfs_del(&mp->m_stats.xs_kobj); 989 976 out_remove_sysfs: 990 977 xfs_sysfs_del(&mp->m_kobj); 991 978 out: ··· 1064 1047 xfs_warn(mp, "Unable to update superblock counters. " 1065 1048 "Freespace may not be correct on next mount."); 1066 1049 1050 + 1067 1051 xfs_log_unmount(mp); 1068 1052 xfs_da_unmount(mp); 1069 1053 xfs_uuid_unmount(mp); ··· 1074 1056 #endif 1075 1057 xfs_free_perag(mp); 1076 1058 1059 + xfs_sysfs_del(&mp->m_stats.xs_kobj); 1077 1060 xfs_sysfs_del(&mp->m_kobj); 1078 1061 } 1079 1062

+5

fs/xfs/xfs_mount.h

··· 127 127 int64_t m_low_space[XFS_LOWSP_MAX]; 128 128 /* low free space thresholds */ 129 129 struct xfs_kobj m_kobj; 130 + struct xstats m_stats; /* per-fs stats */ 130 131 131 132 struct workqueue_struct *m_buf_workqueue; 132 133 struct workqueue_struct *m_data_workqueue; ··· 313 312 int pagb_count; /* pagb slots in use */ 314 313 } xfs_perag_t; 315 314 315 + extern void xfs_uuid_table_free(void); 316 316 extern int xfs_log_sbcount(xfs_mount_t *); 317 317 extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); 318 318 extern int xfs_mountfs(xfs_mount_t *mp); ··· 337 335 extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 338 336 339 337 extern void xfs_set_low_space_thresholds(struct xfs_mount *); 338 + 339 + int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, 340 + xfs_off_t count_fsb); 340 341 341 342 #endif /* __XFS_MOUNT_H__ */

+5

fs/xfs/xfs_pnfs.c

··· 181 181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 182 182 183 183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { 184 + /* 185 + * xfs_iomap_write_direct() expects to take ownership of 186 + * the shared ilock. 187 + */ 188 + xfs_ilock(ip, XFS_ILOCK_SHARED); 184 189 error = xfs_iomap_write_direct(ip, offset, length, 185 190 &imap, nimaps); 186 191 if (error)

+7 -7

fs/xfs/xfs_qm.c

··· 184 184 */ 185 185 ASSERT(!list_empty(&dqp->q_lru)); 186 186 list_lru_del(&qi->qi_lru, &dqp->q_lru); 187 - XFS_STATS_DEC(xs_qm_dquot_unused); 187 + XFS_STATS_DEC(mp, xs_qm_dquot_unused); 188 188 189 189 xfs_qm_dqdestroy(dqp); 190 190 return 0; ··· 448 448 */ 449 449 if (dqp->q_nrefs) { 450 450 xfs_dqunlock(dqp); 451 - XFS_STATS_INC(xs_qm_dqwants); 451 + XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants); 452 452 453 453 trace_xfs_dqreclaim_want(dqp); 454 454 list_lru_isolate(lru, &dqp->q_lru); 455 - XFS_STATS_DEC(xs_qm_dquot_unused); 455 + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); 456 456 return LRU_REMOVED; 457 457 } 458 458 ··· 496 496 497 497 ASSERT(dqp->q_nrefs == 0); 498 498 list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose); 499 - XFS_STATS_DEC(xs_qm_dquot_unused); 499 + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); 500 500 trace_xfs_dqreclaim_done(dqp); 501 - XFS_STATS_INC(xs_qm_dqreclaims); 501 + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims); 502 502 return LRU_REMOVED; 503 503 504 504 out_miss_busy: 505 505 trace_xfs_dqreclaim_busy(dqp); 506 - XFS_STATS_INC(xs_qm_dqreclaim_misses); 506 + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); 507 507 return LRU_SKIP; 508 508 509 509 out_unlock_dirty: 510 510 trace_xfs_dqreclaim_busy(dqp); 511 - XFS_STATS_INC(xs_qm_dqreclaim_misses); 511 + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); 512 512 xfs_dqunlock(dqp); 513 513 spin_lock(lru_lock); 514 514 return LRU_RETRY;

+45 -48

fs/xfs/xfs_stats.c

··· 18 18 #include "xfs.h" 19 19 #include <linux/proc_fs.h> 20 20 21 - DEFINE_PER_CPU(struct xfsstats, xfsstats); 21 + struct xstats xfsstats; 22 22 23 - static int counter_val(int idx) 23 + static int counter_val(struct xfsstats __percpu *stats, int idx) 24 24 { 25 25 int val = 0, cpu; 26 26 27 27 for_each_possible_cpu(cpu) 28 - val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); 28 + val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx)); 29 29 return val; 30 30 } 31 31 32 - static int xfs_stat_proc_show(struct seq_file *m, void *v) 32 + int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) 33 33 { 34 34 int i, j; 35 + int len = 0; 35 36 __uint64_t xs_xstrat_bytes = 0; 36 37 __uint64_t xs_write_bytes = 0; 37 38 __uint64_t xs_read_bytes = 0; ··· 66 65 }; 67 66 68 67 /* Loop over all stats groups */ 68 + 69 69 for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { 70 - seq_printf(m, "%s", xstats[i].desc); 70 + len += snprintf(buf + len, PATH_MAX - len, "%s", 71 + xstats[i].desc); 71 72 /* inner loop does each group */ 72 73 for (; j < xstats[i].endpoint; j++) 73 - seq_printf(m, " %u", counter_val(j)); 74 - seq_putc(m, '\n'); 74 + len += snprintf(buf + len, PATH_MAX - len, " %u", 75 + counter_val(stats, j)); 76 + len += snprintf(buf + len, PATH_MAX - len, "\n"); 75 77 } 76 78 /* extra precision counters */ 77 79 for_each_possible_cpu(i) { 78 - xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; 79 - xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; 80 - xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; 80 + xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes; 81 + xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes; 82 + xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes; 81 83 } 82 84 83 - seq_printf(m, "xpc %Lu %Lu %Lu\n", 85 + len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", 84 86 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); 85 - seq_printf(m, "debug %u\n", 87 + len += snprintf(buf + len, PATH_MAX-len, "debug %u\n", 86 88 #if defined(DEBUG) 87 89 1); 88 90 #else 89 91 0); 90 92 #endif 91 - return 0; 93 + 94 + return len; 92 95 } 93 96 94 - static int xfs_stat_proc_open(struct inode *inode, struct file *file) 97 + void xfs_stats_clearall(struct xfsstats __percpu *stats) 95 98 { 96 - return single_open(file, xfs_stat_proc_show, NULL); 97 - } 99 + int c; 100 + __uint32_t vn_active; 98 101 99 - static const struct file_operations xfs_stat_proc_fops = { 100 - .owner = THIS_MODULE, 101 - .open = xfs_stat_proc_open, 102 - .read = seq_read, 103 - .llseek = seq_lseek, 104 - .release = single_release, 105 - }; 102 + xfs_notice(NULL, "Clearing xfsstats"); 103 + for_each_possible_cpu(c) { 104 + preempt_disable(); 105 + /* save vn_active, it's a universal truth! */ 106 + vn_active = per_cpu_ptr(stats, c)->vn_active; 107 + memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); 108 + per_cpu_ptr(stats, c)->vn_active = vn_active; 109 + preempt_enable(); 110 + } 111 + } 106 112 107 113 /* legacy quota interfaces */ 108 114 #ifdef CONFIG_XFS_QUOTA ··· 117 109 { 118 110 /* maximum; incore; ratio free to inuse; freelist */ 119 111 seq_printf(m, "%d\t%d\t%d\t%u\n", 120 - 0, 121 - counter_val(XFSSTAT_END_XQMSTAT), 122 - 0, 123 - counter_val(XFSSTAT_END_XQMSTAT + 1)); 112 + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT), 113 + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1)); 124 114 return 0; 125 115 } 126 116 ··· 142 136 143 137 seq_printf(m, "qm"); 144 138 for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) 145 - seq_printf(m, " %u", counter_val(j)); 139 + seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j)); 146 140 seq_putc(m, '\n'); 147 141 return 0; 148 142 } ··· 161 155 }; 162 156 #endif /* CONFIG_XFS_QUOTA */ 163 157 158 + #ifdef CONFIG_PROC_FS 164 159 int 165 160 xfs_init_procfs(void) 166 161 { 167 162 if (!proc_mkdir("fs/xfs", NULL)) 163 + return -ENOMEM; 164 + 165 + if (!proc_symlink("fs/xfs/stat", NULL, 166 + "/sys/fs/xfs/stats/stats")) 168 167 goto out; 169 168 170 - if (!proc_create("fs/xfs/stat", 0, NULL, 171 - &xfs_stat_proc_fops)) 172 - goto out_remove_xfs_dir; 173 169 #ifdef CONFIG_XFS_QUOTA 174 170 if (!proc_create("fs/xfs/xqmstat", 0, NULL, 175 171 &xqmstat_proc_fops)) 176 - goto out_remove_stat_file; 172 + goto out; 177 173 if (!proc_create("fs/xfs/xqm", 0, NULL, 178 174 &xqm_proc_fops)) 179 - goto out_remove_xqmstat_file; 175 + goto out; 180 176 #endif 181 177 return 0; 182 178 183 - #ifdef CONFIG_XFS_QUOTA 184 - out_remove_xqmstat_file: 185 - remove_proc_entry("fs/xfs/xqmstat", NULL); 186 - out_remove_stat_file: 187 - remove_proc_entry("fs/xfs/stat", NULL); 188 - #endif 189 - out_remove_xfs_dir: 190 - remove_proc_entry("fs/xfs", NULL); 191 - out: 179 + out: 180 + remove_proc_subtree("fs/xfs", NULL); 192 181 return -ENOMEM; 193 182 } 194 183 195 184 void 196 185 xfs_cleanup_procfs(void) 197 186 { 198 - #ifdef CONFIG_XFS_QUOTA 199 - remove_proc_entry("fs/xfs/xqm", NULL); 200 - remove_proc_entry("fs/xfs/xqmstat", NULL); 201 - #endif 202 - remove_proc_entry("fs/xfs/stat", NULL); 203 - remove_proc_entry("fs/xfs", NULL); 187 + remove_proc_subtree("fs/xfs", NULL); 204 188 } 189 + #endif /* CONFIG_PROC_FS */

+22 -14

fs/xfs/xfs_stats.h

··· 19 19 #define __XFS_STATS_H__ 20 20 21 21 22 - #if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) 23 - 24 22 #include <linux/percpu.h> 25 23 26 24 /* ··· 213 215 __uint64_t xs_read_bytes; 214 216 }; 215 217 216 - DECLARE_PER_CPU(struct xfsstats, xfsstats); 218 + int xfs_stats_format(struct xfsstats __percpu *stats, char *buf); 219 + void xfs_stats_clearall(struct xfsstats __percpu *stats); 220 + extern struct xstats xfsstats; 217 221 218 - /* 219 - * We don't disable preempt, not too worried about poking the 220 - * wrong CPU's stat for now (also aggregated before reporting). 221 - */ 222 - #define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) 223 - #define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) 224 - #define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) 222 + #define XFS_STATS_INC(mp, v) \ 223 + do { \ 224 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \ 225 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \ 226 + } while (0) 227 + 228 + #define XFS_STATS_DEC(mp, v) \ 229 + do { \ 230 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \ 231 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \ 232 + } while (0) 233 + 234 + #define XFS_STATS_ADD(mp, v, inc) \ 235 + do { \ 236 + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \ 237 + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \ 238 + } while (0) 239 + 240 + #if defined(CONFIG_PROC_FS) 225 241 226 242 extern int xfs_init_procfs(void); 227 243 extern void xfs_cleanup_procfs(void); 228 244 229 245 230 246 #else /* !CONFIG_PROC_FS */ 231 - 232 - # define XFS_STATS_INC(count) 233 - # define XFS_STATS_DEC(count) 234 - # define XFS_STATS_ADD(count, inc) 235 247 236 248 static inline int xfs_init_procfs(void) 237 249 {

+44 -13

fs/xfs/xfs_super.c

··· 838 838 goto out_destroy_unwritten; 839 839 840 840 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 841 - WQ_FREEZABLE, 0, mp->m_fsname); 841 + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 842 842 if (!mp->m_reclaim_workqueue) 843 843 goto out_destroy_cil; 844 844 845 845 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", 846 - WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); 846 + WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, 847 + mp->m_fsname); 847 848 if (!mp->m_log_workqueue) 848 849 goto out_destroy_reclaim; 849 850 850 851 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 851 - WQ_FREEZABLE, 0, mp->m_fsname); 852 + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 852 853 if (!mp->m_eofblocks_workqueue) 853 854 goto out_destroy_log; 854 855 ··· 923 922 924 923 trace_xfs_destroy_inode(ip); 925 924 926 - XFS_STATS_INC(vn_reclaim); 925 + XFS_STATS_INC(ip->i_mount, vn_reclaim); 927 926 928 927 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 929 928 ··· 984 983 985 984 truncate_inode_pages_final(&inode->i_data); 986 985 clear_inode(inode); 987 - XFS_STATS_INC(vn_rele); 988 - XFS_STATS_INC(vn_remove); 986 + XFS_STATS_INC(ip->i_mount, vn_rele); 987 + XFS_STATS_INC(ip->i_mount, vn_remove); 989 988 990 989 xfs_inactive(ip); 991 990 } ··· 1475 1474 if (error) 1476 1475 goto out_destroy_workqueues; 1477 1476 1477 + /* Allocate stats memory before we do operations that might use it */ 1478 + mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1479 + if (!mp->m_stats.xs_stats) { 1480 + error = -ENOMEM; 1481 + goto out_destroy_counters; 1482 + } 1483 + 1478 1484 error = xfs_readsb(mp, flags); 1479 1485 if (error) 1480 - goto out_destroy_counters; 1486 + goto out_free_stats; 1481 1487 1482 1488 error = xfs_finish_flags(mp); 1483 1489 if (error) ··· 1553 1545 xfs_filestream_unmount(mp); 1554 1546 out_free_sb: 1555 1547 xfs_freesb(mp); 1548 + out_free_stats: 1549 + free_percpu(mp->m_stats.xs_stats); 1556 1550 out_destroy_counters: 1557 1551 xfs_destroy_percpu_counters(mp); 1558 - out_destroy_workqueues: 1552 + out_destroy_workqueues: 1559 1553 xfs_destroy_mount_workqueues(mp); 1560 1554 out_close_devices: 1561 1555 xfs_close_devices(mp); ··· 1584 1574 xfs_unmountfs(mp); 1585 1575 1586 1576 xfs_freesb(mp); 1577 + free_percpu(mp->m_stats.xs_stats); 1587 1578 xfs_destroy_percpu_counters(mp); 1588 1579 xfs_destroy_mount_workqueues(mp); 1589 1580 xfs_close_devices(mp); ··· 1849 1838 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 1850 1839 if (!xfs_kset) { 1851 1840 error = -ENOMEM; 1852 - goto out_sysctl_unregister;; 1841 + goto out_sysctl_unregister; 1853 1842 } 1843 + 1844 + xfsstats.xs_kobj.kobject.kset = xfs_kset; 1845 + 1846 + xfsstats.xs_stats = alloc_percpu(struct xfsstats); 1847 + if (!xfsstats.xs_stats) { 1848 + error = -ENOMEM; 1849 + goto out_kset_unregister; 1850 + } 1851 + 1852 + error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 1853 + "stats"); 1854 + if (error) 1855 + goto out_free_stats; 1854 1856 1855 1857 #ifdef DEBUG 1856 1858 xfs_dbg_kobj.kobject.kset = xfs_kset; 1857 1859 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 1858 1860 if (error) 1859 - goto out_kset_unregister; 1861 + goto out_remove_stats_kobj; 1860 1862 #endif 1861 1863 1862 1864 error = xfs_qm_init(); 1863 1865 if (error) 1864 - goto out_remove_kobj; 1866 + goto out_remove_dbg_kobj; 1865 1867 1866 1868 error = register_filesystem(&xfs_fs_type); 1867 1869 if (error) ··· 1883 1859 1884 1860 out_qm_exit: 1885 1861 xfs_qm_exit(); 1886 - out_remove_kobj: 1862 + out_remove_dbg_kobj: 1887 1863 #ifdef DEBUG 1888 1864 xfs_sysfs_del(&xfs_dbg_kobj); 1889 - out_kset_unregister: 1865 + out_remove_stats_kobj: 1890 1866 #endif 1867 + xfs_sysfs_del(&xfsstats.xs_kobj); 1868 + out_free_stats: 1869 + free_percpu(xfsstats.xs_stats); 1870 + out_kset_unregister: 1891 1871 kset_unregister(xfs_kset); 1892 1872 out_sysctl_unregister: 1893 1873 xfs_sysctl_unregister(); ··· 1917 1889 #ifdef DEBUG 1918 1890 xfs_sysfs_del(&xfs_dbg_kobj); 1919 1891 #endif 1892 + xfs_sysfs_del(&xfsstats.xs_kobj); 1893 + free_percpu(xfsstats.xs_stats); 1920 1894 kset_unregister(xfs_kset); 1921 1895 xfs_sysctl_unregister(); 1922 1896 xfs_cleanup_procfs(); ··· 1926 1896 xfs_mru_cache_uninit(); 1927 1897 xfs_destroy_workqueues(); 1928 1898 xfs_destroy_zones(); 1899 + xfs_uuid_table_free(); 1929 1900 } 1930 1901 1931 1902 module_init(init_xfs_fs);

+3 -12

fs/xfs/xfs_sysctl.c

··· 19 19 #include <linux/sysctl.h> 20 20 #include <linux/proc_fs.h> 21 21 #include "xfs_error.h" 22 + #include "xfs_stats.h" 22 23 23 24 static struct ctl_table_header *xfs_table_header; 24 25 ··· 32 31 size_t *lenp, 33 32 loff_t *ppos) 34 33 { 35 - int c, ret, *valp = ctl->data; 36 - __uint32_t vn_active; 34 + int ret, *valp = ctl->data; 37 35 38 36 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); 39 37 40 38 if (!ret && write && *valp) { 41 - xfs_notice(NULL, "Clearing xfsstats"); 42 - for_each_possible_cpu(c) { 43 - preempt_disable(); 44 - /* save vn_active, it's a universal truth! */ 45 - vn_active = per_cpu(xfsstats, c).vn_active; 46 - memset(&per_cpu(xfsstats, c), 0, 47 - sizeof(struct xfsstats)); 48 - per_cpu(xfsstats, c).vn_active = vn_active; 49 - preempt_enable(); 50 - } 39 + xfs_stats_clearall(xfsstats.xs_stats); 51 40 xfs_stats_clear = 0; 52 41 } 53 42

+116 -85

fs/xfs/xfs_sysfs.c

··· 21 21 #include "xfs_log_format.h" 22 22 #include "xfs_log.h" 23 23 #include "xfs_log_priv.h" 24 + #include "xfs_stats.h" 24 25 25 26 struct xfs_sysfs_attr { 26 27 struct attribute attr; 27 - ssize_t (*show)(char *buf, void *data); 28 - ssize_t (*store)(const char *buf, size_t count, void *data); 28 + ssize_t (*show)(struct kobject *kobject, char *buf); 29 + ssize_t (*store)(struct kobject *kobject, const char *buf, 30 + size_t count); 29 31 }; 30 32 31 33 static inline struct xfs_sysfs_attr * ··· 40 38 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) 41 39 #define XFS_SYSFS_ATTR_RO(name) \ 42 40 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) 41 + #define XFS_SYSFS_ATTR_WO(name) \ 42 + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name) 43 43 44 44 #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr 45 45 ··· 55 51 .release = xfs_sysfs_release, 56 52 }; 57 53 54 + STATIC ssize_t 55 + xfs_sysfs_object_show( 56 + struct kobject *kobject, 57 + struct attribute *attr, 58 + char *buf) 59 + { 60 + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 61 + 62 + return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0; 63 + } 64 + 65 + STATIC ssize_t 66 + xfs_sysfs_object_store( 67 + struct kobject *kobject, 68 + struct attribute *attr, 69 + const char *buf, 70 + size_t count) 71 + { 72 + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 73 + 74 + return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0; 75 + } 76 + 77 + static const struct sysfs_ops xfs_sysfs_ops = { 78 + .show = xfs_sysfs_object_show, 79 + .store = xfs_sysfs_object_store, 80 + }; 81 + 58 82 #ifdef DEBUG 59 83 /* debug */ 60 84 61 85 STATIC ssize_t 62 86 log_recovery_delay_store( 87 + struct kobject *kobject, 63 88 const char *buf, 64 - size_t count, 65 - void *data) 89 + size_t count) 66 90 { 67 91 int ret; 68 92 int val; ··· 109 77 110 78 STATIC ssize_t 111 79 log_recovery_delay_show( 112 - char *buf, 113 - void *data) 80 + struct kobject *kobject, 81 + char *buf) 114 82 { 115 83 return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); 116 84 } ··· 121 89 NULL, 122 90 }; 123 91 124 - STATIC ssize_t 125 - xfs_dbg_show( 126 - struct kobject *kobject, 127 - struct attribute *attr, 128 - char *buf) 129 - { 130 - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 131 - 132 - return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; 133 - } 134 - 135 - STATIC ssize_t 136 - xfs_dbg_store( 137 - struct kobject *kobject, 138 - struct attribute *attr, 139 - const char *buf, 140 - size_t count) 141 - { 142 - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 143 - 144 - return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; 145 - } 146 - 147 - static struct sysfs_ops xfs_dbg_ops = { 148 - .show = xfs_dbg_show, 149 - .store = xfs_dbg_store, 150 - }; 151 - 152 92 struct kobj_type xfs_dbg_ktype = { 153 93 .release = xfs_sysfs_release, 154 - .sysfs_ops = &xfs_dbg_ops, 94 + .sysfs_ops = &xfs_sysfs_ops, 155 95 .default_attrs = xfs_dbg_attrs, 156 96 }; 157 97 158 98 #endif /* DEBUG */ 159 99 100 + /* stats */ 101 + 102 + static inline struct xstats * 103 + to_xstats(struct kobject *kobject) 104 + { 105 + struct xfs_kobj *kobj = to_kobj(kobject); 106 + 107 + return container_of(kobj, struct xstats, xs_kobj); 108 + } 109 + 110 + STATIC ssize_t 111 + stats_show( 112 + struct kobject *kobject, 113 + char *buf) 114 + { 115 + struct xstats *stats = to_xstats(kobject); 116 + 117 + return xfs_stats_format(stats->xs_stats, buf); 118 + } 119 + XFS_SYSFS_ATTR_RO(stats); 120 + 121 + STATIC ssize_t 122 + stats_clear_store( 123 + struct kobject *kobject, 124 + const char *buf, 125 + size_t count) 126 + { 127 + int ret; 128 + int val; 129 + struct xstats *stats = to_xstats(kobject); 130 + 131 + ret = kstrtoint(buf, 0, &val); 132 + if (ret) 133 + return ret; 134 + 135 + if (val != 1) 136 + return -EINVAL; 137 + 138 + xfs_stats_clearall(stats->xs_stats); 139 + return count; 140 + } 141 + XFS_SYSFS_ATTR_WO(stats_clear); 142 + 143 + static struct attribute *xfs_stats_attrs[] = { 144 + ATTR_LIST(stats), 145 + ATTR_LIST(stats_clear), 146 + NULL, 147 + }; 148 + 149 + struct kobj_type xfs_stats_ktype = { 150 + .release = xfs_sysfs_release, 151 + .sysfs_ops = &xfs_sysfs_ops, 152 + .default_attrs = xfs_stats_attrs, 153 + }; 154 + 160 155 /* xlog */ 156 + 157 + static inline struct xlog * 158 + to_xlog(struct kobject *kobject) 159 + { 160 + struct xfs_kobj *kobj = to_kobj(kobject); 161 + 162 + return container_of(kobj, struct xlog, l_kobj); 163 + } 161 164 162 165 STATIC ssize_t 163 166 log_head_lsn_show( 164 - char *buf, 165 - void *data) 167 + struct kobject *kobject, 168 + char *buf) 166 169 { 167 - struct xlog *log = data; 168 170 int cycle; 169 171 int block; 172 + struct xlog *log = to_xlog(kobject); 170 173 171 174 spin_lock(&log->l_icloglock); 172 175 cycle = log->l_curr_cycle; ··· 214 147 215 148 STATIC ssize_t 216 149 log_tail_lsn_show( 217 - char *buf, 218 - void *data) 150 + struct kobject *kobject, 151 + char *buf) 219 152 { 220 - struct xlog *log = data; 221 153 int cycle; 222 154 int block; 155 + struct xlog *log = to_xlog(kobject); 223 156 224 157 xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); 225 158 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); ··· 228 161 229 162 STATIC ssize_t 230 163 reserve_grant_head_show( 231 - char *buf, 232 - void *data) 164 + struct kobject *kobject, 165 + char *buf) 166 + 233 167 { 234 - struct xlog *log = data; 235 168 int cycle; 236 169 int bytes; 170 + struct xlog *log = to_xlog(kobject); 237 171 238 172 xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); 239 173 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); ··· 243 175 244 176 STATIC ssize_t 245 177 write_grant_head_show( 246 - char *buf, 247 - void *data) 178 + struct kobject *kobject, 179 + char *buf) 248 180 { 249 - struct xlog *log = data; 250 181 int cycle; 251 182 int bytes; 183 + struct xlog *log = to_xlog(kobject); 252 184 253 185 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); 254 186 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); ··· 263 195 NULL, 264 196 }; 265 197 266 - static inline struct xlog * 267 - to_xlog(struct kobject *kobject) 268 - { 269 - struct xfs_kobj *kobj = to_kobj(kobject); 270 - return container_of(kobj, struct xlog, l_kobj); 271 - } 272 - 273 - STATIC ssize_t 274 - xfs_log_show( 275 - struct kobject *kobject, 276 - struct attribute *attr, 277 - char *buf) 278 - { 279 - struct xlog *log = to_xlog(kobject); 280 - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 281 - 282 - return xfs_attr->show ? xfs_attr->show(buf, log) : 0; 283 - } 284 - 285 - STATIC ssize_t 286 - xfs_log_store( 287 - struct kobject *kobject, 288 - struct attribute *attr, 289 - const char *buf, 290 - size_t count) 291 - { 292 - struct xlog *log = to_xlog(kobject); 293 - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 294 - 295 - return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; 296 - } 297 - 298 - static struct sysfs_ops xfs_log_ops = { 299 - .show = xfs_log_show, 300 - .store = xfs_log_store, 301 - }; 302 - 303 198 struct kobj_type xfs_log_ktype = { 304 199 .release = xfs_sysfs_release, 305 - .sysfs_ops = &xfs_log_ops, 200 + .sysfs_ops = &xfs_sysfs_ops, 306 201 .default_attrs = xfs_log_attrs, 307 202 };

+1

fs/xfs/xfs_sysfs.h

··· 22 22 extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ 23 23 extern struct kobj_type xfs_dbg_ktype; /* debug */ 24 24 extern struct kobj_type xfs_log_ktype; /* xlog */ 25 + extern struct kobj_type xfs_stats_ktype; /* stats */ 25 26 26 27 static inline struct xfs_kobj * 27 28 to_kobj(struct kobject *kobject)

+2

fs/xfs/xfs_trace.h

··· 689 689 DEFINE_INODE_EVENT(xfs_filemap_fault); 690 690 DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); 691 691 DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); 692 + DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); 692 693 693 694 DECLARE_EVENT_CLASS(xfs_iref_class, 694 695 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), ··· 1313 1312 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 1314 1313 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); 1315 1314 DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); 1315 + DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); 1316 1316 1317 1317 DECLARE_EVENT_CLASS(xfs_itrunc_class, 1318 1318 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),

+3 -3

fs/xfs/xfs_trans.c

··· 930 930 */ 931 931 if (sync) { 932 932 error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); 933 - XFS_STATS_INC(xs_trans_sync); 933 + XFS_STATS_INC(mp, xs_trans_sync); 934 934 } else { 935 - XFS_STATS_INC(xs_trans_async); 935 + XFS_STATS_INC(mp, xs_trans_async); 936 936 } 937 937 938 938 return error; ··· 955 955 xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); 956 956 xfs_trans_free(tp); 957 957 958 - XFS_STATS_INC(xs_trans_empty); 958 + XFS_STATS_INC(mp, xs_trans_empty); 959 959 return error; 960 960 } 961 961

+7 -6

fs/xfs/xfs_trans_ail.c

··· 349 349 xfs_ail_min_lsn(ailp))) { 350 350 ailp->xa_log_flush = 0; 351 351 352 - XFS_STATS_INC(xs_push_ail_flush); 352 + XFS_STATS_INC(mp, xs_push_ail_flush); 353 353 xfs_log_force(mp, XFS_LOG_SYNC); 354 354 } 355 355 ··· 371 371 goto out_done; 372 372 } 373 373 374 - XFS_STATS_INC(xs_push_ail); 374 + XFS_STATS_INC(mp, xs_push_ail); 375 375 376 376 lsn = lip->li_lsn; 377 377 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { ··· 385 385 lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list); 386 386 switch (lock_result) { 387 387 case XFS_ITEM_SUCCESS: 388 - XFS_STATS_INC(xs_push_ail_success); 388 + XFS_STATS_INC(mp, xs_push_ail_success); 389 389 trace_xfs_ail_push(lip); 390 390 391 391 ailp->xa_last_pushed_lsn = lsn; ··· 403 403 * re-try the flushing relatively soon if most of the 404 404 * AIL is beeing flushed. 405 405 */ 406 - XFS_STATS_INC(xs_push_ail_flushing); 406 + XFS_STATS_INC(mp, xs_push_ail_flushing); 407 407 trace_xfs_ail_flushing(lip); 408 408 409 409 flushing++; ··· 411 411 break; 412 412 413 413 case XFS_ITEM_PINNED: 414 - XFS_STATS_INC(xs_push_ail_pinned); 414 + XFS_STATS_INC(mp, xs_push_ail_pinned); 415 415 trace_xfs_ail_pinned(lip); 416 416 417 417 stuck++; 418 418 ailp->xa_log_flush++; 419 419 break; 420 420 case XFS_ITEM_LOCKED: 421 - XFS_STATS_INC(xs_push_ail_locked); 421 + XFS_STATS_INC(mp, xs_push_ail_locked); 422 422 trace_xfs_ail_locked(lip); 423 423 424 424 stuck++; ··· 497 497 long tout = 0; /* milliseconds */ 498 498 499 499 current->flags |= PF_MEMALLOC; 500 + set_freezable(); 500 501 501 502 while (!kthread_should_stop()) { 502 503 if (tout && tout <= 20)

+9

fs/xfs/xfs_trans_inode.c

··· 108 108 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 109 109 110 110 /* 111 + * Record the specific change for fdatasync optimisation. This 112 + * allows fdatasync to skip log forces for inodes that are only 113 + * timestamp dirty. We do this before the change count so that 114 + * the core being logged in this case does not impact on fdatasync 115 + * behaviour. 116 + */ 117 + ip->i_itemp->ili_fsync_fields |= flags; 118 + 119 + /* 111 120 * First time we log the inode in a transaction, bump the inode change 112 121 * counter if it is configured for this to occur. We don't use 113 122 * inode_inc_version() because there is no need for extra locking around

+29 -2

fs/xfs/xfs_xattr.c

··· 53 53 return asize; 54 54 } 55 55 56 + void 57 + xfs_forget_acl( 58 + struct inode *inode, 59 + const char *name, 60 + int xflags) 61 + { 62 + /* 63 + * Invalidate any cached ACLs if the user has bypassed the ACL 64 + * interface. We don't validate the content whatsoever so it is caller 65 + * responsibility to provide data in valid format and ensure i_mode is 66 + * consistent. 67 + */ 68 + if (xflags & ATTR_ROOT) { 69 + #ifdef CONFIG_XFS_POSIX_ACL 70 + if (!strcmp(name, SGI_ACL_FILE)) 71 + forget_cached_acl(inode, ACL_TYPE_ACCESS); 72 + else if (!strcmp(name, SGI_ACL_DEFAULT)) 73 + forget_cached_acl(inode, ACL_TYPE_DEFAULT); 74 + #endif 75 + } 76 + } 77 + 56 78 static int 57 79 xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, 58 80 size_t size, int flags, int xflags) 59 81 { 60 - struct xfs_inode *ip = XFS_I(d_inode(dentry)); 82 + struct xfs_inode *ip = XFS_I(d_inode(dentry)); 83 + int error; 61 84 62 85 if (strcmp(name, "") == 0) 63 86 return -EINVAL; ··· 93 70 94 71 if (!value) 95 72 return xfs_attr_remove(ip, (unsigned char *)name, xflags); 96 - return xfs_attr_set(ip, (unsigned char *)name, 73 + error = xfs_attr_set(ip, (unsigned char *)name, 97 74 (void *)value, size, xflags); 75 + if (!error) 76 + xfs_forget_acl(d_inode(dentry), name, xflags); 77 + 78 + return error; 98 79 } 99 80 100 81 static const struct xattr_handler xfs_xattr_user_handler = {