xfs: always use iget in bulkstat

The non-coherent bulkstat versionsthat look directly at the inode
buffers causes various problems with performance optimizations that
make increased use of just logging inodes. This patch makes bulkstat
always use iget, which should be fast enough for normal use with the
radix-tree based inode cache introduced a while ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>

authored by Christoph Hellwig and committed by Dave Chinner 7dce11db 1817176a

+61 -284
+3 -4
fs/xfs/linux-2.6/xfs_ioctl.c
··· 679 679 error = xfs_bulkstat_single(mp, &inlast, 680 680 bulkreq.ubuffer, &done); 681 681 else /* XFS_IOC_FSBULKSTAT */ 682 - error = xfs_bulkstat(mp, &inlast, &count, 683 - (bulkstat_one_pf)xfs_bulkstat_one, NULL, 684 - sizeof(xfs_bstat_t), bulkreq.ubuffer, 685 - BULKSTAT_FG_QUICK, &done); 682 + error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, 683 + sizeof(xfs_bstat_t), bulkreq.ubuffer, 684 + &done); 686 685 687 686 if (error) 688 687 return -error;
+4 -8
fs/xfs/linux-2.6/xfs_ioctl32.c
··· 237 237 xfs_ino_t ino, /* inode number to get data for */ 238 238 void __user *buffer, /* buffer to place output in */ 239 239 int ubsize, /* size of buffer */ 240 - void *private_data, /* my private data */ 241 240 xfs_daddr_t bno, /* starting bno of inode cluster */ 242 241 int *ubused, /* bytes used by me */ 243 - void *dibuff, /* on-disk inode buffer */ 244 242 int *stat) /* BULKSTAT_RV_... */ 245 243 { 246 244 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 247 245 xfs_bulkstat_one_fmt_compat, bno, 248 - ubused, dibuff, stat); 246 + ubused, stat); 249 247 } 250 248 251 249 /* copied from xfs_ioctl.c */ ··· 296 298 int res; 297 299 298 300 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 299 - sizeof(compat_xfs_bstat_t), 300 - NULL, 0, NULL, NULL, &res); 301 + sizeof(compat_xfs_bstat_t), 0, NULL, &res); 301 302 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 302 303 error = xfs_bulkstat(mp, &inlast, &count, 303 - xfs_bulkstat_one_compat, NULL, 304 - sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, 305 - BULKSTAT_FG_QUICK, &done); 304 + xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), 305 + bulkreq.ubuffer, &done); 306 306 } else 307 307 error = XFS_ERROR(EINVAL); 308 308 if (error)
+5 -6
fs/xfs/quota/xfs_qm.c
··· 1632 1632 xfs_ino_t ino, /* inode number to get data for */ 1633 1633 void __user *buffer, /* not used */ 1634 1634 int ubsize, /* not used */ 1635 - void *private_data, /* not used */ 1636 1635 xfs_daddr_t bno, /* starting block of inode cluster */ 1637 1636 int *ubused, /* not used */ 1638 - void *dip, /* on-disk inode pointer (not used) */ 1639 1637 int *res) /* result code value */ 1640 1638 { 1641 1639 xfs_inode_t *ip; ··· 1794 1796 * Iterate thru all the inodes in the file system, 1795 1797 * adjusting the corresponding dquot counters in core. 1796 1798 */ 1797 - if ((error = xfs_bulkstat(mp, &lastino, &count, 1798 - xfs_qm_dqusage_adjust, NULL, 1799 - structsz, NULL, BULKSTAT_FG_IGET, &done))) 1799 + error = xfs_bulkstat(mp, &lastino, &count, 1800 + xfs_qm_dqusage_adjust, 1801 + structsz, NULL, &done); 1802 + if (error) 1800 1803 break; 1801 1804 1802 - } while (! done); 1805 + } while (!done); 1803 1806 1804 1807 /* 1805 1808 * We've made all the changes that we need to make incore.
+7 -9
fs/xfs/quota/xfs_qm_syscalls.c
··· 1109 1109 xfs_ino_t ino, /* inode number to get data for */ 1110 1110 void __user *buffer, /* not used */ 1111 1111 int ubsize, /* not used */ 1112 - void *private_data, /* not used */ 1113 1112 xfs_daddr_t bno, /* starting block of inode cluster */ 1114 1113 int *ubused, /* not used */ 1115 - void *dip, /* not used */ 1116 1114 int *res) /* bulkstat result code */ 1117 1115 { 1118 1116 xfs_inode_t *ip; ··· 1203 1205 * Iterate thru all the inodes in the file system, 1204 1206 * adjusting the corresponding dquot counters 1205 1207 */ 1206 - if ((error = xfs_bulkstat(mp, &lastino, &count, 1207 - xfs_qm_internalqcheck_adjust, NULL, 1208 - 0, NULL, BULKSTAT_FG_IGET, &done))) { 1208 + error = xfs_bulkstat(mp, &lastino, &count, 1209 + xfs_qm_internalqcheck_adjust, 1210 + 0, NULL, &done); 1211 + if (error) { 1212 + cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); 1209 1213 break; 1210 1214 } 1211 - } while (! done); 1212 - if (error) { 1213 - cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); 1214 - } 1215 + } while (!done); 1216 + 1215 1217 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1216 1218 for (i = 0; i < qmtest_hashmask; i++) { 1217 1219 xfs_dqtest_t *d, *n;
+42 -243
fs/xfs/xfs_itable.c
··· 49 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 50 50 } 51 51 52 - STATIC int 53 - xfs_bulkstat_one_iget( 54 - xfs_mount_t *mp, /* mount point for filesystem */ 55 - xfs_ino_t ino, /* inode number to get data for */ 56 - xfs_daddr_t bno, /* starting bno of inode cluster */ 57 - xfs_bstat_t *buf, /* return buffer */ 58 - int *stat) /* BULKSTAT_RV_... */ 52 + /* 53 + * Return stat information for one inode. 54 + * Return 0 if ok, else errno. 55 + */ 56 + int 57 + xfs_bulkstat_one_int( 58 + struct xfs_mount *mp, /* mount point for filesystem */ 59 + xfs_ino_t ino, /* inode to get data for */ 60 + void __user *buffer, /* buffer to place output in */ 61 + int ubsize, /* size of buffer */ 62 + bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ 63 + xfs_daddr_t bno, /* starting bno of cluster */ 64 + int *ubused, /* bytes used by me */ 65 + int *stat) /* BULKSTAT_RV_... */ 59 66 { 60 - xfs_icdinode_t *dic; /* dinode core info pointer */ 61 - xfs_inode_t *ip; /* incore inode pointer */ 62 - struct inode *inode; 63 - int error; 67 + struct xfs_icdinode *dic; /* dinode core info pointer */ 68 + struct xfs_inode *ip; /* incore inode pointer */ 69 + struct inode *inode; 70 + struct xfs_bstat *buf; /* return buffer */ 71 + int error = 0; /* error value */ 72 + 73 + *stat = BULKSTAT_RV_NOTHING; 74 + 75 + if (!buffer || xfs_internal_inum(mp, ino)) 76 + return XFS_ERROR(EINVAL); 77 + 78 + buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); 79 + if (!buf) 80 + return XFS_ERROR(ENOMEM); 64 81 65 82 error = xfs_iget(mp, NULL, ino, 66 83 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); 67 84 if (error) { 68 85 *stat = BULKSTAT_RV_NOTHING; 69 - return error; 86 + goto out_free; 70 87 } 71 88 72 89 ASSERT(ip != NULL); ··· 144 127 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 145 128 break; 146 129 } 147 - 148 130 xfs_iput(ip, XFS_ILOCK_SHARED); 131 + 132 + error = formatter(buffer, ubsize, ubused, buf); 133 + 134 + if (!error) 135 + *stat = BULKSTAT_RV_DIDONE; 136 + 137 + out_free: 138 + kmem_free(buf); 149 139 return error; 150 - } 151 - 152 - STATIC void 153 - xfs_bulkstat_one_dinode( 154 - xfs_mount_t *mp, /* mount point for filesystem */ 155 - xfs_ino_t ino, /* inode number to get data for */ 156 - xfs_dinode_t *dic, /* dinode inode pointer */ 157 - xfs_bstat_t *buf) /* return buffer */ 158 - { 159 - /* 160 - * The inode format changed when we moved the link count and 161 - * made it 32 bits long. If this is an old format inode, 162 - * convert it in memory to look like a new one. If it gets 163 - * flushed to disk we will convert back before flushing or 164 - * logging it. We zero out the new projid field and the old link 165 - * count field. We'll handle clearing the pad field (the remains 166 - * of the old uuid field) when we actually convert the inode to 167 - * the new format. We don't change the version number so that we 168 - * can distinguish this from a real new format inode. 169 - */ 170 - if (dic->di_version == 1) { 171 - buf->bs_nlink = be16_to_cpu(dic->di_onlink); 172 - buf->bs_projid = 0; 173 - } else { 174 - buf->bs_nlink = be32_to_cpu(dic->di_nlink); 175 - buf->bs_projid = be16_to_cpu(dic->di_projid); 176 - } 177 - 178 - buf->bs_ino = ino; 179 - buf->bs_mode = be16_to_cpu(dic->di_mode); 180 - buf->bs_uid = be32_to_cpu(dic->di_uid); 181 - buf->bs_gid = be32_to_cpu(dic->di_gid); 182 - buf->bs_size = be64_to_cpu(dic->di_size); 183 - buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); 184 - buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); 185 - buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); 186 - buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); 187 - buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); 188 - buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); 189 - buf->bs_xflags = xfs_dic2xflags(dic); 190 - buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; 191 - buf->bs_extents = be32_to_cpu(dic->di_nextents); 192 - buf->bs_gen = be32_to_cpu(dic->di_gen); 193 - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); 194 - buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); 195 - buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); 196 - buf->bs_aextents = be16_to_cpu(dic->di_anextents); 197 - buf->bs_forkoff = XFS_DFORK_BOFF(dic); 198 - 199 - switch (dic->di_format) { 200 - case XFS_DINODE_FMT_DEV: 201 - buf->bs_rdev = xfs_dinode_get_rdev(dic); 202 - buf->bs_blksize = BLKDEV_IOSIZE; 203 - buf->bs_blocks = 0; 204 - break; 205 - case XFS_DINODE_FMT_LOCAL: 206 - case XFS_DINODE_FMT_UUID: 207 - buf->bs_rdev = 0; 208 - buf->bs_blksize = mp->m_sb.sb_blocksize; 209 - buf->bs_blocks = 0; 210 - break; 211 - case XFS_DINODE_FMT_EXTENTS: 212 - case XFS_DINODE_FMT_BTREE: 213 - buf->bs_rdev = 0; 214 - buf->bs_blksize = mp->m_sb.sb_blocksize; 215 - buf->bs_blocks = be64_to_cpu(dic->di_nblocks); 216 - break; 217 - } 218 140 } 219 141 220 142 /* Return 0 on success or positive error */ ··· 173 217 return 0; 174 218 } 175 219 176 - /* 177 - * Return stat information for one inode. 178 - * Return 0 if ok, else errno. 179 - */ 180 - int /* error status */ 181 - xfs_bulkstat_one_int( 182 - xfs_mount_t *mp, /* mount point for filesystem */ 183 - xfs_ino_t ino, /* inode number to get data for */ 184 - void __user *buffer, /* buffer to place output in */ 185 - int ubsize, /* size of buffer */ 186 - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ 187 - xfs_daddr_t bno, /* starting bno of inode cluster */ 188 - int *ubused, /* bytes used by me */ 189 - void *dibuff, /* on-disk inode buffer */ 190 - int *stat) /* BULKSTAT_RV_... */ 191 - { 192 - xfs_bstat_t *buf; /* return buffer */ 193 - int error = 0; /* error value */ 194 - xfs_dinode_t *dip; /* dinode inode pointer */ 195 - 196 - dip = (xfs_dinode_t *)dibuff; 197 - *stat = BULKSTAT_RV_NOTHING; 198 - 199 - if (!buffer || xfs_internal_inum(mp, ino)) 200 - return XFS_ERROR(EINVAL); 201 - 202 - buf = kmem_alloc(sizeof(*buf), KM_SLEEP); 203 - 204 - if (dip == NULL) { 205 - /* We're not being passed a pointer to a dinode. This happens 206 - * if BULKSTAT_FG_IGET is selected. Do the iget. 207 - */ 208 - error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); 209 - if (error) 210 - goto out_free; 211 - } else { 212 - xfs_bulkstat_one_dinode(mp, ino, dip, buf); 213 - } 214 - 215 - error = formatter(buffer, ubsize, ubused, buf); 216 - if (error) 217 - goto out_free; 218 - 219 - *stat = BULKSTAT_RV_DIDONE; 220 - 221 - out_free: 222 - kmem_free(buf); 223 - return error; 224 - } 225 - 226 220 int 227 221 xfs_bulkstat_one( 228 222 xfs_mount_t *mp, /* mount point for filesystem */ 229 223 xfs_ino_t ino, /* inode number to get data for */ 230 224 void __user *buffer, /* buffer to place output in */ 231 225 int ubsize, /* size of buffer */ 232 - void *private_data, /* my private data */ 233 226 xfs_daddr_t bno, /* starting bno of inode cluster */ 234 227 int *ubused, /* bytes used by me */ 235 - void *dibuff, /* on-disk inode buffer */ 236 228 int *stat) /* BULKSTAT_RV_... */ 237 229 { 238 230 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 239 231 xfs_bulkstat_one_fmt, bno, 240 - ubused, dibuff, stat); 241 - } 242 - 243 - /* 244 - * Test to see whether we can use the ondisk inode directly, based 245 - * on the given bulkstat flags, filling in dipp accordingly. 246 - * Returns zero if the inode is dodgey. 247 - */ 248 - STATIC int 249 - xfs_bulkstat_use_dinode( 250 - xfs_mount_t *mp, 251 - int flags, 252 - xfs_buf_t *bp, 253 - int clustidx, 254 - xfs_dinode_t **dipp) 255 - { 256 - xfs_dinode_t *dip; 257 - unsigned int aformat; 258 - 259 - *dipp = NULL; 260 - if (!bp || (flags & BULKSTAT_FG_IGET)) 261 - return 1; 262 - dip = (xfs_dinode_t *) 263 - xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); 264 - /* 265 - * Check the buffer containing the on-disk inode for di_mode == 0. 266 - * This is to prevent xfs_bulkstat from picking up just reclaimed 267 - * inodes that have their in-core state initialized but not flushed 268 - * to disk yet. This is a temporary hack that would require a proper 269 - * fix in the future. 270 - */ 271 - if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || 272 - !XFS_DINODE_GOOD_VERSION(dip->di_version) || 273 - !dip->di_mode) 274 - return 0; 275 - if (flags & BULKSTAT_FG_QUICK) { 276 - *dipp = dip; 277 - return 1; 278 - } 279 - /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ 280 - aformat = dip->di_aformat; 281 - if ((XFS_DFORK_Q(dip) == 0) || 282 - (aformat == XFS_DINODE_FMT_LOCAL) || 283 - (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { 284 - *dipp = dip; 285 - return 1; 286 - } 287 - return 1; 232 + ubused, stat); 288 233 } 289 234 290 235 #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) ··· 199 342 xfs_ino_t *lastinop, /* last inode returned */ 200 343 int *ubcountp, /* size of buffer/count returned */ 201 344 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 202 - void *private_data,/* private data for formatter */ 203 345 size_t statstruct_size, /* sizeof struct filling */ 204 346 char __user *ubuffer, /* buffer with inode stats */ 205 - int flags, /* defined in xfs_itable.h */ 206 347 int *done) /* 1 if there are more stats to get */ 207 348 { 208 349 xfs_agblock_t agbno=0;/* allocation group block number */ ··· 235 380 int ubelem; /* spaces used in user's buffer */ 236 381 int ubused; /* bytes used by formatter */ 237 382 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 238 - xfs_dinode_t *dip; /* ptr into bp for specific inode */ 239 383 240 384 /* 241 385 * Get the last inode value, see if there's nothing to do. 242 386 */ 243 387 ino = (xfs_ino_t)*lastinop; 244 388 lastino = ino; 245 - dip = NULL; 246 389 agno = XFS_INO_TO_AGNO(mp, ino); 247 390 agino = XFS_INO_TO_AGINO(mp, ino); 248 391 if (agno >= mp->m_sb.sb_agcount || ··· 465 612 irbp->ir_startino) + 466 613 ((chunkidx & nimask) >> 467 614 mp->m_sb.sb_inopblog); 468 - 469 - if (flags & (BULKSTAT_FG_QUICK | 470 - BULKSTAT_FG_INLINE)) { 471 - int offset; 472 - 473 - ino = XFS_AGINO_TO_INO(mp, agno, 474 - agino); 475 - bno = XFS_AGB_TO_DADDR(mp, agno, 476 - agbno); 477 - 478 - /* 479 - * Get the inode cluster buffer 480 - */ 481 - if (bp) 482 - xfs_buf_relse(bp); 483 - 484 - error = xfs_inotobp(mp, NULL, ino, &dip, 485 - &bp, &offset, 486 - XFS_IGET_BULKSTAT); 487 - 488 - if (!error) 489 - clustidx = offset / mp->m_sb.sb_inodesize; 490 - if (XFS_TEST_ERROR(error != 0, 491 - mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, 492 - XFS_RANDOM_BULKSTAT_READ_CHUNK)) { 493 - bp = NULL; 494 - ubleft = 0; 495 - rval = error; 496 - break; 497 - } 498 - } 499 615 } 500 616 ino = XFS_AGINO_TO_INO(mp, agno, agino); 501 617 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); ··· 480 658 * when the chunk is used up. 481 659 */ 482 660 irbp->ir_freecount++; 483 - if (!xfs_bulkstat_use_dinode(mp, flags, bp, 484 - clustidx, &dip)) { 485 - lastino = ino; 486 - continue; 487 - } 488 - /* 489 - * If we need to do an iget, cannot hold bp. 490 - * Drop it, until starting the next cluster. 491 - */ 492 - if ((flags & BULKSTAT_FG_INLINE) && !dip) { 493 - if (bp) 494 - xfs_buf_relse(bp); 495 - bp = NULL; 496 - } 497 661 498 662 /* 499 663 * Get the inode and fill in a single buffer. 500 - * BULKSTAT_FG_QUICK uses dip to fill it in. 501 - * BULKSTAT_FG_IGET uses igets. 502 - * BULKSTAT_FG_INLINE uses dip if we have an 503 - * inline attr fork, else igets. 504 - * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. 505 - * This is also used to count inodes/blks, etc 506 - * in xfs_qm_quotacheck. 507 664 */ 508 665 ubused = statstruct_size; 509 - error = formatter(mp, ino, ubufp, 510 - ubleft, private_data, 511 - bno, &ubused, dip, &fmterror); 666 + error = formatter(mp, ino, ubufp, ubleft, bno, 667 + &ubused, &fmterror); 512 668 if (fmterror == BULKSTAT_RV_NOTHING) { 513 669 if (error && error != ENOENT && 514 670 error != EINVAL) { ··· 579 779 580 780 ino = (xfs_ino_t)*lastinop; 581 781 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 582 - NULL, 0, NULL, NULL, &res); 782 + 0, NULL, &res); 583 783 if (error) { 584 784 /* 585 785 * Special case way failed, do it the "long" way ··· 588 788 (*lastinop)--; 589 789 count = 1; 590 790 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 591 - NULL, sizeof(xfs_bstat_t), buffer, 592 - BULKSTAT_FG_IGET, done)) 791 + sizeof(xfs_bstat_t), buffer, done)) 593 792 return error; 594 793 if (count == 0 || (xfs_ino_t)*lastinop != ino) 595 794 return error == EFSCORRUPTED ?
-14
fs/xfs/xfs_itable.h
··· 27 27 xfs_ino_t ino, 28 28 void __user *buffer, 29 29 int ubsize, 30 - void *private_data, 31 30 xfs_daddr_t bno, 32 31 int *ubused, 33 - void *dip, 34 32 int *stat); 35 33 36 34 /* ··· 39 41 #define BULKSTAT_RV_GIVEUP 2 40 42 41 43 /* 42 - * Values for bulkstat flag argument. 43 - */ 44 - #define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ 45 - #define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ 46 - #define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */ 47 - 48 - /* 49 44 * Return stat information in bulk (by-inode) for the filesystem. 50 45 */ 51 46 int /* error status */ ··· 47 56 xfs_ino_t *lastino, /* last inode returned */ 48 57 int *count, /* size of buffer/count returned */ 49 58 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 50 - void *private_data, /* private data for formatter */ 51 59 size_t statstruct_size,/* sizeof struct that we're filling */ 52 60 char __user *ubuffer,/* buffer with inode stats */ 53 - int flags, /* flag to control access method */ 54 61 int *done); /* 1 if there are more stats to get */ 55 62 56 63 int ··· 73 84 bulkstat_one_fmt_pf formatter, 74 85 xfs_daddr_t bno, 75 86 int *ubused, 76 - void *dibuff, 77 87 int *stat); 78 88 79 89 int ··· 81 93 xfs_ino_t ino, 82 94 void __user *buffer, 83 95 int ubsize, 84 - void *private_data, 85 96 xfs_daddr_t bno, 86 97 int *ubused, 87 - void *dibuff, 88 98 int *stat); 89 99 90 100 typedef int (*inumbers_fmt_pf)(