xfs: introduce a per-ag inode iterator

Given that we walk across the per-ag inode lists so often, it makes sense to
introduce an iterator for this.

Convert the sync and reclaim code to use this new iterator, quota code will
follow in the next patch.

Also change xfs_reclaim_inode to return -EGAIN instead of 1 for an inode
already under reclaim. This simplifies the AG iterator and doesn't
matter for the only other caller.

[hch: merged the lookup and execute callbacks back into one to get the
pag_ici_lock locking correct and simplify the code flow]

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

authored by

Dave Chinner and committed by
Christoph Hellwig
75f3cb13 abc10647

+152 -166
+150 -166
fs/xfs/linux-2.6/xfs_sync.c
··· 49 #include <linux/freezer.h> 50 51 52 /* must be called with pag_ici_lock held and releases it */ 53 STATIC int 54 xfs_sync_inode_valid( ··· 202 STATIC int 203 xfs_sync_inode_data( 204 struct xfs_inode *ip, 205 int flags) 206 { 207 struct inode *inode = VFS_I(ip); 208 struct address_space *mapping = inode->i_mapping; 209 int error = 0; 210 211 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 212 goto out_wait; ··· 229 out_wait: 230 if (flags & SYNC_IOWAIT) 231 xfs_ioend_wait(ip); 232 return error; 233 } 234 235 STATIC int 236 xfs_sync_inode_attr( 237 struct xfs_inode *ip, 238 int flags) 239 { 240 int error = 0; 241 242 xfs_ilock(ip, XFS_ILOCK_SHARED); 243 if (xfs_inode_clean(ip)) ··· 264 265 out_unlock: 266 xfs_iunlock(ip, XFS_ILOCK_SHARED); 267 return error; 268 - } 269 - 270 - /* 271 - * Sync all the inodes in the given AG according to the 272 - * direction given by the flags. 273 - */ 274 - STATIC int 275 - xfs_sync_inodes_ag( 276 - xfs_mount_t *mp, 277 - int ag, 278 - int flags) 279 - { 280 - xfs_perag_t *pag = &mp->m_perag[ag]; 281 - int nr_found; 282 - uint32_t first_index = 0; 283 - int error = 0; 284 - int last_error = 0; 285 - 286 - do { 287 - xfs_inode_t *ip = NULL; 288 - 289 - /* 290 - * use a gang lookup to find the next inode in the tree 291 - * as the tree is sparse and a gang lookup walks to find 292 - * the number of objects requested. 293 - */ 294 - read_lock(&pag->pag_ici_lock); 295 - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 296 - (void**)&ip, first_index, 1); 297 - 298 - if (!nr_found) { 299 - read_unlock(&pag->pag_ici_lock); 300 - break; 301 - } 302 - 303 - /* 304 - * Update the index for the next lookup. Catch overflows 305 - * into the next AG range which can occur if we have inodes 306 - * in the last block of the AG and we are currently 307 - * pointing to the last inode. 308 - */ 309 - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 310 - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { 311 - read_unlock(&pag->pag_ici_lock); 312 - break; 313 - } 314 - 315 - error = xfs_sync_inode_valid(ip, pag); 316 - if (error) { 317 - if (error == EFSCORRUPTED) 318 - return 0; 319 - continue; 320 - } 321 - 322 - /* 323 - * If we have to flush data or wait for I/O completion 324 - * we need to hold the iolock. 325 - */ 326 - if (flags & SYNC_DELWRI) 327 - error = xfs_sync_inode_data(ip, flags); 328 - 329 - if (flags & SYNC_ATTR) 330 - error = xfs_sync_inode_attr(ip, flags); 331 - 332 - IRELE(ip); 333 - 334 - if (error) 335 - last_error = error; 336 - /* 337 - * bail out if the filesystem is corrupted. 338 - */ 339 - if (error == EFSCORRUPTED) 340 - return XFS_ERROR(error); 341 - 342 - } while (nr_found); 343 - 344 - return last_error; 345 } 346 347 int ··· 273 xfs_mount_t *mp, 274 int flags) 275 { 276 - int error; 277 - int last_error; 278 - int i; 279 int lflags = XFS_LOG_FORCE; 280 281 if (mp->m_flags & XFS_MOUNT_RDONLY) 282 return 0; 283 - error = 0; 284 - last_error = 0; 285 286 if (flags & SYNC_WAIT) 287 lflags |= XFS_LOG_SYNC; 288 289 - for (i = 0; i < mp->m_sb.sb_agcount; i++) { 290 - if (!mp->m_perag[i].pag_ici_init) 291 - continue; 292 - error = xfs_sync_inodes_ag(mp, i, flags); 293 - if (error) 294 - last_error = error; 295 - if (error == EFSCORRUPTED) 296 - break; 297 - } 298 if (flags & SYNC_DELWRI) 299 - xfs_log_force(mp, 0, lflags); 300 301 - return XFS_ERROR(last_error); 302 } 303 304 STATIC int ··· 657 xfs_ifunlock(ip); 658 xfs_iunlock(ip, XFS_ILOCK_EXCL); 659 } 660 - return 1; 661 } 662 __xfs_iflags_set(ip, XFS_IRECLAIM); 663 spin_unlock(&ip->i_flags_lock); ··· 742 xfs_put_perag(mp, pag); 743 } 744 745 - 746 - STATIC void 747 - xfs_reclaim_inodes_ag( 748 - xfs_mount_t *mp, 749 - int ag, 750 - int mode) 751 { 752 - xfs_inode_t *ip = NULL; 753 - xfs_perag_t *pag = &mp->m_perag[ag]; 754 - int nr_found; 755 - uint32_t first_index; 756 - int skipped; 757 - 758 - restart: 759 - first_index = 0; 760 - skipped = 0; 761 - do { 762 - /* 763 - * use a gang lookup to find the next inode in the tree 764 - * as the tree is sparse and a gang lookup walks to find 765 - * the number of objects requested. 766 - */ 767 - read_lock(&pag->pag_ici_lock); 768 - nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, 769 - (void**)&ip, first_index, 1, 770 - XFS_ICI_RECLAIM_TAG); 771 - 772 - if (!nr_found) { 773 - read_unlock(&pag->pag_ici_lock); 774 - break; 775 - } 776 - 777 - /* 778 - * Update the index for the next lookup. Catch overflows 779 - * into the next AG range which can occur if we have inodes 780 - * in the last block of the AG and we are currently 781 - * pointing to the last inode. 782 - */ 783 - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 784 - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { 785 - read_unlock(&pag->pag_ici_lock); 786 - break; 787 - } 788 - 789 - /* ignore if already under reclaim */ 790 - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 791 - read_unlock(&pag->pag_ici_lock); 792 - continue; 793 - } 794 - 795 read_unlock(&pag->pag_ici_lock); 796 - 797 - /* 798 - * hmmm - this is an inode already in reclaim. Do 799 - * we even bother catching it here? 800 - */ 801 - if (xfs_reclaim_inode(ip, 0, mode)) 802 - skipped++; 803 - } while (nr_found); 804 - 805 - if (skipped) { 806 - delay(1); 807 - goto restart; 808 } 809 - return; 810 811 } 812 813 int ··· 763 xfs_mount_t *mp, 764 int mode) 765 { 766 - int i; 767 - 768 - for (i = 0; i < mp->m_sb.sb_agcount; i++) { 769 - if (!mp->m_perag[i].pag_ici_init) 770 - continue; 771 - xfs_reclaim_inodes_ag(mp, i, mode); 772 - } 773 - return 0; 774 } 775 - 776 -
··· 49 #include <linux/freezer.h> 50 51 52 + STATIC xfs_inode_t * 53 + xfs_inode_ag_lookup( 54 + struct xfs_mount *mp, 55 + struct xfs_perag *pag, 56 + uint32_t *first_index, 57 + int tag) 58 + { 59 + int nr_found; 60 + struct xfs_inode *ip; 61 + 62 + /* 63 + * use a gang lookup to find the next inode in the tree 64 + * as the tree is sparse and a gang lookup walks to find 65 + * the number of objects requested. 66 + */ 67 + read_lock(&pag->pag_ici_lock); 68 + if (tag == XFS_ICI_NO_TAG) { 69 + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 70 + (void **)&ip, *first_index, 1); 71 + } else { 72 + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, 73 + (void **)&ip, *first_index, 1, tag); 74 + } 75 + if (!nr_found) 76 + goto unlock; 77 + 78 + /* 79 + * Update the index for the next lookup. Catch overflows 80 + * into the next AG range which can occur if we have inodes 81 + * in the last block of the AG and we are currently 82 + * pointing to the last inode. 83 + */ 84 + *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 85 + if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 86 + goto unlock; 87 + 88 + return ip; 89 + 90 + unlock: 91 + read_unlock(&pag->pag_ici_lock); 92 + return NULL; 93 + } 94 + 95 + STATIC int 96 + xfs_inode_ag_walk( 97 + struct xfs_mount *mp, 98 + xfs_agnumber_t ag, 99 + int (*execute)(struct xfs_inode *ip, 100 + struct xfs_perag *pag, int flags), 101 + int flags, 102 + int tag) 103 + { 104 + struct xfs_perag *pag = &mp->m_perag[ag]; 105 + uint32_t first_index; 106 + int last_error = 0; 107 + int skipped; 108 + 109 + restart: 110 + skipped = 0; 111 + first_index = 0; 112 + do { 113 + int error = 0; 114 + xfs_inode_t *ip; 115 + 116 + ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 117 + if (!ip) 118 + break; 119 + 120 + error = execute(ip, pag, flags); 121 + if (error == EAGAIN) { 122 + skipped++; 123 + continue; 124 + } 125 + if (error) 126 + last_error = error; 127 + /* 128 + * bail out if the filesystem is corrupted. 129 + */ 130 + if (error == EFSCORRUPTED) 131 + break; 132 + 133 + } while (1); 134 + 135 + if (skipped) { 136 + delay(1); 137 + goto restart; 138 + } 139 + 140 + xfs_put_perag(mp, pag); 141 + return last_error; 142 + } 143 + 144 + STATIC int 145 + xfs_inode_ag_iterator( 146 + struct xfs_mount *mp, 147 + int (*execute)(struct xfs_inode *ip, 148 + struct xfs_perag *pag, int flags), 149 + int flags, 150 + int tag) 151 + { 152 + int error = 0; 153 + int last_error = 0; 154 + xfs_agnumber_t ag; 155 + 156 + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 157 + if (!mp->m_perag[ag].pag_ici_init) 158 + continue; 159 + error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 160 + if (error) { 161 + last_error = error; 162 + if (error == EFSCORRUPTED) 163 + break; 164 + } 165 + } 166 + return XFS_ERROR(last_error); 167 + } 168 + 169 /* must be called with pag_ici_lock held and releases it */ 170 STATIC int 171 xfs_sync_inode_valid( ··· 85 STATIC int 86 xfs_sync_inode_data( 87 struct xfs_inode *ip, 88 + struct xfs_perag *pag, 89 int flags) 90 { 91 struct inode *inode = VFS_I(ip); 92 struct address_space *mapping = inode->i_mapping; 93 int error = 0; 94 + 95 + error = xfs_sync_inode_valid(ip, pag); 96 + if (error) 97 + return error; 98 99 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 100 goto out_wait; ··· 107 out_wait: 108 if (flags & SYNC_IOWAIT) 109 xfs_ioend_wait(ip); 110 + IRELE(ip); 111 return error; 112 } 113 114 STATIC int 115 xfs_sync_inode_attr( 116 struct xfs_inode *ip, 117 + struct xfs_perag *pag, 118 int flags) 119 { 120 int error = 0; 121 + 122 + error = xfs_sync_inode_valid(ip, pag); 123 + if (error) 124 + return error; 125 126 xfs_ilock(ip, XFS_ILOCK_SHARED); 127 if (xfs_inode_clean(ip)) ··· 136 137 out_unlock: 138 xfs_iunlock(ip, XFS_ILOCK_SHARED); 139 + IRELE(ip); 140 return error; 141 } 142 143 int ··· 221 xfs_mount_t *mp, 222 int flags) 223 { 224 + int error = 0; 225 int lflags = XFS_LOG_FORCE; 226 227 if (mp->m_flags & XFS_MOUNT_RDONLY) 228 return 0; 229 230 if (flags & SYNC_WAIT) 231 lflags |= XFS_LOG_SYNC; 232 233 if (flags & SYNC_DELWRI) 234 + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, XFS_ICI_NO_TAG); 235 236 + if (flags & SYNC_ATTR) 237 + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, XFS_ICI_NO_TAG); 238 + 239 + if (!error && (flags & SYNC_DELWRI)) 240 + xfs_log_force(mp, 0, lflags); 241 + return XFS_ERROR(error); 242 } 243 244 STATIC int ··· 613 xfs_ifunlock(ip); 614 xfs_iunlock(ip, XFS_ILOCK_EXCL); 615 } 616 + return -EAGAIN; 617 } 618 __xfs_iflags_set(ip, XFS_IRECLAIM); 619 spin_unlock(&ip->i_flags_lock); ··· 698 xfs_put_perag(mp, pag); 699 } 700 701 + STATIC int 702 + xfs_reclaim_inode_now( 703 + struct xfs_inode *ip, 704 + struct xfs_perag *pag, 705 + int flags) 706 { 707 + /* ignore if already under reclaim */ 708 + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 709 read_unlock(&pag->pag_ici_lock); 710 + return 0; 711 } 712 + read_unlock(&pag->pag_ici_lock); 713 714 + return xfs_reclaim_inode(ip, 0, flags); 715 } 716 717 int ··· 771 xfs_mount_t *mp, 772 int mode) 773 { 774 + return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 775 + XFS_ICI_RECLAIM_TAG); 776 }
+2
fs/xfs/xfs_ag.h
··· 212 /* 213 * tags for inode radix tree 214 */ 215 #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ 216 217 #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
··· 212 /* 213 * tags for inode radix tree 214 */ 215 + #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup 216 + in xfs_inode_ag_iterator */ 217 #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ 218 219 #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)