Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw

Pull GFS2 updates from Steven Whitehouse:
"The main feature this time is the new Orlov allocator and the patches
leading up to it which allow us to allocate new inodes from their own
allocation context, rather than borrowing that of their parent
directory. It is this change which then allows us to choose a
different location for subdirectories when required. This works
exactly as per the ext3 implementation from the users point of view.

In addition to that, we've got a speed up in gfs2_rbm_from_block()
from Bob Peterson, three locking related improvements from Dave
Teigland plus a selection of smaller bug fixes and clean ups."

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw:
GFS2: Set gl_object during inode create
GFS2: add error check while allocating new inodes
GFS2: don't reference inode's glock during block allocation trace
GFS2: remove redundant lvb pointer
GFS2: only use lvb on glocks that need it
GFS2: skip dlm_unlock calls in unmount
GFS2: Fix one RG corner case
GFS2: Eliminate redundant buffer_head manipulation in gfs2_unlink_inode
GFS2: Use dirty_inode in gfs2_dir_add
GFS2: Fix truncation of journaled data files
GFS2: Add Orlov allocator
GFS2: Use proper allocation context for new inodes
GFS2: Add test for resource group congestion status
GFS2: Rename glops go_xmote_th to go_sync
GFS2: Speed up gfs2_rbm_from_block
GFS2: Review bug traps in glops.c

+389 -203
+1 -1
fs/gfs2/aops.c
··· 643 643 goto out_unlock; 644 644 645 645 requested = data_blocks + ind_blocks; 646 - error = gfs2_inplace_reserve(ip, requested); 646 + error = gfs2_inplace_reserve(ip, requested, 0); 647 647 if (error) 648 648 goto out_qunlock; 649 649 }
+50 -4
fs/gfs2/bmap.c
··· 991 991 return err; 992 992 } 993 993 994 + /** 995 + * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files 996 + * @inode: The inode being truncated 997 + * @oldsize: The original (larger) size 998 + * @newsize: The new smaller size 999 + * 1000 + * With jdata files, we have to journal a revoke for each block which is 1001 + * truncated. As a result, we need to split this into separate transactions 1002 + * if the number of pages being truncated gets too large. 1003 + */ 1004 + 1005 + #define GFS2_JTRUNC_REVOKES 8192 1006 + 1007 + static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) 1008 + { 1009 + struct gfs2_sbd *sdp = GFS2_SB(inode); 1010 + u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 1011 + u64 chunk; 1012 + int error; 1013 + 1014 + while (oldsize != newsize) { 1015 + chunk = oldsize - newsize; 1016 + if (chunk > max_chunk) 1017 + chunk = max_chunk; 1018 + truncate_pagecache(inode, oldsize, oldsize - chunk); 1019 + oldsize -= chunk; 1020 + gfs2_trans_end(sdp); 1021 + error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1022 + if (error) 1023 + return error; 1024 + } 1025 + 1026 + return 0; 1027 + } 1028 + 994 1029 static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) 995 1030 { 996 1031 struct gfs2_inode *ip = GFS2_I(inode); ··· 1035 1000 int journaled = gfs2_is_jdata(ip); 1036 1001 int error; 1037 1002 1038 - error = gfs2_trans_begin(sdp, 1039 - RES_DINODE + (journaled ? RES_JDATA : 0), 0); 1003 + if (journaled) 1004 + error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); 1005 + else 1006 + error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1040 1007 if (error) 1041 1008 return error; 1042 1009 ··· 1063 1026 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1064 1027 gfs2_dinode_out(ip, dibh->b_data); 1065 1028 1066 - truncate_pagecache(inode, oldsize, newsize); 1029 + if (journaled) 1030 + error = gfs2_journaled_truncate(inode, oldsize, newsize); 1031 + else 1032 + truncate_pagecache(inode, oldsize, newsize); 1033 + 1034 + if (error) { 1035 + brelse(dibh); 1036 + return error; 1037 + } 1038 + 1067 1039 out_brelse: 1068 1040 brelse(dibh); 1069 1041 out: ··· 1224 1178 if (error) 1225 1179 return error; 1226 1180 1227 - error = gfs2_inplace_reserve(ip, 1); 1181 + error = gfs2_inplace_reserve(ip, 1, 0); 1228 1182 if (error) 1229 1183 goto do_grow_qunlock; 1230 1184 unstuff = 1;
+1 -6
fs/gfs2/dir.c
··· 1676 1676 be16_add_cpu(&leaf->lf_entries, 1); 1677 1677 } 1678 1678 brelse(bh); 1679 - error = gfs2_meta_inode_buffer(ip, &bh); 1680 - if (error) 1681 - break; 1682 - gfs2_trans_add_bh(ip->i_gl, bh, 1); 1683 1679 ip->i_entries++; 1684 1680 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1685 1681 if (S_ISDIR(nip->i_inode.i_mode)) 1686 1682 inc_nlink(&ip->i_inode); 1687 - gfs2_dinode_out(ip, bh->b_data); 1688 - brelse(bh); 1683 + mark_inode_dirty(inode); 1689 1684 error = 0; 1690 1685 break; 1691 1686 }
+2 -2
fs/gfs2/file.c
··· 432 432 if (ret) 433 433 goto out_unlock; 434 434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 435 - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 435 + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); 436 436 if (ret) 437 437 goto out_quota_unlock; 438 438 ··· 825 825 retry: 826 826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 827 827 828 - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 828 + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); 829 829 if (error) { 830 830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 831 831 bytes >>= 1;
+26 -14
fs/gfs2/glock.c
··· 55 55 56 56 typedef void (*glock_examiner) (struct gfs2_glock * gl); 57 57 58 - static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 59 - #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 60 58 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 61 59 62 60 static struct dentry *gfs2_root; ··· 105 107 { 106 108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 107 109 108 - if (gl->gl_ops->go_flags & GLOF_ASPACE) 110 + if (gl->gl_ops->go_flags & GLOF_ASPACE) { 109 111 kmem_cache_free(gfs2_glock_aspace_cachep, gl); 110 - else 112 + } else { 113 + kfree(gl->gl_lksb.sb_lvbptr); 111 114 kmem_cache_free(gfs2_glock_cachep, gl); 115 + } 112 116 } 113 117 114 118 void gfs2_glock_free(struct gfs2_glock *gl) ··· 537 537 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 538 538 clear_bit(GLF_BLOCKING, &gl->gl_flags); 539 539 spin_unlock(&gl->gl_spin); 540 - if (glops->go_xmote_th) 541 - glops->go_xmote_th(gl); 540 + if (glops->go_sync) 541 + glops->go_sync(gl); 542 542 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 543 543 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 544 544 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); ··· 547 547 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 548 548 /* lock_dlm */ 549 549 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 550 - GLOCK_BUG_ON(gl, ret); 550 + if (ret) { 551 + printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); 552 + GLOCK_BUG_ON(gl, 1); 553 + } 551 554 } else { /* lock_nolock */ 552 555 finish_xmote(gl, target); 553 556 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) ··· 739 736 if (!gl) 740 737 return -ENOMEM; 741 738 739 + memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 740 + 741 + if (glops->go_flags & GLOF_LVB) { 742 + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); 743 + if (!gl->gl_lksb.sb_lvbptr) { 744 + kmem_cache_free(cachep, gl); 745 + return -ENOMEM; 746 + } 747 + } 748 + 742 749 atomic_inc(&sdp->sd_glock_disposal); 743 750 gl->gl_sbd = sdp; 744 751 gl->gl_flags = 0; ··· 766 753 preempt_enable(); 767 754 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 768 755 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 769 - memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 770 - memset(gl->gl_lvb, 0, 32 * sizeof(char)); 771 - gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 772 756 gl->gl_tchange = jiffies; 773 757 gl->gl_object = NULL; 774 758 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; ··· 787 777 tmp = search_bucket(hash, sdp, &name); 788 778 if (tmp) { 789 779 spin_unlock_bucket(hash); 780 + kfree(gl->gl_lksb.sb_lvbptr); 790 781 kmem_cache_free(cachep, gl); 791 782 atomic_dec(&sdp->sd_glock_disposal); 792 783 gl = tmp; ··· 1024 1013 printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1025 1014 printk(KERN_ERR "lock type: %d req lock state : %d\n", 1026 1015 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1027 - __dump_glock(NULL, gl); 1016 + gfs2_dump_glock(NULL, gl); 1028 1017 BUG(); 1029 1018 } 1030 1019 ··· 1519 1508 { 1520 1509 int ret; 1521 1510 spin_lock(&gl->gl_spin); 1522 - ret = __dump_glock(seq, gl); 1511 + ret = gfs2_dump_glock(seq, gl); 1523 1512 spin_unlock(&gl->gl_spin); 1524 1513 return ret; 1525 1514 } ··· 1539 1528 1540 1529 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1541 1530 { 1531 + set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 1542 1532 glock_hash_walk(clear_glock, sdp); 1543 1533 flush_workqueue(glock_workqueue); 1544 1534 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); ··· 1667 1655 } 1668 1656 1669 1657 /** 1670 - * __dump_glock - print information about a glock 1658 + * gfs2_dump_glock - print information about a glock 1671 1659 * @seq: The seq_file struct 1672 1660 * @gl: the glock 1673 1661 * ··· 1684 1672 * Returns: 0 on success, -ENOBUFS when we run out of space 1685 1673 */ 1686 1674 1687 - static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) 1675 + int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) 1688 1676 { 1689 1677 const struct gfs2_glock_operations *glops = gl->gl_ops; 1690 1678 unsigned long long dtime;
+27 -27
fs/gfs2/glock.h
··· 178 178 return NULL; 179 179 } 180 180 181 - int gfs2_glock_get(struct gfs2_sbd *sdp, 182 - u64 number, const struct gfs2_glock_operations *glops, 183 - int create, struct gfs2_glock **glp); 184 - void gfs2_glock_hold(struct gfs2_glock *gl); 185 - void gfs2_glock_put_nolock(struct gfs2_glock *gl); 186 - void gfs2_glock_put(struct gfs2_glock *gl); 187 - void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 188 - struct gfs2_holder *gh); 189 - void gfs2_holder_reinit(unsigned int state, unsigned flags, 190 - struct gfs2_holder *gh); 191 - void gfs2_holder_uninit(struct gfs2_holder *gh); 192 - int gfs2_glock_nq(struct gfs2_holder *gh); 193 - int gfs2_glock_poll(struct gfs2_holder *gh); 194 - int gfs2_glock_wait(struct gfs2_holder *gh); 195 - void gfs2_glock_dq(struct gfs2_holder *gh); 196 - void gfs2_glock_dq_wait(struct gfs2_holder *gh); 197 - 198 - void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 199 - int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 200 - u64 number, const struct gfs2_glock_operations *glops, 201 - unsigned int state, int flags, struct gfs2_holder *gh); 202 - 203 - int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 204 - void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 205 - void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 206 - 207 - __printf(2, 3) 181 + extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 182 + const struct gfs2_glock_operations *glops, 183 + int create, struct gfs2_glock **glp); 184 + extern void gfs2_glock_hold(struct gfs2_glock *gl); 185 + extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); 186 + extern void gfs2_glock_put(struct gfs2_glock *gl); 187 + extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, 188 + unsigned flags, struct gfs2_holder *gh); 189 + extern void gfs2_holder_reinit(unsigned int state, unsigned flags, 190 + struct gfs2_holder *gh); 191 + extern void gfs2_holder_uninit(struct gfs2_holder *gh); 192 + extern int gfs2_glock_nq(struct gfs2_holder *gh); 193 + extern int gfs2_glock_poll(struct gfs2_holder *gh); 194 + extern int gfs2_glock_wait(struct gfs2_holder *gh); 195 + extern void gfs2_glock_dq(struct gfs2_holder *gh); 196 + extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); 197 + extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 198 + extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 199 + const struct gfs2_glock_operations *glops, 200 + unsigned int state, int flags, 201 + struct gfs2_holder *gh); 202 + extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 203 + extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 204 + extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 205 + extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 206 + #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) 207 + extern __printf(2, 3) 208 208 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 209 209 210 210 /**
+10 -9
fs/gfs2/glops.c
··· 74 74 75 75 gfs2_trans_add_revoke(sdp, bd); 76 76 } 77 - BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); 77 + GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 78 78 spin_unlock(&sdp->sd_ail_lock); 79 79 gfs2_log_unlock(sdp); 80 80 } ··· 96 96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 97 97 sb_start_intwrite(sdp->sd_vfs); 98 98 gfs2_log_reserve(sdp, tr.tr_reserved); 99 - BUG_ON(current->journal_info); 99 + WARN_ON_ONCE(current->journal_info); 100 100 current->journal_info = &tr; 101 101 102 102 __gfs2_ail_flush(gl, 0); ··· 139 139 140 140 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 141 141 return; 142 - BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); 142 + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 143 143 144 144 gfs2_log_flush(gl->gl_sbd, gl); 145 145 filemap_fdatawrite(metamapping); ··· 168 168 { 169 169 struct address_space *mapping = gfs2_glock2aspace(gl); 170 170 171 - BUG_ON(!(flags & DIO_METADATA)); 171 + WARN_ON_ONCE(!(flags & DIO_METADATA)); 172 172 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 173 173 truncate_inode_pages(mapping, 0); 174 174 ··· 197 197 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 198 198 return; 199 199 200 - BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); 200 + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 201 201 202 202 gfs2_log_flush(gl->gl_sbd, gl); 203 203 filemap_fdatawrite(metamapping); ··· 536 536 }; 537 537 538 538 const struct gfs2_glock_operations gfs2_inode_glops = { 539 - .go_xmote_th = inode_go_sync, 539 + .go_sync = inode_go_sync, 540 540 .go_inval = inode_go_inval, 541 541 .go_demote_ok = inode_go_demote_ok, 542 542 .go_lock = inode_go_lock, ··· 546 546 }; 547 547 548 548 const struct gfs2_glock_operations gfs2_rgrp_glops = { 549 - .go_xmote_th = rgrp_go_sync, 549 + .go_sync = rgrp_go_sync, 550 550 .go_inval = rgrp_go_inval, 551 551 .go_lock = gfs2_rgrp_go_lock, 552 552 .go_unlock = gfs2_rgrp_go_unlock, 553 553 .go_dump = gfs2_rgrp_dump, 554 554 .go_type = LM_TYPE_RGRP, 555 - .go_flags = GLOF_ASPACE, 555 + .go_flags = GLOF_ASPACE | GLOF_LVB, 556 556 }; 557 557 558 558 const struct gfs2_glock_operations gfs2_trans_glops = { 559 - .go_xmote_th = trans_go_sync, 559 + .go_sync = trans_go_sync, 560 560 .go_xmote_bh = trans_go_xmote_bh, 561 561 .go_demote_ok = trans_go_demote_ok, 562 562 .go_type = LM_TYPE_NONDISK, ··· 577 577 578 578 const struct gfs2_glock_operations gfs2_quota_glops = { 579 579 .go_type = LM_TYPE_QUOTA, 580 + .go_flags = GLOF_LVB, 580 581 }; 581 582 582 583 const struct gfs2_glock_operations gfs2_journal_glops = {
+4 -2
fs/gfs2/incore.h
··· 205 205 206 206 207 207 struct gfs2_glock_operations { 208 - void (*go_xmote_th) (struct gfs2_glock *gl); 208 + void (*go_sync) (struct gfs2_glock *gl); 209 209 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); 210 210 void (*go_inval) (struct gfs2_glock *gl, int flags); 211 211 int (*go_demote_ok) (const struct gfs2_glock *gl); ··· 216 216 const int go_type; 217 217 const unsigned long go_flags; 218 218 #define GLOF_ASPACE 1 219 + #define GLOF_LVB 2 219 220 }; 220 221 221 222 enum { ··· 322 321 ktime_t gl_dstamp; 323 322 struct gfs2_lkstats gl_stats; 324 323 struct dlm_lksb gl_lksb; 325 - char gl_lvb[32]; 326 324 unsigned long gl_tchange; 327 325 void *gl_object; 328 326 ··· 539 539 SDF_DEMOTE = 5, 540 540 SDF_NOJOURNALID = 6, 541 541 SDF_RORECOVERY = 7, /* read only recovery */ 542 + SDF_SKIP_DLM_UNLOCK = 8, 542 543 }; 543 544 544 545 #define GFS2_FSNAME_LEN 256 ··· 622 621 u32 sd_hash_bsize_shift; 623 622 u32 sd_hash_ptrs; /* Number of pointers in a hash block */ 624 623 u32 sd_qc_per_block; 624 + u32 sd_blocks_per_bitmap; 625 625 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ 626 626 u32 sd_max_height; /* Max height of a file's metadata tree */ 627 627 u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
+118 -109
fs/gfs2/inode.c
··· 364 364 return 0; 365 365 } 366 366 367 - static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode, 368 - unsigned int *uid, unsigned int *gid) 367 + static void munge_mode_uid_gid(const struct gfs2_inode *dip, 368 + struct inode *inode) 369 369 { 370 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 371 371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 372 - if (S_ISDIR(*mode)) 373 - *mode |= S_ISUID; 372 + if (S_ISDIR(inode->i_mode)) 373 + inode->i_mode |= S_ISUID; 374 374 else if (dip->i_inode.i_uid != current_fsuid()) 375 - *mode &= ~07111; 376 - *uid = dip->i_inode.i_uid; 375 + inode->i_mode &= ~07111; 376 + inode->i_uid = dip->i_inode.i_uid; 377 377 } else 378 - *uid = current_fsuid(); 378 + inode->i_uid = current_fsuid(); 379 379 380 380 if (dip->i_inode.i_mode & S_ISGID) { 381 - if (S_ISDIR(*mode)) 382 - *mode |= S_ISGID; 383 - *gid = dip->i_inode.i_gid; 381 + if (S_ISDIR(inode->i_mode)) 382 + inode->i_mode |= S_ISGID; 383 + inode->i_gid = dip->i_inode.i_gid; 384 384 } else 385 - *gid = current_fsgid(); 385 + inode->i_gid = current_fsgid(); 386 386 } 387 387 388 - static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 388 + static int alloc_dinode(struct gfs2_inode *ip, u32 flags) 389 389 { 390 - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 390 + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 391 391 int error; 392 392 int dblocks = 1; 393 393 394 - error = gfs2_inplace_reserve(dip, RES_DINODE); 394 + error = gfs2_inplace_reserve(ip, RES_DINODE, flags); 395 395 if (error) 396 396 goto out; 397 397 ··· 399 399 if (error) 400 400 goto out_ipreserv; 401 401 402 - error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation); 402 + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); 403 + ip->i_no_formal_ino = ip->i_generation; 404 + ip->i_inode.i_ino = ip->i_no_addr; 405 + ip->i_goal = ip->i_no_addr; 403 406 404 407 gfs2_trans_end(sdp); 405 408 406 409 out_ipreserv: 407 - gfs2_inplace_release(dip); 410 + gfs2_inplace_release(ip); 408 411 out: 409 412 return error; 410 413 } ··· 432 429 /** 433 430 * init_dinode - Fill in a new dinode structure 434 431 * @dip: The directory this inode is being created in 435 - * @gl: The glock covering the new inode 436 - * @inum: The inode number 437 - * @mode: The file permissions 438 - * @uid: The uid of the new inode 439 - * @gid: The gid of the new inode 440 - * @generation: The generation number of the new inode 441 - * @dev: The device number (if a device node) 432 + * @ip: The inode 442 433 * @symname: The symlink destination (if a symlink) 443 - * @size: The inode size (ignored for directories) 444 434 * @bhp: The buffer head (returned to caller) 445 435 * 446 436 */ 447 437 448 - static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449 - const struct gfs2_inum_host *inum, umode_t mode, 450 - unsigned int uid, unsigned int gid, 451 - const u64 *generation, dev_t dev, const char *symname, 452 - unsigned size, struct buffer_head **bhp) 438 + static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, 439 + const char *symname, struct buffer_head **bhp) 453 440 { 454 441 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 442 struct gfs2_dinode *di; 456 443 struct buffer_head *dibh; 457 444 struct timespec tv = CURRENT_TIME; 458 445 459 - dibh = gfs2_meta_new(gl, inum->no_addr); 460 - gfs2_trans_add_bh(gl, dibh, 1); 446 + dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 447 + gfs2_trans_add_bh(ip->i_gl, dibh, 1); 461 448 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 462 449 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 463 450 di = (struct gfs2_dinode *)dibh->b_data; 464 451 465 - di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 466 - di->di_num.no_addr = cpu_to_be64(inum->no_addr); 467 - di->di_mode = cpu_to_be32(mode); 468 - di->di_uid = cpu_to_be32(uid); 469 - di->di_gid = cpu_to_be32(gid); 452 + di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 453 + di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 454 + di->di_mode = cpu_to_be32(ip->i_inode.i_mode); 455 + di->di_uid = cpu_to_be32(ip->i_inode.i_uid); 456 + di->di_gid = cpu_to_be32(ip->i_inode.i_gid); 470 457 di->di_nlink = 0; 471 - di->di_size = cpu_to_be64(size); 458 + di->di_size = cpu_to_be64(ip->i_inode.i_size); 472 459 di->di_blocks = cpu_to_be64(1); 473 460 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 - di->di_major = cpu_to_be32(MAJOR(dev)); 475 - di->di_minor = cpu_to_be32(MINOR(dev)); 476 - di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 - di->di_generation = cpu_to_be64(*generation); 461 + di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev)); 462 + di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev)); 463 + di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr); 464 + di->di_generation = cpu_to_be64(ip->i_generation); 478 465 di->di_flags = 0; 479 466 di->__pad1 = 0; 480 - di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 467 + di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0); 481 468 di->di_height = 0; 482 469 di->__pad2 = 0; 483 470 di->__pad3 = 0; ··· 480 487 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 481 488 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 482 489 483 - switch(mode & S_IFMT) { 490 + switch(ip->i_inode.i_mode & S_IFMT) { 484 491 case S_IFREG: 485 492 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || 486 493 gfs2_tune_get(sdp, gt_new_files_jdata)) ··· 495 502 gfs2_init_dir(dibh, dip); 496 503 break; 497 504 case S_IFLNK: 498 - memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); 505 + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size); 499 506 break; 500 507 } 501 508 ··· 504 511 *bhp = dibh; 505 512 } 506 513 507 - static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 508 - umode_t mode, const struct gfs2_inum_host *inum, 509 - const u64 *generation, dev_t dev, const char *symname, 510 - unsigned int size, struct buffer_head **bhp) 514 + static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, 515 + const char *symname, struct buffer_head **bhp) 511 516 { 517 + struct inode *inode = &ip->i_inode; 512 518 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 513 - unsigned int uid, gid; 514 519 int error; 515 520 516 - munge_mode_uid_gid(dip, &mode, &uid, &gid); 517 521 error = gfs2_rindex_update(sdp); 518 522 if (error) 519 523 return error; 520 524 521 - error = gfs2_quota_lock(dip, uid, gid); 525 + error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid); 522 526 if (error) 523 527 return error; 524 528 525 - error = gfs2_quota_check(dip, uid, gid); 529 + error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid); 526 530 if (error) 527 531 goto out_quota; 528 532 ··· 527 537 if (error) 528 538 goto out_quota; 529 539 530 - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); 531 - gfs2_quota_change(dip, +1, uid, gid); 540 + init_dinode(dip, ip, symname, bhp); 541 + gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid); 532 542 gfs2_trans_end(sdp); 533 543 534 544 out_quota: ··· 560 570 if (error) 561 571 goto fail_quota_locks; 562 572 563 - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 573 + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); 564 574 if (error) 565 575 goto fail_quota_locks; 566 576 ··· 647 657 struct inode *inode = NULL; 648 658 struct gfs2_inode *dip = GFS2_I(dir), *ip; 649 659 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 650 - struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 660 + struct gfs2_glock *io_gl; 651 661 int error; 652 - u64 generation; 653 662 struct buffer_head *bh = NULL; 663 + u32 aflags = 0; 654 664 655 665 if (!name->len || name->len > GFS2_FNAMESIZE) 656 666 return -ENAMETOOLONG; 657 667 658 - /* We need a reservation to allocate the new dinode block. The 659 - directory ip temporarily points to the reservation, but this is 660 - being done to get a set of contiguous blocks for the new dinode. 661 - Since this is a create, we don't have a sizehint yet, so it will 662 - have to use the minimum reservation size. */ 663 668 error = gfs2_rs_alloc(dip); 664 669 if (error) 665 670 return error; ··· 673 688 if (error) 674 689 goto fail_gunlock; 675 690 676 - error = alloc_dinode(dip, &inum.no_addr, &generation); 677 - if (error) 678 - goto fail_gunlock; 679 - inum.no_formal_ino = generation; 680 - 681 - error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 682 - LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 683 - if (error) 684 - goto fail_gunlock; 685 - 686 - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); 687 - if (error) 688 - goto fail_gunlock2; 689 - 690 - inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 691 - inum.no_formal_ino, 0); 692 - if (IS_ERR(inode)) 693 - goto fail_gunlock2; 694 - 691 + inode = new_inode(sdp->sd_vfs); 692 + if (!inode) { 693 + gfs2_glock_dq_uninit(ghs); 694 + return -ENOMEM; 695 + } 695 696 ip = GFS2_I(inode); 696 - error = gfs2_inode_refresh(ip); 697 - if (error) 698 - goto fail_gunlock2; 699 - 700 697 error = gfs2_rs_alloc(ip); 701 698 if (error) 699 + goto fail_free_inode; 700 + 701 + set_bit(GIF_INVALID, &ip->i_flags); 702 + inode->i_mode = mode; 703 + inode->i_rdev = dev; 704 + inode->i_size = size; 705 + munge_mode_uid_gid(dip, inode); 706 + ip->i_goal = dip->i_goal; 707 + 708 + if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || 709 + (dip->i_diskflags & GFS2_DIF_TOPDIR)) 710 + aflags |= GFS2_AF_ORLOV; 711 + 712 + error = alloc_dinode(ip, aflags); 713 + if (error) 714 + goto fail_free_inode; 715 + 716 + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 717 + if (error) 718 + goto fail_free_inode; 719 + 720 + ip->i_gl->gl_object = ip; 721 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 722 + if (error) 723 + goto fail_free_inode; 724 + 725 + error = make_dinode(dip, ip, symname, &bh); 726 + if (error) 702 727 goto fail_gunlock2; 728 + 729 + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 730 + if (error) 731 + goto fail_gunlock2; 732 + 733 + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 734 + if (error) 735 + goto fail_gunlock2; 736 + 737 + ip->i_iopen_gh.gh_gl->gl_object = ip; 738 + gfs2_glock_put(io_gl); 739 + gfs2_set_iop(inode); 740 + insert_inode_hash(inode); 741 + 742 + error = gfs2_inode_refresh(ip); 743 + if (error) 744 + goto fail_gunlock3; 703 745 704 746 error = gfs2_acl_create(dip, inode); 705 747 if (error) 706 - goto fail_gunlock2; 748 + goto fail_gunlock3; 707 749 708 750 error = gfs2_security_init(dip, ip, name); 709 751 if (error) 710 - goto fail_gunlock2; 752 + goto fail_gunlock3; 711 753 712 754 error = link_dinode(dip, name, ip); 713 755 if (error) 714 - goto fail_gunlock2; 756 + goto fail_gunlock3; 715 757 716 758 if (bh) 717 759 brelse(bh); ··· 751 739 d_instantiate(dentry, inode); 752 740 return 0; 753 741 742 + fail_gunlock3: 743 + gfs2_glock_dq_uninit(ghs + 1); 744 + if (ip->i_gl) 745 + gfs2_glock_put(ip->i_gl); 746 + goto fail_gunlock; 747 + 754 748 fail_gunlock2: 755 749 gfs2_glock_dq_uninit(ghs + 1); 750 + fail_free_inode: 751 + if (ip->i_gl) 752 + gfs2_glock_put(ip->i_gl); 753 + gfs2_rs_delete(ip); 754 + free_inode_nonrcu(inode); 755 + inode = NULL; 756 756 fail_gunlock: 757 757 gfs2_glock_dq_uninit(ghs); 758 758 if (inode && !IS_ERR(inode)) { ··· 772 748 iput(inode); 773 749 } 774 750 fail: 775 - gfs2_rs_delete(dip); 776 751 if (bh) 777 752 brelse(bh); 778 753 return error; ··· 907 884 if (error) 908 885 goto out_gunlock; 909 886 910 - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 887 + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); 911 888 if (error) 912 889 goto out_gunlock_q; 913 890 ··· 1000 977 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it 1001 978 * @dip: The parent directory 1002 979 * @name: The name of the entry in the parent directory 1003 - * @bh: The inode buffer for the inode to be removed 1004 980 * @inode: The inode to be removed 1005 981 * 1006 982 * Called with all the locks and in a transaction. This will only be ··· 1009 987 */ 1010 988 1011 989 static int gfs2_unlink_inode(struct gfs2_inode *dip, 1012 - const struct dentry *dentry, 1013 - struct buffer_head *bh) 990 + const struct dentry *dentry) 1014 991 { 1015 992 struct inode *inode = dentry->d_inode; 1016 993 struct gfs2_inode *ip = GFS2_I(inode); ··· 1049 1028 struct gfs2_sbd *sdp = GFS2_SB(dir); 1050 1029 struct inode *inode = dentry->d_inode; 1051 1030 struct gfs2_inode *ip = GFS2_I(inode); 1052 - struct buffer_head *bh; 1053 1031 struct gfs2_holder ghs[3]; 1054 1032 struct gfs2_rgrpd *rgd; 1055 1033 int error; ··· 1097 1077 1098 1078 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); 1099 1079 if (error) 1100 - goto out_gunlock; 1101 - 1102 - error = gfs2_meta_inode_buffer(ip, &bh); 1103 - if (error) 1104 1080 goto out_end_trans; 1105 1081 1106 - error = gfs2_unlink_inode(dip, dentry, bh); 1107 - brelse(bh); 1082 + error = gfs2_unlink_inode(dip, dentry); 1108 1083 1109 1084 out_end_trans: 1110 1085 gfs2_trans_end(sdp); ··· 1380 1365 if (error) 1381 1366 goto out_gunlock; 1382 1367 1383 - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); 1368 + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); 1384 1369 if (error) 1385 1370 goto out_gunlock_q; 1386 1371 ··· 1399 1384 1400 1385 /* Remove the target file, if it exists */ 1401 1386 1402 - if (nip) { 1403 - struct buffer_head *bh; 1404 - error = gfs2_meta_inode_buffer(nip, &bh); 1405 - if (error) 1406 - goto out_end_trans; 1407 - error = gfs2_unlink_inode(ndip, ndentry, bh); 1408 - brelse(bh); 1409 - } 1387 + if (nip) 1388 + error = gfs2_unlink_inode(ndip, ndentry); 1410 1389 1411 1390 if (dir_rename) { 1412 1391 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
+15 -5
fs/gfs2/lock_dlm.c
··· 120 120 gfs2_update_reply_times(gl); 121 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 122 122 123 - if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 124 - memset(gl->gl_lvb, 0, GDLM_LVB_SIZE); 123 + if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) 124 + memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); 125 125 126 126 switch (gl->gl_lksb.sb_status) { 127 127 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ ··· 203 203 static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, 204 204 const int req) 205 205 { 206 - u32 lkf = DLM_LKF_VALBLK; 207 - u32 lkid = gl->gl_lksb.sb_lkid; 206 + u32 lkf = 0; 207 + 208 + if (gl->gl_lksb.sb_lvbptr) 209 + lkf |= DLM_LKF_VALBLK; 208 210 209 211 if (gfs_flags & LM_FLAG_TRY) 210 212 lkf |= DLM_LKF_NOQUEUE; ··· 230 228 BUG(); 231 229 } 232 230 233 - if (lkid != 0) { 231 + if (gl->gl_lksb.sb_lkid != 0) { 234 232 lkf |= DLM_LKF_CONVERT; 235 233 if (test_bit(GLF_BLOCKING, &gl->gl_flags)) 236 234 lkf |= DLM_LKF_QUECVT; ··· 291 289 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); 292 290 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); 293 291 gfs2_update_request_times(gl); 292 + 293 + /* don't want to skip dlm_unlock writing the lvb when lock is ex */ 294 + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && 295 + gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { 296 + gfs2_glock_free(gl); 297 + return; 298 + } 299 + 294 300 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 295 301 NULL, gl); 296 302 if (error) {
+3
fs/gfs2/ops_fstype.c
··· 278 278 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - 279 279 sizeof(struct gfs2_meta_header)) / 280 280 sizeof(struct gfs2_quota_change); 281 + sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize - 282 + sizeof(struct gfs2_meta_header)) 283 + * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */ 281 284 282 285 /* Compute maximum reservation required to add a entry to a directory */ 283 286
+5 -5
fs/gfs2/quota.c
··· 816 816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 817 817 818 818 reserved = 1 + (nalloc * (data_blocks + ind_blocks)); 819 - error = gfs2_inplace_reserve(ip, reserved); 819 + error = gfs2_inplace_reserve(ip, reserved, 0); 820 820 if (error) 821 821 goto out_alloc; 822 822 ··· 869 869 if (error < 0) 870 870 return error; 871 871 872 - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 872 + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 873 873 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); 874 874 qlvb->__pad = 0; 875 875 qlvb->qb_limit = q.qu_limit; ··· 893 893 if (error) 894 894 return error; 895 895 896 - qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 896 + qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 897 897 898 898 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 899 899 gfs2_glock_dq_uninit(q_gh); ··· 1506 1506 if (error) 1507 1507 goto out; 1508 1508 1509 - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 1509 + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 1510 1510 fdq->d_version = FS_DQUOT_VERSION; 1511 1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1512 1512 fdq->d_id = from_kqid(&init_user_ns, qid); ··· 1605 1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1606 1606 &data_blocks, &ind_blocks); 1607 1607 blocks = 1 + data_blocks + ind_blocks; 1608 - error = gfs2_inplace_reserve(ip, blocks); 1608 + error = gfs2_inplace_reserve(ip, blocks, 0); 1609 1609 if (error) 1610 1610 goto out_i; 1611 1611 blocks += gfs2_rg_blocks(ip, blocks);
+123 -16
fs/gfs2/rgrp.c
··· 16 16 #include <linux/prefetch.h> 17 17 #include <linux/blkdev.h> 18 18 #include <linux/rbtree.h> 19 + #include <linux/random.h> 19 20 20 21 #include "gfs2.h" 21 22 #include "incore.h" ··· 252 251 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 253 252 { 254 253 u64 rblock = block - rbm->rgd->rd_data0; 255 - u32 goal = (u32)rblock; 256 - int x; 254 + u32 x; 257 255 258 256 if (WARN_ON_ONCE(rblock > UINT_MAX)) 259 257 return -EINVAL; 260 258 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 261 259 return -E2BIG; 262 260 263 - for (x = 0; x < rbm->rgd->rd_length; x++) { 264 - rbm->bi = rbm->rgd->rd_bits + x; 265 - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { 266 - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); 267 - break; 268 - } 269 - } 261 + rbm->bi = rbm->rgd->rd_bits; 262 + rbm->offset = (u32)(rblock); 263 + /* Check if the block is within the first block */ 264 + if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) 265 + return 0; 270 266 267 + /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ 268 + rbm->offset += (sizeof(struct gfs2_rgrp) - 269 + sizeof(struct gfs2_meta_header)) * GFS2_NBBY; 270 + x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 271 + rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 272 + rbm->bi += x; 271 273 return 0; 272 274 } 273 275 ··· 879 875 goto fail; 880 876 881 877 rgd->rd_gl->gl_object = rgd; 882 - rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 878 + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; 883 879 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 884 880 if (rgd->rd_data > sdp->sd_max_rg_data) 885 881 sdp->sd_max_rg_data = rgd->rd_data; ··· 1682 1678 return; 1683 1679 } 1684 1680 1681 + /** 1682 + * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested 1683 + * @rgd: The rgrp in question 1684 + * @loops: An indication of how picky we can be (0=very, 1=less so) 1685 + * 1686 + * This function uses the recently added glock statistics in order to 1687 + * figure out whether a parciular resource group is suffering from 1688 + * contention from multiple nodes. This is done purely on the basis 1689 + * of timings, since this is the only data we have to work with and 1690 + * our aim here is to reject a resource group which is highly contended 1691 + * but (very important) not to do this too often in order to ensure that 1692 + * we do not land up introducing fragmentation by changing resource 1693 + * groups when not actually required. 1694 + * 1695 + * The calculation is fairly simple, we want to know whether the SRTTB 1696 + * (i.e. smoothed round trip time for blocking operations) to acquire 1697 + * the lock for this rgrp's glock is significantly greater than the 1698 + * time taken for resource groups on average. We introduce a margin in 1699 + * the form of the variable @var which is computed as the sum of the two 1700 + * respective variences, and multiplied by a factor depending on @loops 1701 + * and whether we have a lot of data to base the decision on. This is 1702 + * then tested against the square difference of the means in order to 1703 + * decide whether the result is statistically significant or not. 1704 + * 1705 + * Returns: A boolean verdict on the congestion status 1706 + */ 1707 + 1708 + static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) 1709 + { 1710 + const struct gfs2_glock *gl = rgd->rd_gl; 1711 + const struct gfs2_sbd *sdp = gl->gl_sbd; 1712 + struct gfs2_lkstats *st; 1713 + s64 r_dcount, l_dcount; 1714 + s64 r_srttb, l_srttb; 1715 + s64 srttb_diff; 1716 + s64 sqr_diff; 1717 + s64 var; 1718 + 1719 + preempt_disable(); 1720 + st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; 1721 + r_srttb = st->stats[GFS2_LKS_SRTTB]; 1722 + r_dcount = st->stats[GFS2_LKS_DCOUNT]; 1723 + var = st->stats[GFS2_LKS_SRTTVARB] + 1724 + gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; 1725 + preempt_enable(); 1726 + 1727 + l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; 1728 + l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; 1729 + 1730 + if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) 1731 + return false; 1732 + 1733 + srttb_diff = r_srttb - l_srttb; 1734 + sqr_diff = srttb_diff * srttb_diff; 1735 + 1736 + var *= 2; 1737 + if (l_dcount < 8 || r_dcount < 8) 1738 + var *= 2; 1739 + if (loops == 1) 1740 + var *= 2; 1741 + 1742 + return ((srttb_diff < 0) && (sqr_diff > var)); 1743 + } 1744 + 1745 + /** 1746 + * gfs2_rgrp_used_recently 1747 + * @rs: The block reservation with the rgrp to test 1748 + * @msecs: The time limit in milliseconds 1749 + * 1750 + * Returns: True if the rgrp glock has been used within the time limit 1751 + */ 1752 + static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, 1753 + u64 msecs) 1754 + { 1755 + u64 tdiff; 1756 + 1757 + tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), 1758 + rs->rs_rbm.rgd->rd_gl->gl_dstamp)); 1759 + 1760 + return tdiff > (msecs * 1000 * 1000); 1761 + } 1762 + 1763 + static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) 1764 + { 1765 + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1766 + u32 skip; 1767 + 1768 + get_random_bytes(&skip, sizeof(skip)); 1769 + return skip % sdp->sd_rgrps; 1770 + } 1771 + 1685 1772 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1686 1773 { 1687 1774 struct gfs2_rgrpd *rgd = *pos; 1775 + struct gfs2_sbd *sdp = rgd->rd_sbd; 1688 1776 1689 1777 rgd = gfs2_rgrpd_get_next(rgd); 1690 1778 if (rgd == NULL) 1691 - rgd = gfs2_rgrpd_get_next(NULL); 1779 + rgd = gfs2_rgrpd_get_first(sdp); 1692 1780 *pos = rgd; 1693 1781 if (rgd != begin) /* If we didn't wrap */ 1694 1782 return true; ··· 1795 1699 * Returns: errno 1796 1700 */ 1797 1701 1798 - int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1702 + int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) 1799 1703 { 1800 1704 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1801 1705 struct gfs2_rgrpd *begin = NULL; 1802 1706 struct gfs2_blkreserv *rs = ip->i_res; 1803 - int error = 0, rg_locked, flags = LM_FLAG_TRY; 1707 + int error = 0, rg_locked, flags = 0; 1804 1708 u64 last_unlinked = NO_BLOCK; 1805 1709 int loops = 0; 1710 + u32 skip = 0; 1806 1711 1807 1712 if (sdp->sd_args.ar_rgrplvb) 1808 1713 flags |= GL_SKIP; ··· 1817 1720 } else { 1818 1721 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1819 1722 } 1723 + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) 1724 + skip = gfs2_orlov_skip(ip); 1820 1725 if (rs->rs_rbm.rgd == NULL) 1821 1726 return -EBADSLT; 1822 1727 ··· 1827 1728 1828 1729 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1829 1730 rg_locked = 0; 1731 + if (skip && skip--) 1732 + goto next_rgrp; 1733 + if (!gfs2_rs_active(rs) && (loops < 2) && 1734 + gfs2_rgrp_used_recently(rs, 1000) && 1735 + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1736 + goto next_rgrp; 1830 1737 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1831 1738 LM_ST_EXCLUSIVE, flags, 1832 1739 &rs->rs_rgd_gh); 1833 - if (error == GLR_TRYFAILED) 1834 - goto next_rgrp; 1835 1740 if (unlikely(error)) 1836 1741 return error; 1742 + if (!gfs2_rs_active(rs) && (loops < 2) && 1743 + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1744 + goto skip_rgrp; 1837 1745 if (sdp->sd_args.ar_rgrplvb) { 1838 1746 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1839 1747 if (unlikely(error)) { ··· 1887 1781 /* Find the next rgrp, and continue looking */ 1888 1782 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1889 1783 continue; 1784 + if (skip) 1785 + continue; 1890 1786 1891 1787 /* If we've scanned all the rgrps, but found no free blocks 1892 1788 * then this checks for some less likely conditions before 1893 1789 * trying again. 1894 1790 */ 1895 - flags &= ~LM_FLAG_TRY; 1896 1791 loops++; 1897 1792 /* Check that fs hasn't grown if writing to rindex */ 1898 1793 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
+2 -1
fs/gfs2/rgrp.h
··· 39 39 40 40 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 41 41 42 - extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); 42 + #define GFS2_AF_ORLOV 1 43 + extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); 43 44 extern void gfs2_inplace_release(struct gfs2_inode *ip); 44 45 45 46 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
+1 -1
fs/gfs2/trace_gfs2.h
··· 486 486 ), 487 487 488 488 TP_fast_assign( 489 - __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; 489 + __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev; 490 490 __entry->start = block; 491 491 __entry->inum = ip->i_no_addr; 492 492 __entry->len = len;
+1 -1
fs/gfs2/xattr.c
··· 734 734 if (error) 735 735 return error; 736 736 737 - error = gfs2_inplace_reserve(ip, blks); 737 + error = gfs2_inplace_reserve(ip, blks, 0); 738 738 if (error) 739 739 goto out_gunlock_q; 740 740