Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw

+1 -1

fs/gfs2/aops.c

··· 643 643 goto out_unlock; 644 644 645 645 requested = data_blocks + ind_blocks; 646 - error = gfs2_inplace_reserve(ip, requested); 646 + error = gfs2_inplace_reserve(ip, requested, 0); 647 647 if (error) 648 648 goto out_qunlock; 649 649 }

+50 -4

fs/gfs2/bmap.c

··· 991 991 return err; 992 992 } 993 993 994 + /** 995 + * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files 996 + * @inode: The inode being truncated 997 + * @oldsize: The original (larger) size 998 + * @newsize: The new smaller size 999 + * 1000 + * With jdata files, we have to journal a revoke for each block which is 1001 + * truncated. As a result, we need to split this into separate transactions 1002 + * if the number of pages being truncated gets too large. 1003 + */ 1004 + 1005 + #define GFS2_JTRUNC_REVOKES 8192 1006 + 1007 + static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) 1008 + { 1009 + struct gfs2_sbd *sdp = GFS2_SB(inode); 1010 + u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 1011 + u64 chunk; 1012 + int error; 1013 + 1014 + while (oldsize != newsize) { 1015 + chunk = oldsize - newsize; 1016 + if (chunk > max_chunk) 1017 + chunk = max_chunk; 1018 + truncate_pagecache(inode, oldsize, oldsize - chunk); 1019 + oldsize -= chunk; 1020 + gfs2_trans_end(sdp); 1021 + error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1022 + if (error) 1023 + return error; 1024 + } 1025 + 1026 + return 0; 1027 + } 1028 + 994 1029 static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) 995 1030 { 996 1031 struct gfs2_inode *ip = GFS2_I(inode); ··· 1035 1000 int journaled = gfs2_is_jdata(ip); 1036 1001 int error; 1037 1002 1038 - error = gfs2_trans_begin(sdp, 1039 - RES_DINODE + (journaled ? RES_JDATA : 0), 0); 1003 + if (journaled) 1004 + error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); 1005 + else 1006 + error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1040 1007 if (error) 1041 1008 return error; 1042 1009 ··· 1063 1026 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1064 1027 gfs2_dinode_out(ip, dibh->b_data); 1065 1028 1066 - truncate_pagecache(inode, oldsize, newsize); 1029 + if (journaled) 1030 + error = gfs2_journaled_truncate(inode, oldsize, newsize); 1031 + else 1032 + truncate_pagecache(inode, oldsize, newsize); 1033 + 1034 + if (error) { 1035 + brelse(dibh); 1036 + return error; 1037 + } 1038 + 1067 1039 out_brelse: 1068 1040 brelse(dibh); 1069 1041 out: ··· 1224 1178 if (error) 1225 1179 return error; 1226 1180 1227 - error = gfs2_inplace_reserve(ip, 1); 1181 + error = gfs2_inplace_reserve(ip, 1, 0); 1228 1182 if (error) 1229 1183 goto do_grow_qunlock; 1230 1184 unstuff = 1;

+1 -6

fs/gfs2/dir.c

··· 1676 1676 be16_add_cpu(&leaf->lf_entries, 1); 1677 1677 } 1678 1678 brelse(bh); 1679 - error = gfs2_meta_inode_buffer(ip, &bh); 1680 - if (error) 1681 - break; 1682 - gfs2_trans_add_bh(ip->i_gl, bh, 1); 1683 1679 ip->i_entries++; 1684 1680 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1685 1681 if (S_ISDIR(nip->i_inode.i_mode)) 1686 1682 inc_nlink(&ip->i_inode); 1687 - gfs2_dinode_out(ip, bh->b_data); 1688 - brelse(bh); 1683 + mark_inode_dirty(inode); 1689 1684 error = 0; 1690 1685 break; 1691 1686 }

+2 -2

fs/gfs2/file.c

··· 432 432 if (ret) 433 433 goto out_unlock; 434 434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 435 - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 435 + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); 436 436 if (ret) 437 437 goto out_quota_unlock; 438 438 ··· 825 825 retry: 826 826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 827 827 828 - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 828 + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); 829 829 if (error) { 830 830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 831 831 bytes >>= 1;

+26 -14

fs/gfs2/glock.c

··· 55 55 56 56 typedef void (*glock_examiner) (struct gfs2_glock * gl); 57 57 58 - static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 59 - #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 60 58 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 61 59 62 60 static struct dentry *gfs2_root; ··· 105 107 { 106 108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 107 109 108 - if (gl->gl_ops->go_flags & GLOF_ASPACE) 110 + if (gl->gl_ops->go_flags & GLOF_ASPACE) { 109 111 kmem_cache_free(gfs2_glock_aspace_cachep, gl); 110 - else 112 + } else { 113 + kfree(gl->gl_lksb.sb_lvbptr); 111 114 kmem_cache_free(gfs2_glock_cachep, gl); 115 + } 112 116 } 113 117 114 118 void gfs2_glock_free(struct gfs2_glock *gl) ··· 537 537 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 538 538 clear_bit(GLF_BLOCKING, &gl->gl_flags); 539 539 spin_unlock(&gl->gl_spin); 540 - if (glops->go_xmote_th) 541 - glops->go_xmote_th(gl); 540 + if (glops->go_sync) 541 + glops->go_sync(gl); 542 542 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 543 543 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 544 544 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); ··· 547 547 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 548 548 /* lock_dlm */ 549 549 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 550 - GLOCK_BUG_ON(gl, ret); 550 + if (ret) { 551 + printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); 552 + GLOCK_BUG_ON(gl, 1); 553 + } 551 554 } else { /* lock_nolock */ 552 555 finish_xmote(gl, target); 553 556 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) ··· 739 736 if (!gl) 740 737 return -ENOMEM; 741 738 739 + memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 740 + 741 + if (glops->go_flags & GLOF_LVB) { 742 + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); 743 + if (!gl->gl_lksb.sb_lvbptr) { 744 + kmem_cache_free(cachep, gl); 745 + return -ENOMEM; 746 + } 747 + } 748 + 742 749 atomic_inc(&sdp->sd_glock_disposal); 743 750 gl->gl_sbd = sdp; 744 751 gl->gl_flags = 0; ··· 766 753 preempt_enable(); 767 754 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 768 755 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 769 - memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 770 - memset(gl->gl_lvb, 0, 32 * sizeof(char)); 771 - gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 772 756 gl->gl_tchange = jiffies; 773 757 gl->gl_object = NULL; 774 758 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; ··· 787 777 tmp = search_bucket(hash, sdp, &name); 788 778 if (tmp) { 789 779 spin_unlock_bucket(hash); 780 + kfree(gl->gl_lksb.sb_lvbptr); 790 781 kmem_cache_free(cachep, gl); 791 782 atomic_dec(&sdp->sd_glock_disposal); 792 783 gl = tmp; ··· 1024 1013 printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1025 1014 printk(KERN_ERR "lock type: %d req lock state : %d\n", 1026 1015 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1027 - __dump_glock(NULL, gl); 1016 + gfs2_dump_glock(NULL, gl); 1028 1017 BUG(); 1029 1018 } 1030 1019 ··· 1519 1508 { 1520 1509 int ret; 1521 1510 spin_lock(&gl->gl_spin); 1522 - ret = __dump_glock(seq, gl); 1511 + ret = gfs2_dump_glock(seq, gl); 1523 1512 spin_unlock(&gl->gl_spin); 1524 1513 return ret; 1525 1514 } ··· 1539 1528 1540 1529 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1541 1530 { 1531 + set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 1542 1532 glock_hash_walk(clear_glock, sdp); 1543 1533 flush_workqueue(glock_workqueue); 1544 1534 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); ··· 1667 1655 } 1668 1656 1669 1657 /** 1670 - * __dump_glock - print information about a glock 1658 + * gfs2_dump_glock - print information about a glock 1671 1659 * @seq: The seq_file struct 1672 1660 * @gl: the glock 1673 1661 * ··· 1684 1672 * Returns: 0 on success, -ENOBUFS when we run out of space 1685 1673 */ 1686 1674 1687 - static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) 1675 + int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) 1688 1676 { 1689 1677 const struct gfs2_glock_operations *glops = gl->gl_ops; 1690 1678 unsigned long long dtime;

+27 -27

fs/gfs2/glock.h

··· 178 178 return NULL; 179 179 } 180 180 181 - int gfs2_glock_get(struct gfs2_sbd *sdp, 182 - u64 number, const struct gfs2_glock_operations *glops, 183 - int create, struct gfs2_glock **glp); 184 - void gfs2_glock_hold(struct gfs2_glock *gl); 185 - void gfs2_glock_put_nolock(struct gfs2_glock *gl); 186 - void gfs2_glock_put(struct gfs2_glock *gl); 187 - void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 188 - struct gfs2_holder *gh); 189 - void gfs2_holder_reinit(unsigned int state, unsigned flags, 190 - struct gfs2_holder *gh); 191 - void gfs2_holder_uninit(struct gfs2_holder *gh); 192 - int gfs2_glock_nq(struct gfs2_holder *gh); 193 - int gfs2_glock_poll(struct gfs2_holder *gh); 194 - int gfs2_glock_wait(struct gfs2_holder *gh); 195 - void gfs2_glock_dq(struct gfs2_holder *gh); 196 - void gfs2_glock_dq_wait(struct gfs2_holder *gh); 197 - 198 - void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 199 - int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 200 - u64 number, const struct gfs2_glock_operations *glops, 201 - unsigned int state, int flags, struct gfs2_holder *gh); 202 - 203 - int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 204 - void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 205 - void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 206 - 207 - __printf(2, 3) 181 + extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 182 + const struct gfs2_glock_operations *glops, 183 + int create, struct gfs2_glock **glp); 184 + extern void gfs2_glock_hold(struct gfs2_glock *gl); 185 + extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); 186 + extern void gfs2_glock_put(struct gfs2_glock *gl); 187 + extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, 188 + unsigned flags, struct gfs2_holder *gh); 189 + extern void gfs2_holder_reinit(unsigned int state, unsigned flags, 190 + struct gfs2_holder *gh); 191 + extern void gfs2_holder_uninit(struct gfs2_holder *gh); 192 + extern int gfs2_glock_nq(struct gfs2_holder *gh); 193 + extern int gfs2_glock_poll(struct gfs2_holder *gh); 194 + extern int gfs2_glock_wait(struct gfs2_holder *gh); 195 + extern void gfs2_glock_dq(struct gfs2_holder *gh); 196 + extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); 197 + extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 198 + extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 199 + const struct gfs2_glock_operations *glops, 200 + unsigned int state, int flags, 201 + struct gfs2_holder *gh); 202 + extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 203 + extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 204 + extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 205 + extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 206 + #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) 207 + extern __printf(2, 3) 208 208 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 209 209 210 210 /**

+10 -9

fs/gfs2/glops.c

··· 74 74 75 75 gfs2_trans_add_revoke(sdp, bd); 76 76 } 77 - BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); 77 + GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 78 78 spin_unlock(&sdp->sd_ail_lock); 79 79 gfs2_log_unlock(sdp); 80 80 } ··· 96 96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 97 97 sb_start_intwrite(sdp->sd_vfs); 98 98 gfs2_log_reserve(sdp, tr.tr_reserved); 99 - BUG_ON(current->journal_info); 99 + WARN_ON_ONCE(current->journal_info); 100 100 current->journal_info = &tr; 101 101 102 102 __gfs2_ail_flush(gl, 0); ··· 139 139 140 140 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 141 141 return; 142 - BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); 142 + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 143 143 144 144 gfs2_log_flush(gl->gl_sbd, gl); 145 145 filemap_fdatawrite(metamapping); ··· 168 168 { 169 169 struct address_space *mapping = gfs2_glock2aspace(gl); 170 170 171 - BUG_ON(!(flags & DIO_METADATA)); 171 + WARN_ON_ONCE(!(flags & DIO_METADATA)); 172 172 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 173 173 truncate_inode_pages(mapping, 0); 174 174 ··· 197 197 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 198 198 return; 199 199 200 - BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); 200 + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 201 201 202 202 gfs2_log_flush(gl->gl_sbd, gl); 203 203 filemap_fdatawrite(metamapping); ··· 536 536 }; 537 537 538 538 const struct gfs2_glock_operations gfs2_inode_glops = { 539 - .go_xmote_th = inode_go_sync, 539 + .go_sync = inode_go_sync, 540 540 .go_inval = inode_go_inval, 541 541 .go_demote_ok = inode_go_demote_ok, 542 542 .go_lock = inode_go_lock, ··· 546 546 }; 547 547 548 548 const struct gfs2_glock_operations gfs2_rgrp_glops = { 549 - .go_xmote_th = rgrp_go_sync, 549 + .go_sync = rgrp_go_sync, 550 550 .go_inval = rgrp_go_inval, 551 551 .go_lock = gfs2_rgrp_go_lock, 552 552 .go_unlock = gfs2_rgrp_go_unlock, 553 553 .go_dump = gfs2_rgrp_dump, 554 554 .go_type = LM_TYPE_RGRP, 555 - .go_flags = GLOF_ASPACE, 555 + .go_flags = GLOF_ASPACE | GLOF_LVB, 556 556 }; 557 557 558 558 const struct gfs2_glock_operations gfs2_trans_glops = { 559 - .go_xmote_th = trans_go_sync, 559 + .go_sync = trans_go_sync, 560 560 .go_xmote_bh = trans_go_xmote_bh, 561 561 .go_demote_ok = trans_go_demote_ok, 562 562 .go_type = LM_TYPE_NONDISK, ··· 577 577 578 578 const struct gfs2_glock_operations gfs2_quota_glops = { 579 579 .go_type = LM_TYPE_QUOTA, 580 + .go_flags = GLOF_LVB, 580 581 }; 581 582 582 583 const struct gfs2_glock_operations gfs2_journal_glops = {

+4 -2

fs/gfs2/incore.h

··· 205 205 206 206 207 207 struct gfs2_glock_operations { 208 - void (*go_xmote_th) (struct gfs2_glock *gl); 208 + void (*go_sync) (struct gfs2_glock *gl); 209 209 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); 210 210 void (*go_inval) (struct gfs2_glock *gl, int flags); 211 211 int (*go_demote_ok) (const struct gfs2_glock *gl); ··· 216 216 const int go_type; 217 217 const unsigned long go_flags; 218 218 #define GLOF_ASPACE 1 219 + #define GLOF_LVB 2 219 220 }; 220 221 221 222 enum { ··· 322 321 ktime_t gl_dstamp; 323 322 struct gfs2_lkstats gl_stats; 324 323 struct dlm_lksb gl_lksb; 325 - char gl_lvb[32]; 326 324 unsigned long gl_tchange; 327 325 void *gl_object; 328 326 ··· 539 539 SDF_DEMOTE = 5, 540 540 SDF_NOJOURNALID = 6, 541 541 SDF_RORECOVERY = 7, /* read only recovery */ 542 + SDF_SKIP_DLM_UNLOCK = 8, 542 543 }; 543 544 544 545 #define GFS2_FSNAME_LEN 256 ··· 622 621 u32 sd_hash_bsize_shift; 623 622 u32 sd_hash_ptrs; /* Number of pointers in a hash block */ 624 623 u32 sd_qc_per_block; 624 + u32 sd_blocks_per_bitmap; 625 625 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ 626 626 u32 sd_max_height; /* Max height of a file's metadata tree */ 627 627 u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];

+118 -109

fs/gfs2/inode.c

··· 364 364 return 0; 365 365 } 366 366 367 - static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode, 368 - unsigned int *uid, unsigned int *gid) 367 + static void munge_mode_uid_gid(const struct gfs2_inode *dip, 368 + struct inode *inode) 369 369 { 370 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 371 371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 372 - if (S_ISDIR(*mode)) 373 - *mode |= S_ISUID; 372 + if (S_ISDIR(inode->i_mode)) 373 + inode->i_mode |= S_ISUID; 374 374 else if (dip->i_inode.i_uid != current_fsuid()) 375 - *mode &= ~07111; 376 - *uid = dip->i_inode.i_uid; 375 + inode->i_mode &= ~07111; 376 + inode->i_uid = dip->i_inode.i_uid; 377 377 } else 378 - *uid = current_fsuid(); 378 + inode->i_uid = current_fsuid(); 379 379 380 380 if (dip->i_inode.i_mode & S_ISGID) { 381 - if (S_ISDIR(*mode)) 382 - *mode |= S_ISGID; 383 - *gid = dip->i_inode.i_gid; 381 + if (S_ISDIR(inode->i_mode)) 382 + inode->i_mode |= S_ISGID; 383 + inode->i_gid = dip->i_inode.i_gid; 384 384 } else 385 - *gid = current_fsgid(); 385 + inode->i_gid = current_fsgid(); 386 386 } 387 387 388 - static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 388 + static int alloc_dinode(struct gfs2_inode *ip, u32 flags) 389 389 { 390 - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 390 + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 391 391 int error; 392 392 int dblocks = 1; 393 393 394 - error = gfs2_inplace_reserve(dip, RES_DINODE); 394 + error = gfs2_inplace_reserve(ip, RES_DINODE, flags); 395 395 if (error) 396 396 goto out; 397 397 ··· 399 399 if (error) 400 400 goto out_ipreserv; 401 401 402 - error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation); 402 + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); 403 + ip->i_no_formal_ino = ip->i_generation; 404 + ip->i_inode.i_ino = ip->i_no_addr; 405 + ip->i_goal = ip->i_no_addr; 403 406 404 407 gfs2_trans_end(sdp); 405 408 406 409 out_ipreserv: 407 - gfs2_inplace_release(dip); 410 + gfs2_inplace_release(ip); 408 411 out: 409 412 return error; 410 413 } ··· 432 429 /** 433 430 * init_dinode - Fill in a new dinode structure 434 431 * @dip: The directory this inode is being created in 435 - * @gl: The glock covering the new inode 436 - * @inum: The inode number 437 - * @mode: The file permissions 438 - * @uid: The uid of the new inode 439 - * @gid: The gid of the new inode 440 - * @generation: The generation number of the new inode 441 - * @dev: The device number (if a device node) 432 + * @ip: The inode 442 433 * @symname: The symlink destination (if a symlink) 443 - * @size: The inode size (ignored for directories) 444 434 * @bhp: The buffer head (returned to caller) 445 435 * 446 436 */ 447 437 448 - static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449 - const struct gfs2_inum_host *inum, umode_t mode, 450 - unsigned int uid, unsigned int gid, 451 - const u64 *generation, dev_t dev, const char *symname, 452 - unsigned size, struct buffer_head **bhp) 438 + static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, 439 + const char *symname, struct buffer_head **bhp) 453 440 { 454 441 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 442 struct gfs2_dinode *di; 456 443 struct buffer_head *dibh; 457 444 struct timespec tv = CURRENT_TIME; 458 445 459 - dibh = gfs2_meta_new(gl, inum->no_addr); 460 - gfs2_trans_add_bh(gl, dibh, 1); 446 + dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 447 + gfs2_trans_add_bh(ip->i_gl, dibh, 1); 461 448 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 462 449 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 463 450 di = (struct gfs2_dinode *)dibh->b_data; 464 451 465 - di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 466 - di->di_num.no_addr = cpu_to_be64(inum->no_addr); 467 - di->di_mode = cpu_to_be32(mode); 468 - di->di_uid = cpu_to_be32(uid); 469 - di->di_gid = cpu_to_be32(gid); 452 + di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 453 + di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 454 + di->di_mode = cpu_to_be32(ip->i_inode.i_mode); 455 + di->di_uid = cpu_to_be32(ip->i_inode.i_uid); 456 + di->di_gid = cpu_to_be32(ip->i_inode.i_gid); 470 457 di->di_nlink = 0; 471 - di->di_size = cpu_to_be64(size); 458 + di->di_size = cpu_to_be64(ip->i_inode.i_size); 472 459 di->di_blocks = cpu_to_be64(1); 473 460 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 - di->di_major = cpu_to_be32(MAJOR(dev)); 475 - di->di_minor = cpu_to_be32(MINOR(dev)); 476 - di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 - di->di_generation = cpu_to_be64(*generation); 461 + di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev)); 462 + di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev)); 463 + di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr); 464 + di->di_generation = cpu_to_be64(ip->i_generation); 478 465 di->di_flags = 0; 479 466 di->__pad1 = 0; 480 - di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 467 + di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0); 481 468 di->di_height = 0; 482 469 di->__pad2 = 0; 483 470 di->__pad3 = 0; ··· 480 487 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 481 488 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 482 489 483 - switch(mode & S_IFMT) { 490 + switch(ip->i_inode.i_mode & S_IFMT) { 484 491 case S_IFREG: 485 492 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || 486 493 gfs2_tune_get(sdp, gt_new_files_jdata)) ··· 495 502 gfs2_init_dir(dibh, dip); 496 503 break; 497 504 case S_IFLNK: 498 - memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); 505 + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size); 499 506 break; 500 507 } 501 508 ··· 504 511 *bhp = dibh; 505 512 } 506 513 507 - static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 508 - umode_t mode, const struct gfs2_inum_host *inum, 509 - const u64 *generation, dev_t dev, const char *symname, 510 - unsigned int size, struct buffer_head **bhp) 514 + static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, 515 + const char *symname, struct buffer_head **bhp) 511 516 { 517 + struct inode *inode = &ip->i_inode; 512 518 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 513 - unsigned int uid, gid; 514 519 int error; 515 520 516 - munge_mode_uid_gid(dip, &mode, &uid, &gid); 517 521 error = gfs2_rindex_update(sdp); 518 522 if (error) 519 523 return error; 520 524 521 - error = gfs2_quota_lock(dip, uid, gid); 525 + error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid); 522 526 if (error) 523 527 return error; 524 528 525 - error = gfs2_quota_check(dip, uid, gid); 529 + error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid); 526 530 if (error) 527 531 goto out_quota; 528 532 ··· 527 537 if (error) 528 538 goto out_quota; 529 539 530 - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); 531 - gfs2_quota_change(dip, +1, uid, gid); 540 + init_dinode(dip, ip, symname, bhp); 541 + gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid); 532 542 gfs2_trans_end(sdp); 533 543 534 544 out_quota: ··· 560 570 if (error) 561 571 goto fail_quota_locks; 562 572 563 - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 573 + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); 564 574 if (error) 565 575 goto fail_quota_locks; 566 576 ··· 647 657 struct inode *inode = NULL; 648 658 struct gfs2_inode *dip = GFS2_I(dir), *ip; 649 659 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 650 - struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 660 + struct gfs2_glock *io_gl; 651 661 int error; 652 - u64 generation; 653 662 struct buffer_head *bh = NULL; 663 + u32 aflags = 0; 654 664 655 665 if (!name->len || name->len > GFS2_FNAMESIZE) 656 666 return -ENAMETOOLONG; 657 667 658 - /* We need a reservation to allocate the new dinode block. The 659 - directory ip temporarily points to the reservation, but this is 660 - being done to get a set of contiguous blocks for the new dinode. 661 - Since this is a create, we don't have a sizehint yet, so it will 662 - have to use the minimum reservation size. */ 663 668 error = gfs2_rs_alloc(dip); 664 669 if (error) 665 670 return error; ··· 673 688 if (error) 674 689 goto fail_gunlock; 675 690 676 - error = alloc_dinode(dip, &inum.no_addr, &generation); 677 - if (error) 678 - goto fail_gunlock; 679 - inum.no_formal_ino = generation; 680 - 681 - error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 682 - LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 683 - if (error) 684 - goto fail_gunlock; 685 - 686 - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); 687 - if (error) 688 - goto fail_gunlock2; 689 - 690 - inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 691 - inum.no_formal_ino, 0); 692 - if (IS_ERR(inode)) 693 - goto fail_gunlock2; 694 - 691 + inode = new_inode(sdp->sd_vfs); 692 + if (!inode) { 693 + gfs2_glock_dq_uninit(ghs); 694 + return -ENOMEM; 695 + } 695 696 ip = GFS2_I(inode); 696 - error = gfs2_inode_refresh(ip); 697 - if (error) 698 - goto fail_gunlock2; 699 - 700 697 error = gfs2_rs_alloc(ip); 701 698 if (error) 699 + goto fail_free_inode; 700 + 701 + set_bit(GIF_INVALID, &ip->i_flags); 702 + inode->i_mode = mode; 703 + inode->i_rdev = dev; 704 + inode->i_size = size; 705 + munge_mode_uid_gid(dip, inode); 706 + ip->i_goal = dip->i_goal; 707 + 708 + if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || 709 + (dip->i_diskflags & GFS2_DIF_TOPDIR)) 710 + aflags |= GFS2_AF_ORLOV; 711 + 712 + error = alloc_dinode(ip, aflags); 713 + if (error) 714 + goto fail_free_inode; 715 + 716 + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 717 + if (error) 718 + goto fail_free_inode; 719 + 720 + ip->i_gl->gl_object = ip; 721 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 722 + if (error) 723 + goto fail_free_inode; 724 + 725 + error = make_dinode(dip, ip, symname, &bh); 726 + if (error) 702 727 goto fail_gunlock2; 728 + 729 + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 730 + if (error) 731 + goto fail_gunlock2; 732 + 733 + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 734 + if (error) 735 + goto fail_gunlock2; 736 + 737 + ip->i_iopen_gh.gh_gl->gl_object = ip; 738 + gfs2_glock_put(io_gl); 739 + gfs2_set_iop(inode); 740 + insert_inode_hash(inode); 741 + 742 + error = gfs2_inode_refresh(ip); 743 + if (error) 744 + goto fail_gunlock3; 703 745 704 746 error = gfs2_acl_create(dip, inode); 705 747 if (error) 706 - goto fail_gunlock2; 748 + goto fail_gunlock3; 707 749 708 750 error = gfs2_security_init(dip, ip, name); 709 751 if (error) 710 - goto fail_gunlock2; 752 + goto fail_gunlock3; 711 753 712 754 error = link_dinode(dip, name, ip); 713 755 if (error) 714 - goto fail_gunlock2; 756 + goto fail_gunlock3; 715 757 716 758 if (bh) 717 759 brelse(bh); ··· 751 739 d_instantiate(dentry, inode); 752 740 return 0; 753 741 742 + fail_gunlock3: 743 + gfs2_glock_dq_uninit(ghs + 1); 744 + if (ip->i_gl) 745 + gfs2_glock_put(ip->i_gl); 746 + goto fail_gunlock; 747 + 754 748 fail_gunlock2: 755 749 gfs2_glock_dq_uninit(ghs + 1); 750 + fail_free_inode: 751 + if (ip->i_gl) 752 + gfs2_glock_put(ip->i_gl); 753 + gfs2_rs_delete(ip); 754 + free_inode_nonrcu(inode); 755 + inode = NULL; 756 756 fail_gunlock: 757 757 gfs2_glock_dq_uninit(ghs); 758 758 if (inode && !IS_ERR(inode)) { ··· 772 748 iput(inode); 773 749 } 774 750 fail: 775 - gfs2_rs_delete(dip); 776 751 if (bh) 777 752 brelse(bh); 778 753 return error; ··· 907 884 if (error) 908 885 goto out_gunlock; 909 886 910 - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 887 + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); 911 888 if (error) 912 889 goto out_gunlock_q; 913 890 ··· 1000 977 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it 1001 978 * @dip: The parent directory 1002 979 * @name: The name of the entry in the parent directory 1003 - * @bh: The inode buffer for the inode to be removed 1004 980 * @inode: The inode to be removed 1005 981 * 1006 982 * Called with all the locks and in a transaction. This will only be ··· 1009 987 */ 1010 988 1011 989 static int gfs2_unlink_inode(struct gfs2_inode *dip, 1012 - const struct dentry *dentry, 1013 - struct buffer_head *bh) 990 + const struct dentry *dentry) 1014 991 { 1015 992 struct inode *inode = dentry->d_inode; 1016 993 struct gfs2_inode *ip = GFS2_I(inode); ··· 1049 1028 struct gfs2_sbd *sdp = GFS2_SB(dir); 1050 1029 struct inode *inode = dentry->d_inode; 1051 1030 struct gfs2_inode *ip = GFS2_I(inode); 1052 - struct buffer_head *bh; 1053 1031 struct gfs2_holder ghs[3]; 1054 1032 struct gfs2_rgrpd *rgd; 1055 1033 int error; ··· 1097 1077 1098 1078 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); 1099 1079 if (error) 1100 - goto out_gunlock; 1101 - 1102 - error = gfs2_meta_inode_buffer(ip, &bh); 1103 - if (error) 1104 1080 goto out_end_trans; 1105 1081 1106 - error = gfs2_unlink_inode(dip, dentry, bh); 1107 - brelse(bh); 1082 + error = gfs2_unlink_inode(dip, dentry); 1108 1083 1109 1084 out_end_trans: 1110 1085 gfs2_trans_end(sdp); ··· 1380 1365 if (error) 1381 1366 goto out_gunlock; 1382 1367 1383 - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); 1368 + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); 1384 1369 if (error) 1385 1370 goto out_gunlock_q; 1386 1371 ··· 1399 1384 1400 1385 /* Remove the target file, if it exists */ 1401 1386 1402 - if (nip) { 1403 - struct buffer_head *bh; 1404 - error = gfs2_meta_inode_buffer(nip, &bh); 1405 - if (error) 1406 - goto out_end_trans; 1407 - error = gfs2_unlink_inode(ndip, ndentry, bh); 1408 - brelse(bh); 1409 - } 1387 + if (nip) 1388 + error = gfs2_unlink_inode(ndip, ndentry); 1410 1389 1411 1390 if (dir_rename) { 1412 1391 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);

+15 -5

fs/gfs2/lock_dlm.c

··· 120 120 gfs2_update_reply_times(gl); 121 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 122 122 123 - if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 124 - memset(gl->gl_lvb, 0, GDLM_LVB_SIZE); 123 + if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) 124 + memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); 125 125 126 126 switch (gl->gl_lksb.sb_status) { 127 127 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ ··· 203 203 static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, 204 204 const int req) 205 205 { 206 - u32 lkf = DLM_LKF_VALBLK; 207 - u32 lkid = gl->gl_lksb.sb_lkid; 206 + u32 lkf = 0; 207 + 208 + if (gl->gl_lksb.sb_lvbptr) 209 + lkf |= DLM_LKF_VALBLK; 208 210 209 211 if (gfs_flags & LM_FLAG_TRY) 210 212 lkf |= DLM_LKF_NOQUEUE; ··· 230 228 BUG(); 231 229 } 232 230 233 - if (lkid != 0) { 231 + if (gl->gl_lksb.sb_lkid != 0) { 234 232 lkf |= DLM_LKF_CONVERT; 235 233 if (test_bit(GLF_BLOCKING, &gl->gl_flags)) 236 234 lkf |= DLM_LKF_QUECVT; ··· 291 289 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); 292 290 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); 293 291 gfs2_update_request_times(gl); 292 + 293 + /* don't want to skip dlm_unlock writing the lvb when lock is ex */ 294 + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && 295 + gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { 296 + gfs2_glock_free(gl); 297 + return; 298 + } 299 + 294 300 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 295 301 NULL, gl); 296 302 if (error) {

+3

fs/gfs2/ops_fstype.c

··· 278 278 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - 279 279 sizeof(struct gfs2_meta_header)) / 280 280 sizeof(struct gfs2_quota_change); 281 + sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize - 282 + sizeof(struct gfs2_meta_header)) 283 + * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */ 281 284 282 285 /* Compute maximum reservation required to add a entry to a directory */ 283 286

+5 -5

fs/gfs2/quota.c

··· 816 816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 817 817 818 818 reserved = 1 + (nalloc * (data_blocks + ind_blocks)); 819 - error = gfs2_inplace_reserve(ip, reserved); 819 + error = gfs2_inplace_reserve(ip, reserved, 0); 820 820 if (error) 821 821 goto out_alloc; 822 822 ··· 869 869 if (error < 0) 870 870 return error; 871 871 872 - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 872 + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 873 873 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); 874 874 qlvb->__pad = 0; 875 875 qlvb->qb_limit = q.qu_limit; ··· 893 893 if (error) 894 894 return error; 895 895 896 - qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 896 + qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 897 897 898 898 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 899 899 gfs2_glock_dq_uninit(q_gh); ··· 1506 1506 if (error) 1507 1507 goto out; 1508 1508 1509 - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 1509 + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 1510 1510 fdq->d_version = FS_DQUOT_VERSION; 1511 1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1512 1512 fdq->d_id = from_kqid(&init_user_ns, qid); ··· 1605 1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1606 1606 &data_blocks, &ind_blocks); 1607 1607 blocks = 1 + data_blocks + ind_blocks; 1608 - error = gfs2_inplace_reserve(ip, blocks); 1608 + error = gfs2_inplace_reserve(ip, blocks, 0); 1609 1609 if (error) 1610 1610 goto out_i; 1611 1611 blocks += gfs2_rg_blocks(ip, blocks);

+123 -16

fs/gfs2/rgrp.c

··· 16 16 #include <linux/prefetch.h> 17 17 #include <linux/blkdev.h> 18 18 #include <linux/rbtree.h> 19 + #include <linux/random.h> 19 20 20 21 #include "gfs2.h" 21 22 #include "incore.h" ··· 252 251 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 253 252 { 254 253 u64 rblock = block - rbm->rgd->rd_data0; 255 - u32 goal = (u32)rblock; 256 - int x; 254 + u32 x; 257 255 258 256 if (WARN_ON_ONCE(rblock > UINT_MAX)) 259 257 return -EINVAL; 260 258 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 261 259 return -E2BIG; 262 260 263 - for (x = 0; x < rbm->rgd->rd_length; x++) { 264 - rbm->bi = rbm->rgd->rd_bits + x; 265 - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { 266 - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); 267 - break; 268 - } 269 - } 261 + rbm->bi = rbm->rgd->rd_bits; 262 + rbm->offset = (u32)(rblock); 263 + /* Check if the block is within the first block */ 264 + if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) 265 + return 0; 270 266 267 + /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ 268 + rbm->offset += (sizeof(struct gfs2_rgrp) - 269 + sizeof(struct gfs2_meta_header)) * GFS2_NBBY; 270 + x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 271 + rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 272 + rbm->bi += x; 271 273 return 0; 272 274 } 273 275 ··· 879 875 goto fail; 880 876 881 877 rgd->rd_gl->gl_object = rgd; 882 - rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 878 + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; 883 879 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 884 880 if (rgd->rd_data > sdp->sd_max_rg_data) 885 881 sdp->sd_max_rg_data = rgd->rd_data; ··· 1682 1678 return; 1683 1679 } 1684 1680 1681 + /** 1682 + * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested 1683 + * @rgd: The rgrp in question 1684 + * @loops: An indication of how picky we can be (0=very, 1=less so) 1685 + * 1686 + * This function uses the recently added glock statistics in order to 1687 + * figure out whether a parciular resource group is suffering from 1688 + * contention from multiple nodes. This is done purely on the basis 1689 + * of timings, since this is the only data we have to work with and 1690 + * our aim here is to reject a resource group which is highly contended 1691 + * but (very important) not to do this too often in order to ensure that 1692 + * we do not land up introducing fragmentation by changing resource 1693 + * groups when not actually required. 1694 + * 1695 + * The calculation is fairly simple, we want to know whether the SRTTB 1696 + * (i.e. smoothed round trip time for blocking operations) to acquire 1697 + * the lock for this rgrp's glock is significantly greater than the 1698 + * time taken for resource groups on average. We introduce a margin in 1699 + * the form of the variable @var which is computed as the sum of the two 1700 + * respective variences, and multiplied by a factor depending on @loops 1701 + * and whether we have a lot of data to base the decision on. This is 1702 + * then tested against the square difference of the means in order to 1703 + * decide whether the result is statistically significant or not. 1704 + * 1705 + * Returns: A boolean verdict on the congestion status 1706 + */ 1707 + 1708 + static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) 1709 + { 1710 + const struct gfs2_glock *gl = rgd->rd_gl; 1711 + const struct gfs2_sbd *sdp = gl->gl_sbd; 1712 + struct gfs2_lkstats *st; 1713 + s64 r_dcount, l_dcount; 1714 + s64 r_srttb, l_srttb; 1715 + s64 srttb_diff; 1716 + s64 sqr_diff; 1717 + s64 var; 1718 + 1719 + preempt_disable(); 1720 + st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; 1721 + r_srttb = st->stats[GFS2_LKS_SRTTB]; 1722 + r_dcount = st->stats[GFS2_LKS_DCOUNT]; 1723 + var = st->stats[GFS2_LKS_SRTTVARB] + 1724 + gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; 1725 + preempt_enable(); 1726 + 1727 + l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; 1728 + l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; 1729 + 1730 + if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) 1731 + return false; 1732 + 1733 + srttb_diff = r_srttb - l_srttb; 1734 + sqr_diff = srttb_diff * srttb_diff; 1735 + 1736 + var *= 2; 1737 + if (l_dcount < 8 || r_dcount < 8) 1738 + var *= 2; 1739 + if (loops == 1) 1740 + var *= 2; 1741 + 1742 + return ((srttb_diff < 0) && (sqr_diff > var)); 1743 + } 1744 + 1745 + /** 1746 + * gfs2_rgrp_used_recently 1747 + * @rs: The block reservation with the rgrp to test 1748 + * @msecs: The time limit in milliseconds 1749 + * 1750 + * Returns: True if the rgrp glock has been used within the time limit 1751 + */ 1752 + static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, 1753 + u64 msecs) 1754 + { 1755 + u64 tdiff; 1756 + 1757 + tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), 1758 + rs->rs_rbm.rgd->rd_gl->gl_dstamp)); 1759 + 1760 + return tdiff > (msecs * 1000 * 1000); 1761 + } 1762 + 1763 + static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) 1764 + { 1765 + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1766 + u32 skip; 1767 + 1768 + get_random_bytes(&skip, sizeof(skip)); 1769 + return skip % sdp->sd_rgrps; 1770 + } 1771 + 1685 1772 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1686 1773 { 1687 1774 struct gfs2_rgrpd *rgd = *pos; 1775 + struct gfs2_sbd *sdp = rgd->rd_sbd; 1688 1776 1689 1777 rgd = gfs2_rgrpd_get_next(rgd); 1690 1778 if (rgd == NULL) 1691 - rgd = gfs2_rgrpd_get_next(NULL); 1779 + rgd = gfs2_rgrpd_get_first(sdp); 1692 1780 *pos = rgd; 1693 1781 if (rgd != begin) /* If we didn't wrap */ 1694 1782 return true; ··· 1795 1699 * Returns: errno 1796 1700 */ 1797 1701 1798 - int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1702 + int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) 1799 1703 { 1800 1704 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1801 1705 struct gfs2_rgrpd *begin = NULL; 1802 1706 struct gfs2_blkreserv *rs = ip->i_res; 1803 - int error = 0, rg_locked, flags = LM_FLAG_TRY; 1707 + int error = 0, rg_locked, flags = 0; 1804 1708 u64 last_unlinked = NO_BLOCK; 1805 1709 int loops = 0; 1710 + u32 skip = 0; 1806 1711 1807 1712 if (sdp->sd_args.ar_rgrplvb) 1808 1713 flags |= GL_SKIP; ··· 1817 1720 } else { 1818 1721 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1819 1722 } 1723 + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) 1724 + skip = gfs2_orlov_skip(ip); 1820 1725 if (rs->rs_rbm.rgd == NULL) 1821 1726 return -EBADSLT; 1822 1727 ··· 1827 1728 1828 1729 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1829 1730 rg_locked = 0; 1731 + if (skip && skip--) 1732 + goto next_rgrp; 1733 + if (!gfs2_rs_active(rs) && (loops < 2) && 1734 + gfs2_rgrp_used_recently(rs, 1000) && 1735 + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1736 + goto next_rgrp; 1830 1737 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1831 1738 LM_ST_EXCLUSIVE, flags, 1832 1739 &rs->rs_rgd_gh); 1833 - if (error == GLR_TRYFAILED) 1834 - goto next_rgrp; 1835 1740 if (unlikely(error)) 1836 1741 return error; 1742 + if (!gfs2_rs_active(rs) && (loops < 2) && 1743 + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1744 + goto skip_rgrp; 1837 1745 if (sdp->sd_args.ar_rgrplvb) { 1838 1746 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1839 1747 if (unlikely(error)) { ··· 1887 1781 /* Find the next rgrp, and continue looking */ 1888 1782 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1889 1783 continue; 1784 + if (skip) 1785 + continue; 1890 1786 1891 1787 /* If we've scanned all the rgrps, but found no free blocks 1892 1788 * then this checks for some less likely conditions before 1893 1789 * trying again. 1894 1790 */ 1895 - flags &= ~LM_FLAG_TRY; 1896 1791 loops++; 1897 1792 /* Check that fs hasn't grown if writing to rindex */ 1898 1793 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {

+2 -1

fs/gfs2/rgrp.h

··· 39 39 40 40 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 41 41 42 - extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); 42 + #define GFS2_AF_ORLOV 1 43 + extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); 43 44 extern void gfs2_inplace_release(struct gfs2_inode *ip); 44 45 45 46 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,

+1 -1

fs/gfs2/trace_gfs2.h

··· 486 486 ), 487 487 488 488 TP_fast_assign( 489 - __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; 489 + __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev; 490 490 __entry->start = block; 491 491 __entry->inum = ip->i_no_addr; 492 492 __entry->len = len;

+1 -1

fs/gfs2/xattr.c

··· 734 734 if (error) 735 735 return error; 736 736 737 - error = gfs2_inplace_reserve(ip, blks); 737 + error = gfs2_inplace_reserve(ip, blks, 0); 738 738 if (error) 739 739 goto out_gunlock_q; 740 740