Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

GFS2: Fix AIL flush issue during fsync

Unfortunately, it is not enough to just ignore locked buffers during
the AIL flush from fsync. We need to be able to ignore all buffers
which are locked, dirty or pinned at this stage as they might have
been added subsequent to the log flush earlier in the fsync function.

In addition, this means that we no longer need to rely on i_mutex to
keep out writes during fsync, so we can, as a side-effect, remove
that protection too.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Tested-By: Abhijith Das <adas@redhat.com>

+20 -24
+2 -6
fs/gfs2/file.c
··· 593 593 sync_state &= ~I_DIRTY_SYNC; 594 594 595 595 if (sync_state) { 596 - mutex_lock(&inode->i_mutex); 597 596 ret = sync_inode_metadata(inode, 1); 598 - if (ret) { 599 - mutex_unlock(&inode->i_mutex); 597 + if (ret) 600 598 return ret; 601 - } 602 599 if (gfs2_is_jdata(ip)) 603 600 filemap_write_and_wait(mapping); 604 - gfs2_ail_flush(ip->i_gl); 605 - mutex_unlock(&inode->i_mutex); 601 + gfs2_ail_flush(ip->i_gl, 1); 606 602 } 607 603 608 604 if (mapping->nrpages)
+16 -16
fs/gfs2/glops.c
··· 42 42 /** 43 43 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL 44 44 * @gl: the glock 45 + * @fsync: set when called from fsync (not all buffers will be clean) 45 46 * 46 47 * None of the buffers should be dirty, locked, or pinned. 47 48 */ 48 49 49 - static void __gfs2_ail_flush(struct gfs2_glock *gl, unsigned long b_state) 50 + static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 50 51 { 51 52 struct gfs2_sbd *sdp = gl->gl_sbd; 52 53 struct list_head *head = &gl->gl_ail_list; 53 - struct gfs2_bufdata *bd; 54 + struct gfs2_bufdata *bd, *tmp; 54 55 struct buffer_head *bh; 56 + const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); 55 57 sector_t blocknr; 56 58 59 + gfs2_log_lock(sdp); 57 60 spin_lock(&sdp->sd_ail_lock); 58 - while (!list_empty(head)) { 59 - bd = list_entry(head->next, struct gfs2_bufdata, 60 - bd_ail_gl_list); 61 + list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { 61 62 bh = bd->bd_bh; 62 - blocknr = bh->b_blocknr; 63 - if (bh->b_state & b_state) 63 + if (bh->b_state & b_state) { 64 + if (fsync) 65 + continue; 64 66 gfs2_ail_error(gl, bh); 67 + } 68 + blocknr = bh->b_blocknr; 65 69 bh->b_private = NULL; 66 70 gfs2_remove_from_ail(bd); /* drops ref on bh */ 67 - spin_unlock(&sdp->sd_ail_lock); 68 71 69 72 bd->bd_bh = NULL; 70 73 bd->bd_blkno = blocknr; 71 74 72 - gfs2_log_lock(sdp); 73 75 gfs2_trans_add_revoke(sdp, bd); 74 - gfs2_log_unlock(sdp); 75 - 76 - spin_lock(&sdp->sd_ail_lock); 77 76 } 78 - gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 77 + BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); 79 78 spin_unlock(&sdp->sd_ail_lock); 79 + gfs2_log_unlock(sdp); 80 80 } 81 81 82 82 ··· 99 99 BUG_ON(current->journal_info); 100 100 current->journal_info = &tr; 101 101 102 - __gfs2_ail_flush(gl, (1ul << BH_Dirty)|(1ul << BH_Pinned)|(1ul << BH_Lock)); 102 + __gfs2_ail_flush(gl, 0); 103 103 104 104 gfs2_trans_end(sdp); 105 105 gfs2_log_flush(sdp, NULL); 106 106 } 107 107 108 - void gfs2_ail_flush(struct gfs2_glock *gl) 108 + void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 109 109 { 110 110 struct gfs2_sbd *sdp = gl->gl_sbd; 111 111 unsigned int revokes = atomic_read(&gl->gl_ail_count); ··· 117 117 ret = gfs2_trans_begin(sdp, 0, revokes); 118 118 if (ret) 119 119 return; 120 - __gfs2_ail_flush(gl, (1ul << BH_Dirty)|(1ul << BH_Pinned)); 120 + __gfs2_ail_flush(gl, fsync); 121 121 gfs2_trans_end(sdp); 122 122 gfs2_log_flush(sdp, NULL); 123 123 }
+1 -1
fs/gfs2/glops.h
··· 23 23 extern const struct gfs2_glock_operations gfs2_journal_glops; 24 24 extern const struct gfs2_glock_operations *gfs2_glops_list[]; 25 25 26 - extern void gfs2_ail_flush(struct gfs2_glock *gl); 26 + extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); 27 27 28 28 #endif /* __GLOPS_DOT_H__ */
+1 -1
fs/gfs2/super.c
··· 1533 1533 out_truncate: 1534 1534 gfs2_log_flush(sdp, ip->i_gl); 1535 1535 write_inode_now(inode, 1); 1536 - gfs2_ail_flush(ip->i_gl); 1536 + gfs2_ail_flush(ip->i_gl, 0); 1537 1537 1538 1538 /* Case 2 starts here */ 1539 1539 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);