Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

GFS2: high time to take some time over atime

Until now, we've used the same scheme as GFS1 for atime. This has failed
since atime is a per vfsmnt flag, not a per fs flag and as such the
"noatime" flag was not getting passed down to the filesystems. This
patch removes all the "special casing" around atime updates and we
simply use the VFS's atime code.

The net result is that GFS2 will now support all the same atime related
mount options of any other filesystem on a per-vfsmnt basis. We do lose
the "lazy atime" updates, but we gain "relatime". We could add lazy
atime to the VFS at a later date, if there is a requirement for that
variant still - I suspect relatime will be enough.

Also we lose about 100 lines of code after this patch has been applied,
and I have a suspicion that it will speed things up a bit, even when
atime is "on". So it seems like a nice clean up as well.

From a user perspective, everything stays the same except the loss of
the per-fs atime quantum tweekable (ought to be per-vfsmnt at the very
least, and to be honest I don't think anybody ever used it) and that a
number of options which were ignored before now work correctly.

Please let me know if you've got any comments. I'm pushing this out
early so that you can all see what my plans are.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

+64 -166
-2
fs/gfs2/glock.c
··· 1580 1580 *p++ = 'a'; 1581 1581 if (flags & GL_EXACT) 1582 1582 *p++ = 'E'; 1583 - if (flags & GL_ATIME) 1584 - *p++ = 'a'; 1585 1583 if (flags & GL_NOCACHE) 1586 1584 *p++ = 'c'; 1587 1585 if (test_bit(HIF_HOLDER, &iflags))
-1
fs/gfs2/glock.h
··· 24 24 #define GL_ASYNC 0x00000040 25 25 #define GL_EXACT 0x00000080 26 26 #define GL_SKIP 0x00000100 27 - #define GL_ATIME 0x00000200 28 27 #define GL_NOCACHE 0x00000400 29 28 30 29 #define GLR_TRYFAILED 13
-2
fs/gfs2/incore.h
··· 420 420 unsigned int gt_quota_scale_den; /* Denominator */ 421 421 unsigned int gt_quota_cache_secs; 422 422 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ 423 - unsigned int gt_atime_quantum; /* Min secs between atime updates */ 424 423 unsigned int gt_new_files_jdata; 425 424 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 426 425 unsigned int gt_stall_secs; /* Detects trouble! */ ··· 432 433 SDF_JOURNAL_CHECKED = 0, 433 434 SDF_JOURNAL_LIVE = 1, 434 435 SDF_SHUTDOWN = 2, 435 - SDF_NOATIME = 3, 436 436 }; 437 437 438 438 #define GFS2_FSNAME_LEN 256
+8 -99
fs/gfs2/inode.c
··· 18 18 #include <linux/crc32.h> 19 19 #include <linux/lm_interface.h> 20 20 #include <linux/security.h> 21 + #include <linux/time.h> 21 22 22 23 #include "gfs2.h" 23 24 #include "incore.h" ··· 250 249 { 251 250 struct gfs2_dinode_host *di = &ip->i_di; 252 251 const struct gfs2_dinode *str = buf; 252 + struct timespec atime; 253 253 u16 height, depth; 254 254 255 255 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) ··· 277 275 di->di_size = be64_to_cpu(str->di_size); 278 276 i_size_write(&ip->i_inode, di->di_size); 279 277 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 280 - ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); 281 - ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 278 + atime.tv_sec = be64_to_cpu(str->di_atime); 279 + atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 280 + if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 281 + ip->i_inode.i_atime = atime; 282 282 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 283 283 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 284 284 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); ··· 1161 1157 unsigned int x; 1162 1158 int error; 1163 1159 1164 - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); 1165 - error = gfs2_glock_nq_atime(&i_gh); 1160 + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 1161 + error = gfs2_glock_nq(&i_gh); 1166 1162 if (error) { 1167 1163 gfs2_holder_uninit(&i_gh); 1168 1164 return error; ··· 1194 1190 brelse(dibh); 1195 1191 out: 1196 1192 gfs2_glock_dq_uninit(&i_gh); 1197 - return error; 1198 - } 1199 - 1200 - /** 1201 - * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and 1202 - * conditionally update the inode's atime 1203 - * @gh: the holder to acquire 1204 - * 1205 - * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap 1206 - * Update if the difference between the current time and the inode's current 1207 - * atime is greater than an interval specified at mount. 1208 - * 1209 - * Returns: errno 1210 - */ 1211 - 1212 - int gfs2_glock_nq_atime(struct gfs2_holder *gh) 1213 - { 1214 - struct gfs2_glock *gl = gh->gh_gl; 1215 - struct gfs2_sbd *sdp = gl->gl_sbd; 1216 - struct gfs2_inode *ip = gl->gl_object; 1217 - s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); 1218 - unsigned int state; 1219 - int flags; 1220 - int error; 1221 - struct timespec tv = CURRENT_TIME; 1222 - 1223 - if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || 1224 - gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || 1225 - gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) 1226 - return -EINVAL; 1227 - 1228 - state = gh->gh_state; 1229 - flags = gh->gh_flags; 1230 - 1231 - error = gfs2_glock_nq(gh); 1232 - if (error) 1233 - return error; 1234 - 1235 - if (test_bit(SDF_NOATIME, &sdp->sd_flags) || 1236 - (sdp->sd_vfs->s_flags & MS_RDONLY)) 1237 - return 0; 1238 - 1239 - if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { 1240 - gfs2_glock_dq(gh); 1241 - gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, 1242 - gh); 1243 - error = gfs2_glock_nq(gh); 1244 - if (error) 1245 - return error; 1246 - 1247 - /* Verify that atime hasn't been updated while we were 1248 - trying to get exclusive lock. */ 1249 - 1250 - tv = CURRENT_TIME; 1251 - if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { 1252 - struct buffer_head *dibh; 1253 - struct gfs2_dinode *di; 1254 - 1255 - error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1256 - if (error == -EROFS) 1257 - return 0; 1258 - if (error) 1259 - goto fail; 1260 - 1261 - error = gfs2_meta_inode_buffer(ip, &dibh); 1262 - if (error) 1263 - goto fail_end_trans; 1264 - 1265 - ip->i_inode.i_atime = tv; 1266 - 1267 - gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1268 - di = (struct gfs2_dinode *)dibh->b_data; 1269 - di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1270 - di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1271 - brelse(dibh); 1272 - 1273 - gfs2_trans_end(sdp); 1274 - } 1275 - 1276 - /* If someone else has asked for the glock, 1277 - unlock and let them have it. Then reacquire 1278 - in the original state. */ 1279 - if (gfs2_glock_is_blocking(gl)) { 1280 - gfs2_glock_dq(gh); 1281 - gfs2_holder_reinit(state, flags, gh); 1282 - return gfs2_glock_nq(gh); 1283 - } 1284 - } 1285 - 1286 - return 0; 1287 - 1288 - fail_end_trans: 1289 - gfs2_trans_end(sdp); 1290 - fail: 1291 - gfs2_glock_dq(gh); 1292 1193 return error; 1293 1194 } 1294 1195
-1
fs/gfs2/inode.h
··· 92 92 const struct gfs2_inode *ip); 93 93 int gfs2_permission(struct inode *inode, int mask); 94 94 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); 95 - int gfs2_glock_nq_atime(struct gfs2_holder *gh); 96 95 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 97 96 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 98 97 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+8 -8
fs/gfs2/ops_address.c
··· 512 512 int error; 513 513 514 514 unlock_page(page); 515 - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 516 - error = gfs2_glock_nq_atime(&gh); 515 + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 516 + error = gfs2_glock_nq(&gh); 517 517 if (unlikely(error)) 518 518 goto out; 519 519 error = AOP_TRUNCATED_PAGE; ··· 594 594 struct gfs2_holder gh; 595 595 int ret; 596 596 597 - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 598 - ret = gfs2_glock_nq_atime(&gh); 597 + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 598 + ret = gfs2_glock_nq(&gh); 599 599 if (unlikely(ret)) 600 600 goto out_uninit; 601 601 if (!gfs2_is_stuffed(ip)) ··· 636 636 unsigned to = from + len; 637 637 struct page *page; 638 638 639 - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); 640 - error = gfs2_glock_nq_atime(&ip->i_gh); 639 + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 640 + error = gfs2_glock_nq(&ip->i_gh); 641 641 if (unlikely(error)) 642 642 goto out_uninit; 643 643 ··· 1000 1000 * unfortunately have the option of only flushing a range like 1001 1001 * the VFS does. 1002 1002 */ 1003 - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); 1004 - rv = gfs2_glock_nq_atime(&gh); 1003 + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); 1004 + rv = gfs2_glock_nq(&gh); 1005 1005 if (rv) 1006 1006 return rv; 1007 1007 rv = gfs2_ok_for_dio(ip, rw, offset);
+8 -8
fs/gfs2/ops_file.c
··· 89 89 u64 offset = file->f_pos; 90 90 int error; 91 91 92 - gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); 93 - error = gfs2_glock_nq_atime(&d_gh); 92 + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 93 + error = gfs2_glock_nq(&d_gh); 94 94 if (error) { 95 95 gfs2_holder_uninit(&d_gh); 96 96 return error; ··· 153 153 int error; 154 154 u32 fsflags; 155 155 156 - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 157 - error = gfs2_glock_nq_atime(&gh); 156 + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 157 + error = gfs2_glock_nq(&gh); 158 158 if (error) 159 159 return error; 160 160 ··· 351 351 struct gfs2_alloc *al; 352 352 int ret; 353 353 354 - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); 355 - ret = gfs2_glock_nq_atime(&gh); 354 + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 355 + ret = gfs2_glock_nq(&gh); 356 356 if (ret) 357 357 goto out; 358 358 ··· 434 434 struct gfs2_holder i_gh; 435 435 int error; 436 436 437 - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); 438 - error = gfs2_glock_nq_atime(&i_gh); 437 + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 438 + error = gfs2_glock_nq(&i_gh); 439 439 if (error) { 440 440 gfs2_holder_uninit(&i_gh); 441 441 return error;
+5 -18
fs/gfs2/ops_fstype.c
··· 70 70 gt->gt_quota_scale_den = 1; 71 71 gt->gt_quota_cache_secs = 300; 72 72 gt->gt_quota_quantum = 60; 73 - gt->gt_atime_quantum = 3600; 74 73 gt->gt_new_files_jdata = 0; 75 74 gt->gt_max_readahead = 1 << 18; 76 75 gt->gt_stall_secs = 600; ··· 134 135 return sdp; 135 136 } 136 137 137 - static void init_vfs(struct super_block *sb, unsigned noatime) 138 - { 139 - struct gfs2_sbd *sdp = sb->s_fs_info; 140 - 141 - sb->s_magic = GFS2_MAGIC; 142 - sb->s_op = &gfs2_super_ops; 143 - sb->s_export_op = &gfs2_export_ops; 144 - sb->s_time_gran = 1; 145 - sb->s_maxbytes = MAX_LFS_FILESIZE; 146 - 147 - if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) 148 - set_bit(noatime, &sdp->sd_flags); 149 - 150 - /* Don't let the VFS update atimes. GFS2 handles this itself. */ 151 - sb->s_flags |= MS_NOATIME | MS_NODIRATIME; 152 - } 153 138 154 139 /** 155 140 * gfs2_check_sb - Check superblock ··· 1083 1100 goto fail; 1084 1101 } 1085 1102 1086 - init_vfs(sb, SDF_NOATIME); 1103 + sb->s_magic = GFS2_MAGIC; 1104 + sb->s_op = &gfs2_super_ops; 1105 + sb->s_export_op = &gfs2_export_ops; 1106 + sb->s_time_gran = 1; 1107 + sb->s_maxbytes = MAX_LFS_FILESIZE; 1087 1108 1088 1109 /* Set up the buffer cache and fill in some fake block size values 1089 1110 to allow us to read-in the on-disk superblock. */
+35 -16
fs/gfs2/ops_super.c
··· 20 20 #include <linux/gfs2_ondisk.h> 21 21 #include <linux/crc32.h> 22 22 #include <linux/lm_interface.h> 23 + #include <linux/time.h> 23 24 24 25 #include "gfs2.h" 25 26 #include "incore.h" ··· 39 38 #include "dir.h" 40 39 #include "eattr.h" 41 40 #include "bmap.h" 41 + #include "meta_io.h" 42 42 43 43 /** 44 44 * gfs2_write_inode - Make sure the inode is stable on the disk ··· 52 50 static int gfs2_write_inode(struct inode *inode, int sync) 53 51 { 54 52 struct gfs2_inode *ip = GFS2_I(inode); 53 + struct gfs2_sbd *sdp = GFS2_SB(inode); 54 + struct gfs2_holder gh; 55 + struct buffer_head *bh; 56 + struct timespec atime; 57 + struct gfs2_dinode *di; 58 + int ret = 0; 55 59 56 - /* Check this is a "normal" inode */ 57 - if (test_bit(GIF_USER, &ip->i_flags)) { 58 - if (current->flags & PF_MEMALLOC) 59 - return 0; 60 - if (sync) 61 - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 60 + /* Check this is a "normal" inode, etc */ 61 + if (!test_bit(GIF_USER, &ip->i_flags) || 62 + (current->flags & PF_MEMALLOC)) 63 + return 0; 64 + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 65 + if (ret) 66 + goto do_flush; 67 + ret = gfs2_trans_begin(sdp, RES_DINODE, 0); 68 + if (ret) 69 + goto do_unlock; 70 + ret = gfs2_meta_inode_buffer(ip, &bh); 71 + if (ret == 0) { 72 + di = (struct gfs2_dinode *)bh->b_data; 73 + atime.tv_sec = be64_to_cpu(di->di_atime); 74 + atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); 75 + if (timespec_compare(&inode->i_atime, &atime) > 0) { 76 + gfs2_trans_add_bh(ip->i_gl, bh, 1); 77 + gfs2_dinode_out(ip, bh->b_data); 78 + } 79 + brelse(bh); 62 80 } 63 - 64 - return 0; 81 + gfs2_trans_end(sdp); 82 + do_unlock: 83 + gfs2_glock_dq_uninit(&gh); 84 + do_flush: 85 + if (sync != 0) 86 + gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 87 + return ret; 65 88 } 66 89 67 90 /** ··· 323 296 error = gfs2_make_fs_rw(sdp); 324 297 } 325 298 } 326 - 327 - if (*flags & (MS_NOATIME | MS_NODIRATIME)) 328 - set_bit(SDF_NOATIME, &sdp->sd_flags); 329 - else 330 - clear_bit(SDF_NOATIME, &sdp->sd_flags); 331 - 332 - /* Don't let the VFS update atimes. GFS2 handles this itself. */ 333 - *flags |= MS_NOATIME | MS_NODIRATIME; 334 299 335 300 return error; 336 301 }
-11
fs/gfs2/sys.c
··· 269 269 ARGS_ATTR(suiddir, "%d\n"); 270 270 ARGS_ATTR(data, "%d\n"); 271 271 272 - /* one oddball doesn't fit the macro mold */ 273 - static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) 274 - { 275 - return snprintf(buf, PAGE_SIZE, "%d\n", 276 - !!test_bit(SDF_NOATIME, &sdp->sd_flags)); 277 - } 278 - static struct args_attr args_attr_noatime = __ATTR_RO(noatime); 279 - 280 272 static struct attribute *args_attrs[] = { 281 273 &args_attr_lockproto.attr, 282 274 &args_attr_locktable.attr, ··· 284 292 &args_attr_quota.attr, 285 293 &args_attr_suiddir.attr, 286 294 &args_attr_data.attr, 287 - &args_attr_noatime.attr, 288 295 NULL, 289 296 }; 290 297 ··· 398 407 TUNE_ATTR(log_flush_secs, 0); 399 408 TUNE_ATTR(quota_warn_period, 0); 400 409 TUNE_ATTR(quota_quantum, 0); 401 - TUNE_ATTR(atime_quantum, 0); 402 410 TUNE_ATTR(max_readahead, 0); 403 411 TUNE_ATTR(complain_secs, 0); 404 412 TUNE_ATTR(statfs_slow, 0); ··· 417 427 &tune_attr_log_flush_secs.attr, 418 428 &tune_attr_quota_warn_period.attr, 419 429 &tune_attr_quota_quantum.attr, 420 - &tune_attr_atime_quantum.attr, 421 430 &tune_attr_max_readahead.attr, 422 431 &tune_attr_complain_secs.attr, 423 432 &tune_attr_statfs_slow.attr,