Merge branch 'master' into for-linus · tjh.dev/kernel@e09d399

+28 -10

fs/xfs/linux-2.6/xfs_aops.c

··· 186 186 } 187 187 188 188 /* 189 + * If the end of the current ioend is beyond the current EOF, 190 + * return the new EOF value, otherwise zero. 191 + */ 192 + STATIC xfs_fsize_t 193 + xfs_ioend_new_eof( 194 + xfs_ioend_t *ioend) 195 + { 196 + xfs_inode_t *ip = XFS_I(ioend->io_inode); 197 + xfs_fsize_t isize; 198 + xfs_fsize_t bsize; 199 + 200 + bsize = ioend->io_offset + ioend->io_size; 201 + isize = MAX(ip->i_size, ip->i_new_size); 202 + isize = MIN(isize, bsize); 203 + return isize > ip->i_d.di_size ? isize : 0; 204 + } 205 + 206 + /* 189 207 * Update on-disk file size now that data has been written to disk. 190 208 * The current in-memory file size is i_size. If a write is beyond 191 209 * eof i_new_size will be the intended file size until i_size is 192 210 * updated. If this write does not extend all the way to the valid 193 211 * file size then restrict this update to the end of the write. 194 212 */ 213 + 195 214 STATIC void 196 215 xfs_setfilesize( 197 216 xfs_ioend_t *ioend) 198 217 { 199 218 xfs_inode_t *ip = XFS_I(ioend->io_inode); 200 219 xfs_fsize_t isize; 201 - xfs_fsize_t bsize; 202 220 203 221 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 204 222 ASSERT(ioend->io_type != IOMAP_READ); ··· 224 206 if (unlikely(ioend->io_error)) 225 207 return; 226 208 227 - bsize = ioend->io_offset + ioend->io_size; 228 - 229 209 xfs_ilock(ip, XFS_ILOCK_EXCL); 230 - 231 - isize = MAX(ip->i_size, ip->i_new_size); 232 - isize = MIN(isize, bsize); 233 - 234 - if (ip->i_d.di_size < isize) { 210 + isize = xfs_ioend_new_eof(ioend); 211 + if (isize) { 235 212 ip->i_d.di_size = isize; 236 - ip->i_update_core = 1; 237 213 xfs_mark_inode_dirty_sync(ip); 238 214 } 239 215 ··· 416 404 struct bio *bio) 417 405 { 418 406 atomic_inc(&ioend->io_remaining); 419 - 420 407 bio->bi_private = ioend; 421 408 bio->bi_end_io = xfs_end_bio; 409 + 410 + /* 411 + * If the I/O is beyond EOF we mark the inode dirty immediately 412 + * but don't update the inode size until I/O completion. 413 + */ 414 + if (xfs_ioend_new_eof(ioend)) 415 + xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 422 416 423 417 submit_bio(WRITE, bio); 424 418 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));

+1 -8

fs/xfs/linux-2.6/xfs_file.c

··· 176 176 struct dentry *dentry, 177 177 int datasync) 178 178 { 179 - struct inode *inode = dentry->d_inode; 180 - struct xfs_inode *ip = XFS_I(inode); 181 - int error; 182 - 183 - /* capture size updates in I/O completion before writing the inode. */ 184 - error = filemap_fdatawait(inode->i_mapping); 185 - if (error) 186 - return error; 179 + struct xfs_inode *ip = XFS_I(dentry->d_inode); 187 180 188 181 xfs_iflags_clear(ip, XFS_ITRUNCATED); 189 182 return -xfs_fsync(ip);

+15 -26

fs/xfs/linux-2.6/xfs_iops.c

··· 57 57 #include <linux/fiemap.h> 58 58 59 59 /* 60 - * Bring the atime in the XFS inode uptodate. 61 - * Used before logging the inode to disk or when the Linux inode goes away. 60 + * Bring the timestamps in the XFS inode uptodate. 61 + * 62 + * Used before writing the inode to disk. 62 63 */ 63 64 void 64 - xfs_synchronize_atime( 65 + xfs_synchronize_times( 65 66 xfs_inode_t *ip) 66 67 { 67 68 struct inode *inode = VFS_I(ip); 68 69 69 - if (!(inode->i_state & I_CLEAR)) { 70 - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 71 - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 72 - } 70 + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 71 + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 72 + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; 73 + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; 74 + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; 75 + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; 73 76 } 74 77 75 78 /* ··· 109 106 if ((flags & XFS_ICHGTIME_MOD) && 110 107 !timespec_equal(&inode->i_mtime, &tv)) { 111 108 inode->i_mtime = tv; 112 - ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 113 - ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 114 109 sync_it = 1; 115 110 } 116 111 if ((flags & XFS_ICHGTIME_CHG) && 117 112 !timespec_equal(&inode->i_ctime, &tv)) { 118 113 inode->i_ctime = tv; 119 - ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; 120 - ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; 121 114 sync_it = 1; 122 115 } 123 116 124 117 /* 125 - * We update the i_update_core field _after_ changing 126 - * the timestamps in order to coordinate properly with 127 - * xfs_iflush() so that we don't lose timestamp updates. 128 - * This keeps us from having to hold the inode lock 129 - * while doing this. We use the SYNCHRONIZE macro to 130 - * ensure that the compiler does not reorder the update 131 - * of i_update_core above the timestamp updates above. 118 + * Update complete - now make sure everyone knows that the inode 119 + * is dirty. 132 120 */ 133 - if (sync_it) { 134 - SYNCHRONIZE(); 135 - ip->i_update_core = 1; 121 + if (sync_it) 136 122 xfs_mark_inode_dirty_sync(ip); 137 - } 138 123 } 139 124 140 125 /* ··· 497 506 stat->gid = ip->i_d.di_gid; 498 507 stat->ino = ip->i_ino; 499 508 stat->atime = inode->i_atime; 500 - stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; 501 - stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 502 - stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec; 503 - stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 509 + stat->mtime = inode->i_mtime; 510 + stat->ctime = inode->i_ctime; 504 511 stat->blocks = 505 512 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 506 513

+1 -1

fs/xfs/linux-2.6/xfs_lrw.c

··· 667 667 xip->i_new_size = new_size; 668 668 669 669 if (likely(!(ioflags & IO_INVIS))) 670 - xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 670 + file_update_time(file); 671 671 672 672 /* 673 673 * If the offset is beyond the size of the file, we have a couple

+41 -18

fs/xfs/linux-2.6/xfs_super.c

··· 977 977 } 978 978 979 979 /* 980 + * Dirty the XFS inode when mark_inode_dirty_sync() is called so that 981 + * we catch unlogged VFS level updates to the inode. Care must be taken 982 + * here - the transaction code calls mark_inode_dirty_sync() to mark the 983 + * VFS inode dirty in a transaction and clears the i_update_core field; 984 + * it must clear the field after calling mark_inode_dirty_sync() to 985 + * correctly indicate that the dirty state has been propagated into the 986 + * inode log item. 987 + * 988 + * We need the barrier() to maintain correct ordering between unlogged 989 + * updates and the transaction commit code that clears the i_update_core 990 + * field. This requires all updates to be completed before marking the 991 + * inode dirty. 992 + */ 993 + STATIC void 994 + xfs_fs_dirty_inode( 995 + struct inode *inode) 996 + { 997 + barrier(); 998 + XFS_I(inode)->i_update_core = 1; 999 + } 1000 + 1001 + /* 980 1002 * Attempt to flush the inode, this will actually fail 981 1003 * if the inode is pinned, but we dirty the inode again 982 1004 * at the point when it is unpinned after a log write, ··· 1148 1126 } 1149 1127 1150 1128 STATIC int 1151 - xfs_fs_sync_super( 1129 + xfs_fs_sync_fs( 1152 1130 struct super_block *sb, 1153 1131 int wait) 1154 1132 { ··· 1156 1134 int error; 1157 1135 1158 1136 /* 1159 - * Treat a sync operation like a freeze. This is to work 1160 - * around a race in sync_inodes() which works in two phases 1161 - * - an asynchronous flush, which can write out an inode 1162 - * without waiting for file size updates to complete, and a 1163 - * synchronous flush, which wont do anything because the 1164 - * async flush removed the inode's dirty flag. Also 1165 - * sync_inodes() will not see any files that just have 1166 - * outstanding transactions to be flushed because we don't 1167 - * dirty the Linux inode until after the transaction I/O 1168 - * completes. 1137 + * Not much we can do for the first async pass. Writing out the 1138 + * superblock would be counter-productive as we are going to redirty 1139 + * when writing out other data and metadata (and writing out a single 1140 + * block is quite fast anyway). 1141 + * 1142 + * Try to asynchronously kick off quota syncing at least. 1169 1143 */ 1170 - if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) 1171 - error = xfs_quiesce_data(mp); 1172 - else 1173 - error = xfs_sync_fsdata(mp, 0); 1144 + if (!wait) { 1145 + xfs_qm_sync(mp, SYNC_TRYLOCK); 1146 + return 0; 1147 + } 1174 1148 1175 - if (unlikely(laptop_mode)) { 1149 + error = xfs_quiesce_data(mp); 1150 + if (error) 1151 + return -error; 1152 + 1153 + if (laptop_mode) { 1176 1154 int prev_sync_seq = mp->m_sync_seq; 1177 1155 1178 1156 /* ··· 1191 1169 mp->m_sync_seq != prev_sync_seq); 1192 1170 } 1193 1171 1194 - return -error; 1172 + return 0; 1195 1173 } 1196 1174 1197 1175 STATIC int ··· 1561 1539 static struct super_operations xfs_super_operations = { 1562 1540 .alloc_inode = xfs_fs_alloc_inode, 1563 1541 .destroy_inode = xfs_fs_destroy_inode, 1542 + .dirty_inode = xfs_fs_dirty_inode, 1564 1543 .write_inode = xfs_fs_write_inode, 1565 1544 .clear_inode = xfs_fs_clear_inode, 1566 1545 .put_super = xfs_fs_put_super, 1567 - .sync_fs = xfs_fs_sync_super, 1546 + .sync_fs = xfs_fs_sync_fs, 1568 1547 .freeze_fs = xfs_fs_freeze, 1569 1548 .statfs = xfs_fs_statfs, 1570 1549 .remount_fs = xfs_fs_remount,

+26 -10

fs/xfs/linux-2.6/xfs_sync.c

··· 309 309 STATIC int 310 310 xfs_commit_dummy_trans( 311 311 struct xfs_mount *mp, 312 - uint log_flags) 312 + uint flags) 313 313 { 314 314 struct xfs_inode *ip = mp->m_rootip; 315 315 struct xfs_trans *tp; 316 316 int error; 317 + int log_flags = XFS_LOG_FORCE; 318 + 319 + if (flags & SYNC_WAIT) 320 + log_flags |= XFS_LOG_SYNC; 317 321 318 322 /* 319 323 * Put a dummy transaction in the log to tell recovery ··· 335 331 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 336 332 xfs_trans_ihold(tp, ip); 337 333 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 338 - /* XXX(hch): ignoring the error here.. */ 339 334 error = xfs_trans_commit(tp, 0); 340 - 341 335 xfs_iunlock(ip, XFS_ILOCK_EXCL); 342 336 337 + /* the log force ensures this transaction is pushed to disk */ 343 338 xfs_log_force(mp, 0, log_flags); 344 - return 0; 339 + return error; 345 340 } 346 341 347 342 int ··· 388 385 else 389 386 XFS_BUF_ASYNC(bp); 390 387 391 - return xfs_bwrite(mp, bp); 388 + error = xfs_bwrite(mp, bp); 389 + if (error) 390 + return error; 391 + 392 + /* 393 + * If this is a data integrity sync make sure all pending buffers 394 + * are flushed out for the log coverage check below. 395 + */ 396 + if (flags & SYNC_WAIT) 397 + xfs_flush_buftarg(mp->m_ddev_targp, 1); 398 + 399 + if (xfs_log_need_covered(mp)) 400 + error = xfs_commit_dummy_trans(mp, flags); 401 + return error; 392 402 393 403 out_brelse: 394 404 xfs_buf_relse(bp); ··· 435 419 /* push non-blocking */ 436 420 xfs_sync_data(mp, 0); 437 421 xfs_qm_sync(mp, SYNC_TRYLOCK); 438 - xfs_filestream_flush(mp); 439 422 440 - /* push and block */ 423 + /* push and block till complete */ 441 424 xfs_sync_data(mp, SYNC_WAIT); 442 425 xfs_qm_sync(mp, SYNC_WAIT); 443 426 427 + /* drop inode references pinned by filestreams */ 428 + xfs_filestream_flush(mp); 429 + 444 430 /* write superblock and hoover up shutdown errors */ 445 - error = xfs_sync_fsdata(mp, 0); 431 + error = xfs_sync_fsdata(mp, SYNC_WAIT); 446 432 447 433 /* flush data-only devices */ 448 434 if (mp->m_rtdev_targp) ··· 588 570 /* dgc: errors ignored here */ 589 571 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 590 572 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 591 - if (xfs_log_need_covered(mp)) 592 - error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); 593 573 } 594 574 mp->m_sync_seq++; 595 575 wake_up(&mp->m_wait_single_sync_task);

+4 -4

fs/xfs/xfs_dfrag.c

··· 206 206 * process that the file was not changed out from 207 207 * under it. 208 208 */ 209 - if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || 210 - (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || 211 - (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || 212 - (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { 209 + if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 210 + (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 211 + (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 212 + (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 213 213 error = XFS_ERROR(EBUSY); 214 214 goto out_unlock; 215 215 }

+3 -1

fs/xfs/xfs_dir2_leaf.c

··· 854 854 */ 855 855 ra_want = howmany(bufsize + mp->m_dirblksize, 856 856 mp->m_sb.sb_blocksize) - 1; 857 + ASSERT(ra_want >= 0); 857 858 858 859 /* 859 860 * If we don't have as many as we want, and we haven't ··· 1089 1088 */ 1090 1089 ptr += length; 1091 1090 curoff += length; 1092 - bufsize -= length; 1091 + /* bufsize may have just been a guess; don't go negative */ 1092 + bufsize = bufsize > length ? bufsize - length : 0; 1093 1093 } 1094 1094 1095 1095 /*

+2 -2

fs/xfs/xfs_inode.c

··· 3068 3068 SYNCHRONIZE(); 3069 3069 3070 3070 /* 3071 - * Make sure to get the latest atime from the Linux inode. 3071 + * Make sure to get the latest timestamps from the Linux inode. 3072 3072 */ 3073 - xfs_synchronize_atime(ip); 3073 + xfs_synchronize_times(ip); 3074 3074 3075 3075 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, 3076 3076 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {

+1 -1

fs/xfs/xfs_inode.h

··· 504 504 void xfs_lock_inodes(xfs_inode_t **, int, uint); 505 505 void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 506 506 507 - void xfs_synchronize_atime(xfs_inode_t *); 507 + void xfs_synchronize_times(xfs_inode_t *); 508 508 void xfs_mark_inode_dirty_sync(xfs_inode_t *); 509 509 510 510 #if defined(XFS_INODE_TRACE)

+11 -7

fs/xfs/xfs_inode_item.c

··· 232 232 nvecs = 1; 233 233 234 234 /* 235 + * Make sure the linux inode is dirty. We do this before 236 + * clearing i_update_core as the VFS will call back into 237 + * XFS here and set i_update_core, so we need to dirty the 238 + * inode first so that the ordering of i_update_core and 239 + * unlogged modifications still works as described below. 240 + */ 241 + xfs_mark_inode_dirty_sync(ip); 242 + 243 + /* 235 244 * Clear i_update_core if the timestamps (or any other 236 245 * non-transactional modification) need flushing/logging 237 246 * and we're about to log them with the rest of the core. ··· 272 263 } 273 264 274 265 /* 275 - * Make sure to get the latest atime from the Linux inode. 266 + * Make sure to get the latest timestamps from the Linux inode. 276 267 */ 277 - xfs_synchronize_atime(ip); 278 - 279 - /* 280 - * make sure the linux inode is dirty 281 - */ 282 - xfs_mark_inode_dirty_sync(ip); 268 + xfs_synchronize_times(ip); 283 269 284 270 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 285 271 vecp->i_len = sizeof(struct xfs_icdinode);

+13 -8

fs/xfs/xfs_itable.c

··· 59 59 { 60 60 xfs_icdinode_t *dic; /* dinode core info pointer */ 61 61 xfs_inode_t *ip; /* incore inode pointer */ 62 + struct inode *inode; 62 63 int error; 63 64 64 65 error = xfs_iget(mp, NULL, ino, ··· 73 72 ASSERT(ip->i_imap.im_blkno != 0); 74 73 75 74 dic = &ip->i_d; 75 + inode = VFS_I(ip); 76 76 77 77 /* xfs_iget returns the following without needing 78 78 * further change. ··· 85 83 buf->bs_uid = dic->di_uid; 86 84 buf->bs_gid = dic->di_gid; 87 85 buf->bs_size = dic->di_size; 86 + 88 87 /* 89 - * We are reading the atime from the Linux inode because the 90 - * dinode might not be uptodate. 88 + * We need to read the timestamps from the Linux inode because 89 + * the VFS keeps writing directly into the inode structure instead 90 + * of telling us about the updates. 91 91 */ 92 - buf->bs_atime.tv_sec = VFS_I(ip)->i_atime.tv_sec; 93 - buf->bs_atime.tv_nsec = VFS_I(ip)->i_atime.tv_nsec; 94 - buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 95 - buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 96 - buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 97 - buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec; 92 + buf->bs_atime.tv_sec = inode->i_atime.tv_sec; 93 + buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; 94 + buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; 95 + buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; 96 + buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; 97 + buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; 98 + 98 99 buf->bs_xflags = xfs_ip2xflags(ip); 99 100 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 100 101 buf->bs_extents = dic->di_nextents;

-6

fs/xfs/xfs_vnodeops.c

··· 2476 2476 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 2477 2477 2478 2478 /* 2479 - * Make sure the atime in the XFS inode is correct before freeing the 2480 - * Linux inode. 2481 - */ 2482 - xfs_synchronize_atime(ip); 2483 - 2484 - /* 2485 2479 * If we have nothing to flush with this inode then complete the 2486 2480 * teardown now, otherwise break the link between the xfs inode and the 2487 2481 * linux inode and clean up the xfs inode later. This avoids flushing