Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

9p: use inode->i_lock to protect i_size_write() under 32-bit

Use inode->i_lock to protect i_size_write(), else i_size_read() in
generic_fillattr() may loop infinitely in read_seqcount_begin() when
multiple processes invoke v9fs_vfs_getattr() or v9fs_vfs_getattr_dotl()
simultaneously under 32-bit SMP environment, and a soft lockup will be
triggered as show below:

watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [stat:2217]
Modules linked in:
CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
Hardware name: Generic DT based system
PC is at generic_fillattr+0x104/0x108
LR is at 0xec497f00
pc : [<802b8898>] lr : [<ec497f00>] psr: 200c0013
sp : ec497e20 ip : ed608030 fp : ec497e3c
r10: 00000000 r9 : ec497f00 r8 : ed608030
r7 : ec497ebc r6 : ec497f00 r5 : ee5c1550 r4 : ee005780
r3 : 0000052d r2 : 00000000 r1 : ec497f00 r0 : ed608030
Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
Control: 10c5387d Table: ac48006a DAC: 00000051
CPU: 5 PID: 2217 Comm: stat Not tainted 5.0.0-rc1-00005-g7f702faf5a9e #4
Hardware name: Generic DT based system
Backtrace:
[<8010d974>] (dump_backtrace) from [<8010dc88>] (show_stack+0x20/0x24)
[<8010dc68>] (show_stack) from [<80a1d194>] (dump_stack+0xb0/0xdc)
[<80a1d0e4>] (dump_stack) from [<80109f34>] (show_regs+0x1c/0x20)
[<80109f18>] (show_regs) from [<801d0a80>] (watchdog_timer_fn+0x280/0x2f8)
[<801d0800>] (watchdog_timer_fn) from [<80198658>] (__hrtimer_run_queues+0x18c/0x380)
[<801984cc>] (__hrtimer_run_queues) from [<80198e60>] (hrtimer_run_queues+0xb8/0xf0)
[<80198da8>] (hrtimer_run_queues) from [<801973e8>] (run_local_timers+0x28/0x64)
[<801973c0>] (run_local_timers) from [<80197460>] (update_process_times+0x3c/0x6c)
[<80197424>] (update_process_times) from [<801ab2b8>] (tick_nohz_handler+0xe0/0x1bc)
[<801ab1d8>] (tick_nohz_handler) from [<80843050>] (arch_timer_handler_virt+0x38/0x48)
[<80843018>] (arch_timer_handler_virt) from [<80180a64>] (handle_percpu_devid_irq+0x8c/0x240)
[<801809d8>] (handle_percpu_devid_irq) from [<8017ac20>] (generic_handle_irq+0x34/0x44)
[<8017abec>] (generic_handle_irq) from [<8017b344>] (__handle_domain_irq+0x6c/0xc4)
[<8017b2d8>] (__handle_domain_irq) from [<801022e0>] (gic_handle_irq+0x4c/0x88)
[<80102294>] (gic_handle_irq) from [<80101a30>] (__irq_svc+0x70/0x98)
[<802b8794>] (generic_fillattr) from [<8056b284>] (v9fs_vfs_getattr_dotl+0x74/0xa4)
[<8056b210>] (v9fs_vfs_getattr_dotl) from [<802b8904>] (vfs_getattr_nosec+0x68/0x7c)
[<802b889c>] (vfs_getattr_nosec) from [<802b895c>] (vfs_getattr+0x44/0x48)
[<802b8918>] (vfs_getattr) from [<802b8a74>] (vfs_statx+0x9c/0xec)
[<802b89d8>] (vfs_statx) from [<802b9428>] (sys_lstat64+0x48/0x78)
[<802b93e0>] (sys_lstat64) from [<80101000>] (ret_fast_syscall+0x0/0x28)

[dominique.martinet@cea.fr: updated comment to not refer to a function
in another subsystem]
Link: http://lkml.kernel.org/r/20190124063514.8571-2-houtao1@huawei.com
Cc: stable@vger.kernel.org
Fixes: 7549ae3e81cc ("9p: Use the i_size_[read, write]() macros instead of using inode->i_size directly.")
Reported-by: Xing Gaopeng <xingaopeng@huawei.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>

authored by

Hou Tao and committed by
Dominique Martinet
5e3cc1ee 3bbe8b1a

+53 -30
+21 -2
fs/9p/v9fs_vfs.h
··· 40 40 */ 41 41 #define P9_LOCK_TIMEOUT (30*HZ) 42 42 43 + /* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */ 44 + #define V9FS_STAT2INODE_KEEP_ISIZE 1 45 + 43 46 extern struct file_system_type v9fs_fs_type; 44 47 extern const struct address_space_operations v9fs_addr_operations; 45 48 extern const struct file_operations v9fs_file_operations; ··· 64 61 struct inode *inode, umode_t mode, dev_t); 65 62 void v9fs_evict_inode(struct inode *inode); 66 63 ino_t v9fs_qid2ino(struct p9_qid *qid); 67 - void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 68 - void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); 64 + void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, 65 + struct super_block *sb, unsigned int flags); 66 + void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, 67 + unsigned int flags); 69 68 int v9fs_dir_release(struct inode *inode, struct file *filp); 70 69 int v9fs_file_open(struct inode *inode, struct file *file); 71 70 void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); ··· 88 83 } 89 84 90 85 int v9fs_open_to_dotl_flags(int flags); 86 + 87 + static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size) 88 + { 89 + /* 90 + * 32-bit need the lock, concurrent updates could break the 91 + * sequences and make i_size_read() loop forever. 92 + * 64-bit updates are atomic and can skip the locking. 93 + */ 94 + if (sizeof(i_size) > sizeof(long)) 95 + spin_lock(&inode->i_lock); 96 + i_size_write(inode, i_size); 97 + if (sizeof(i_size) > sizeof(long)) 98 + spin_unlock(&inode->i_lock); 99 + } 91 100 #endif
+5 -1
fs/9p/vfs_file.c
··· 446 446 i_size = i_size_read(inode); 447 447 if (iocb->ki_pos > i_size) { 448 448 inode_add_bytes(inode, iocb->ki_pos - i_size); 449 - i_size_write(inode, iocb->ki_pos); 449 + /* 450 + * Need to serialize against i_size_write() in 451 + * v9fs_stat2inode() 452 + */ 453 + v9fs_i_size_write(inode, iocb->ki_pos); 450 454 } 451 455 return retval; 452 456 }
+11 -12
fs/9p/vfs_inode.c
··· 538 538 if (retval) 539 539 goto error; 540 540 541 - v9fs_stat2inode(st, inode, sb); 541 + v9fs_stat2inode(st, inode, sb, 0); 542 542 v9fs_cache_inode_get_cookie(inode); 543 543 unlock_new_inode(inode); 544 544 return inode; ··· 1092 1092 if (IS_ERR(st)) 1093 1093 return PTR_ERR(st); 1094 1094 1095 - v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb); 1095 + v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); 1096 1096 generic_fillattr(d_inode(dentry), stat); 1097 1097 1098 1098 p9stat_free(st); ··· 1170 1170 * @stat: Plan 9 metadata (mistat) structure 1171 1171 * @inode: inode to populate 1172 1172 * @sb: superblock of filesystem 1173 + * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) 1173 1174 * 1174 1175 */ 1175 1176 1176 1177 void 1177 1178 v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, 1178 - struct super_block *sb) 1179 + struct super_block *sb, unsigned int flags) 1179 1180 { 1180 1181 umode_t mode; 1181 1182 char ext[32]; ··· 1217 1216 mode = p9mode2perm(v9ses, stat); 1218 1217 mode |= inode->i_mode & ~S_IALLUGO; 1219 1218 inode->i_mode = mode; 1220 - i_size_write(inode, stat->length); 1221 1219 1220 + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) 1221 + v9fs_i_size_write(inode, stat->length); 1222 1222 /* not real number of blocks, but 512 byte ones ... */ 1223 - inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 1223 + inode->i_blocks = (stat->length + 512 - 1) >> 9; 1224 1224 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; 1225 1225 } 1226 1226 ··· 1418 1416 { 1419 1417 int umode; 1420 1418 dev_t rdev; 1421 - loff_t i_size; 1422 1419 struct p9_wstat *st; 1423 1420 struct v9fs_session_info *v9ses; 1421 + unsigned int flags; 1424 1422 1425 1423 v9ses = v9fs_inode2v9ses(inode); 1426 1424 st = p9_client_stat(fid); ··· 1433 1431 if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) 1434 1432 goto out; 1435 1433 1436 - spin_lock(&inode->i_lock); 1437 1434 /* 1438 1435 * We don't want to refresh inode->i_size, 1439 1436 * because we may have cached data 1440 1437 */ 1441 - i_size = inode->i_size; 1442 - v9fs_stat2inode(st, inode, inode->i_sb); 1443 - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 1444 - inode->i_size = i_size; 1445 - spin_unlock(&inode->i_lock); 1438 + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? 1439 + V9FS_STAT2INODE_KEEP_ISIZE : 0; 1440 + v9fs_stat2inode(st, inode, inode->i_sb, flags); 1446 1441 out: 1447 1442 p9stat_free(st); 1448 1443 kfree(st);
+14 -13
fs/9p/vfs_inode_dotl.c
··· 143 143 if (retval) 144 144 goto error; 145 145 146 - v9fs_stat2inode_dotl(st, inode); 146 + v9fs_stat2inode_dotl(st, inode, 0); 147 147 v9fs_cache_inode_get_cookie(inode); 148 148 retval = v9fs_get_acl(inode, fid); 149 149 if (retval) ··· 496 496 if (IS_ERR(st)) 497 497 return PTR_ERR(st); 498 498 499 - v9fs_stat2inode_dotl(st, d_inode(dentry)); 499 + v9fs_stat2inode_dotl(st, d_inode(dentry), 0); 500 500 generic_fillattr(d_inode(dentry), stat); 501 501 /* Change block size to what the server returned */ 502 502 stat->blksize = st->st_blksize; ··· 607 607 * v9fs_stat2inode_dotl - populate an inode structure with stat info 608 608 * @stat: stat structure 609 609 * @inode: inode to populate 610 + * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) 610 611 * 611 612 */ 612 613 613 614 void 614 - v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 615 + v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, 616 + unsigned int flags) 615 617 { 616 618 umode_t mode; 617 619 struct v9fs_inode *v9inode = V9FS_I(inode); ··· 633 631 mode |= inode->i_mode & ~S_IALLUGO; 634 632 inode->i_mode = mode; 635 633 636 - i_size_write(inode, stat->st_size); 634 + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) 635 + v9fs_i_size_write(inode, stat->st_size); 637 636 inode->i_blocks = stat->st_blocks; 638 637 } else { 639 638 if (stat->st_result_mask & P9_STATS_ATIME) { ··· 664 661 } 665 662 if (stat->st_result_mask & P9_STATS_RDEV) 666 663 inode->i_rdev = new_decode_dev(stat->st_rdev); 667 - if (stat->st_result_mask & P9_STATS_SIZE) 668 - i_size_write(inode, stat->st_size); 664 + if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && 665 + stat->st_result_mask & P9_STATS_SIZE) 666 + v9fs_i_size_write(inode, stat->st_size); 669 667 if (stat->st_result_mask & P9_STATS_BLOCKS) 670 668 inode->i_blocks = stat->st_blocks; 671 669 } ··· 932 928 933 929 int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) 934 930 { 935 - loff_t i_size; 936 931 struct p9_stat_dotl *st; 937 932 struct v9fs_session_info *v9ses; 933 + unsigned int flags; 938 934 939 935 v9ses = v9fs_inode2v9ses(inode); 940 936 st = p9_client_getattr_dotl(fid, P9_STATS_ALL); ··· 946 942 if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) 947 943 goto out; 948 944 949 - spin_lock(&inode->i_lock); 950 945 /* 951 946 * We don't want to refresh inode->i_size, 952 947 * because we may have cached data 953 948 */ 954 - i_size = inode->i_size; 955 - v9fs_stat2inode_dotl(st, inode); 956 - if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 957 - inode->i_size = i_size; 958 - spin_unlock(&inode->i_lock); 949 + flags = (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) ? 950 + V9FS_STAT2INODE_KEEP_ISIZE : 0; 951 + v9fs_stat2inode_dotl(st, inode, flags); 959 952 out: 960 953 kfree(st); 961 954 return 0;
+2 -2
fs/9p/vfs_super.c
··· 172 172 goto release_sb; 173 173 } 174 174 d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); 175 - v9fs_stat2inode_dotl(st, d_inode(root)); 175 + v9fs_stat2inode_dotl(st, d_inode(root), 0); 176 176 kfree(st); 177 177 } else { 178 178 struct p9_wstat *st = NULL; ··· 183 183 } 184 184 185 185 d_inode(root)->i_ino = v9fs_qid2ino(&st->qid); 186 - v9fs_stat2inode(st, d_inode(root), sb); 186 + v9fs_stat2inode(st, d_inode(root), sb, 0); 187 187 188 188 p9stat_free(st); 189 189 kfree(st);