Merge git://oss.sgi.com:8090/xfs/linux-2.6

* git://oss.sgi.com:8090/xfs/linux-2.6:
[XFS] Don't do I/O beyond eof when unreserving space
[XFS] Fix use-after-free with buffers
[XFS] Prevent lockdep false positives when locking two inodes.
[XFS] Fix barrier status change detection.
[XFS] Prevent direct I/O from mapping extents beyond eof
[XFS] Fix regression introduced by remount fixup
[XFS] Move memory allocations for log tracing out of the critical path

+119 -47
+4
fs/xfs/linux-2.6/xfs_aops.c
··· 1338 1338 offset = (xfs_off_t)iblock << inode->i_blkbits; 1339 1339 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1340 1340 size = bh_result->b_size; 1341 + 1342 + if (!create && direct && offset >= i_size_read(inode)) 1343 + return 0; 1344 + 1341 1345 error = xfs_iomap(XFS_I(inode), offset, size, 1342 1346 create ? flags : BMAPI_READ, &iomap, &niomap); 1343 1347 if (error)
+20
fs/xfs/linux-2.6/xfs_super.c
··· 1302 1302 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1303 1303 break; 1304 1304 default: 1305 + /* 1306 + * Logically we would return an error here to prevent 1307 + * users from believing they might have changed 1308 + * mount options using remount which can't be changed. 1309 + * 1310 + * But unfortunately mount(8) adds all options from 1311 + * mtab and fstab to the mount arguments in some cases 1312 + * so we can't blindly reject options, but have to 1313 + * check for each specified option if it actually 1314 + * differs from the currently set option and only 1315 + * reject it if that's the case. 1316 + * 1317 + * Until that is implemented we return success for 1318 + * every remount request, and silently ignore all 1319 + * options that we can't actually change. 1320 + */ 1321 + #if 0 1305 1322 printk(KERN_INFO 1306 1323 "XFS: mount option \"%s\" not supported for remount\n", p); 1307 1324 return -EINVAL; 1325 + #else 1326 + return 0; 1327 + #endif 1308 1328 } 1309 1329 } 1310 1330
+20 -24
fs/xfs/xfs_buf_item.c
··· 732 732 bip->bli_item.li_ops = &xfs_buf_item_ops; 733 733 bip->bli_item.li_mountp = mp; 734 734 bip->bli_buf = bp; 735 + xfs_buf_hold(bp); 735 736 bip->bli_format.blf_type = XFS_LI_BUF; 736 737 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); 737 738 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); ··· 868 867 return (bip->bli_flags & XFS_BLI_DIRTY); 869 868 } 870 869 870 + STATIC void 871 + xfs_buf_item_free( 872 + xfs_buf_log_item_t *bip) 873 + { 874 + #ifdef XFS_TRANS_DEBUG 875 + kmem_free(bip->bli_orig); 876 + kmem_free(bip->bli_logged); 877 + #endif /* XFS_TRANS_DEBUG */ 878 + 879 + #ifdef XFS_BLI_TRACE 880 + ktrace_free(bip->bli_trace); 881 + #endif 882 + kmem_zone_free(xfs_buf_item_zone, bip); 883 + } 884 + 871 885 /* 872 886 * This is called when the buf log item is no longer needed. It should 873 887 * free the buf log item associated with the given buffer and clear ··· 903 887 (XFS_BUF_IODONE_FUNC(bp) != NULL)) { 904 888 XFS_BUF_CLR_IODONE_FUNC(bp); 905 889 } 906 - 907 - #ifdef XFS_TRANS_DEBUG 908 - kmem_free(bip->bli_orig); 909 - bip->bli_orig = NULL; 910 - kmem_free(bip->bli_logged); 911 - bip->bli_logged = NULL; 912 - #endif /* XFS_TRANS_DEBUG */ 913 - 914 - #ifdef XFS_BLI_TRACE 915 - ktrace_free(bip->bli_trace); 916 - #endif 917 - kmem_zone_free(xfs_buf_item_zone, bip); 890 + xfs_buf_rele(bp); 891 + xfs_buf_item_free(bip); 918 892 } 919 893 920 894 ··· 1126 1120 1127 1121 ASSERT(bip->bli_buf == bp); 1128 1122 1123 + xfs_buf_rele(bp); 1129 1124 mp = bip->bli_item.li_mountp; 1130 1125 1131 1126 /* ··· 1143 1136 * xfs_trans_delete_ail() drops the AIL lock. 1144 1137 */ 1145 1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 1146 - 1147 - #ifdef XFS_TRANS_DEBUG 1148 - kmem_free(bip->bli_orig); 1149 - bip->bli_orig = NULL; 1150 - kmem_free(bip->bli_logged); 1151 - bip->bli_logged = NULL; 1152 - #endif /* XFS_TRANS_DEBUG */ 1153 - 1154 - #ifdef XFS_BLI_TRACE 1155 - ktrace_free(bip->bli_trace); 1156 - #endif 1157 - kmem_zone_free(xfs_buf_item_zone, bip); 1139 + xfs_buf_item_free(bip); 1158 1140 } 1159 1141 1160 1142 #if defined(XFS_BLI_TRACE)
+8 -1
fs/xfs/xfs_dfrag.c
··· 149 149 150 150 sbp = &sxp->sx_stat; 151 151 152 - xfs_lock_two_inodes(ip, tip, lock_flags); 152 + /* 153 + * we have to do two separate lock calls here to keep lockdep 154 + * happy. If we try to get all the locks in one call, lock will 155 + * report false positives when we drop the ILOCK and regain them 156 + * below. 157 + */ 158 + xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 159 + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 153 160 locked = 1; 154 161 155 162 /* Verify that both files have the same format */
+41 -21
fs/xfs/xfs_log.c
··· 124 124 STATIC int xlog_iclogs_empty(xlog_t *log); 125 125 126 126 #if defined(XFS_LOG_TRACE) 127 + 128 + #define XLOG_TRACE_LOGGRANT_SIZE 2048 129 + #define XLOG_TRACE_ICLOG_SIZE 256 130 + 131 + void 132 + xlog_trace_loggrant_alloc(xlog_t *log) 133 + { 134 + log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); 135 + } 136 + 137 + void 138 + xlog_trace_loggrant_dealloc(xlog_t *log) 139 + { 140 + ktrace_free(log->l_grant_trace); 141 + } 142 + 127 143 void 128 144 xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) 129 145 { 130 146 unsigned long cnts; 131 147 132 - if (!log->l_grant_trace) { 133 - log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); 134 - if (!log->l_grant_trace) 135 - return; 136 - } 137 148 /* ticket counts are 1 byte each */ 138 149 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; 139 150 ··· 168 157 } 169 158 170 159 void 160 + xlog_trace_iclog_alloc(xlog_in_core_t *iclog) 161 + { 162 + iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); 163 + } 164 + 165 + void 166 + xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) 167 + { 168 + ktrace_free(iclog->ic_trace); 169 + } 170 + 171 + void 171 172 xlog_trace_iclog(xlog_in_core_t *iclog, uint state) 172 173 { 173 - if (!iclog->ic_trace) 174 - iclog->ic_trace = ktrace_alloc(256, KM_NOFS); 175 174 ktrace_enter(iclog->ic_trace, 176 175 (void *)((unsigned long)state), 177 176 (void *)((unsigned long)current_pid()), ··· 191 170 (void *)NULL, (void *)NULL); 192 171 } 193 172 #else 173 + 174 + #define xlog_trace_loggrant_alloc(log) 175 + #define xlog_trace_loggrant_dealloc(log) 194 176 #define xlog_trace_loggrant(log,tic,string) 177 + 178 + #define xlog_trace_iclog_alloc(iclog) 179 + #define xlog_trace_iclog_dealloc(iclog) 195 180 #define xlog_trace_iclog(iclog,state) 181 + 196 182 #endif /* XFS_LOG_TRACE */ 197 183 198 184 ··· 1037 1009 * layer, it means the underlyin device no longer supports 1038 1010 * barrier I/O. Warn loudly and turn off barriers. 1039 1011 */ 1040 - if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { 1012 + if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) { 1041 1013 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; 1042 1014 xfs_fs_cmn_err(CE_WARN, l->l_mp, 1043 1015 "xlog_iodone: Barriers are no longer supported" ··· 1259 1231 spin_lock_init(&log->l_grant_lock); 1260 1232 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1261 1233 1234 + xlog_trace_loggrant_alloc(log); 1262 1235 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1263 1236 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1264 1237 ··· 1313 1284 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1314 1285 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1315 1286 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1287 + 1288 + xlog_trace_iclog_alloc(iclog); 1316 1289 1317 1290 iclogp = &iclog->ic_next; 1318 1291 } ··· 1596 1565 sv_destroy(&iclog->ic_force_wait); 1597 1566 sv_destroy(&iclog->ic_write_wait); 1598 1567 xfs_buf_free(iclog->ic_bp); 1599 - #ifdef XFS_LOG_TRACE 1600 - if (iclog->ic_trace != NULL) { 1601 - ktrace_free(iclog->ic_trace); 1602 - } 1603 - #endif 1568 + xlog_trace_iclog_dealloc(iclog); 1604 1569 next_iclog = iclog->ic_next; 1605 1570 kmem_free(iclog); 1606 1571 iclog = next_iclog; ··· 1605 1578 spinlock_destroy(&log->l_grant_lock); 1606 1579 1607 1580 xfs_buf_free(log->l_xbuf); 1608 - #ifdef XFS_LOG_TRACE 1609 - if (log->l_trace != NULL) { 1610 - ktrace_free(log->l_trace); 1611 - } 1612 - if (log->l_grant_trace != NULL) { 1613 - ktrace_free(log->l_grant_trace); 1614 - } 1615 - #endif 1581 + xlog_trace_loggrant_dealloc(log); 1616 1582 log->l_mp->m_log = NULL; 1617 1583 kmem_free(log); 1618 1584 } /* xlog_dealloc_log */
-1
fs/xfs/xfs_log_priv.h
··· 448 448 int l_grant_write_bytes; 449 449 450 450 #ifdef XFS_LOG_TRACE 451 - struct ktrace *l_trace; 452 451 struct ktrace *l_grant_trace; 453 452 #endif 454 453
+26
fs/xfs/xfs_vnodeops.c
··· 1838 1838 #endif 1839 1839 } 1840 1840 1841 + /* 1842 + * xfs_lock_two_inodes() can only be used to lock one type of lock 1843 + * at a time - the iolock or the ilock, but not both at once. If 1844 + * we lock both at once, lockdep will report false positives saying 1845 + * we have violated locking orders. 1846 + */ 1841 1847 void 1842 1848 xfs_lock_two_inodes( 1843 1849 xfs_inode_t *ip0, ··· 1854 1848 int attempts = 0; 1855 1849 xfs_log_item_t *lp; 1856 1850 1851 + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1852 + ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 1857 1853 ASSERT(ip0->i_ino != ip1->i_ino); 1858 1854 1859 1855 if (ip0->i_ino > ip1->i_ino) { ··· 3160 3152 /* 3161 3153 * Zero file bytes between startoff and endoff inclusive. 3162 3154 * The iolock is held exclusive and no blocks are buffered. 3155 + * 3156 + * This function is used by xfs_free_file_space() to zero 3157 + * partial blocks when the range to free is not block aligned. 3158 + * When unreserving space with boundaries that are not block 3159 + * aligned we round up the start and round down the end 3160 + * boundaries and then use this function to zero the parts of 3161 + * the blocks that got dropped during the rounding. 3163 3162 */ 3164 3163 STATIC int 3165 3164 xfs_zero_remaining_bytes( ··· 3182 3167 xfs_mount_t *mp = ip->i_mount; 3183 3168 int nimap; 3184 3169 int error = 0; 3170 + 3171 + /* 3172 + * Avoid doing I/O beyond eof - it's not necessary 3173 + * since nothing can read beyond eof. The space will 3174 + * be zeroed when the file is extended anyway. 3175 + */ 3176 + if (startoff >= ip->i_size) 3177 + return 0; 3178 + 3179 + if (endoff > ip->i_size) 3180 + endoff = ip->i_size; 3185 3181 3186 3182 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 3187 3183 XFS_IS_REALTIME_INODE(ip) ?