commit 66ce3cf84deba6cc71dcf43c9d56a4278e5f712d · tjh.dev/kernel

-1

fs/xfs/Makefile

··· 105 xfs_globals.o \ 106 xfs_ioctl.o \ 107 xfs_iops.o \ 108 - xfs_lrw.o \ 109 xfs_super.o \ 110 xfs_sync.o \ 111 xfs_xattr.o)

··· 105 xfs_globals.o \ 106 xfs_ioctl.o \ 107 xfs_iops.o \ 108 xfs_super.o \ 109 xfs_sync.o \ 110 xfs_xattr.o)

+178 -55

fs/xfs/linux-2.6/xfs_aops.c

··· 39 #include "xfs_iomap.h" 40 #include "xfs_vnodeops.h" 41 #include "xfs_trace.h" 42 #include <linux/mpage.h> 43 #include <linux/pagevec.h> 44 #include <linux/writeback.h> ··· 164 } 165 166 /* 167 - * Update on-disk file size now that data has been written to disk. 168 - * The current in-memory file size is i_size. If a write is beyond 169 - * eof i_new_size will be the intended file size until i_size is 170 - * updated. If this write does not extend all the way to the valid 171 - * file size then restrict this update to the end of the write. 172 */ 173 - 174 - STATIC void 175 xfs_setfilesize( 176 xfs_ioend_t *ioend) 177 { ··· 185 ASSERT(ioend->io_type != IOMAP_READ); 186 187 if (unlikely(ioend->io_error)) 188 - return; 189 190 - xfs_ilock(ip, XFS_ILOCK_EXCL); 191 isize = xfs_ioend_new_eof(ioend); 192 if (isize) { 193 ip->i_d.di_size = isize; 194 - xfs_mark_inode_dirty_sync(ip); 195 } 196 197 xfs_iunlock(ip, XFS_ILOCK_EXCL); 198 - } 199 - 200 - /* 201 - * IO write completion. 202 - */ 203 - STATIC void 204 - xfs_end_io( 205 - struct work_struct *work) 206 - { 207 - xfs_ioend_t *ioend = 208 - container_of(work, xfs_ioend_t, io_work); 209 - struct xfs_inode *ip = XFS_I(ioend->io_inode); 210 - 211 - /* 212 - * For unwritten extents we need to issue transactions to convert a 213 - * range to normal written extens after the data I/O has finished. 214 - */ 215 - if (ioend->io_type == IOMAP_UNWRITTEN && 216 - likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { 217 - int error; 218 - 219 - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 220 - ioend->io_size); 221 - if (error) 222 - ioend->io_error = error; 223 - } 224 - 225 - /* 226 - * We might have to update the on-disk file size after extending 227 - * writes. 228 - */ 229 - if (ioend->io_type != IOMAP_READ) 230 - xfs_setfilesize(ioend); 231 - xfs_destroy_ioend(ioend); 232 } 233 234 /* ··· 219 if (wait) 220 flush_workqueue(wq); 221 } 222 } 223 224 /* ··· 361 * but don't update the inode size until I/O completion. 362 */ 363 if (xfs_ioend_new_eof(ioend)) 364 - xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 365 366 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 367 WRITE_SYNC_PLUG : WRITE, bio); ··· 894 } 895 } 896 897 /* 898 * Calling this without startio set means we are being asked to make a dirty 899 * page ready for freeing it's buffers. When called with startio set then ··· 1257 */ 1258 if (err != -EAGAIN) { 1259 if (!unmapped) 1260 - block_invalidatepage(page, 0); 1261 ClearPageUptodate(page); 1262 } 1263 return err; ··· 1665 unsigned nr_pages) 1666 { 1667 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1668 - } 1669 - 1670 - STATIC void 1671 - xfs_vm_invalidatepage( 1672 - struct page *page, 1673 - unsigned long offset) 1674 - { 1675 - trace_xfs_invalidatepage(page->mapping->host, page, offset); 1676 - block_invalidatepage(page, offset); 1677 } 1678 1679 const struct address_space_operations xfs_address_space_operations = {

··· 39 #include "xfs_iomap.h" 40 #include "xfs_vnodeops.h" 41 #include "xfs_trace.h" 42 + #include "xfs_bmap.h" 43 #include <linux/mpage.h> 44 #include <linux/pagevec.h> 45 #include <linux/writeback.h> ··· 163 } 164 165 /* 166 + * Update on-disk file size now that data has been written to disk. The 167 + * current in-memory file size is i_size. If a write is beyond eof i_new_size 168 + * will be the intended file size until i_size is updated. If this write does 169 + * not extend all the way to the valid file size then restrict this update to 170 + * the end of the write. 171 + * 172 + * This function does not block as blocking on the inode lock in IO completion 173 + * can lead to IO completion order dependency deadlocks.. If it can't get the 174 + * inode ilock it will return EAGAIN. Callers must handle this. 175 */ 176 + STATIC int 177 xfs_setfilesize( 178 xfs_ioend_t *ioend) 179 { ··· 181 ASSERT(ioend->io_type != IOMAP_READ); 182 183 if (unlikely(ioend->io_error)) 184 + return 0; 185 186 + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) 187 + return EAGAIN; 188 + 189 isize = xfs_ioend_new_eof(ioend); 190 if (isize) { 191 ip->i_d.di_size = isize; 192 + xfs_mark_inode_dirty(ip); 193 } 194 195 xfs_iunlock(ip, XFS_ILOCK_EXCL); 196 + return 0; 197 } 198 199 /* ··· 246 if (wait) 247 flush_workqueue(wq); 248 } 249 + } 250 + 251 + /* 252 + * IO write completion. 253 + */ 254 + STATIC void 255 + xfs_end_io( 256 + struct work_struct *work) 257 + { 258 + xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); 259 + struct xfs_inode *ip = XFS_I(ioend->io_inode); 260 + int error = 0; 261 + 262 + /* 263 + * For unwritten extents we need to issue transactions to convert a 264 + * range to normal written extens after the data I/O has finished. 265 + */ 266 + if (ioend->io_type == IOMAP_UNWRITTEN && 267 + likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { 268 + 269 + error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 270 + ioend->io_size); 271 + if (error) 272 + ioend->io_error = error; 273 + } 274 + 275 + /* 276 + * We might have to update the on-disk file size after extending 277 + * writes. 278 + */ 279 + if (ioend->io_type != IOMAP_READ) { 280 + error = xfs_setfilesize(ioend); 281 + ASSERT(!error || error == EAGAIN); 282 + } 283 + 284 + /* 285 + * If we didn't complete processing of the ioend, requeue it to the 286 + * tail of the workqueue for another attempt later. Otherwise destroy 287 + * it. 288 + */ 289 + if (error == EAGAIN) { 290 + atomic_inc(&ioend->io_remaining); 291 + xfs_finish_ioend(ioend, 0); 292 + /* ensure we don't spin on blocked ioends */ 293 + delay(1); 294 + } else 295 + xfs_destroy_ioend(ioend); 296 } 297 298 /* ··· 341 * but don't update the inode size until I/O completion. 342 */ 343 if (xfs_ioend_new_eof(ioend)) 344 + xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); 345 346 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 347 WRITE_SYNC_PLUG : WRITE, bio); ··· 874 } 875 } 876 877 + STATIC void 878 + xfs_vm_invalidatepage( 879 + struct page *page, 880 + unsigned long offset) 881 + { 882 + trace_xfs_invalidatepage(page->mapping->host, page, offset); 883 + block_invalidatepage(page, offset); 884 + } 885 + 886 + /* 887 + * If the page has delalloc buffers on it, we need to punch them out before we 888 + * invalidate the page. If we don't, we leave a stale delalloc mapping on the 889 + * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 890 + * is done on that same region - the delalloc extent is returned when none is 891 + * supposed to be there. 892 + * 893 + * We prevent this by truncating away the delalloc regions on the page before 894 + * invalidating it. Because they are delalloc, we can do this without needing a 895 + * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 896 + * truncation without a transaction as there is no space left for block 897 + * reservation (typically why we see a ENOSPC in writeback). 898 + * 899 + * This is not a performance critical path, so for now just do the punching a 900 + * buffer head at a time. 901 + */ 902 + STATIC void 903 + xfs_aops_discard_page( 904 + struct page *page) 905 + { 906 + struct inode *inode = page->mapping->host; 907 + struct xfs_inode *ip = XFS_I(inode); 908 + struct buffer_head *bh, *head; 909 + loff_t offset = page_offset(page); 910 + ssize_t len = 1 << inode->i_blkbits; 911 + 912 + if (!xfs_is_delayed_page(page, IOMAP_DELAY)) 913 + goto out_invalidate; 914 + 915 + xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 916 + "page discard on page %p, inode 0x%llx, offset %llu.", 917 + page, ip->i_ino, offset); 918 + 919 + xfs_ilock(ip, XFS_ILOCK_EXCL); 920 + bh = head = page_buffers(page); 921 + do { 922 + int done; 923 + xfs_fileoff_t offset_fsb; 924 + xfs_bmbt_irec_t imap; 925 + int nimaps = 1; 926 + int error; 927 + xfs_fsblock_t firstblock; 928 + xfs_bmap_free_t flist; 929 + 930 + if (!buffer_delay(bh)) 931 + goto next_buffer; 932 + 933 + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 934 + 935 + /* 936 + * Map the range first and check that it is a delalloc extent 937 + * before trying to unmap the range. Otherwise we will be 938 + * trying to remove a real extent (which requires a 939 + * transaction) or a hole, which is probably a bad idea... 940 + */ 941 + error = xfs_bmapi(NULL, ip, offset_fsb, 1, 942 + XFS_BMAPI_ENTIRE, NULL, 0, &imap, 943 + &nimaps, NULL, NULL); 944 + 945 + if (error) { 946 + /* something screwed, just bail */ 947 + xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 948 + "page discard failed delalloc mapping lookup."); 949 + break; 950 + } 951 + if (!nimaps) { 952 + /* nothing there */ 953 + goto next_buffer; 954 + } 955 + if (imap.br_startblock != DELAYSTARTBLOCK) { 956 + /* been converted, ignore */ 957 + goto next_buffer; 958 + } 959 + WARN_ON(imap.br_blockcount == 0); 960 + 961 + /* 962 + * Note: while we initialise the firstblock/flist pair, they 963 + * should never be used because blocks should never be 964 + * allocated or freed for a delalloc extent and hence we need 965 + * don't cancel or finish them after the xfs_bunmapi() call. 966 + */ 967 + xfs_bmap_init(&flist, &firstblock); 968 + error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, 969 + &flist, NULL, &done); 970 + 971 + ASSERT(!flist.xbf_count && !flist.xbf_first); 972 + if (error) { 973 + /* something screwed, just bail */ 974 + xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 975 + "page discard unable to remove delalloc mapping."); 976 + break; 977 + } 978 + next_buffer: 979 + offset += len; 980 + 981 + } while ((bh = bh->b_this_page) != head); 982 + 983 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 984 + out_invalidate: 985 + xfs_vm_invalidatepage(page, 0); 986 + return; 987 + } 988 + 989 /* 990 * Calling this without startio set means we are being asked to make a dirty 991 * page ready for freeing it's buffers. When called with startio set then ··· 1125 */ 1126 if (err != -EAGAIN) { 1127 if (!unmapped) 1128 + xfs_aops_discard_page(page); 1129 ClearPageUptodate(page); 1130 } 1131 return err; ··· 1533 unsigned nr_pages) 1534 { 1535 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1536 } 1537 1538 const struct address_space_operations xfs_address_space_operations = {

+805 -49

fs/xfs/linux-2.6/xfs_file.c

··· 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_bit.h" 20 #include "xfs_log.h" 21 #include "xfs_inum.h" ··· 35 #include "xfs_dir2_sf.h" 36 #include "xfs_dinode.h" 37 #include "xfs_inode.h" 38 #include "xfs_error.h" 39 #include "xfs_rw.h" 40 #include "xfs_vnodeops.h" 41 #include "xfs_da_btree.h" 42 #include "xfs_ioctl.h" 43 44 #include <linux/dcache.h> 45 46 static const struct vm_operations_struct xfs_file_vm_ops; 47 48 - STATIC ssize_t 49 - xfs_file_aio_read( 50 - struct kiocb *iocb, 51 - const struct iovec *iov, 52 - unsigned long nr_segs, 53 - loff_t pos) 54 { 55 - struct file *file = iocb->ki_filp; 56 - int ioflags = 0; 57 58 - BUG_ON(iocb->ki_pos != pos); 59 - if (unlikely(file->f_flags & O_DIRECT)) 60 - ioflags |= IO_ISDIRECT; 61 - if (file->f_mode & FMODE_NOCMTIME) 62 - ioflags |= IO_INVIS; 63 - return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, 64 - nr_segs, &iocb->ki_pos, ioflags); 65 } 66 67 STATIC ssize_t 68 - xfs_file_aio_write( 69 struct kiocb *iocb, 70 - const struct iovec *iov, 71 unsigned long nr_segs, 72 loff_t pos) 73 { 74 struct file *file = iocb->ki_filp; 75 int ioflags = 0; 76 77 BUG_ON(iocb->ki_pos != pos); 78 if (unlikely(file->f_flags & O_DIRECT)) 79 ioflags |= IO_ISDIRECT; 80 if (file->f_mode & FMODE_NOCMTIME) 81 ioflags |= IO_INVIS; 82 - return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, 83 - &iocb->ki_pos, ioflags); 84 } 85 86 STATIC ssize_t ··· 315 struct file *infilp, 316 loff_t *ppos, 317 struct pipe_inode_info *pipe, 318 - size_t len, 319 unsigned int flags) 320 { 321 int ioflags = 0; 322 323 if (infilp->f_mode & FMODE_NOCMTIME) 324 ioflags |= IO_INVIS; 325 326 - return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 327 - infilp, ppos, pipe, len, flags, ioflags); 328 } 329 330 STATIC ssize_t ··· 360 struct pipe_inode_info *pipe, 361 struct file *outfilp, 362 loff_t *ppos, 363 - size_t len, 364 unsigned int flags) 365 { 366 int ioflags = 0; 367 368 if (outfilp->f_mode & FMODE_NOCMTIME) 369 ioflags |= IO_INVIS; 370 371 - return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 372 - pipe, outfilp, ppos, len, flags, ioflags); 373 } 374 375 STATIC int ··· 938 return -xfs_release(XFS_I(inode)); 939 } 940 941 - /* 942 - * We ignore the datasync flag here because a datasync is effectively 943 - * identical to an fsync. That is, datasync implies that we need to write 944 - * only the metadata needed to be able to access the data that is written 945 - * if we crash after the call completes. Hence if we are writing beyond 946 - * EOF we have to log the inode size change as well, which makes it a 947 - * full fsync. If we don't write beyond EOF, the inode core will be 948 - * clean in memory and so we don't need to log the inode, just like 949 - * fsync. 950 - */ 951 - STATIC int 952 - xfs_file_fsync( 953 - struct file *file, 954 - struct dentry *dentry, 955 - int datasync) 956 - { 957 - struct xfs_inode *ip = XFS_I(dentry->d_inode); 958 - 959 - xfs_iflags_clear(ip, XFS_ITRUNCATED); 960 - return -xfs_fsync(ip); 961 - } 962 - 963 STATIC int 964 xfs_file_readdir( 965 struct file *filp, ··· 959 * 960 * Try to give it an estimate that's good enough, maybe at some 961 * point we can change the ->readdir prototype to include the 962 - * buffer size. 963 */ 964 - bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size); 965 966 error = xfs_readdir(ip, dirent, bufsize, 967 (xfs_off_t *)&filp->f_pos, filldir);

··· 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 + #include "xfs_fs.h" 20 #include "xfs_bit.h" 21 #include "xfs_log.h" 22 #include "xfs_inum.h" ··· 34 #include "xfs_dir2_sf.h" 35 #include "xfs_dinode.h" 36 #include "xfs_inode.h" 37 + #include "xfs_inode_item.h" 38 + #include "xfs_bmap.h" 39 #include "xfs_error.h" 40 #include "xfs_rw.h" 41 #include "xfs_vnodeops.h" 42 #include "xfs_da_btree.h" 43 #include "xfs_ioctl.h" 44 + #include "xfs_trace.h" 45 46 #include <linux/dcache.h> 47 48 static const struct vm_operations_struct xfs_file_vm_ops; 49 50 + /* 51 + * xfs_iozero 52 + * 53 + * xfs_iozero clears the specified range of buffer supplied, 54 + * and marks all the affected blocks as valid and modified. If 55 + * an affected block is not allocated, it will be allocated. If 56 + * an affected block is not completely overwritten, and is not 57 + * valid before the operation, it will be read from disk before 58 + * being partially zeroed. 59 + */ 60 + STATIC int 61 + xfs_iozero( 62 + struct xfs_inode *ip, /* inode */ 63 + loff_t pos, /* offset in file */ 64 + size_t count) /* size of data to zero */ 65 { 66 + struct page *page; 67 + struct address_space *mapping; 68 + int status; 69 70 + mapping = VFS_I(ip)->i_mapping; 71 + do { 72 + unsigned offset, bytes; 73 + void *fsdata; 74 + 75 + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 76 + bytes = PAGE_CACHE_SIZE - offset; 77 + if (bytes > count) 78 + bytes = count; 79 + 80 + status = pagecache_write_begin(NULL, mapping, pos, bytes, 81 + AOP_FLAG_UNINTERRUPTIBLE, 82 + &page, &fsdata); 83 + if (status) 84 + break; 85 + 86 + zero_user(page, offset, bytes); 87 + 88 + status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, 89 + page, fsdata); 90 + WARN_ON(status <= 0); /* can't return less than zero! */ 91 + pos += bytes; 92 + count -= bytes; 93 + status = 0; 94 + } while (count); 95 + 96 + return (-status); 97 + } 98 + 99 + STATIC int 100 + xfs_file_fsync( 101 + struct file *file, 102 + struct dentry *dentry, 103 + int datasync) 104 + { 105 + struct xfs_inode *ip = XFS_I(dentry->d_inode); 106 + struct xfs_trans *tp; 107 + int error = 0; 108 + int log_flushed = 0; 109 + 110 + xfs_itrace_entry(ip); 111 + 112 + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 113 + return -XFS_ERROR(EIO); 114 + 115 + xfs_iflags_clear(ip, XFS_ITRUNCATED); 116 + 117 + /* 118 + * We always need to make sure that the required inode state is safe on 119 + * disk. The inode might be clean but we still might need to force the 120 + * log because of committed transactions that haven't hit the disk yet. 121 + * Likewise, there could be unflushed non-transactional changes to the 122 + * inode core that have to go to disk and this requires us to issue 123 + * a synchronous transaction to capture these changes correctly. 124 + * 125 + * This code relies on the assumption that if the i_update_core field 126 + * of the inode is clear and the inode is unpinned then it is clean 127 + * and no action is required. 128 + */ 129 + xfs_ilock(ip, XFS_ILOCK_SHARED); 130 + 131 + /* 132 + * First check if the VFS inode is marked dirty. All the dirtying 133 + * of non-transactional updates no goes through mark_inode_dirty*, 134 + * which allows us to distinguish beteeen pure timestamp updates 135 + * and i_size updates which need to be caught for fdatasync. 136 + * After that also theck for the dirty state in the XFS inode, which 137 + * might gets cleared when the inode gets written out via the AIL 138 + * or xfs_iflush_cluster. 139 + */ 140 + if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || 141 + ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && 142 + ip->i_update_core) { 143 + /* 144 + * Kick off a transaction to log the inode core to get the 145 + * updates. The sync transaction will also force the log. 146 + */ 147 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 148 + tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 149 + error = xfs_trans_reserve(tp, 0, 150 + XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); 151 + if (error) { 152 + xfs_trans_cancel(tp, 0); 153 + return -error; 154 + } 155 + xfs_ilock(ip, XFS_ILOCK_EXCL); 156 + 157 + /* 158 + * Note - it's possible that we might have pushed ourselves out 159 + * of the way during trans_reserve which would flush the inode. 160 + * But there's no guarantee that the inode buffer has actually 161 + * gone out yet (it's delwri). Plus the buffer could be pinned 162 + * anyway if it's part of an inode in another recent 163 + * transaction. So we play it safe and fire off the 164 + * transaction anyway. 165 + */ 166 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 167 + xfs_trans_ihold(tp, ip); 168 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 169 + xfs_trans_set_sync(tp); 170 + error = _xfs_trans_commit(tp, 0, &log_flushed); 171 + 172 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 173 + } else { 174 + /* 175 + * Timestamps/size haven't changed since last inode flush or 176 + * inode transaction commit. That means either nothing got 177 + * written or a transaction committed which caught the updates. 178 + * If the latter happened and the transaction hasn't hit the 179 + * disk yet, the inode will be still be pinned. If it is, 180 + * force the log. 181 + */ 182 + if (xfs_ipincount(ip)) { 183 + error = _xfs_log_force_lsn(ip->i_mount, 184 + ip->i_itemp->ili_last_lsn, 185 + XFS_LOG_SYNC, &log_flushed); 186 + } 187 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 188 + } 189 + 190 + if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { 191 + /* 192 + * If the log write didn't issue an ordered tag we need 193 + * to flush the disk cache for the data device now. 194 + */ 195 + if (!log_flushed) 196 + xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 197 + 198 + /* 199 + * If this inode is on the RT dev we need to flush that 200 + * cache as well. 201 + */ 202 + if (XFS_IS_REALTIME_INODE(ip)) 203 + xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 204 + } 205 + 206 + return -error; 207 } 208 209 STATIC ssize_t 210 + xfs_file_aio_read( 211 struct kiocb *iocb, 212 + const struct iovec *iovp, 213 unsigned long nr_segs, 214 loff_t pos) 215 { 216 struct file *file = iocb->ki_filp; 217 + struct inode *inode = file->f_mapping->host; 218 + struct xfs_inode *ip = XFS_I(inode); 219 + struct xfs_mount *mp = ip->i_mount; 220 + size_t size = 0; 221 + ssize_t ret = 0; 222 int ioflags = 0; 223 + xfs_fsize_t n; 224 + unsigned long seg; 225 + 226 + XFS_STATS_INC(xs_read_calls); 227 228 BUG_ON(iocb->ki_pos != pos); 229 + 230 if (unlikely(file->f_flags & O_DIRECT)) 231 ioflags |= IO_ISDIRECT; 232 if (file->f_mode & FMODE_NOCMTIME) 233 ioflags |= IO_INVIS; 234 + 235 + /* START copy & waste from filemap.c */ 236 + for (seg = 0; seg < nr_segs; seg++) { 237 + const struct iovec *iv = &iovp[seg]; 238 + 239 + /* 240 + * If any segment has a negative length, or the cumulative 241 + * length ever wraps negative then return -EINVAL. 242 + */ 243 + size += iv->iov_len; 244 + if (unlikely((ssize_t)(size|iv->iov_len) < 0)) 245 + return XFS_ERROR(-EINVAL); 246 + } 247 + /* END copy & waste from filemap.c */ 248 + 249 + if (unlikely(ioflags & IO_ISDIRECT)) { 250 + xfs_buftarg_t *target = 251 + XFS_IS_REALTIME_INODE(ip) ? 252 + mp->m_rtdev_targp : mp->m_ddev_targp; 253 + if ((iocb->ki_pos & target->bt_smask) || 254 + (size & target->bt_smask)) { 255 + if (iocb->ki_pos == ip->i_size) 256 + return 0; 257 + return -XFS_ERROR(EINVAL); 258 + } 259 + } 260 + 261 + n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; 262 + if (n <= 0 || size == 0) 263 + return 0; 264 + 265 + if (n < size) 266 + size = n; 267 + 268 + if (XFS_FORCED_SHUTDOWN(mp)) 269 + return -EIO; 270 + 271 + if (unlikely(ioflags & IO_ISDIRECT)) 272 + mutex_lock(&inode->i_mutex); 273 + xfs_ilock(ip, XFS_IOLOCK_SHARED); 274 + 275 + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 276 + int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); 277 + int iolock = XFS_IOLOCK_SHARED; 278 + 279 + ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size, 280 + dmflags, &iolock); 281 + if (ret) { 282 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 283 + if (unlikely(ioflags & IO_ISDIRECT)) 284 + mutex_unlock(&inode->i_mutex); 285 + return ret; 286 + } 287 + } 288 + 289 + if (unlikely(ioflags & IO_ISDIRECT)) { 290 + if (inode->i_mapping->nrpages) { 291 + ret = -xfs_flushinval_pages(ip, 292 + (iocb->ki_pos & PAGE_CACHE_MASK), 293 + -1, FI_REMAPF_LOCKED); 294 + } 295 + mutex_unlock(&inode->i_mutex); 296 + if (ret) { 297 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 298 + return ret; 299 + } 300 + } 301 + 302 + trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); 303 + 304 + ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); 305 + if (ret > 0) 306 + XFS_STATS_ADD(xs_read_bytes, ret); 307 + 308 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 309 + return ret; 310 } 311 312 STATIC ssize_t ··· 87 struct file *infilp, 88 loff_t *ppos, 89 struct pipe_inode_info *pipe, 90 + size_t count, 91 unsigned int flags) 92 { 93 + struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); 94 + struct xfs_mount *mp = ip->i_mount; 95 int ioflags = 0; 96 + ssize_t ret; 97 + 98 + XFS_STATS_INC(xs_read_calls); 99 100 if (infilp->f_mode & FMODE_NOCMTIME) 101 ioflags |= IO_INVIS; 102 103 + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 104 + return -EIO; 105 + 106 + xfs_ilock(ip, XFS_IOLOCK_SHARED); 107 + 108 + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 109 + int iolock = XFS_IOLOCK_SHARED; 110 + int error; 111 + 112 + error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, 113 + FILP_DELAY_FLAG(infilp), &iolock); 114 + if (error) { 115 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 116 + return -error; 117 + } 118 + } 119 + 120 + trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 121 + 122 + ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 123 + if (ret > 0) 124 + XFS_STATS_ADD(xs_read_bytes, ret); 125 + 126 + xfs_iunlock(ip, XFS_IOLOCK_SHARED); 127 + return ret; 128 } 129 130 STATIC ssize_t ··· 104 struct pipe_inode_info *pipe, 105 struct file *outfilp, 106 loff_t *ppos, 107 + size_t count, 108 unsigned int flags) 109 { 110 + struct inode *inode = outfilp->f_mapping->host; 111 + struct xfs_inode *ip = XFS_I(inode); 112 + struct xfs_mount *mp = ip->i_mount; 113 + xfs_fsize_t isize, new_size; 114 int ioflags = 0; 115 + ssize_t ret; 116 + 117 + XFS_STATS_INC(xs_write_calls); 118 119 if (outfilp->f_mode & FMODE_NOCMTIME) 120 ioflags |= IO_INVIS; 121 122 + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 123 + return -EIO; 124 + 125 + xfs_ilock(ip, XFS_IOLOCK_EXCL); 126 + 127 + if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { 128 + int iolock = XFS_IOLOCK_EXCL; 129 + int error; 130 + 131 + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, 132 + FILP_DELAY_FLAG(outfilp), &iolock); 133 + if (error) { 134 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 135 + return -error; 136 + } 137 + } 138 + 139 + new_size = *ppos + count; 140 + 141 + xfs_ilock(ip, XFS_ILOCK_EXCL); 142 + if (new_size > ip->i_size) 143 + ip->i_new_size = new_size; 144 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 145 + 146 + trace_xfs_file_splice_write(ip, count, *ppos, ioflags); 147 + 148 + ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 149 + if (ret > 0) 150 + XFS_STATS_ADD(xs_write_bytes, ret); 151 + 152 + isize = i_size_read(inode); 153 + if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) 154 + *ppos = isize; 155 + 156 + if (*ppos > ip->i_size) { 157 + xfs_ilock(ip, XFS_ILOCK_EXCL); 158 + if (*ppos > ip->i_size) 159 + ip->i_size = *ppos; 160 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 161 + } 162 + 163 + if (ip->i_new_size) { 164 + xfs_ilock(ip, XFS_ILOCK_EXCL); 165 + ip->i_new_size = 0; 166 + if (ip->i_d.di_size > ip->i_size) 167 + ip->i_d.di_size = ip->i_size; 168 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 169 + } 170 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 171 + return ret; 172 + } 173 + 174 + /* 175 + * This routine is called to handle zeroing any space in the last 176 + * block of the file that is beyond the EOF. We do this since the 177 + * size is being increased without writing anything to that block 178 + * and we don't want anyone to read the garbage on the disk. 179 + */ 180 + STATIC int /* error (positive) */ 181 + xfs_zero_last_block( 182 + xfs_inode_t *ip, 183 + xfs_fsize_t offset, 184 + xfs_fsize_t isize) 185 + { 186 + xfs_fileoff_t last_fsb; 187 + xfs_mount_t *mp = ip->i_mount; 188 + int nimaps; 189 + int zero_offset; 190 + int zero_len; 191 + int error = 0; 192 + xfs_bmbt_irec_t imap; 193 + 194 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 195 + 196 + zero_offset = XFS_B_FSB_OFFSET(mp, isize); 197 + if (zero_offset == 0) { 198 + /* 199 + * There are no extra bytes in the last block on disk to 200 + * zero, so return. 201 + */ 202 + return 0; 203 + } 204 + 205 + last_fsb = XFS_B_TO_FSBT(mp, isize); 206 + nimaps = 1; 207 + error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, 208 + &nimaps, NULL, NULL); 209 + if (error) { 210 + return error; 211 + } 212 + ASSERT(nimaps > 0); 213 + /* 214 + * If the block underlying isize is just a hole, then there 215 + * is nothing to zero. 216 + */ 217 + if (imap.br_startblock == HOLESTARTBLOCK) { 218 + return 0; 219 + } 220 + /* 221 + * Zero the part of the last block beyond the EOF, and write it 222 + * out sync. We need to drop the ilock while we do this so we 223 + * don't deadlock when the buffer cache calls back to us. 224 + */ 225 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 226 + 227 + zero_len = mp->m_sb.sb_blocksize - zero_offset; 228 + if (isize + zero_len > offset) 229 + zero_len = offset - isize; 230 + error = xfs_iozero(ip, isize, zero_len); 231 + 232 + xfs_ilock(ip, XFS_ILOCK_EXCL); 233 + ASSERT(error >= 0); 234 + return error; 235 + } 236 + 237 + /* 238 + * Zero any on disk space between the current EOF and the new, 239 + * larger EOF. This handles the normal case of zeroing the remainder 240 + * of the last block in the file and the unusual case of zeroing blocks 241 + * out beyond the size of the file. This second case only happens 242 + * with fixed size extents and when the system crashes before the inode 243 + * size was updated but after blocks were allocated. If fill is set, 244 + * then any holes in the range are filled and zeroed. If not, the holes 245 + * are left alone as holes. 246 + */ 247 + 248 + int /* error (positive) */ 249 + xfs_zero_eof( 250 + xfs_inode_t *ip, 251 + xfs_off_t offset, /* starting I/O offset */ 252 + xfs_fsize_t isize) /* current inode size */ 253 + { 254 + xfs_mount_t *mp = ip->i_mount; 255 + xfs_fileoff_t start_zero_fsb; 256 + xfs_fileoff_t end_zero_fsb; 257 + xfs_fileoff_t zero_count_fsb; 258 + xfs_fileoff_t last_fsb; 259 + xfs_fileoff_t zero_off; 260 + xfs_fsize_t zero_len; 261 + int nimaps; 262 + int error = 0; 263 + xfs_bmbt_irec_t imap; 264 + 265 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 266 + ASSERT(offset > isize); 267 + 268 + /* 269 + * First handle zeroing the block on which isize resides. 270 + * We only zero a part of that block so it is handled specially. 271 + */ 272 + error = xfs_zero_last_block(ip, offset, isize); 273 + if (error) { 274 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 275 + return error; 276 + } 277 + 278 + /* 279 + * Calculate the range between the new size and the old 280 + * where blocks needing to be zeroed may exist. To get the 281 + * block where the last byte in the file currently resides, 282 + * we need to subtract one from the size and truncate back 283 + * to a block boundary. We subtract 1 in case the size is 284 + * exactly on a block boundary. 285 + */ 286 + last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; 287 + start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 288 + end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); 289 + ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); 290 + if (last_fsb == end_zero_fsb) { 291 + /* 292 + * The size was only incremented on its last block. 293 + * We took care of that above, so just return. 294 + */ 295 + return 0; 296 + } 297 + 298 + ASSERT(start_zero_fsb <= end_zero_fsb); 299 + while (start_zero_fsb <= end_zero_fsb) { 300 + nimaps = 1; 301 + zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 302 + error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 303 + 0, NULL, 0, &imap, &nimaps, NULL, NULL); 304 + if (error) { 305 + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 306 + return error; 307 + } 308 + ASSERT(nimaps > 0); 309 + 310 + if (imap.br_state == XFS_EXT_UNWRITTEN || 311 + imap.br_startblock == HOLESTARTBLOCK) { 312 + /* 313 + * This loop handles initializing pages that were 314 + * partially initialized by the code below this 315 + * loop. It basically zeroes the part of the page 316 + * that sits on a hole and sets the page as P_HOLE 317 + * and calls remapf if it is a mapped file. 318 + */ 319 + start_zero_fsb = imap.br_startoff + imap.br_blockcount; 320 + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 321 + continue; 322 + } 323 + 324 + /* 325 + * There are blocks we need to zero. 326 + * Drop the inode lock while we're doing the I/O. 327 + * We'll still have the iolock to protect us. 328 + */ 329 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 330 + 331 + zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 332 + zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 333 + 334 + if ((zero_off + zero_len) > offset) 335 + zero_len = offset - zero_off; 336 + 337 + error = xfs_iozero(ip, zero_off, zero_len); 338 + if (error) { 339 + goto out_lock; 340 + } 341 + 342 + start_zero_fsb = imap.br_startoff + imap.br_blockcount; 343 + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 344 + 345 + xfs_ilock(ip, XFS_ILOCK_EXCL); 346 + } 347 + 348 + return 0; 349 + 350 + out_lock: 351 + xfs_ilock(ip, XFS_ILOCK_EXCL); 352 + ASSERT(error >= 0); 353 + return error; 354 + } 355 + 356 + STATIC ssize_t 357 + xfs_file_aio_write( 358 + struct kiocb *iocb, 359 + const struct iovec *iovp, 360 + unsigned long nr_segs, 361 + loff_t pos) 362 + { 363 + struct file *file = iocb->ki_filp; 364 + struct address_space *mapping = file->f_mapping; 365 + struct inode *inode = mapping->host; 366 + struct xfs_inode *ip = XFS_I(inode); 367 + struct xfs_mount *mp = ip->i_mount; 368 + ssize_t ret = 0, error = 0; 369 + int ioflags = 0; 370 + xfs_fsize_t isize, new_size; 371 + int iolock; 372 + int eventsent = 0; 373 + size_t ocount = 0, count; 374 + int need_i_mutex; 375 + 376 + XFS_STATS_INC(xs_write_calls); 377 + 378 + BUG_ON(iocb->ki_pos != pos); 379 + 380 + if (unlikely(file->f_flags & O_DIRECT)) 381 + ioflags |= IO_ISDIRECT; 382 + if (file->f_mode & FMODE_NOCMTIME) 383 + ioflags |= IO_INVIS; 384 + 385 + error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); 386 + if (error) 387 + return error; 388 + 389 + count = ocount; 390 + if (count == 0) 391 + return 0; 392 + 393 + xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); 394 + 395 + if (XFS_FORCED_SHUTDOWN(mp)) 396 + return -EIO; 397 + 398 + relock: 399 + if (ioflags & IO_ISDIRECT) { 400 + iolock = XFS_IOLOCK_SHARED; 401 + need_i_mutex = 0; 402 + } else { 403 + iolock = XFS_IOLOCK_EXCL; 404 + need_i_mutex = 1; 405 + mutex_lock(&inode->i_mutex); 406 + } 407 + 408 + xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); 409 + 410 + start: 411 + error = -generic_write_checks(file, &pos, &count, 412 + S_ISBLK(inode->i_mode)); 413 + if (error) { 414 + xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); 415 + goto out_unlock_mutex; 416 + } 417 + 418 + if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && 419 + !(ioflags & IO_INVIS) && !eventsent)) { 420 + int dmflags = FILP_DELAY_FLAG(file); 421 + 422 + if (need_i_mutex) 423 + dmflags |= DM_FLAGS_IMUX; 424 + 425 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 426 + error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip, 427 + pos, count, dmflags, &iolock); 428 + if (error) { 429 + goto out_unlock_internal; 430 + } 431 + xfs_ilock(ip, XFS_ILOCK_EXCL); 432 + eventsent = 1; 433 + 434 + /* 435 + * The iolock was dropped and reacquired in XFS_SEND_DATA 436 + * so we have to recheck the size when appending. 437 + * We will only "goto start;" once, since having sent the 438 + * event prevents another call to XFS_SEND_DATA, which is 439 + * what allows the size to change in the first place. 440 + */ 441 + if ((file->f_flags & O_APPEND) && pos != ip->i_size) 442 + goto start; 443 + } 444 + 445 + if (ioflags & IO_ISDIRECT) { 446 + xfs_buftarg_t *target = 447 + XFS_IS_REALTIME_INODE(ip) ? 448 + mp->m_rtdev_targp : mp->m_ddev_targp; 449 + 450 + if ((pos & target->bt_smask) || (count & target->bt_smask)) { 451 + xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); 452 + return XFS_ERROR(-EINVAL); 453 + } 454 + 455 + if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { 456 + xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); 457 + iolock = XFS_IOLOCK_EXCL; 458 + need_i_mutex = 1; 459 + mutex_lock(&inode->i_mutex); 460 + xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); 461 + goto start; 462 + } 463 + } 464 + 465 + new_size = pos + count; 466 + if (new_size > ip->i_size) 467 + ip->i_new_size = new_size; 468 + 469 + if (likely(!(ioflags & IO_INVIS))) 470 + file_update_time(file); 471 + 472 + /* 473 + * If the offset is beyond the size of the file, we have a couple 474 + * of things to do. First, if there is already space allocated 475 + * we need to either create holes or zero the disk or ... 476 + * 477 + * If there is a page where the previous size lands, we need 478 + * to zero it out up to the new size. 479 + */ 480 + 481 + if (pos > ip->i_size) { 482 + error = xfs_zero_eof(ip, pos, ip->i_size); 483 + if (error) { 484 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 485 + goto out_unlock_internal; 486 + } 487 + } 488 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 489 + 490 + /* 491 + * If we're writing the file then make sure to clear the 492 + * setuid and setgid bits if the process is not being run 493 + * by root. This keeps people from modifying setuid and 494 + * setgid binaries. 495 + */ 496 + error = -file_remove_suid(file); 497 + if (unlikely(error)) 498 + goto out_unlock_internal; 499 + 500 + /* We can write back this queue in page reclaim */ 501 + current->backing_dev_info = mapping->backing_dev_info; 502 + 503 + if ((ioflags & IO_ISDIRECT)) { 504 + if (mapping->nrpages) { 505 + WARN_ON(need_i_mutex == 0); 506 + error = xfs_flushinval_pages(ip, 507 + (pos & PAGE_CACHE_MASK), 508 + -1, FI_REMAPF_LOCKED); 509 + if (error) 510 + goto out_unlock_internal; 511 + } 512 + 513 + if (need_i_mutex) { 514 + /* demote the lock now the cached pages are gone */ 515 + xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 516 + mutex_unlock(&inode->i_mutex); 517 + 518 + iolock = XFS_IOLOCK_SHARED; 519 + need_i_mutex = 0; 520 + } 521 + 522 + trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); 523 + ret = generic_file_direct_write(iocb, iovp, 524 + &nr_segs, pos, &iocb->ki_pos, count, ocount); 525 + 526 + /* 527 + * direct-io write to a hole: fall through to buffered I/O 528 + * for completing the rest of the request. 529 + */ 530 + if (ret >= 0 && ret != count) { 531 + XFS_STATS_ADD(xs_write_bytes, ret); 532 + 533 + pos += ret; 534 + count -= ret; 535 + 536 + ioflags &= ~IO_ISDIRECT; 537 + xfs_iunlock(ip, iolock); 538 + goto relock; 539 + } 540 + } else { 541 + int enospc = 0; 542 + ssize_t ret2 = 0; 543 + 544 + write_retry: 545 + trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); 546 + ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, 547 + pos, &iocb->ki_pos, count, ret); 548 + /* 549 + * if we just got an ENOSPC, flush the inode now we 550 + * aren't holding any page locks and retry *once* 551 + */ 552 + if (ret2 == -ENOSPC && !enospc) { 553 + error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); 554 + if (error) 555 + goto out_unlock_internal; 556 + enospc = 1; 557 + goto write_retry; 558 + } 559 + ret = ret2; 560 + } 561 + 562 + current->backing_dev_info = NULL; 563 + 564 + isize = i_size_read(inode); 565 + if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) 566 + iocb->ki_pos = isize; 567 + 568 + if (iocb->ki_pos > ip->i_size) { 569 + xfs_ilock(ip, XFS_ILOCK_EXCL); 570 + if (iocb->ki_pos > ip->i_size) 571 + ip->i_size = iocb->ki_pos; 572 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 573 + } 574 + 575 + if (ret == -ENOSPC && 576 + DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { 577 + xfs_iunlock(ip, iolock); 578 + if (need_i_mutex) 579 + mutex_unlock(&inode->i_mutex); 580 + error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip, 581 + DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL, 582 + 0, 0, 0); /* Delay flag intentionally unused */ 583 + if (need_i_mutex) 584 + mutex_lock(&inode->i_mutex); 585 + xfs_ilock(ip, iolock); 586 + if (error) 587 + goto out_unlock_internal; 588 + goto start; 589 + } 590 + 591 + error = -ret; 592 + if (ret <= 0) 593 + goto out_unlock_internal; 594 + 595 + XFS_STATS_ADD(xs_write_bytes, ret); 596 + 597 + /* Handle various SYNC-type writes */ 598 + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 599 + loff_t end = pos + ret - 1; 600 + int error2; 601 + 602 + xfs_iunlock(ip, iolock); 603 + if (need_i_mutex) 604 + mutex_unlock(&inode->i_mutex); 605 + 606 + error2 = filemap_write_and_wait_range(mapping, pos, end); 607 + if (!error) 608 + error = error2; 609 + if (need_i_mutex) 610 + mutex_lock(&inode->i_mutex); 611 + xfs_ilock(ip, iolock); 612 + 613 + error2 = -xfs_file_fsync(file, file->f_path.dentry, 614 + (file->f_flags & __O_SYNC) ? 0 : 1); 615 + if (!error) 616 + error = error2; 617 + } 618 + 619 + out_unlock_internal: 620 + if (ip->i_new_size) { 621 + xfs_ilock(ip, XFS_ILOCK_EXCL); 622 + ip->i_new_size = 0; 623 + /* 624 + * If this was a direct or synchronous I/O that failed (such 625 + * as ENOSPC) then part of the I/O may have been written to 626 + * disk before the error occured. In this case the on-disk 627 + * file size may have been adjusted beyond the in-memory file 628 + * size and now needs to be truncated back. 629 + */ 630 + if (ip->i_d.di_size > ip->i_size) 631 + ip->i_d.di_size = ip->i_size; 632 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 633 + } 634 + xfs_iunlock(ip, iolock); 635 + out_unlock_mutex: 636 + if (need_i_mutex) 637 + mutex_unlock(&inode->i_mutex); 638 + return -error; 639 } 640 641 STATIC int ··· 160 return -xfs_release(XFS_I(inode)); 161 } 162 163 STATIC int 164 xfs_file_readdir( 165 struct file *filp, ··· 203 * 204 * Try to give it an estimate that's good enough, maybe at some 205 * point we can change the ->readdir prototype to include the 206 + * buffer size. For now we use the current glibc buffer size. 207 */ 208 + bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); 209 210 error = xfs_readdir(ip, dirent, bufsize, 211 (xfs_off_t *)&filp->f_pos, filldir);

+10

fs/xfs/linux-2.6/xfs_iops.c

··· 91 mark_inode_dirty_sync(inode); 92 } 93 94 /* 95 * Change the requested timestamp in the given inode. 96 * We don't lock across timestamp updates, and we don't log them but

··· 91 mark_inode_dirty_sync(inode); 92 } 93 94 + void 95 + xfs_mark_inode_dirty( 96 + xfs_inode_t *ip) 97 + { 98 + struct inode *inode = VFS_I(ip); 99 + 100 + if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 101 + mark_inode_dirty(inode); 102 + } 103 + 104 /* 105 * Change the requested timestamp in the given inode. 106 * We don't lock across timestamp updates, and we don't log them but

-1

fs/xfs/linux-2.6/xfs_linux.h

··· 88 #include <xfs_super.h> 89 #include <xfs_globals.h> 90 #include <xfs_fs_subr.h> 91 - #include <xfs_lrw.h> 92 #include <xfs_buf.h> 93 94 /*

··· 88 #include <xfs_super.h> 89 #include <xfs_globals.h> 90 #include <xfs_fs_subr.h> 91 #include <xfs_buf.h> 92 93 /*

-796

fs/xfs/linux-2.6/xfs_lrw.c

··· 1 - /* 2 - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 3 - * All Rights Reserved. 4 - * 5 - * This program is free software; you can redistribute it and/or 6 - * modify it under the terms of the GNU General Public License as 7 - * published by the Free Software Foundation. 8 - * 9 - * This program is distributed in the hope that it would be useful, 10 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 - * GNU General Public License for more details. 13 - * 14 - * You should have received a copy of the GNU General Public License 15 - * along with this program; if not, write the Free Software Foundation, 16 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 - */ 18 - #include "xfs.h" 19 - #include "xfs_fs.h" 20 - #include "xfs_bit.h" 21 - #include "xfs_log.h" 22 - #include "xfs_inum.h" 23 - #include "xfs_trans.h" 24 - #include "xfs_sb.h" 25 - #include "xfs_ag.h" 26 - #include "xfs_dir2.h" 27 - #include "xfs_alloc.h" 28 - #include "xfs_dmapi.h" 29 - #include "xfs_quota.h" 30 - #include "xfs_mount.h" 31 - #include "xfs_bmap_btree.h" 32 - #include "xfs_alloc_btree.h" 33 - #include "xfs_ialloc_btree.h" 34 - #include "xfs_dir2_sf.h" 35 - #include "xfs_attr_sf.h" 36 - #include "xfs_dinode.h" 37 - #include "xfs_inode.h" 38 - #include "xfs_bmap.h" 39 - #include "xfs_btree.h" 40 - #include "xfs_ialloc.h" 41 - #include "xfs_rtalloc.h" 42 - #include "xfs_error.h" 43 - #include "xfs_itable.h" 44 - #include "xfs_rw.h" 45 - #include "xfs_attr.h" 46 - #include "xfs_inode_item.h" 47 - #include "xfs_buf_item.h" 48 - #include "xfs_utils.h" 49 - #include "xfs_iomap.h" 50 - #include "xfs_vnodeops.h" 51 - #include "xfs_trace.h" 52 - 53 - #include <linux/capability.h> 54 - #include <linux/writeback.h> 55 - 56 - 57 - /* 58 - * xfs_iozero 59 - * 60 - * xfs_iozero clears the specified range of buffer supplied, 61 - * and marks all the affected blocks as valid and modified. If 62 - * an affected block is not allocated, it will be allocated. If 63 - * an affected block is not completely overwritten, and is not 64 - * valid before the operation, it will be read from disk before 65 - * being partially zeroed. 66 - */ 67 - STATIC int 68 - xfs_iozero( 69 - struct xfs_inode *ip, /* inode */ 70 - loff_t pos, /* offset in file */ 71 - size_t count) /* size of data to zero */ 72 - { 73 - struct page *page; 74 - struct address_space *mapping; 75 - int status; 76 - 77 - mapping = VFS_I(ip)->i_mapping; 78 - do { 79 - unsigned offset, bytes; 80 - void *fsdata; 81 - 82 - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 83 - bytes = PAGE_CACHE_SIZE - offset; 84 - if (bytes > count) 85 - bytes = count; 86 - 87 - status = pagecache_write_begin(NULL, mapping, pos, bytes, 88 - AOP_FLAG_UNINTERRUPTIBLE, 89 - &page, &fsdata); 90 - if (status) 91 - break; 92 - 93 - zero_user(page, offset, bytes); 94 - 95 - status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, 96 - page, fsdata); 97 - WARN_ON(status <= 0); /* can't return less than zero! */ 98 - pos += bytes; 99 - count -= bytes; 100 - status = 0; 101 - } while (count); 102 - 103 - return (-status); 104 - } 105 - 106 - ssize_t /* bytes read, or (-) error */ 107 - xfs_read( 108 - xfs_inode_t *ip, 109 - struct kiocb *iocb, 110 - const struct iovec *iovp, 111 - unsigned int segs, 112 - loff_t *offset, 113 - int ioflags) 114 - { 115 - struct file *file = iocb->ki_filp; 116 - struct inode *inode = file->f_mapping->host; 117 - xfs_mount_t *mp = ip->i_mount; 118 - size_t size = 0; 119 - ssize_t ret = 0; 120 - xfs_fsize_t n; 121 - unsigned long seg; 122 - 123 - 124 - XFS_STATS_INC(xs_read_calls); 125 - 126 - /* START copy & waste from filemap.c */ 127 - for (seg = 0; seg < segs; seg++) { 128 - const struct iovec *iv = &iovp[seg]; 129 - 130 - /* 131 - * If any segment has a negative length, or the cumulative 132 - * length ever wraps negative then return -EINVAL. 133 - */ 134 - size += iv->iov_len; 135 - if (unlikely((ssize_t)(size|iv->iov_len) < 0)) 136 - return XFS_ERROR(-EINVAL); 137 - } 138 - /* END copy & waste from filemap.c */ 139 - 140 - if (unlikely(ioflags & IO_ISDIRECT)) { 141 - xfs_buftarg_t *target = 142 - XFS_IS_REALTIME_INODE(ip) ? 143 - mp->m_rtdev_targp : mp->m_ddev_targp; 144 - if ((*offset & target->bt_smask) || 145 - (size & target->bt_smask)) { 146 - if (*offset == ip->i_size) { 147 - return (0); 148 - } 149 - return -XFS_ERROR(EINVAL); 150 - } 151 - } 152 - 153 - n = XFS_MAXIOFFSET(mp) - *offset; 154 - if ((n <= 0) || (size == 0)) 155 - return 0; 156 - 157 - if (n < size) 158 - size = n; 159 - 160 - if (XFS_FORCED_SHUTDOWN(mp)) 161 - return -EIO; 162 - 163 - if (unlikely(ioflags & IO_ISDIRECT)) 164 - mutex_lock(&inode->i_mutex); 165 - xfs_ilock(ip, XFS_IOLOCK_SHARED); 166 - 167 - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 168 - int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); 169 - int iolock = XFS_IOLOCK_SHARED; 170 - 171 - ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, 172 - dmflags, &iolock); 173 - if (ret) { 174 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 175 - if (unlikely(ioflags & IO_ISDIRECT)) 176 - mutex_unlock(&inode->i_mutex); 177 - return ret; 178 - } 179 - } 180 - 181 - if (unlikely(ioflags & IO_ISDIRECT)) { 182 - if (inode->i_mapping->nrpages) 183 - ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), 184 - -1, FI_REMAPF_LOCKED); 185 - mutex_unlock(&inode->i_mutex); 186 - if (ret) { 187 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 188 - return ret; 189 - } 190 - } 191 - 192 - trace_xfs_file_read(ip, size, *offset, ioflags); 193 - 194 - iocb->ki_pos = *offset; 195 - ret = generic_file_aio_read(iocb, iovp, segs, *offset); 196 - if (ret > 0) 197 - XFS_STATS_ADD(xs_read_bytes, ret); 198 - 199 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 200 - return ret; 201 - } 202 - 203 - ssize_t 204 - xfs_splice_read( 205 - xfs_inode_t *ip, 206 - struct file *infilp, 207 - loff_t *ppos, 208 - struct pipe_inode_info *pipe, 209 - size_t count, 210 - int flags, 211 - int ioflags) 212 - { 213 - xfs_mount_t *mp = ip->i_mount; 214 - ssize_t ret; 215 - 216 - XFS_STATS_INC(xs_read_calls); 217 - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 218 - return -EIO; 219 - 220 - xfs_ilock(ip, XFS_IOLOCK_SHARED); 221 - 222 - if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { 223 - int iolock = XFS_IOLOCK_SHARED; 224 - int error; 225 - 226 - error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, 227 - FILP_DELAY_FLAG(infilp), &iolock); 228 - if (error) { 229 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 230 - return -error; 231 - } 232 - } 233 - 234 - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 235 - 236 - ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 237 - if (ret > 0) 238 - XFS_STATS_ADD(xs_read_bytes, ret); 239 - 240 - xfs_iunlock(ip, XFS_IOLOCK_SHARED); 241 - return ret; 242 - } 243 - 244 - ssize_t 245 - xfs_splice_write( 246 - xfs_inode_t *ip, 247 - struct pipe_inode_info *pipe, 248 - struct file *outfilp, 249 - loff_t *ppos, 250 - size_t count, 251 - int flags, 252 - int ioflags) 253 - { 254 - xfs_mount_t *mp = ip->i_mount; 255 - ssize_t ret; 256 - struct inode *inode = outfilp->f_mapping->host; 257 - xfs_fsize_t isize, new_size; 258 - 259 - XFS_STATS_INC(xs_write_calls); 260 - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 261 - return -EIO; 262 - 263 - xfs_ilock(ip, XFS_IOLOCK_EXCL); 264 - 265 - if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { 266 - int iolock = XFS_IOLOCK_EXCL; 267 - int error; 268 - 269 - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, 270 - FILP_DELAY_FLAG(outfilp), &iolock); 271 - if (error) { 272 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 273 - return -error; 274 - } 275 - } 276 - 277 - new_size = *ppos + count; 278 - 279 - xfs_ilock(ip, XFS_ILOCK_EXCL); 280 - if (new_size > ip->i_size) 281 - ip->i_new_size = new_size; 282 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 283 - 284 - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); 285 - 286 - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 287 - if (ret > 0) 288 - XFS_STATS_ADD(xs_write_bytes, ret); 289 - 290 - isize = i_size_read(inode); 291 - if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) 292 - *ppos = isize; 293 - 294 - if (*ppos > ip->i_size) { 295 - xfs_ilock(ip, XFS_ILOCK_EXCL); 296 - if (*ppos > ip->i_size) 297 - ip->i_size = *ppos; 298 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 299 - } 300 - 301 - if (ip->i_new_size) { 302 - xfs_ilock(ip, XFS_ILOCK_EXCL); 303 - ip->i_new_size = 0; 304 - if (ip->i_d.di_size > ip->i_size) 305 - ip->i_d.di_size = ip->i_size; 306 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 307 - } 308 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 309 - return ret; 310 - } 311 - 312 - /* 313 - * This routine is called to handle zeroing any space in the last 314 - * block of the file that is beyond the EOF. We do this since the 315 - * size is being increased without writing anything to that block 316 - * and we don't want anyone to read the garbage on the disk. 317 - */ 318 - STATIC int /* error (positive) */ 319 - xfs_zero_last_block( 320 - xfs_inode_t *ip, 321 - xfs_fsize_t offset, 322 - xfs_fsize_t isize) 323 - { 324 - xfs_fileoff_t last_fsb; 325 - xfs_mount_t *mp = ip->i_mount; 326 - int nimaps; 327 - int zero_offset; 328 - int zero_len; 329 - int error = 0; 330 - xfs_bmbt_irec_t imap; 331 - 332 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 333 - 334 - zero_offset = XFS_B_FSB_OFFSET(mp, isize); 335 - if (zero_offset == 0) { 336 - /* 337 - * There are no extra bytes in the last block on disk to 338 - * zero, so return. 339 - */ 340 - return 0; 341 - } 342 - 343 - last_fsb = XFS_B_TO_FSBT(mp, isize); 344 - nimaps = 1; 345 - error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, 346 - &nimaps, NULL, NULL); 347 - if (error) { 348 - return error; 349 - } 350 - ASSERT(nimaps > 0); 351 - /* 352 - * If the block underlying isize is just a hole, then there 353 - * is nothing to zero. 354 - */ 355 - if (imap.br_startblock == HOLESTARTBLOCK) { 356 - return 0; 357 - } 358 - /* 359 - * Zero the part of the last block beyond the EOF, and write it 360 - * out sync. We need to drop the ilock while we do this so we 361 - * don't deadlock when the buffer cache calls back to us. 362 - */ 363 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 364 - 365 - zero_len = mp->m_sb.sb_blocksize - zero_offset; 366 - if (isize + zero_len > offset) 367 - zero_len = offset - isize; 368 - error = xfs_iozero(ip, isize, zero_len); 369 - 370 - xfs_ilock(ip, XFS_ILOCK_EXCL); 371 - ASSERT(error >= 0); 372 - return error; 373 - } 374 - 375 - /* 376 - * Zero any on disk space between the current EOF and the new, 377 - * larger EOF. This handles the normal case of zeroing the remainder 378 - * of the last block in the file and the unusual case of zeroing blocks 379 - * out beyond the size of the file. This second case only happens 380 - * with fixed size extents and when the system crashes before the inode 381 - * size was updated but after blocks were allocated. If fill is set, 382 - * then any holes in the range are filled and zeroed. If not, the holes 383 - * are left alone as holes. 384 - */ 385 - 386 - int /* error (positive) */ 387 - xfs_zero_eof( 388 - xfs_inode_t *ip, 389 - xfs_off_t offset, /* starting I/O offset */ 390 - xfs_fsize_t isize) /* current inode size */ 391 - { 392 - xfs_mount_t *mp = ip->i_mount; 393 - xfs_fileoff_t start_zero_fsb; 394 - xfs_fileoff_t end_zero_fsb; 395 - xfs_fileoff_t zero_count_fsb; 396 - xfs_fileoff_t last_fsb; 397 - xfs_fileoff_t zero_off; 398 - xfs_fsize_t zero_len; 399 - int nimaps; 400 - int error = 0; 401 - xfs_bmbt_irec_t imap; 402 - 403 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 404 - ASSERT(offset > isize); 405 - 406 - /* 407 - * First handle zeroing the block on which isize resides. 408 - * We only zero a part of that block so it is handled specially. 409 - */ 410 - error = xfs_zero_last_block(ip, offset, isize); 411 - if (error) { 412 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 413 - return error; 414 - } 415 - 416 - /* 417 - * Calculate the range between the new size and the old 418 - * where blocks needing to be zeroed may exist. To get the 419 - * block where the last byte in the file currently resides, 420 - * we need to subtract one from the size and truncate back 421 - * to a block boundary. We subtract 1 in case the size is 422 - * exactly on a block boundary. 423 - */ 424 - last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; 425 - start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 426 - end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); 427 - ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); 428 - if (last_fsb == end_zero_fsb) { 429 - /* 430 - * The size was only incremented on its last block. 431 - * We took care of that above, so just return. 432 - */ 433 - return 0; 434 - } 435 - 436 - ASSERT(start_zero_fsb <= end_zero_fsb); 437 - while (start_zero_fsb <= end_zero_fsb) { 438 - nimaps = 1; 439 - zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 440 - error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 441 - 0, NULL, 0, &imap, &nimaps, NULL, NULL); 442 - if (error) { 443 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 444 - return error; 445 - } 446 - ASSERT(nimaps > 0); 447 - 448 - if (imap.br_state == XFS_EXT_UNWRITTEN || 449 - imap.br_startblock == HOLESTARTBLOCK) { 450 - /* 451 - * This loop handles initializing pages that were 452 - * partially initialized by the code below this 453 - * loop. It basically zeroes the part of the page 454 - * that sits on a hole and sets the page as P_HOLE 455 - * and calls remapf if it is a mapped file. 456 - */ 457 - start_zero_fsb = imap.br_startoff + imap.br_blockcount; 458 - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 459 - continue; 460 - } 461 - 462 - /* 463 - * There are blocks we need to zero. 464 - * Drop the inode lock while we're doing the I/O. 465 - * We'll still have the iolock to protect us. 466 - */ 467 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 468 - 469 - zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 470 - zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 471 - 472 - if ((zero_off + zero_len) > offset) 473 - zero_len = offset - zero_off; 474 - 475 - error = xfs_iozero(ip, zero_off, zero_len); 476 - if (error) { 477 - goto out_lock; 478 - } 479 - 480 - start_zero_fsb = imap.br_startoff + imap.br_blockcount; 481 - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 482 - 483 - xfs_ilock(ip, XFS_ILOCK_EXCL); 484 - } 485 - 486 - return 0; 487 - 488 - out_lock: 489 - xfs_ilock(ip, XFS_ILOCK_EXCL); 490 - ASSERT(error >= 0); 491 - return error; 492 - } 493 - 494 - ssize_t /* bytes written, or (-) error */ 495 - xfs_write( 496 - struct xfs_inode *xip, 497 - struct kiocb *iocb, 498 - const struct iovec *iovp, 499 - unsigned int nsegs, 500 - loff_t *offset, 501 - int ioflags) 502 - { 503 - struct file *file = iocb->ki_filp; 504 - struct address_space *mapping = file->f_mapping; 505 - struct inode *inode = mapping->host; 506 - unsigned long segs = nsegs; 507 - xfs_mount_t *mp; 508 - ssize_t ret = 0, error = 0; 509 - xfs_fsize_t isize, new_size; 510 - int iolock; 511 - int eventsent = 0; 512 - size_t ocount = 0, count; 513 - loff_t pos; 514 - int need_i_mutex; 515 - 516 - XFS_STATS_INC(xs_write_calls); 517 - 518 - error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); 519 - if (error) 520 - return error; 521 - 522 - count = ocount; 523 - pos = *offset; 524 - 525 - if (count == 0) 526 - return 0; 527 - 528 - mp = xip->i_mount; 529 - 530 - xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); 531 - 532 - if (XFS_FORCED_SHUTDOWN(mp)) 533 - return -EIO; 534 - 535 - relock: 536 - if (ioflags & IO_ISDIRECT) { 537 - iolock = XFS_IOLOCK_SHARED; 538 - need_i_mutex = 0; 539 - } else { 540 - iolock = XFS_IOLOCK_EXCL; 541 - need_i_mutex = 1; 542 - mutex_lock(&inode->i_mutex); 543 - } 544 - 545 - xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); 546 - 547 - start: 548 - error = -generic_write_checks(file, &pos, &count, 549 - S_ISBLK(inode->i_mode)); 550 - if (error) { 551 - xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 552 - goto out_unlock_mutex; 553 - } 554 - 555 - if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && 556 - !(ioflags & IO_INVIS) && !eventsent)) { 557 - int dmflags = FILP_DELAY_FLAG(file); 558 - 559 - if (need_i_mutex) 560 - dmflags |= DM_FLAGS_IMUX; 561 - 562 - xfs_iunlock(xip, XFS_ILOCK_EXCL); 563 - error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, 564 - pos, count, dmflags, &iolock); 565 - if (error) { 566 - goto out_unlock_internal; 567 - } 568 - xfs_ilock(xip, XFS_ILOCK_EXCL); 569 - eventsent = 1; 570 - 571 - /* 572 - * The iolock was dropped and reacquired in XFS_SEND_DATA 573 - * so we have to recheck the size when appending. 574 - * We will only "goto start;" once, since having sent the 575 - * event prevents another call to XFS_SEND_DATA, which is 576 - * what allows the size to change in the first place. 577 - */ 578 - if ((file->f_flags & O_APPEND) && pos != xip->i_size) 579 - goto start; 580 - } 581 - 582 - if (ioflags & IO_ISDIRECT) { 583 - xfs_buftarg_t *target = 584 - XFS_IS_REALTIME_INODE(xip) ? 585 - mp->m_rtdev_targp : mp->m_ddev_targp; 586 - 587 - if ((pos & target->bt_smask) || (count & target->bt_smask)) { 588 - xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 589 - return XFS_ERROR(-EINVAL); 590 - } 591 - 592 - if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { 593 - xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); 594 - iolock = XFS_IOLOCK_EXCL; 595 - need_i_mutex = 1; 596 - mutex_lock(&inode->i_mutex); 597 - xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); 598 - goto start; 599 - } 600 - } 601 - 602 - new_size = pos + count; 603 - if (new_size > xip->i_size) 604 - xip->i_new_size = new_size; 605 - 606 - if (likely(!(ioflags & IO_INVIS))) 607 - file_update_time(file); 608 - 609 - /* 610 - * If the offset is beyond the size of the file, we have a couple 611 - * of things to do. First, if there is already space allocated 612 - * we need to either create holes or zero the disk or ... 613 - * 614 - * If there is a page where the previous size lands, we need 615 - * to zero it out up to the new size. 616 - */ 617 - 618 - if (pos > xip->i_size) { 619 - error = xfs_zero_eof(xip, pos, xip->i_size); 620 - if (error) { 621 - xfs_iunlock(xip, XFS_ILOCK_EXCL); 622 - goto out_unlock_internal; 623 - } 624 - } 625 - xfs_iunlock(xip, XFS_ILOCK_EXCL); 626 - 627 - /* 628 - * If we're writing the file then make sure to clear the 629 - * setuid and setgid bits if the process is not being run 630 - * by root. This keeps people from modifying setuid and 631 - * setgid binaries. 632 - */ 633 - error = -file_remove_suid(file); 634 - if (unlikely(error)) 635 - goto out_unlock_internal; 636 - 637 - /* We can write back this queue in page reclaim */ 638 - current->backing_dev_info = mapping->backing_dev_info; 639 - 640 - if ((ioflags & IO_ISDIRECT)) { 641 - if (mapping->nrpages) { 642 - WARN_ON(need_i_mutex == 0); 643 - error = xfs_flushinval_pages(xip, 644 - (pos & PAGE_CACHE_MASK), 645 - -1, FI_REMAPF_LOCKED); 646 - if (error) 647 - goto out_unlock_internal; 648 - } 649 - 650 - if (need_i_mutex) { 651 - /* demote the lock now the cached pages are gone */ 652 - xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); 653 - mutex_unlock(&inode->i_mutex); 654 - 655 - iolock = XFS_IOLOCK_SHARED; 656 - need_i_mutex = 0; 657 - } 658 - 659 - trace_xfs_file_direct_write(xip, count, *offset, ioflags); 660 - ret = generic_file_direct_write(iocb, iovp, 661 - &segs, pos, offset, count, ocount); 662 - 663 - /* 664 - * direct-io write to a hole: fall through to buffered I/O 665 - * for completing the rest of the request. 666 - */ 667 - if (ret >= 0 && ret != count) { 668 - XFS_STATS_ADD(xs_write_bytes, ret); 669 - 670 - pos += ret; 671 - count -= ret; 672 - 673 - ioflags &= ~IO_ISDIRECT; 674 - xfs_iunlock(xip, iolock); 675 - goto relock; 676 - } 677 - } else { 678 - int enospc = 0; 679 - ssize_t ret2 = 0; 680 - 681 - write_retry: 682 - trace_xfs_file_buffered_write(xip, count, *offset, ioflags); 683 - ret2 = generic_file_buffered_write(iocb, iovp, segs, 684 - pos, offset, count, ret); 685 - /* 686 - * if we just got an ENOSPC, flush the inode now we 687 - * aren't holding any page locks and retry *once* 688 - */ 689 - if (ret2 == -ENOSPC && !enospc) { 690 - error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); 691 - if (error) 692 - goto out_unlock_internal; 693 - enospc = 1; 694 - goto write_retry; 695 - } 696 - ret = ret2; 697 - } 698 - 699 - current->backing_dev_info = NULL; 700 - 701 - isize = i_size_read(inode); 702 - if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) 703 - *offset = isize; 704 - 705 - if (*offset > xip->i_size) { 706 - xfs_ilock(xip, XFS_ILOCK_EXCL); 707 - if (*offset > xip->i_size) 708 - xip->i_size = *offset; 709 - xfs_iunlock(xip, XFS_ILOCK_EXCL); 710 - } 711 - 712 - if (ret == -ENOSPC && 713 - DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { 714 - xfs_iunlock(xip, iolock); 715 - if (need_i_mutex) 716 - mutex_unlock(&inode->i_mutex); 717 - error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, 718 - DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, 719 - 0, 0, 0); /* Delay flag intentionally unused */ 720 - if (need_i_mutex) 721 - mutex_lock(&inode->i_mutex); 722 - xfs_ilock(xip, iolock); 723 - if (error) 724 - goto out_unlock_internal; 725 - goto start; 726 - } 727 - 728 - error = -ret; 729 - if (ret <= 0) 730 - goto out_unlock_internal; 731 - 732 - XFS_STATS_ADD(xs_write_bytes, ret); 733 - 734 - /* Handle various SYNC-type writes */ 735 - if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 736 - loff_t end = pos + ret - 1; 737 - int error2; 738 - 739 - xfs_iunlock(xip, iolock); 740 - if (need_i_mutex) 741 - mutex_unlock(&inode->i_mutex); 742 - 743 - error2 = filemap_write_and_wait_range(mapping, pos, end); 744 - if (!error) 745 - error = error2; 746 - if (need_i_mutex) 747 - mutex_lock(&inode->i_mutex); 748 - xfs_ilock(xip, iolock); 749 - 750 - error2 = xfs_fsync(xip); 751 - if (!error) 752 - error = error2; 753 - } 754 - 755 - out_unlock_internal: 756 - if (xip->i_new_size) { 757 - xfs_ilock(xip, XFS_ILOCK_EXCL); 758 - xip->i_new_size = 0; 759 - /* 760 - * If this was a direct or synchronous I/O that failed (such 761 - * as ENOSPC) then part of the I/O may have been written to 762 - * disk before the error occured. In this case the on-disk 763 - * file size may have been adjusted beyond the in-memory file 764 - * size and now needs to be truncated back. 765 - */ 766 - if (xip->i_d.di_size > xip->i_size) 767 - xip->i_d.di_size = xip->i_size; 768 - xfs_iunlock(xip, XFS_ILOCK_EXCL); 769 - } 770 - xfs_iunlock(xip, iolock); 771 - out_unlock_mutex: 772 - if (need_i_mutex) 773 - mutex_unlock(&inode->i_mutex); 774 - return -error; 775 - } 776 - 777 - /* 778 - * If the underlying (data/log/rt) device is readonly, there are some 779 - * operations that cannot proceed. 780 - */ 781 - int 782 - xfs_dev_is_read_only( 783 - xfs_mount_t *mp, 784 - char *message) 785 - { 786 - if (xfs_readonly_buftarg(mp->m_ddev_targp) || 787 - xfs_readonly_buftarg(mp->m_logdev_targp) || 788 - (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 789 - cmn_err(CE_NOTE, 790 - "XFS: %s required on read-only device.", message); 791 - cmn_err(CE_NOTE, 792 - "XFS: write access unavailable, cannot proceed."); 793 - return EROFS; 794 - } 795 - return 0; 796 - }

···

-29

fs/xfs/linux-2.6/xfs_lrw.h

··· 1 - /* 2 - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 3 - * All Rights Reserved. 4 - * 5 - * This program is free software; you can redistribute it and/or 6 - * modify it under the terms of the GNU General Public License as 7 - * published by the Free Software Foundation. 8 - * 9 - * This program is distributed in the hope that it would be useful, 10 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 - * GNU General Public License for more details. 13 - * 14 - * You should have received a copy of the GNU General Public License 15 - * along with this program; if not, write the Free Software Foundation, 16 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 - */ 18 - #ifndef __XFS_LRW_H__ 19 - #define __XFS_LRW_H__ 20 - 21 - struct xfs_mount; 22 - struct xfs_inode; 23 - struct xfs_buf; 24 - 25 - extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 26 - 27 - extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 28 - 29 - #endif /* __XFS_LRW_H__ */

···

+5 -5

fs/xfs/linux-2.6/xfs_sync.c

··· 607 set_freezable(); 608 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 609 for (;;) { 610 - timeleft = schedule_timeout_interruptible(timeleft); 611 /* swsusp */ 612 try_to_freeze(); 613 if (kthread_should_stop() && list_empty(&mp->m_sync_list)) ··· 628 list_add_tail(&mp->m_sync_work.w_list, 629 &mp->m_sync_list); 630 } 631 - list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) 632 - list_move(&work->w_list, &tmp); 633 spin_unlock(&mp->m_sync_lock); 634 635 list_for_each_entry_safe(work, n, &tmp, w_list) { ··· 688 struct xfs_perag *pag; 689 690 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 691 - read_lock(&pag->pag_ici_lock); 692 spin_lock(&ip->i_flags_lock); 693 __xfs_inode_set_reclaim_tag(pag, ip); 694 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 695 spin_unlock(&ip->i_flags_lock); 696 - read_unlock(&pag->pag_ici_lock); 697 xfs_perag_put(pag); 698 } 699

··· 607 set_freezable(); 608 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 609 for (;;) { 610 + if (list_empty(&mp->m_sync_list)) 611 + timeleft = schedule_timeout_interruptible(timeleft); 612 /* swsusp */ 613 try_to_freeze(); 614 if (kthread_should_stop() && list_empty(&mp->m_sync_list)) ··· 627 list_add_tail(&mp->m_sync_work.w_list, 628 &mp->m_sync_list); 629 } 630 + list_splice_init(&mp->m_sync_list, &tmp); 631 spin_unlock(&mp->m_sync_lock); 632 633 list_for_each_entry_safe(work, n, &tmp, w_list) { ··· 688 struct xfs_perag *pag; 689 690 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 691 + write_lock(&pag->pag_ici_lock); 692 spin_lock(&ip->i_flags_lock); 693 __xfs_inode_set_reclaim_tag(pag, ip); 694 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 695 spin_unlock(&ip->i_flags_lock); 696 + write_unlock(&pag->pag_ici_lock); 697 xfs_perag_put(pag); 698 } 699

-16

fs/xfs/linux-2.6/xfs_trace.c

··· 52 #include "quota/xfs_dquot.h" 53 54 /* 55 - * Format fsblock number into a static buffer & return it. 56 - */ 57 - STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno) 58 - { 59 - static char rval[50]; 60 - 61 - if (bno == NULLFSBLOCK) 62 - sprintf(rval, "NULLFSBLOCK"); 63 - else if (isnullstartblock(bno)) 64 - sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno)); 65 - else 66 - sprintf(rval, "%lld", (xfs_dfsbno_t)bno); 67 - return rval; 68 - } 69 - 70 - /* 71 * We include this last to have the helpers above available for the trace 72 * event implementations. 73 */

··· 52 #include "quota/xfs_dquot.h" 53 54 /* 55 * We include this last to have the helpers above available for the trace 56 * event implementations. 57 */

+11 -11

fs/xfs/linux-2.6/xfs_trace.h

··· 197 __entry->caller_ip = caller_ip; 198 ), 199 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 200 - "offset %lld block %s count %lld flag %d caller %pf", 201 MAJOR(__entry->dev), MINOR(__entry->dev), 202 __entry->ino, 203 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 204 (long)__entry->idx, 205 __entry->startoff, 206 - xfs_fmtfsblock(__entry->startblock), 207 __entry->blockcount, 208 __entry->state, 209 (char *)__entry->caller_ip) ··· 241 __entry->caller_ip = caller_ip; 242 ), 243 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 244 - "offset %lld block %s count %lld flag %d caller %pf", 245 MAJOR(__entry->dev), MINOR(__entry->dev), 246 __entry->ino, 247 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 248 (long)__entry->idx, 249 __entry->startoff, 250 - xfs_fmtfsblock(__entry->startblock), 251 __entry->blockcount, 252 __entry->state, 253 (char *)__entry->caller_ip) ··· 593 TP_ARGS(dqp), 594 TP_STRUCT__entry( 595 __field(dev_t, dev) 596 - __field(__be32, id) 597 __field(unsigned, flags) 598 __field(unsigned, nrefs) 599 __field(unsigned long long, res_bcount) ··· 606 ), \ 607 TP_fast_assign( 608 __entry->dev = dqp->q_mount->m_super->s_dev; 609 - __entry->id = dqp->q_core.d_id; 610 __entry->flags = dqp->dq_flags; 611 __entry->nrefs = dqp->q_nrefs; 612 __entry->res_bcount = dqp->q_res_bcount; ··· 622 be64_to_cpu(dqp->q_core.d_ino_softlimit); 623 ), 624 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " 625 - "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " 626 - "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", 627 MAJOR(__entry->dev), MINOR(__entry->dev), 628 - be32_to_cpu(__entry->id), 629 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), 630 __entry->nrefs, 631 __entry->res_bcount, ··· 881 ), \ 882 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 883 "offset 0x%llx count %zd flags %s " \ 884 - "startoff 0x%llx startblock %s blockcount 0x%llx", \ 885 MAJOR(__entry->dev), MINOR(__entry->dev), \ 886 __entry->ino, \ 887 __entry->size, \ ··· 890 __entry->count, \ 891 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 892 __entry->startoff, \ 893 - xfs_fmtfsblock(__entry->startblock), \ 894 __entry->blockcount) \ 895 ) 896 DEFINE_IOMAP_EVENT(xfs_iomap_enter);

··· 197 __entry->caller_ip = caller_ip; 198 ), 199 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 200 + "offset %lld block %lld count %lld flag %d caller %pf", 201 MAJOR(__entry->dev), MINOR(__entry->dev), 202 __entry->ino, 203 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 204 (long)__entry->idx, 205 __entry->startoff, 206 + (__int64_t)__entry->startblock, 207 __entry->blockcount, 208 __entry->state, 209 (char *)__entry->caller_ip) ··· 241 __entry->caller_ip = caller_ip; 242 ), 243 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 244 + "offset %lld block %lld count %lld flag %d caller %pf", 245 MAJOR(__entry->dev), MINOR(__entry->dev), 246 __entry->ino, 247 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 248 (long)__entry->idx, 249 __entry->startoff, 250 + (__int64_t)__entry->startblock, 251 __entry->blockcount, 252 __entry->state, 253 (char *)__entry->caller_ip) ··· 593 TP_ARGS(dqp), 594 TP_STRUCT__entry( 595 __field(dev_t, dev) 596 + __field(u32, id) 597 __field(unsigned, flags) 598 __field(unsigned, nrefs) 599 __field(unsigned long long, res_bcount) ··· 606 ), \ 607 TP_fast_assign( 608 __entry->dev = dqp->q_mount->m_super->s_dev; 609 + __entry->id = be32_to_cpu(dqp->q_core.d_id); 610 __entry->flags = dqp->dq_flags; 611 __entry->nrefs = dqp->q_nrefs; 612 __entry->res_bcount = dqp->q_res_bcount; ··· 622 be64_to_cpu(dqp->q_core.d_ino_softlimit); 623 ), 624 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " 625 + "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx " 626 + "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]", 627 MAJOR(__entry->dev), MINOR(__entry->dev), 628 + __entry->id, 629 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), 630 __entry->nrefs, 631 __entry->res_bcount, ··· 881 ), \ 882 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 883 "offset 0x%llx count %zd flags %s " \ 884 + "startoff 0x%llx startblock %lld blockcount 0x%llx", \ 885 MAJOR(__entry->dev), MINOR(__entry->dev), \ 886 __entry->ino, \ 887 __entry->size, \ ··· 890 __entry->count, \ 891 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 892 __entry->startoff, \ 893 + (__int64_t)__entry->startblock, \ 894 __entry->blockcount) \ 895 ) 896 DEFINE_IOMAP_EVENT(xfs_iomap_enter);

+119 -101

fs/xfs/xfs_bmap.c

··· 2550 } 2551 2552 STATIC int 2553 xfs_bmap_btalloc( 2554 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2555 { 2556 xfs_mount_t *mp; /* mount point structure */ 2557 xfs_alloctype_t atype = 0; /* type for allocation routines */ 2558 xfs_extlen_t align; /* minimum allocation alignment */ 2559 - xfs_agnumber_t ag; 2560 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 2561 - xfs_agnumber_t startag; 2562 xfs_alloc_arg_t args; 2563 xfs_extlen_t blen; 2564 xfs_extlen_t nextminlen = 0; 2565 - xfs_perag_t *pag; 2566 int nullfb; /* true if ap->firstblock isn't set */ 2567 int isaligned; 2568 - int notinit; 2569 int tryagain; 2570 int error; 2571 ··· 2724 args.firstblock = ap->firstblock; 2725 blen = 0; 2726 if (nullfb) { 2727 - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) 2728 - args.type = XFS_ALLOCTYPE_NEAR_BNO; 2729 - else 2730 - args.type = XFS_ALLOCTYPE_START_BNO; 2731 - args.total = ap->total; 2732 - 2733 - /* 2734 - * Search for an allocation group with a single extent 2735 - * large enough for the request. 2736 - * 2737 - * If one isn't found, then adjust the minimum allocation 2738 - * size to the largest space found. 2739 - */ 2740 - startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno); 2741 - if (startag == NULLAGNUMBER) 2742 - startag = ag = 0; 2743 - notinit = 0; 2744 - pag = xfs_perag_get(mp, ag); 2745 - while (blen < ap->alen) { 2746 - if (!pag->pagf_init && 2747 - (error = xfs_alloc_pagf_init(mp, args.tp, 2748 - ag, XFS_ALLOC_FLAG_TRYLOCK))) { 2749 - xfs_perag_put(pag); 2750 - return error; 2751 - } 2752 - /* 2753 - * See xfs_alloc_fix_freelist... 2754 - */ 2755 - if (pag->pagf_init) { 2756 - xfs_extlen_t longest; 2757 - longest = xfs_alloc_longest_free_extent(mp, pag); 2758 - if (blen < longest) 2759 - blen = longest; 2760 - } else 2761 - notinit = 1; 2762 - 2763 - if (xfs_inode_is_filestream(ap->ip)) { 2764 - if (blen >= ap->alen) 2765 - break; 2766 - 2767 - if (ap->userdata) { 2768 - /* 2769 - * If startag is an invalid AG, we've 2770 - * come here once before and 2771 - * xfs_filestream_new_ag picked the 2772 - * best currently available. 2773 - * 2774 - * Don't continue looping, since we 2775 - * could loop forever. 2776 - */ 2777 - if (startag == NULLAGNUMBER) 2778 - break; 2779 - 2780 - error = xfs_filestream_new_ag(ap, &ag); 2781 - xfs_perag_put(pag); 2782 - if (error) 2783 - return error; 2784 - 2785 - /* loop again to set 'blen'*/ 2786 - startag = NULLAGNUMBER; 2787 - pag = xfs_perag_get(mp, ag); 2788 - continue; 2789 - } 2790 - } 2791 - if (++ag == mp->m_sb.sb_agcount) 2792 - ag = 0; 2793 - if (ag == startag) 2794 - break; 2795 - xfs_perag_put(pag); 2796 - pag = xfs_perag_get(mp, ag); 2797 - } 2798 - xfs_perag_put(pag); 2799 - /* 2800 - * Since the above loop did a BUF_TRYLOCK, it is 2801 - * possible that there is space for this request. 2802 - */ 2803 - if (notinit || blen < ap->minlen) 2804 - args.minlen = ap->minlen; 2805 - /* 2806 - * If the best seen length is less than the request 2807 - * length, use the best as the minimum. 2808 - */ 2809 - else if (blen < ap->alen) 2810 - args.minlen = blen; 2811 - /* 2812 - * Otherwise we've seen an extent as big as alen, 2813 - * use that as the minimum. 2814 - */ 2815 - else 2816 - args.minlen = ap->alen; 2817 - 2818 - /* 2819 - * set the failure fallback case to look in the selected 2820 - * AG as the stream may have moved. 2821 - */ 2822 - if (xfs_inode_is_filestream(ap->ip)) 2823 - ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0); 2824 } else if (ap->low) { 2825 if (xfs_inode_is_filestream(ap->ip)) 2826 args.type = XFS_ALLOCTYPE_FIRST_AG;

··· 2550 } 2551 2552 STATIC int 2553 + xfs_bmap_btalloc_nullfb( 2554 + struct xfs_bmalloca *ap, 2555 + struct xfs_alloc_arg *args, 2556 + xfs_extlen_t *blen) 2557 + { 2558 + struct xfs_mount *mp = ap->ip->i_mount; 2559 + struct xfs_perag *pag; 2560 + xfs_agnumber_t ag, startag; 2561 + int notinit = 0; 2562 + int error; 2563 + 2564 + if (ap->userdata && xfs_inode_is_filestream(ap->ip)) 2565 + args->type = XFS_ALLOCTYPE_NEAR_BNO; 2566 + else 2567 + args->type = XFS_ALLOCTYPE_START_BNO; 2568 + args->total = ap->total; 2569 + 2570 + /* 2571 + * Search for an allocation group with a single extent large enough 2572 + * for the request. If one isn't found, then adjust the minimum 2573 + * allocation size to the largest space found. 2574 + */ 2575 + startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); 2576 + if (startag == NULLAGNUMBER) 2577 + startag = ag = 0; 2578 + 2579 + pag = xfs_perag_get(mp, ag); 2580 + while (*blen < ap->alen) { 2581 + if (!pag->pagf_init) { 2582 + error = xfs_alloc_pagf_init(mp, args->tp, ag, 2583 + XFS_ALLOC_FLAG_TRYLOCK); 2584 + if (error) { 2585 + xfs_perag_put(pag); 2586 + return error; 2587 + } 2588 + } 2589 + 2590 + /* 2591 + * See xfs_alloc_fix_freelist... 2592 + */ 2593 + if (pag->pagf_init) { 2594 + xfs_extlen_t longest; 2595 + longest = xfs_alloc_longest_free_extent(mp, pag); 2596 + if (*blen < longest) 2597 + *blen = longest; 2598 + } else 2599 + notinit = 1; 2600 + 2601 + if (xfs_inode_is_filestream(ap->ip)) { 2602 + if (*blen >= ap->alen) 2603 + break; 2604 + 2605 + if (ap->userdata) { 2606 + /* 2607 + * If startag is an invalid AG, we've 2608 + * come here once before and 2609 + * xfs_filestream_new_ag picked the 2610 + * best currently available. 2611 + * 2612 + * Don't continue looping, since we 2613 + * could loop forever. 2614 + */ 2615 + if (startag == NULLAGNUMBER) 2616 + break; 2617 + 2618 + error = xfs_filestream_new_ag(ap, &ag); 2619 + xfs_perag_put(pag); 2620 + if (error) 2621 + return error; 2622 + 2623 + /* loop again to set 'blen'*/ 2624 + startag = NULLAGNUMBER; 2625 + pag = xfs_perag_get(mp, ag); 2626 + continue; 2627 + } 2628 + } 2629 + if (++ag == mp->m_sb.sb_agcount) 2630 + ag = 0; 2631 + if (ag == startag) 2632 + break; 2633 + xfs_perag_put(pag); 2634 + pag = xfs_perag_get(mp, ag); 2635 + } 2636 + xfs_perag_put(pag); 2637 + 2638 + /* 2639 + * Since the above loop did a BUF_TRYLOCK, it is 2640 + * possible that there is space for this request. 2641 + */ 2642 + if (notinit || *blen < ap->minlen) 2643 + args->minlen = ap->minlen; 2644 + /* 2645 + * If the best seen length is less than the request 2646 + * length, use the best as the minimum. 2647 + */ 2648 + else if (*blen < ap->alen) 2649 + args->minlen = *blen; 2650 + /* 2651 + * Otherwise we've seen an extent as big as alen, 2652 + * use that as the minimum. 2653 + */ 2654 + else 2655 + args->minlen = ap->alen; 2656 + 2657 + /* 2658 + * set the failure fallback case to look in the selected 2659 + * AG as the stream may have moved. 2660 + */ 2661 + if (xfs_inode_is_filestream(ap->ip)) 2662 + ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); 2663 + 2664 + return 0; 2665 + } 2666 + 2667 + STATIC int 2668 xfs_bmap_btalloc( 2669 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2670 { 2671 xfs_mount_t *mp; /* mount point structure */ 2672 xfs_alloctype_t atype = 0; /* type for allocation routines */ 2673 xfs_extlen_t align; /* minimum allocation alignment */ 2674 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 2675 + xfs_agnumber_t ag; 2676 xfs_alloc_arg_t args; 2677 xfs_extlen_t blen; 2678 xfs_extlen_t nextminlen = 0; 2679 int nullfb; /* true if ap->firstblock isn't set */ 2680 int isaligned; 2681 int tryagain; 2682 int error; 2683 ··· 2612 args.firstblock = ap->firstblock; 2613 blen = 0; 2614 if (nullfb) { 2615 + error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 2616 + if (error) 2617 + return error; 2618 } else if (ap->low) { 2619 if (xfs_inode_is_filestream(ap->ip)) 2620 args.type = XFS_ALLOCTYPE_FIRST_AG;

+2 -1

fs/xfs/xfs_fs.h

··· 292 __s32 bs_extents; /* number of extents */ 293 __u32 bs_gen; /* generation count */ 294 __u16 bs_projid; /* project id */ 295 - unsigned char bs_pad[14]; /* pad space, unused */ 296 __u32 bs_dmevmask; /* DMIG event mask */ 297 __u16 bs_dmstate; /* DMIG state info */ 298 __u16 bs_aextents; /* attribute number of extents */

··· 292 __s32 bs_extents; /* number of extents */ 293 __u32 bs_gen; /* generation count */ 294 __u16 bs_projid; /* project id */ 295 + __u16 bs_forkoff; /* inode fork offset in bytes */ 296 + unsigned char bs_pad[12]; /* pad space, unused */ 297 __u32 bs_dmevmask; /* DMIG event mask */ 298 __u16 bs_dmstate; /* DMIG state info */ 299 __u16 bs_aextents; /* attribute number of extents */

+13 -6

fs/xfs/xfs_iget.c

··· 190 trace_xfs_iget_reclaim(ip); 191 192 /* 193 - * We need to set XFS_INEW atomically with clearing the 194 - * reclaimable tag so that we do have an indicator of the 195 - * inode still being initialized. 196 */ 197 - ip->i_flags |= XFS_INEW; 198 - ip->i_flags &= ~XFS_IRECLAIMABLE; 199 - __xfs_inode_clear_reclaim_tag(mp, pag, ip); 200 201 spin_unlock(&ip->i_flags_lock); 202 read_unlock(&pag->pag_ici_lock); ··· 215 trace_xfs_iget_reclaim(ip); 216 goto out_error; 217 } 218 inode->i_state = I_NEW; 219 } else { 220 /* If the VFS inode is being torn down, pause and try again. */ 221 if (!igrab(inode)) {

··· 190 trace_xfs_iget_reclaim(ip); 191 192 /* 193 + * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode 194 + * from stomping over us while we recycle the inode. We can't 195 + * clear the radix tree reclaimable tag yet as it requires 196 + * pag_ici_lock to be held exclusive. 197 */ 198 + ip->i_flags |= XFS_IRECLAIM; 199 200 spin_unlock(&ip->i_flags_lock); 201 read_unlock(&pag->pag_ici_lock); ··· 216 trace_xfs_iget_reclaim(ip); 217 goto out_error; 218 } 219 + 220 + write_lock(&pag->pag_ici_lock); 221 + spin_lock(&ip->i_flags_lock); 222 + ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); 223 + ip->i_flags |= XFS_INEW; 224 + __xfs_inode_clear_reclaim_tag(mp, pag, ip); 225 inode->i_state = I_NEW; 226 + spin_unlock(&ip->i_flags_lock); 227 + write_unlock(&pag->pag_ici_lock); 228 } else { 229 /* If the VFS inode is being torn down, pause and try again. */ 230 if (!igrab(inode)) {

+12 -56

fs/xfs/xfs_inode.c

··· 2439 } 2440 2441 /* 2442 - * Increment the pin count of the given buffer. 2443 - * This value is protected by ipinlock spinlock in the mount structure. 2444 */ 2445 - void 2446 - xfs_ipin( 2447 - xfs_inode_t *ip) 2448 { 2449 - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2450 - 2451 - atomic_inc(&ip->i_pincount); 2452 - } 2453 - 2454 - /* 2455 - * Decrement the pin count of the given inode, and wake up 2456 - * anyone in xfs_iwait_unpin() if the count goes to 0. The 2457 - * inode must have been previously pinned with a call to xfs_ipin(). 2458 - */ 2459 - void 2460 - xfs_iunpin( 2461 - xfs_inode_t *ip) 2462 - { 2463 - ASSERT(atomic_read(&ip->i_pincount) > 0); 2464 - 2465 - if (atomic_dec_and_test(&ip->i_pincount)) 2466 - wake_up(&ip->i_ipin_wait); 2467 - } 2468 - 2469 - /* 2470 - * This is called to unpin an inode. It can be directed to wait or to return 2471 - * immediately without waiting for the inode to be unpinned. The caller must 2472 - * have the inode locked in at least shared mode so that the buffer cannot be 2473 - * subsequently pinned once someone is waiting for it to be unpinned. 2474 - */ 2475 - STATIC void 2476 - __xfs_iunpin_wait( 2477 - xfs_inode_t *ip, 2478 - int wait) 2479 - { 2480 - xfs_inode_log_item_t *iip = ip->i_itemp; 2481 - 2482 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2483 - if (atomic_read(&ip->i_pincount) == 0) 2484 - return; 2485 2486 /* Give the log a push to start the unpinning I/O */ 2487 - if (iip && iip->ili_last_lsn) 2488 - xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0); 2489 - else 2490 - xfs_log_force(ip->i_mount, 0); 2491 2492 - if (wait) 2493 - wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2494 } 2495 2496 void 2497 xfs_iunpin_wait( 2498 - xfs_inode_t *ip) 2499 { 2500 - __xfs_iunpin_wait(ip, 1); 2501 } 2502 - 2503 - static inline void 2504 - xfs_iunpin_nowait( 2505 - xfs_inode_t *ip) 2506 - { 2507 - __xfs_iunpin_wait(ip, 0); 2508 - } 2509 - 2510 2511 /* 2512 * xfs_iextents_copy()

··· 2439 } 2440 2441 /* 2442 + * This is called to unpin an inode. The caller must have the inode locked 2443 + * in at least shared mode so that the buffer cannot be subsequently pinned 2444 + * once someone is waiting for it to be unpinned. 2445 */ 2446 + static void 2447 + xfs_iunpin_nowait( 2448 + struct xfs_inode *ip) 2449 { 2450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2451 2452 /* Give the log a push to start the unpinning I/O */ 2453 + xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2454 2455 } 2456 2457 void 2458 xfs_iunpin_wait( 2459 + struct xfs_inode *ip) 2460 { 2461 + if (xfs_ipincount(ip)) { 2462 + xfs_iunpin_nowait(ip); 2463 + wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0)); 2464 + } 2465 } 2466 2467 /* 2468 * xfs_iextents_copy()

+1 -2

fs/xfs/xfs_inode.h

··· 471 int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 472 473 void xfs_iext_realloc(xfs_inode_t *, int, int); 474 - void xfs_ipin(xfs_inode_t *); 475 - void xfs_iunpin(xfs_inode_t *); 476 void xfs_iunpin_wait(xfs_inode_t *); 477 int xfs_iflush(xfs_inode_t *, uint); 478 void xfs_ichgtime(xfs_inode_t *, int); ··· 478 void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 479 480 void xfs_synchronize_times(xfs_inode_t *); 481 void xfs_mark_inode_dirty_sync(xfs_inode_t *); 482 483 #define IHOLD(ip) \

··· 471 int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 472 473 void xfs_iext_realloc(xfs_inode_t *, int, int); 474 void xfs_iunpin_wait(xfs_inode_t *); 475 int xfs_iflush(xfs_inode_t *, uint); 476 void xfs_ichgtime(xfs_inode_t *, int); ··· 480 void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 481 482 void xfs_synchronize_times(xfs_inode_t *); 483 + void xfs_mark_inode_dirty(xfs_inode_t *); 484 void xfs_mark_inode_dirty_sync(xfs_inode_t *); 485 486 #define IHOLD(ip) \

+11 -7

fs/xfs/xfs_inode_item.c

··· 535 536 /* 537 * This is called to pin the inode associated with the inode log 538 - * item in memory so it cannot be written out. Do this by calling 539 - * xfs_ipin() to bump the pin count in the inode while holding the 540 - * inode pin lock. 541 */ 542 STATIC void 543 xfs_inode_item_pin( 544 xfs_inode_log_item_t *iip) 545 { 546 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 547 - xfs_ipin(iip->ili_inode); 548 } 549 550 551 /* 552 * This is called to unpin the inode associated with the inode log 553 * item which was previously pinned with a call to xfs_inode_item_pin(). 554 - * Just call xfs_iunpin() on the inode to do this. 555 */ 556 /* ARGSUSED */ 557 STATIC void ··· 559 xfs_inode_log_item_t *iip, 560 int stale) 561 { 562 - xfs_iunpin(iip->ili_inode); 563 } 564 565 /* ARGSUSED */ ··· 572 xfs_inode_log_item_t *iip, 573 xfs_trans_t *tp) 574 { 575 - xfs_iunpin(iip->ili_inode); 576 } 577 578 /*

··· 535 536 /* 537 * This is called to pin the inode associated with the inode log 538 + * item in memory so it cannot be written out. 539 */ 540 STATIC void 541 xfs_inode_item_pin( 542 xfs_inode_log_item_t *iip) 543 { 544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 545 + 546 + atomic_inc(&iip->ili_inode->i_pincount); 547 } 548 549 550 /* 551 * This is called to unpin the inode associated with the inode log 552 * item which was previously pinned with a call to xfs_inode_item_pin(). 553 + * 554 + * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. 555 */ 556 /* ARGSUSED */ 557 STATIC void ··· 559 xfs_inode_log_item_t *iip, 560 int stale) 561 { 562 + struct xfs_inode *ip = iip->ili_inode; 563 + 564 + ASSERT(atomic_read(&ip->i_pincount) > 0); 565 + if (atomic_dec_and_test(&ip->i_pincount)) 566 + wake_up(&ip->i_ipin_wait); 567 } 568 569 /* ARGSUSED */ ··· 568 xfs_inode_log_item_t *iip, 569 xfs_trans_t *tp) 570 { 571 + xfs_inode_item_unpin(iip, 0); 572 } 573 574 /*

+2

fs/xfs/xfs_itable.c

··· 106 buf->bs_dmevmask = dic->di_dmevmask; 107 buf->bs_dmstate = dic->di_dmstate; 108 buf->bs_aextents = dic->di_anextents; 109 110 switch (dic->di_format) { 111 case XFS_DINODE_FMT_DEV: ··· 177 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); 178 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); 179 buf->bs_aextents = be16_to_cpu(dic->di_anextents); 180 181 switch (dic->di_format) { 182 case XFS_DINODE_FMT_DEV:

··· 106 buf->bs_dmevmask = dic->di_dmevmask; 107 buf->bs_dmstate = dic->di_dmstate; 108 buf->bs_aextents = dic->di_anextents; 109 + buf->bs_forkoff = XFS_IFORK_BOFF(ip); 110 111 switch (dic->di_format) { 112 case XFS_DINODE_FMT_DEV: ··· 176 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); 177 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); 178 buf->bs_aextents = be16_to_cpu(dic->di_anextents); 179 + buf->bs_forkoff = XFS_DFORK_BOFF(dic); 180 181 switch (dic->di_format) { 182 case XFS_DINODE_FMT_DEV:

+52 -54

fs/xfs/xfs_log.c

··· 60 STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 61 STATIC void xlog_dealloc_log(xlog_t *log); 62 STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 63 - int nentries, xfs_log_ticket_t tic, 64 xfs_lsn_t *start_lsn, 65 xlog_in_core_t **commit_iclog, 66 uint flags); ··· 243 * out when the next write occurs. 244 */ 245 xfs_lsn_t 246 - xfs_log_done(xfs_mount_t *mp, 247 - xfs_log_ticket_t xtic, 248 - void **iclog, 249 - uint flags) 250 { 251 - xlog_t *log = mp->m_log; 252 - xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; 253 - xfs_lsn_t lsn = 0; 254 255 if (XLOG_FORCED_SHUTDOWN(log) || 256 /* ··· 258 * If we get an error, just continue and give back the log ticket. 259 */ 260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 261 - (xlog_commit_record(mp, ticket, 262 - (xlog_in_core_t **)iclog, &lsn)))) { 263 lsn = (xfs_lsn_t) -1; 264 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 265 flags |= XFS_LOG_REL_PERM_RESERV; ··· 288 } 289 290 return lsn; 291 - } /* xfs_log_done */ 292 293 /* 294 * Attaches a new iclog I/O completion callback routine during ··· 297 * executing the callback at an appropriate time. 298 */ 299 int 300 - xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ 301 - void *iclog_hndl, /* iclog to hang callback off */ 302 - xfs_log_callback_t *cb) 303 { 304 - xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 305 int abortflg; 306 307 spin_lock(&iclog->ic_callback_lock); ··· 315 } 316 spin_unlock(&iclog->ic_callback_lock); 317 return abortflg; 318 - } /* xfs_log_notify */ 319 320 int 321 - xfs_log_release_iclog(xfs_mount_t *mp, 322 - void *iclog_hndl) 323 { 324 - xlog_t *log = mp->m_log; 325 - xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 326 - 327 - if (xlog_state_release_iclog(log, iclog)) { 328 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 329 return EIO; 330 } ··· 341 * reservation, we prevent over allocation problems. 342 */ 343 int 344 - xfs_log_reserve(xfs_mount_t *mp, 345 - int unit_bytes, 346 - int cnt, 347 - xfs_log_ticket_t *ticket, 348 - __uint8_t client, 349 - uint flags, 350 - uint t_type) 351 { 352 - xlog_t *log = mp->m_log; 353 - xlog_ticket_t *internal_ticket; 354 - int retval = 0; 355 356 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 357 ASSERT((flags & XFS_LOG_NOSLEEP) == 0); ··· 365 366 if (*ticket != NULL) { 367 ASSERT(flags & XFS_LOG_PERM_RESERV); 368 - internal_ticket = (xlog_ticket_t *)*ticket; 369 370 trace_xfs_log_reserve(log, internal_ticket); 371 ··· 517 xlog_in_core_t *first_iclog; 518 #endif 519 xfs_log_iovec_t reg[1]; 520 - xfs_log_ticket_t tic = NULL; 521 xfs_lsn_t lsn; 522 int error; 523 ··· 654 * transaction occur with one call to xfs_log_write(). 655 */ 656 int 657 - xfs_log_write(xfs_mount_t * mp, 658 - xfs_log_iovec_t reg[], 659 - int nentries, 660 - xfs_log_ticket_t tic, 661 - xfs_lsn_t *start_lsn) 662 { 663 - int error; 664 - xlog_t *log = mp->m_log; 665 666 if (XLOG_FORCED_SHUTDOWN(log)) 667 return XFS_ERROR(EIO); 668 669 - if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 670 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 671 - } 672 return error; 673 - } /* xfs_log_write */ 674 - 675 676 void 677 xfs_log_move_tail(xfs_mount_t *mp, ··· 1640 * bytes have been written out. 1641 */ 1642 STATIC int 1643 - xlog_write(xfs_mount_t * mp, 1644 - xfs_log_iovec_t reg[], 1645 - int nentries, 1646 - xfs_log_ticket_t tic, 1647 - xfs_lsn_t *start_lsn, 1648 - xlog_in_core_t **commit_iclog, 1649 - uint flags) 1650 { 1651 xlog_t *log = mp->m_log; 1652 - xlog_ticket_t *ticket = (xlog_ticket_t *)tic; 1653 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1654 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1655 __psint_t ptr; /* copy address into data region */ ··· 1763 default: 1764 xfs_fs_cmn_err(CE_WARN, mp, 1765 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1766 - logop_head->oh_clientid, tic); 1767 return XFS_ERROR(EIO); 1768 } 1769

··· 60 STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 61 STATIC void xlog_dealloc_log(xlog_t *log); 62 STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 63 + int nentries, struct xlog_ticket *tic, 64 xfs_lsn_t *start_lsn, 65 xlog_in_core_t **commit_iclog, 66 uint flags); ··· 243 * out when the next write occurs. 244 */ 245 xfs_lsn_t 246 + xfs_log_done( 247 + struct xfs_mount *mp, 248 + struct xlog_ticket *ticket, 249 + struct xlog_in_core **iclog, 250 + uint flags) 251 { 252 + struct log *log = mp->m_log; 253 + xfs_lsn_t lsn = 0; 254 255 if (XLOG_FORCED_SHUTDOWN(log) || 256 /* ··· 258 * If we get an error, just continue and give back the log ticket. 259 */ 260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 261 + (xlog_commit_record(mp, ticket, iclog, &lsn)))) { 262 lsn = (xfs_lsn_t) -1; 263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 264 flags |= XFS_LOG_REL_PERM_RESERV; ··· 289 } 290 291 return lsn; 292 + } 293 294 /* 295 * Attaches a new iclog I/O completion callback routine during ··· 298 * executing the callback at an appropriate time. 299 */ 300 int 301 + xfs_log_notify( 302 + struct xfs_mount *mp, 303 + struct xlog_in_core *iclog, 304 + xfs_log_callback_t *cb) 305 { 306 int abortflg; 307 308 spin_lock(&iclog->ic_callback_lock); ··· 316 } 317 spin_unlock(&iclog->ic_callback_lock); 318 return abortflg; 319 + } 320 321 int 322 + xfs_log_release_iclog( 323 + struct xfs_mount *mp, 324 + struct xlog_in_core *iclog) 325 { 326 + if (xlog_state_release_iclog(mp->m_log, iclog)) { 327 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 328 return EIO; 329 } ··· 344 * reservation, we prevent over allocation problems. 345 */ 346 int 347 + xfs_log_reserve( 348 + struct xfs_mount *mp, 349 + int unit_bytes, 350 + int cnt, 351 + struct xlog_ticket **ticket, 352 + __uint8_t client, 353 + uint flags, 354 + uint t_type) 355 { 356 + struct log *log = mp->m_log; 357 + struct xlog_ticket *internal_ticket; 358 + int retval = 0; 359 360 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 361 ASSERT((flags & XFS_LOG_NOSLEEP) == 0); ··· 367 368 if (*ticket != NULL) { 369 ASSERT(flags & XFS_LOG_PERM_RESERV); 370 + internal_ticket = *ticket; 371 372 trace_xfs_log_reserve(log, internal_ticket); 373 ··· 519 xlog_in_core_t *first_iclog; 520 #endif 521 xfs_log_iovec_t reg[1]; 522 + xlog_ticket_t *tic = NULL; 523 xfs_lsn_t lsn; 524 int error; 525 ··· 656 * transaction occur with one call to xfs_log_write(). 657 */ 658 int 659 + xfs_log_write( 660 + struct xfs_mount *mp, 661 + struct xfs_log_iovec reg[], 662 + int nentries, 663 + struct xlog_ticket *tic, 664 + xfs_lsn_t *start_lsn) 665 { 666 + struct log *log = mp->m_log; 667 + int error; 668 669 if (XLOG_FORCED_SHUTDOWN(log)) 670 return XFS_ERROR(EIO); 671 672 + error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); 673 + if (error) 674 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 675 return error; 676 + } 677 678 void 679 xfs_log_move_tail(xfs_mount_t *mp, ··· 1642 * bytes have been written out. 1643 */ 1644 STATIC int 1645 + xlog_write( 1646 + struct xfs_mount *mp, 1647 + struct xfs_log_iovec reg[], 1648 + int nentries, 1649 + struct xlog_ticket *ticket, 1650 + xfs_lsn_t *start_lsn, 1651 + struct xlog_in_core **commit_iclog, 1652 + uint flags) 1653 { 1654 xlog_t *log = mp->m_log; 1655 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1656 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1657 __psint_t ptr; /* copy address into data region */ ··· 1765 default: 1766 xfs_fs_cmn_err(CE_WARN, mp, 1767 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1768 + logop_head->oh_clientid, ticket); 1769 return XFS_ERROR(EIO); 1770 } 1771

+8 -8

fs/xfs/xfs_log.h

··· 110 uint i_type; /* type of region */ 111 } xfs_log_iovec_t; 112 113 - typedef void* xfs_log_ticket_t; 114 - 115 /* 116 * Structure used to pass callback function and the function's argument 117 * to the log manager. ··· 124 #ifdef __KERNEL__ 125 /* Log manager interfaces */ 126 struct xfs_mount; 127 struct xlog_ticket; 128 xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 129 - xfs_log_ticket_t ticket, 130 - void **iclog, 131 uint flags); 132 int _xfs_log_force(struct xfs_mount *mp, 133 uint flags, ··· 151 void xfs_log_move_tail(struct xfs_mount *mp, 152 xfs_lsn_t tail_lsn); 153 int xfs_log_notify(struct xfs_mount *mp, 154 - void *iclog, 155 xfs_log_callback_t *callback_entry); 156 int xfs_log_release_iclog(struct xfs_mount *mp, 157 - void *iclog_hndl); 158 int xfs_log_reserve(struct xfs_mount *mp, 159 int length, 160 int count, 161 - xfs_log_ticket_t *ticket, 162 __uint8_t clientid, 163 uint flags, 164 uint t_type); 165 int xfs_log_write(struct xfs_mount *mp, 166 xfs_log_iovec_t region[], 167 int nentries, 168 - xfs_log_ticket_t ticket, 169 xfs_lsn_t *start_lsn); 170 int xfs_log_unmount_write(struct xfs_mount *mp); 171 void xfs_log_unmount(struct xfs_mount *mp);

··· 110 uint i_type; /* type of region */ 111 } xfs_log_iovec_t; 112 113 /* 114 * Structure used to pass callback function and the function's argument 115 * to the log manager. ··· 126 #ifdef __KERNEL__ 127 /* Log manager interfaces */ 128 struct xfs_mount; 129 + struct xlog_in_core; 130 struct xlog_ticket; 131 + 132 xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 133 + struct xlog_ticket *ticket, 134 + struct xlog_in_core **iclog, 135 uint flags); 136 int _xfs_log_force(struct xfs_mount *mp, 137 uint flags, ··· 151 void xfs_log_move_tail(struct xfs_mount *mp, 152 xfs_lsn_t tail_lsn); 153 int xfs_log_notify(struct xfs_mount *mp, 154 + struct xlog_in_core *iclog, 155 xfs_log_callback_t *callback_entry); 156 int xfs_log_release_iclog(struct xfs_mount *mp, 157 + struct xlog_in_core *iclog); 158 int xfs_log_reserve(struct xfs_mount *mp, 159 int length, 160 int count, 161 + struct xlog_ticket **ticket, 162 __uint8_t clientid, 163 uint flags, 164 uint t_type); 165 int xfs_log_write(struct xfs_mount *mp, 166 xfs_log_iovec_t region[], 167 int nentries, 168 + struct xlog_ticket *ticket, 169 xfs_lsn_t *start_lsn); 170 int xfs_log_unmount_write(struct xfs_mount *mp); 171 void xfs_log_unmount(struct xfs_mount *mp);

+51 -22

fs/xfs/xfs_mount.c

··· 1097 __uint64_t resblks; 1098 1099 /* 1100 - * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. 1101 - * This may drive us straight to ENOSPC on mount, but that implies 1102 - * we were already there on the last unmount. Warn if this occurs. 1103 */ 1104 resblks = mp->m_sb.sb_dblocks; 1105 do_div(resblks, 20); 1106 - resblks = min_t(__uint64_t, resblks, 1024); 1107 return resblks; 1108 } 1109 ··· 1419 * when at ENOSPC. This is needed for operations like create with 1420 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1421 * are not allowed to use this reserved space. 1422 */ 1423 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 1424 resblks = xfs_default_resblks(mp); ··· 1730 lcounter += rem; 1731 } 1732 } else { /* Taking blocks away */ 1733 - 1734 lcounter += delta; 1735 - 1736 - /* 1737 - * If were out of blocks, use any available reserved blocks if 1738 - * were allowed to. 1739 - */ 1740 - 1741 - if (lcounter < 0) { 1742 - if (rsvd) { 1743 - lcounter = (long long)mp->m_resblks_avail + delta; 1744 - if (lcounter < 0) { 1745 - return XFS_ERROR(ENOSPC); 1746 - } 1747 - mp->m_resblks_avail = lcounter; 1748 - return 0; 1749 - } else { /* not reserved */ 1750 - return XFS_ERROR(ENOSPC); 1751 - } 1752 } 1753 } 1754 1755 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); ··· 2061 return error; 2062 } 2063 2064 2065 #ifdef HAVE_PERCPU_SB 2066 /*

··· 1097 __uint64_t resblks; 1098 1099 /* 1100 + * We default to 5% or 8192 fsbs of space reserved, whichever is 1101 + * smaller. This is intended to cover concurrent allocation 1102 + * transactions when we initially hit enospc. These each require a 4 1103 + * block reservation. Hence by default we cover roughly 2000 concurrent 1104 + * allocation reservations. 1105 */ 1106 resblks = mp->m_sb.sb_dblocks; 1107 do_div(resblks, 20); 1108 + resblks = min_t(__uint64_t, resblks, 8192); 1109 return resblks; 1110 } 1111 ··· 1417 * when at ENOSPC. This is needed for operations like create with 1418 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1419 * are not allowed to use this reserved space. 1420 + * 1421 + * This may drive us straight to ENOSPC on mount, but that implies 1422 + * we were already there on the last unmount. Warn if this occurs. 1423 */ 1424 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 1425 resblks = xfs_default_resblks(mp); ··· 1725 lcounter += rem; 1726 } 1727 } else { /* Taking blocks away */ 1728 lcounter += delta; 1729 + if (lcounter >= 0) { 1730 + mp->m_sb.sb_fdblocks = lcounter + 1731 + XFS_ALLOC_SET_ASIDE(mp); 1732 + return 0; 1733 } 1734 + 1735 + /* 1736 + * We are out of blocks, use any available reserved 1737 + * blocks if were allowed to. 1738 + */ 1739 + if (!rsvd) 1740 + return XFS_ERROR(ENOSPC); 1741 + 1742 + lcounter = (long long)mp->m_resblks_avail + delta; 1743 + if (lcounter >= 0) { 1744 + mp->m_resblks_avail = lcounter; 1745 + return 0; 1746 + } 1747 + printk_once(KERN_WARNING 1748 + "Filesystem \"%s\": reserve blocks depleted! " 1749 + "Consider increasing reserve pool size.", 1750 + mp->m_fsname); 1751 + return XFS_ERROR(ENOSPC); 1752 } 1753 1754 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); ··· 2052 return error; 2053 } 2054 2055 + /* 2056 + * If the underlying (data/log/rt) device is readonly, there are some 2057 + * operations that cannot proceed. 2058 + */ 2059 + int 2060 + xfs_dev_is_read_only( 2061 + struct xfs_mount *mp, 2062 + char *message) 2063 + { 2064 + if (xfs_readonly_buftarg(mp->m_ddev_targp) || 2065 + xfs_readonly_buftarg(mp->m_logdev_targp) || 2066 + (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 2067 + cmn_err(CE_NOTE, 2068 + "XFS: %s required on read-only device.", message); 2069 + cmn_err(CE_NOTE, 2070 + "XFS: write access unavailable, cannot proceed."); 2071 + return EROFS; 2072 + } 2073 + return 0; 2074 + } 2075 2076 #ifdef HAVE_PERCPU_SB 2077 /*

+2

fs/xfs/xfs_mount.h

··· 436 extern int xfs_fs_writable(xfs_mount_t *); 437 extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); 438 439 extern int xfs_dmops_get(struct xfs_mount *); 440 extern void xfs_dmops_put(struct xfs_mount *); 441

··· 436 extern int xfs_fs_writable(xfs_mount_t *); 437 extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); 438 439 + extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 440 + 441 extern int xfs_dmops_get(struct xfs_mount *); 442 extern void xfs_dmops_put(struct xfs_mount *); 443

+1 -1

fs/xfs/xfs_trans.c

··· 796 int sync; 797 #define XFS_TRANS_LOGVEC_COUNT 16 798 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; 799 - void *commit_iclog; 800 int shutdown; 801 802 commit_lsn = -1;

··· 796 int sync; 797 #define XFS_TRANS_LOGVEC_COUNT 16 798 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; 799 + struct xlog_in_core *commit_iclog; 800 int shutdown; 801 802 commit_lsn = -1;

+1 -1

fs/xfs/xfs_trans.h

··· 910 unsigned int t_blk_res_used; /* # of resvd blocks used */ 911 unsigned int t_rtx_res; /* # of rt extents resvd */ 912 unsigned int t_rtx_res_used; /* # of resvd rt extents used */ 913 - xfs_log_ticket_t t_ticket; /* log mgr ticket */ 914 xfs_lsn_t t_lsn; /* log seq num of start of 915 * transaction. */ 916 xfs_lsn_t t_commit_lsn; /* log seq num of end of

··· 910 unsigned int t_blk_res_used; /* # of resvd blocks used */ 911 unsigned int t_rtx_res; /* # of rt extents resvd */ 912 unsigned int t_rtx_res_used; /* # of resvd rt extents used */ 913 + struct xlog_ticket *t_ticket; /* log mgr ticket */ 914 xfs_lsn_t t_lsn; /* log seq num of start of 915 * transaction. */ 916 xfs_lsn_t t_commit_lsn; /* log seq num of end of

+66 -150

fs/xfs/xfs_trans_buf.c

··· 46 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 47 xfs_daddr_t, int); 48 49 50 /* 51 * Get and lock the buffer for the caller if it is not already ··· 191 192 ASSERT(!XFS_BUF_GETERROR(bp)); 193 194 - /* 195 - * The xfs_buf_log_item pointer is stored in b_fsprivate. If 196 - * it doesn't have one yet, then allocate one and initialize it. 197 - * The checks to see if one is there are in xfs_buf_item_init(). 198 - */ 199 - xfs_buf_item_init(bp, tp->t_mountp); 200 - 201 - /* 202 - * Set the recursion count for the buffer within this transaction 203 - * to 0. 204 - */ 205 - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 206 - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 207 - ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 208 - ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 209 - bip->bli_recur = 0; 210 - 211 - /* 212 - * Take a reference for this transaction on the buf item. 213 - */ 214 - atomic_inc(&bip->bli_refcount); 215 - 216 - /* 217 - * Get a log_item_desc to point at the new item. 218 - */ 219 - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 220 - 221 - /* 222 - * Initialize b_fsprivate2 so we can find it with incore_match() 223 - * above. 224 - */ 225 - XFS_BUF_SET_FSPRIVATE2(bp, tp); 226 - 227 - trace_xfs_trans_get_buf(bip); 228 return (bp); 229 } 230 ··· 237 } 238 239 bp = xfs_getsb(mp, flags); 240 - if (bp == NULL) { 241 return NULL; 242 - } 243 244 - /* 245 - * The xfs_buf_log_item pointer is stored in b_fsprivate. If 246 - * it doesn't have one yet, then allocate one and initialize it. 247 - * The checks to see if one is there are in xfs_buf_item_init(). 248 - */ 249 - xfs_buf_item_init(bp, mp); 250 - 251 - /* 252 - * Set the recursion count for the buffer within this transaction 253 - * to 0. 254 - */ 255 - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 256 - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 257 - ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 258 - ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 259 - bip->bli_recur = 0; 260 - 261 - /* 262 - * Take a reference for this transaction on the buf item. 263 - */ 264 - atomic_inc(&bip->bli_refcount); 265 - 266 - /* 267 - * Get a log_item_desc to point at the new item. 268 - */ 269 - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 270 - 271 - /* 272 - * Initialize b_fsprivate2 so we can find it with incore_match() 273 - * above. 274 - */ 275 - XFS_BUF_SET_FSPRIVATE2(bp, tp); 276 - 277 - trace_xfs_trans_getsb(bip); 278 return (bp); 279 } 280 ··· 419 if (XFS_FORCED_SHUTDOWN(mp)) 420 goto shutdown_abort; 421 422 - /* 423 - * The xfs_buf_log_item pointer is stored in b_fsprivate. If 424 - * it doesn't have one yet, then allocate one and initialize it. 425 - * The checks to see if one is there are in xfs_buf_item_init(). 426 - */ 427 - xfs_buf_item_init(bp, tp->t_mountp); 428 429 - /* 430 - * Set the recursion count for the buffer within this transaction 431 - * to 0. 432 - */ 433 - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 434 - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 435 - ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 436 - ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 437 - bip->bli_recur = 0; 438 - 439 - /* 440 - * Take a reference for this transaction on the buf item. 441 - */ 442 - atomic_inc(&bip->bli_refcount); 443 - 444 - /* 445 - * Get a log_item_desc to point at the new item. 446 - */ 447 - (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 448 - 449 - /* 450 - * Initialize b_fsprivate2 so we can find it with incore_match() 451 - * above. 452 - */ 453 - XFS_BUF_SET_FSPRIVATE2(bp, tp); 454 - 455 - trace_xfs_trans_read_buf(bip); 456 *bpp = bp; 457 return 0; 458 ··· 583 584 xfs_buf_relse(bp); 585 return; 586 - } 587 - 588 - /* 589 - * Add the locked buffer to the transaction. 590 - * The buffer must be locked, and it cannot be associated with any 591 - * transaction. 592 - * 593 - * If the buffer does not yet have a buf log item associated with it, 594 - * then allocate one for it. Then add the buf item to the transaction. 595 - */ 596 - void 597 - xfs_trans_bjoin(xfs_trans_t *tp, 598 - xfs_buf_t *bp) 599 - { 600 - xfs_buf_log_item_t *bip; 601 - 602 - ASSERT(XFS_BUF_ISBUSY(bp)); 603 - ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 604 - 605 - /* 606 - * The xfs_buf_log_item pointer is stored in b_fsprivate. If 607 - * it doesn't have one yet, then allocate one and initialize it. 608 - * The checks to see if one is there are in xfs_buf_item_init(). 609 - */ 610 - xfs_buf_item_init(bp, tp->t_mountp); 611 - bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 612 - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 613 - ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 614 - ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 615 - 616 - /* 617 - * Take a reference for this transaction on the buf item. 618 - */ 619 - atomic_inc(&bip->bli_refcount); 620 - 621 - /* 622 - * Get a log_item_desc to point at the new item. 623 - */ 624 - (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 625 - 626 - /* 627 - * Initialize b_fsprivate2 so we can find it with incore_match() 628 - * in xfs_trans_get_buf() and friends above. 629 - */ 630 - XFS_BUF_SET_FSPRIVATE2(bp, tp); 631 - 632 - trace_xfs_trans_bjoin(bip); 633 } 634 635 /*

··· 46 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 47 xfs_daddr_t, int); 48 49 + /* 50 + * Add the locked buffer to the transaction. 51 + * 52 + * The buffer must be locked, and it cannot be associated with any 53 + * transaction. 54 + * 55 + * If the buffer does not yet have a buf log item associated with it, 56 + * then allocate one for it. Then add the buf item to the transaction. 57 + */ 58 + STATIC void 59 + _xfs_trans_bjoin( 60 + struct xfs_trans *tp, 61 + struct xfs_buf *bp, 62 + int reset_recur) 63 + { 64 + struct xfs_buf_log_item *bip; 65 + 66 + ASSERT(XFS_BUF_ISBUSY(bp)); 67 + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 68 + 69 + /* 70 + * The xfs_buf_log_item pointer is stored in b_fsprivate. If 71 + * it doesn't have one yet, then allocate one and initialize it. 72 + * The checks to see if one is there are in xfs_buf_item_init(). 73 + */ 74 + xfs_buf_item_init(bp, tp->t_mountp); 75 + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 76 + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 77 + ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 78 + ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 79 + if (reset_recur) 80 + bip->bli_recur = 0; 81 + 82 + /* 83 + * Take a reference for this transaction on the buf item. 84 + */ 85 + atomic_inc(&bip->bli_refcount); 86 + 87 + /* 88 + * Get a log_item_desc to point at the new item. 89 + */ 90 + (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 91 + 92 + /* 93 + * Initialize b_fsprivate2 so we can find it with incore_match() 94 + * in xfs_trans_get_buf() and friends above. 95 + */ 96 + XFS_BUF_SET_FSPRIVATE2(bp, tp); 97 + 98 + } 99 + 100 + void 101 + xfs_trans_bjoin( 102 + struct xfs_trans *tp, 103 + struct xfs_buf *bp) 104 + { 105 + _xfs_trans_bjoin(tp, bp, 0); 106 + trace_xfs_trans_bjoin(bp->b_fspriv); 107 + } 108 109 /* 110 * Get and lock the buffer for the caller if it is not already ··· 132 133 ASSERT(!XFS_BUF_GETERROR(bp)); 134 135 + _xfs_trans_bjoin(tp, bp, 1); 136 + trace_xfs_trans_get_buf(bp->b_fspriv); 137 return (bp); 138 } 139 ··· 210 } 211 212 bp = xfs_getsb(mp, flags); 213 + if (bp == NULL) 214 return NULL; 215 216 + _xfs_trans_bjoin(tp, bp, 1); 217 + trace_xfs_trans_getsb(bp->b_fspriv); 218 return (bp); 219 } 220 ··· 425 if (XFS_FORCED_SHUTDOWN(mp)) 426 goto shutdown_abort; 427 428 + _xfs_trans_bjoin(tp, bp, 1); 429 + trace_xfs_trans_read_buf(bp->b_fspriv); 430 431 *bpp = bp; 432 return 0; 433 ··· 620 621 xfs_buf_relse(bp); 622 return; 623 } 624 625 /*

-107

fs/xfs/xfs_vnodeops.c

··· 584 } 585 586 /* 587 - * xfs_fsync 588 - * 589 - * This is called to sync the inode and its data out to disk. We need to hold 590 - * the I/O lock while flushing the data, and the inode lock while flushing the 591 - * inode. The inode lock CANNOT be held while flushing the data, so acquire 592 - * after we're done with that. 593 - */ 594 - int 595 - xfs_fsync( 596 - xfs_inode_t *ip) 597 - { 598 - xfs_trans_t *tp; 599 - int error = 0; 600 - int log_flushed = 0; 601 - 602 - xfs_itrace_entry(ip); 603 - 604 - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 605 - return XFS_ERROR(EIO); 606 - 607 - /* 608 - * We always need to make sure that the required inode state is safe on 609 - * disk. The inode might be clean but we still might need to force the 610 - * log because of committed transactions that haven't hit the disk yet. 611 - * Likewise, there could be unflushed non-transactional changes to the 612 - * inode core that have to go to disk and this requires us to issue 613 - * a synchronous transaction to capture these changes correctly. 614 - * 615 - * This code relies on the assumption that if the update_* fields 616 - * of the inode are clear and the inode is unpinned then it is clean 617 - * and no action is required. 618 - */ 619 - xfs_ilock(ip, XFS_ILOCK_SHARED); 620 - 621 - if (!ip->i_update_core) { 622 - /* 623 - * Timestamps/size haven't changed since last inode flush or 624 - * inode transaction commit. That means either nothing got 625 - * written or a transaction committed which caught the updates. 626 - * If the latter happened and the transaction hasn't hit the 627 - * disk yet, the inode will be still be pinned. If it is, 628 - * force the log. 629 - */ 630 - xfs_iunlock(ip, XFS_ILOCK_SHARED); 631 - if (xfs_ipincount(ip)) { 632 - if (ip->i_itemp->ili_last_lsn) { 633 - error = _xfs_log_force_lsn(ip->i_mount, 634 - ip->i_itemp->ili_last_lsn, 635 - XFS_LOG_SYNC, &log_flushed); 636 - } else { 637 - error = _xfs_log_force(ip->i_mount, 638 - XFS_LOG_SYNC, &log_flushed); 639 - } 640 - } 641 - } else { 642 - /* 643 - * Kick off a transaction to log the inode core to get the 644 - * updates. The sync transaction will also force the log. 645 - */ 646 - xfs_iunlock(ip, XFS_ILOCK_SHARED); 647 - tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 648 - error = xfs_trans_reserve(tp, 0, 649 - XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); 650 - if (error) { 651 - xfs_trans_cancel(tp, 0); 652 - return error; 653 - } 654 - xfs_ilock(ip, XFS_ILOCK_EXCL); 655 - 656 - /* 657 - * Note - it's possible that we might have pushed ourselves out 658 - * of the way during trans_reserve which would flush the inode. 659 - * But there's no guarantee that the inode buffer has actually 660 - * gone out yet (it's delwri). Plus the buffer could be pinned 661 - * anyway if it's part of an inode in another recent 662 - * transaction. So we play it safe and fire off the 663 - * transaction anyway. 664 - */ 665 - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 666 - xfs_trans_ihold(tp, ip); 667 - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 668 - xfs_trans_set_sync(tp); 669 - error = _xfs_trans_commit(tp, 0, &log_flushed); 670 - 671 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 672 - } 673 - 674 - if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { 675 - /* 676 - * If the log write didn't issue an ordered tag we need 677 - * to flush the disk cache for the data device now. 678 - */ 679 - if (!log_flushed) 680 - xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 681 - 682 - /* 683 - * If this inode is on the RT dev we need to flush that 684 - * cache as well. 685 - */ 686 - if (XFS_IS_REALTIME_INODE(ip)) 687 - xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 688 - } 689 - 690 - return error; 691 - } 692 - 693 - /* 694 * Flags for xfs_free_eofblocks 695 */ 696 #define XFS_FREE_EOF_TRYLOCK (1<<0)

··· 584 } 585 586 /* 587 * Flags for xfs_free_eofblocks 588 */ 589 #define XFS_FREE_EOF_TRYLOCK (1<<0)

+2 -13

fs/xfs/xfs_vnodeops.h

··· 21 #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ 22 23 int xfs_readlink(struct xfs_inode *ip, char *link); 24 - int xfs_fsync(struct xfs_inode *ip); 25 int xfs_release(struct xfs_inode *ip); 26 int xfs_inactive(struct xfs_inode *ip); 27 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, ··· 49 int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); 50 int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 51 int flags, struct attrlist_cursor_kern *cursor); 52 - ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 53 - const struct iovec *iovp, unsigned int segs, 54 - loff_t *offset, int ioflags); 55 - ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp, 56 - loff_t *ppos, struct pipe_inode_info *pipe, size_t count, 57 - int flags, int ioflags); 58 - ssize_t xfs_splice_write(struct xfs_inode *ip, 59 - struct pipe_inode_info *pipe, struct file *outfilp, 60 - loff_t *ppos, size_t count, int flags, int ioflags); 61 - ssize_t xfs_write(struct xfs_inode *xip, struct kiocb *iocb, 62 - const struct iovec *iovp, unsigned int nsegs, 63 - loff_t *offset, int ioflags); 64 int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 65 int flags, struct xfs_iomap *iomapp, int *niomaps); 66 void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, ··· 58 int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, 59 xfs_off_t last, uint64_t flags, int fiopt); 60 int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); 61 62 #endif /* _XFS_VNODEOPS_H */

··· 21 #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ 22 23 int xfs_readlink(struct xfs_inode *ip, char *link); 24 int xfs_release(struct xfs_inode *ip); 25 int xfs_inactive(struct xfs_inode *ip); 26 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, ··· 50 int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); 51 int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 52 int flags, struct attrlist_cursor_kern *cursor); 53 int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 54 int flags, struct xfs_iomap *iomapp, int *niomaps); 55 void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, ··· 71 int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, 72 xfs_off_t last, uint64_t flags, int fiopt); 73 int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); 74 + 75 + int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 76 77 #endif /* _XFS_VNODEOPS_H */