Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull ext3 & udf fixes from Jan Kara:
"Shortlog pretty much says it all.

The interesting bits are UDF support for direct IO and ext3 fix for a
long standing oops in data=journal mode."

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
jbd: Fix assertion failure in commit code due to lacking transaction credits
UDF: Add support for O_DIRECT
ext3: Replace 0 with NULL for pointer in super.c file
udf: add writepages support for udf
ext3: don't clear orphan list on ro mount with errors
reiserfs: Make reiserfs_xattr_handlers static

+135 -50
+5 -3
fs/ext3/super.c
··· 980 980 * Initialize args struct so we know whether arg was 981 981 * found; some options take optional arguments. 982 982 */ 983 - args[0].to = args[0].from = 0; 983 + args[0].to = args[0].from = NULL; 984 984 token = match_token(p, tokens, args); 985 985 switch (token) { 986 986 case Opt_bsd_df: ··· 1484 1484 } 1485 1485 1486 1486 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1487 - if (es->s_last_orphan) 1487 + /* don't clear list on RO mount w/ errors */ 1488 + if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 1488 1489 jbd_debug(1, "Errors on filesystem, " 1489 1490 "clearing orphan list.\n"); 1490 - es->s_last_orphan = 0; 1491 + es->s_last_orphan = 0; 1492 + } 1491 1493 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1492 1494 return; 1493 1495 }
+34 -11
fs/jbd/commit.c
··· 86 86 static void release_data_buffer(struct buffer_head *bh) 87 87 { 88 88 if (buffer_freed(bh)) { 89 + WARN_ON_ONCE(buffer_dirty(bh)); 89 90 clear_buffer_freed(bh); 91 + clear_buffer_mapped(bh); 92 + clear_buffer_new(bh); 93 + clear_buffer_req(bh); 94 + bh->b_bdev = NULL; 90 95 release_buffer_page(bh); 91 96 } else 92 97 put_bh(bh); ··· 871 866 * there's no point in keeping a checkpoint record for 872 867 * it. */ 873 868 874 - /* A buffer which has been freed while still being 875 - * journaled by a previous transaction may end up still 876 - * being dirty here, but we want to avoid writing back 877 - * that buffer in the future after the "add to orphan" 878 - * operation been committed, That's not only a performance 879 - * gain, it also stops aliasing problems if the buffer is 880 - * left behind for writeback and gets reallocated for another 881 - * use in a different page. */ 882 - if (buffer_freed(bh) && !jh->b_next_transaction) { 883 - clear_buffer_freed(bh); 884 - clear_buffer_jbddirty(bh); 869 + /* 870 + * A buffer which has been freed while still being journaled by 871 + * a previous transaction. 872 + */ 873 + if (buffer_freed(bh)) { 874 + /* 875 + * If the running transaction is the one containing 876 + * "add to orphan" operation (b_next_transaction != 877 + * NULL), we have to wait for that transaction to 878 + * commit before we can really get rid of the buffer. 879 + * So just clear b_modified to not confuse transaction 880 + * credit accounting and refile the buffer to 881 + * BJ_Forget of the running transaction. If the just 882 + * committed transaction contains "add to orphan" 883 + * operation, we can completely invalidate the buffer 884 + * now. We are rather throughout in that since the 885 + * buffer may be still accessible when blocksize < 886 + * pagesize and it is attached to the last partial 887 + * page. 888 + */ 889 + jh->b_modified = 0; 890 + if (!jh->b_next_transaction) { 891 + clear_buffer_freed(bh); 892 + clear_buffer_jbddirty(bh); 893 + clear_buffer_mapped(bh); 894 + clear_buffer_new(bh); 895 + clear_buffer_req(bh); 896 + bh->b_bdev = NULL; 897 + } 885 898 } 886 899 887 900 if (buffer_jbddirty(bh)) {
+44 -20
fs/jbd/transaction.c
··· 1843 1843 * We're outside-transaction here. Either or both of j_running_transaction 1844 1844 * and j_committing_transaction may be NULL. 1845 1845 */ 1846 - static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1846 + static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, 1847 + int partial_page) 1847 1848 { 1848 1849 transaction_t *transaction; 1849 1850 struct journal_head *jh; 1850 1851 int may_free = 1; 1851 - int ret; 1852 1852 1853 1853 BUFFER_TRACE(bh, "entry"); 1854 1854 1855 + retry: 1855 1856 /* 1856 1857 * It is safe to proceed here without the j_list_lock because the 1857 1858 * buffers cannot be stolen by try_to_free_buffers as long as we are ··· 1880 1879 * clear the buffer dirty bit at latest at the moment when the 1881 1880 * transaction marking the buffer as freed in the filesystem 1882 1881 * structures is committed because from that moment on the 1883 - * buffer can be reallocated and used by a different page. 1882 + * block can be reallocated and used by a different page. 1884 1883 * Since the block hasn't been freed yet but the inode has 1885 1884 * already been added to orphan list, it is safe for us to add 1886 1885 * the buffer to BJ_Forget list of the newest transaction. 1886 + * 1887 + * Also we have to clear buffer_mapped flag of a truncated buffer 1888 + * because the buffer_head may be attached to the page straddling 1889 + * i_size (can happen only when blocksize < pagesize) and thus the 1890 + * buffer_head can be reused when the file is extended again. So we end 1891 + * up keeping around invalidated buffers attached to transactions' 1892 + * BJ_Forget list just to stop checkpointing code from cleaning up 1893 + * the transaction this buffer was modified in. 1887 1894 */ 1888 1895 transaction = jh->b_transaction; 1889 1896 if (transaction == NULL) { ··· 1918 1909 * committed, the buffer won't be needed any 1919 1910 * longer. */ 1920 1911 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1921 - ret = __dispose_buffer(jh, 1912 + may_free = __dispose_buffer(jh, 1922 1913 journal->j_running_transaction); 1923 - journal_put_journal_head(jh); 1924 - spin_unlock(&journal->j_list_lock); 1925 - jbd_unlock_bh_state(bh); 1926 - spin_unlock(&journal->j_state_lock); 1927 - return ret; 1914 + goto zap_buffer; 1928 1915 } else { 1929 1916 /* There is no currently-running transaction. So the 1930 1917 * orphan record which we wrote for this file must have ··· 1928 1923 * the committing transaction, if it exists. */ 1929 1924 if (journal->j_committing_transaction) { 1930 1925 JBUFFER_TRACE(jh, "give to committing trans"); 1931 - ret = __dispose_buffer(jh, 1926 + may_free = __dispose_buffer(jh, 1932 1927 journal->j_committing_transaction); 1933 - journal_put_journal_head(jh); 1934 - spin_unlock(&journal->j_list_lock); 1935 - jbd_unlock_bh_state(bh); 1936 - spin_unlock(&journal->j_state_lock); 1937 - return ret; 1928 + goto zap_buffer; 1938 1929 } else { 1939 1930 /* The orphan record's transaction has 1940 1931 * committed. We can cleanse this buffer */ ··· 1951 1950 } 1952 1951 /* 1953 1952 * The buffer is committing, we simply cannot touch 1954 - * it. So we just set j_next_transaction to the 1955 - * running transaction (if there is one) and mark 1956 - * buffer as freed so that commit code knows it should 1957 - * clear dirty bits when it is done with the buffer. 1953 + * it. If the page is straddling i_size we have to wait 1954 + * for commit and try again. 1955 + */ 1956 + if (partial_page) { 1957 + tid_t tid = journal->j_committing_transaction->t_tid; 1958 + 1959 + journal_put_journal_head(jh); 1960 + spin_unlock(&journal->j_list_lock); 1961 + jbd_unlock_bh_state(bh); 1962 + spin_unlock(&journal->j_state_lock); 1963 + log_wait_commit(journal, tid); 1964 + goto retry; 1965 + } 1966 + /* 1967 + * OK, buffer won't be reachable after truncate. We just set 1968 + * j_next_transaction to the running transaction (if there is 1969 + * one) and mark buffer as freed so that commit code knows it 1970 + * should clear dirty bits when it is done with the buffer. 1958 1971 */ 1959 1972 set_buffer_freed(bh); 1960 1973 if (journal->j_running_transaction && buffer_jbddirty(bh)) ··· 1991 1976 } 1992 1977 1993 1978 zap_buffer: 1979 + /* 1980 + * This is tricky. Although the buffer is truncated, it may be reused 1981 + * if blocksize < pagesize and it is attached to the page straddling 1982 + * EOF. Since the buffer might have been added to BJ_Forget list of the 1983 + * running transaction, journal_get_write_access() won't clear 1984 + * b_modified and credit accounting gets confused. So clear b_modified 1985 + * here. */ 1986 + jh->b_modified = 0; 1994 1987 journal_put_journal_head(jh); 1995 1988 zap_buffer_no_jh: 1996 1989 spin_unlock(&journal->j_list_lock); ··· 2047 2024 if (offset <= curr_off) { 2048 2025 /* This block is wholly outside the truncation point */ 2049 2026 lock_buffer(bh); 2050 - may_free &= journal_unmap_buffer(journal, bh); 2027 + may_free &= journal_unmap_buffer(journal, bh, 2028 + offset > 0); 2051 2029 unlock_buffer(bh); 2052 2030 } 2053 2031 curr_off = next_off;
+1 -1
fs/reiserfs/xattr.c
··· 896 896 #endif 897 897 898 898 /* Actual operations that are exported to VFS-land */ 899 - const struct xattr_handler *reiserfs_xattr_handlers[] = { 899 + static const struct xattr_handler *reiserfs_xattr_handlers[] = { 900 900 #ifdef CONFIG_REISERFS_FS_XATTR 901 901 &reiserfs_xattr_user_handler, 902 902 &reiserfs_xattr_trusted_handler,
+9
fs/udf/file.c
··· 118 118 return simple_write_end(file, mapping, pos, len, copied, page, fsdata); 119 119 } 120 120 121 + static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, 122 + const struct iovec *iov, 123 + loff_t offset, unsigned long nr_segs) 124 + { 125 + /* Fallback to buffered I/O. */ 126 + return 0; 127 + } 128 + 121 129 const struct address_space_operations udf_adinicb_aops = { 122 130 .readpage = udf_adinicb_readpage, 123 131 .writepage = udf_adinicb_writepage, 124 132 .write_begin = udf_adinicb_write_begin, 125 133 .write_end = udf_adinicb_write_end, 134 + .direct_IO = udf_adinicb_direct_IO, 126 135 }; 127 136 128 137 static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+42 -15
fs/udf/inode.c
··· 95 95 } 96 96 } 97 97 98 + static void udf_write_failed(struct address_space *mapping, loff_t to) 99 + { 100 + struct inode *inode = mapping->host; 101 + struct udf_inode_info *iinfo = UDF_I(inode); 102 + loff_t isize = inode->i_size; 103 + 104 + if (to > isize) { 105 + truncate_pagecache(inode, to, isize); 106 + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 107 + down_write(&iinfo->i_data_sem); 108 + udf_truncate_extents(inode); 109 + up_write(&iinfo->i_data_sem); 110 + } 111 + } 112 + } 113 + 98 114 static int udf_writepage(struct page *page, struct writeback_control *wbc) 99 115 { 100 116 return block_write_full_page(page, udf_get_block, wbc); 117 + } 118 + 119 + static int udf_writepages(struct address_space *mapping, 120 + struct writeback_control *wbc) 121 + { 122 + return mpage_writepages(mapping, wbc, udf_get_block); 101 123 } 102 124 103 125 static int udf_readpage(struct file *file, struct page *page) ··· 140 118 int ret; 141 119 142 120 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); 143 - if (unlikely(ret)) { 144 - struct inode *inode = mapping->host; 145 - struct udf_inode_info *iinfo = UDF_I(inode); 146 - loff_t isize = inode->i_size; 121 + if (unlikely(ret)) 122 + udf_write_failed(mapping, pos + len); 123 + return ret; 124 + } 147 125 148 - if (pos + len > isize) { 149 - truncate_pagecache(inode, pos + len, isize); 150 - if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 151 - down_write(&iinfo->i_data_sem); 152 - udf_truncate_extents(inode); 153 - up_write(&iinfo->i_data_sem); 154 - } 155 - } 156 - } 126 + static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, 127 + const struct iovec *iov, 128 + loff_t offset, unsigned long nr_segs) 129 + { 130 + struct file *file = iocb->ki_filp; 131 + struct address_space *mapping = file->f_mapping; 132 + struct inode *inode = mapping->host; 133 + ssize_t ret; 157 134 135 + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 136 + udf_get_block); 137 + if (unlikely(ret < 0 && (rw & WRITE))) 138 + udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); 158 139 return ret; 159 140 } 160 141 ··· 170 145 .readpage = udf_readpage, 171 146 .readpages = udf_readpages, 172 147 .writepage = udf_writepage, 173 - .write_begin = udf_write_begin, 174 - .write_end = generic_write_end, 148 + .writepages = udf_writepages, 149 + .write_begin = udf_write_begin, 150 + .write_end = generic_write_end, 151 + .direct_IO = udf_direct_IO, 175 152 .bmap = udf_bmap, 176 153 }; 177 154