Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs/buffer.c: make block-size be per-page and protected by the page lock

This makes the buffer size handling be a per-page thing, which allows us
to not have to worry about locking too much when changing the buffer
size. If a page doesn't have buffers, we still need to read the block
size from the inode, but we can do that with ACCESS_ONCE(), so that even
if the size is changing, we get a consistent value.

This doesn't convert all functions - many of the buffer functions are
used purely by filesystems, which in turn results in the buffer size
being fixed at mount-time. So they don't have the same consistency
issues that the raw device access can have.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+48 -31
+48 -31
fs/buffer.c
··· 1553 1553 EXPORT_SYMBOL(unmap_underlying_metadata); 1554 1554 1555 1555 /* 1556 + * Size is a power-of-two in the range 512..PAGE_SIZE, 1557 + * and the case we care about most is PAGE_SIZE. 1558 + * 1559 + * So this *could* possibly be written with those 1560 + * constraints in mind (relevant mostly if some 1561 + * architecture has a slow bit-scan instruction) 1562 + */ 1563 + static inline int block_size_bits(unsigned int blocksize) 1564 + { 1565 + return ilog2(blocksize); 1566 + } 1567 + 1568 + static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) 1569 + { 1570 + BUG_ON(!PageLocked(page)); 1571 + 1572 + if (!page_has_buffers(page)) 1573 + create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); 1574 + return page_buffers(page); 1575 + } 1576 + 1577 + /* 1556 1578 * NOTE! All mapped/uptodate combinations are valid: 1557 1579 * 1558 1580 * Mapped Uptodate Meaning ··· 1611 1589 sector_t block; 1612 1590 sector_t last_block; 1613 1591 struct buffer_head *bh, *head; 1614 - const unsigned blocksize = 1 << inode->i_blkbits; 1592 + unsigned int blocksize, bbits; 1615 1593 int nr_underway = 0; 1616 1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1617 1595 WRITE_SYNC : WRITE); 1618 1596 1619 - BUG_ON(!PageLocked(page)); 1620 - 1621 - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; 1622 - 1623 - if (!page_has_buffers(page)) { 1624 - create_empty_buffers(page, blocksize, 1597 + head = create_page_buffers(page, inode, 1625 1598 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1626 - } 1627 1599 1628 1600 /* 1629 1601 * Be very careful. We have no exclusion from __set_page_dirty_buffers ··· 1629 1613 * handle that here by just cleaning them. 1630 1614 */ 1631 1615 1632 - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1633 - head = page_buffers(page); 1634 1616 bh = head; 1617 + blocksize = bh->b_size; 1618 + bbits = block_size_bits(blocksize); 1619 + 1620 + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 1621 + last_block = (i_size_read(inode) - 1) >> bbits; 1635 1622 1636 1623 /* 1637 1624 * Get all the dirty buffers mapped to disk addresses and ··· 1825 1806 BUG_ON(to > PAGE_CACHE_SIZE); 1826 1807 BUG_ON(from > to); 1827 1808 1828 - blocksize = 1 << inode->i_blkbits; 1829 - if (!page_has_buffers(page)) 1830 - create_empty_buffers(page, blocksize, 0); 1831 - head = page_buffers(page); 1809 + head = create_page_buffers(page, inode, 0); 1810 + blocksize = head->b_size; 1811 + bbits = block_size_bits(blocksize); 1832 1812 1833 - bbits = inode->i_blkbits; 1834 1813 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 1835 1814 1836 1815 for(bh = head, block_start = 0; bh != head || !block_start; ··· 1898 1881 unsigned blocksize; 1899 1882 struct buffer_head *bh, *head; 1900 1883 1901 - blocksize = 1 << inode->i_blkbits; 1884 + bh = head = page_buffers(page); 1885 + blocksize = bh->b_size; 1902 1886 1903 - for(bh = head = page_buffers(page), block_start = 0; 1904 - bh != head || !block_start; 1905 - block_start=block_end, bh = bh->b_this_page) { 1887 + block_start = 0; 1888 + do { 1906 1889 block_end = block_start + blocksize; 1907 1890 if (block_end <= from || block_start >= to) { 1908 1891 if (!buffer_uptodate(bh)) ··· 1912 1895 mark_buffer_dirty(bh); 1913 1896 } 1914 1897 clear_buffer_new(bh); 1915 - } 1898 + 1899 + block_start = block_end; 1900 + bh = bh->b_this_page; 1901 + } while (bh != head); 1916 1902 1917 1903 /* 1918 1904 * If this is a partial write which happened to make all buffers ··· 2040 2020 int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2041 2021 unsigned long from) 2042 2022 { 2043 - struct inode *inode = page->mapping->host; 2044 2023 unsigned block_start, block_end, blocksize; 2045 2024 unsigned to; 2046 2025 struct buffer_head *bh, *head; ··· 2048 2029 if (!page_has_buffers(page)) 2049 2030 return 0; 2050 2031 2051 - blocksize = 1 << inode->i_blkbits; 2032 + head = page_buffers(page); 2033 + blocksize = head->b_size; 2052 2034 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2053 2035 to = from + to; 2054 2036 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2055 2037 return 0; 2056 2038 2057 - head = page_buffers(page); 2058 2039 bh = head; 2059 2040 block_start = 0; 2060 2041 do { ··· 2087 2068 struct inode *inode = page->mapping->host; 2088 2069 sector_t iblock, lblock; 2089 2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 2090 - unsigned int blocksize; 2071 + unsigned int blocksize, bbits; 2091 2072 int nr, i; 2092 2073 int fully_mapped = 1; 2093 2074 2094 - BUG_ON(!PageLocked(page)); 2095 - blocksize = 1 << inode->i_blkbits; 2096 - if (!page_has_buffers(page)) 2097 - create_empty_buffers(page, blocksize, 0); 2098 - head = page_buffers(page); 2075 + head = create_page_buffers(page, inode, 0); 2076 + blocksize = head->b_size; 2077 + bbits = block_size_bits(blocksize); 2099 2078 2100 - iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2101 - lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; 2079 + iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 2080 + lblock = (i_size_read(inode)+blocksize-1) >> bbits; 2102 2081 bh = head; 2103 2082 nr = 0; 2104 2083 i = 0;