Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] shmem: restore superblock info

To improve shmem scalability, we allowed tmpfs instances which don't need
their blocks or inodes limited not to count them, and not to allocate any
sbinfo. Which was okay when the only use for the sbinfo was accounting
blocks and inodes; but since then a couple of unrelated projects extending
tmpfs want to store other data in the sbinfo. Whether either extension
reaches mainline is beside the point: I'm guilty of a bad design decision,
and should restore sbinfo to make any such future extensions easier.

So, once again allocate a shmem_sb_info for every shmem/tmpfs instance, and
now let max_blocks 0 indicate unlimited blocks, and max_inodes 0 unlimited
inodes. Brent Casavant verified (many months ago) that this does not
perceptibly impact the scalability (since the unlimited sbinfo cacheline is
repeatedly accessed but only once dirtied).

And merge shmem_set_size into its sole caller shmem_remount_fs.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Hugh Dickins and committed by
Linus Torvalds
0edd73b3 65ed0b33

+73 -76
+3 -3
Documentation/filesystems/tmpfs.txt
··· 71 71 to limit this tmpfs instance to that percentage of your physical RAM: 72 72 the default, when neither size nor nr_blocks is specified, is size=50% 73 73 74 - If both nr_blocks (or size) and nr_inodes are set to 0, neither blocks 75 - nor inodes will be limited in that instance. It is generally unwise to 74 + If nr_blocks=0 (or size=0), blocks will not be limited in that instance; 75 + if nr_inodes=0, inodes will not be limited. It is generally unwise to 76 76 mount with such options, since it allows any user with write access to 77 77 use up all the memory on the machine; but enhances the scalability of 78 78 that instance in a system with many cpus making intensive use of it. ··· 97 97 Author: 98 98 Christoph Rohland <cr@sap.com>, 1.12.01 99 99 Updated: 100 - Hugh Dickins <hugh@veritas.com>, 01 September 2004 100 + Hugh Dickins <hugh@veritas.com>, 13 March 2005
+70 -73
mm/shmem.c
··· 6 6 * 2000-2001 Christoph Rohland 7 7 * 2000-2001 SAP AG 8 8 * 2002 Red Hat Inc. 9 - * Copyright (C) 2002-2004 Hugh Dickins. 10 - * Copyright (C) 2002-2004 VERITAS Software Corporation. 9 + * Copyright (C) 2002-2005 Hugh Dickins. 10 + * Copyright (C) 2002-2005 VERITAS Software Corporation. 11 11 * Copyright (C) 2004 Andi Kleen, SuSE Labs 12 12 * 13 13 * Extended attribute support for tmpfs: ··· 194 194 static void shmem_free_blocks(struct inode *inode, long pages) 195 195 { 196 196 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 197 - if (sbinfo) { 197 + if (sbinfo->max_blocks) { 198 198 spin_lock(&sbinfo->stat_lock); 199 199 sbinfo->free_blocks += pages; 200 200 inode->i_blocks -= pages*BLOCKS_PER_PAGE; ··· 357 357 * page (and perhaps indirect index pages) yet to allocate: 358 358 * a waste to allocate index if we cannot allocate data. 359 359 */ 360 - if (sbinfo) { 360 + if (sbinfo->max_blocks) { 361 361 spin_lock(&sbinfo->stat_lock); 362 362 if (sbinfo->free_blocks <= 1) { 363 363 spin_unlock(&sbinfo->stat_lock); ··· 677 677 spin_unlock(&shmem_swaplist_lock); 678 678 } 679 679 } 680 - if (sbinfo) { 681 - BUG_ON(inode->i_blocks); 680 + BUG_ON(inode->i_blocks); 681 + if (sbinfo->max_inodes) { 682 682 spin_lock(&sbinfo->stat_lock); 683 683 sbinfo->free_inodes++; 684 684 spin_unlock(&sbinfo->stat_lock); ··· 1080 1080 } else { 1081 1081 shmem_swp_unmap(entry); 1082 1082 sbinfo = SHMEM_SB(inode->i_sb); 1083 - if (sbinfo) { 1083 + if (sbinfo->max_blocks) { 1084 1084 spin_lock(&sbinfo->stat_lock); 1085 1085 if (sbinfo->free_blocks == 0 || 1086 1086 shmem_acct_block(info->flags)) { ··· 1269 1269 struct shmem_inode_info *info; 1270 1270 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 1271 1271 1272 - if (sbinfo) { 1272 + if (sbinfo->max_inodes) { 1273 1273 spin_lock(&sbinfo->stat_lock); 1274 1274 if (!sbinfo->free_inodes) { 1275 1275 spin_unlock(&sbinfo->stat_lock); ··· 1319 1319 mpol_shared_policy_init(&info->policy); 1320 1320 break; 1321 1321 } 1322 - } else if (sbinfo) { 1322 + } else if (sbinfo->max_inodes) { 1323 1323 spin_lock(&sbinfo->stat_lock); 1324 1324 sbinfo->free_inodes++; 1325 1325 spin_unlock(&sbinfo->stat_lock); ··· 1328 1328 } 1329 1329 1330 1330 #ifdef CONFIG_TMPFS 1331 - 1332 - static int shmem_set_size(struct shmem_sb_info *sbinfo, 1333 - unsigned long max_blocks, unsigned long max_inodes) 1334 - { 1335 - int error; 1336 - unsigned long blocks, inodes; 1337 - 1338 - spin_lock(&sbinfo->stat_lock); 1339 - blocks = sbinfo->max_blocks - sbinfo->free_blocks; 1340 - inodes = sbinfo->max_inodes - sbinfo->free_inodes; 1341 - error = -EINVAL; 1342 - if (max_blocks < blocks) 1343 - goto out; 1344 - if (max_inodes < inodes) 1345 - goto out; 1346 - error = 0; 1347 - sbinfo->max_blocks = max_blocks; 1348 - sbinfo->free_blocks = max_blocks - blocks; 1349 - sbinfo->max_inodes = max_inodes; 1350 - sbinfo->free_inodes = max_inodes - inodes; 1351 - out: 1352 - spin_unlock(&sbinfo->stat_lock); 1353 - return error; 1354 - } 1355 - 1356 1331 static struct inode_operations shmem_symlink_inode_operations; 1357 1332 static struct inode_operations shmem_symlink_inline_operations; 1358 1333 ··· 1582 1607 buf->f_type = TMPFS_MAGIC; 1583 1608 buf->f_bsize = PAGE_CACHE_SIZE; 1584 1609 buf->f_namelen = NAME_MAX; 1585 - if (sbinfo) { 1586 - spin_lock(&sbinfo->stat_lock); 1610 + spin_lock(&sbinfo->stat_lock); 1611 + if (sbinfo->max_blocks) { 1587 1612 buf->f_blocks = sbinfo->max_blocks; 1588 1613 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 1614 + } 1615 + if (sbinfo->max_inodes) { 1589 1616 buf->f_files = sbinfo->max_inodes; 1590 1617 buf->f_ffree = sbinfo->free_inodes; 1591 - spin_unlock(&sbinfo->stat_lock); 1592 1618 } 1593 1619 /* else leave those fields 0 like simple_statfs */ 1620 + spin_unlock(&sbinfo->stat_lock); 1594 1621 return 0; 1595 1622 } 1596 1623 ··· 1649 1672 * but each new link needs a new dentry, pinning lowmem, and 1650 1673 * tmpfs dentries cannot be pruned until they are unlinked. 1651 1674 */ 1652 - if (sbinfo) { 1675 + if (sbinfo->max_inodes) { 1653 1676 spin_lock(&sbinfo->stat_lock); 1654 1677 if (!sbinfo->free_inodes) { 1655 1678 spin_unlock(&sbinfo->stat_lock); ··· 1674 1697 1675 1698 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) { 1676 1699 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 1677 - if (sbinfo) { 1700 + if (sbinfo->max_inodes) { 1678 1701 spin_lock(&sbinfo->stat_lock); 1679 1702 sbinfo->free_inodes++; 1680 1703 spin_unlock(&sbinfo->stat_lock); ··· 1898 1921 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) 1899 1922 { 1900 1923 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 1901 - unsigned long max_blocks = 0; 1902 - unsigned long max_inodes = 0; 1924 + unsigned long max_blocks = sbinfo->max_blocks; 1925 + unsigned long max_inodes = sbinfo->max_inodes; 1926 + unsigned long blocks; 1927 + unsigned long inodes; 1928 + int error = -EINVAL; 1903 1929 1904 - if (sbinfo) { 1905 - max_blocks = sbinfo->max_blocks; 1906 - max_inodes = sbinfo->max_inodes; 1907 - } 1908 - if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes)) 1909 - return -EINVAL; 1910 - /* Keep it simple: disallow limited <-> unlimited remount */ 1911 - if ((max_blocks || max_inodes) == !sbinfo) 1912 - return -EINVAL; 1913 - /* But allow the pointless unlimited -> unlimited remount */ 1914 - if (!sbinfo) 1915 - return 0; 1916 - return shmem_set_size(sbinfo, max_blocks, max_inodes); 1930 + if (shmem_parse_options(data, NULL, NULL, NULL, 1931 + &max_blocks, &max_inodes)) 1932 + return error; 1933 + 1934 + spin_lock(&sbinfo->stat_lock); 1935 + blocks = sbinfo->max_blocks - sbinfo->free_blocks; 1936 + inodes = sbinfo->max_inodes - sbinfo->free_inodes; 1937 + if (max_blocks < blocks) 1938 + goto out; 1939 + if (max_inodes < inodes) 1940 + goto out; 1941 + /* 1942 + * Those tests also disallow limited->unlimited while any are in 1943 + * use, so i_blocks will always be zero when max_blocks is zero; 1944 + * but we must separately disallow unlimited->limited, because 1945 + * in that case we have no record of how much is already in use. 1946 + */ 1947 + if (max_blocks && !sbinfo->max_blocks) 1948 + goto out; 1949 + if (max_inodes && !sbinfo->max_inodes) 1950 + goto out; 1951 + 1952 + error = 0; 1953 + sbinfo->max_blocks = max_blocks; 1954 + sbinfo->free_blocks = max_blocks - blocks; 1955 + sbinfo->max_inodes = max_inodes; 1956 + sbinfo->free_inodes = max_inodes - inodes; 1957 + out: 1958 + spin_unlock(&sbinfo->stat_lock); 1959 + return error; 1917 1960 } 1918 1961 #endif 1919 1962 ··· 1958 1961 uid_t uid = current->fsuid; 1959 1962 gid_t gid = current->fsgid; 1960 1963 int err = -ENOMEM; 1961 - 1962 - #ifdef CONFIG_TMPFS 1964 + struct shmem_sb_info *sbinfo; 1963 1965 unsigned long blocks = 0; 1964 1966 unsigned long inodes = 0; 1965 1967 1968 + #ifdef CONFIG_TMPFS 1966 1969 /* 1967 1970 * Per default we only allow half of the physical ram per 1968 1971 * tmpfs instance, limiting inodes to one per page of lowmem; ··· 1973 1976 inodes = totalram_pages - totalhigh_pages; 1974 1977 if (inodes > blocks) 1975 1978 inodes = blocks; 1976 - 1977 - if (shmem_parse_options(data, &mode, 1978 - &uid, &gid, &blocks, &inodes)) 1979 + if (shmem_parse_options(data, &mode, &uid, &gid, 1980 + &blocks, &inodes)) 1979 1981 return -EINVAL; 1980 1982 } 1981 - 1982 - if (blocks || inodes) { 1983 - struct shmem_sb_info *sbinfo; 1984 - sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL); 1985 - if (!sbinfo) 1986 - return -ENOMEM; 1987 - sb->s_fs_info = sbinfo; 1988 - spin_lock_init(&sbinfo->stat_lock); 1989 - sbinfo->max_blocks = blocks; 1990 - sbinfo->free_blocks = blocks; 1991 - sbinfo->max_inodes = inodes; 1992 - sbinfo->free_inodes = inodes; 1993 - } 1994 - sb->s_xattr = shmem_xattr_handlers; 1995 1983 #else 1996 1984 sb->s_flags |= MS_NOUSER; 1997 1985 #endif 1998 1986 1987 + /* Round up to L1_CACHE_BYTES to resist false sharing */ 1988 + sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info), 1989 + L1_CACHE_BYTES), GFP_KERNEL); 1990 + if (!sbinfo) 1991 + return -ENOMEM; 1992 + 1993 + spin_lock_init(&sbinfo->stat_lock); 1994 + sbinfo->max_blocks = blocks; 1995 + sbinfo->free_blocks = blocks; 1996 + sbinfo->max_inodes = inodes; 1997 + sbinfo->free_inodes = inodes; 1998 + 1999 + sb->s_fs_info = sbinfo; 1999 2000 sb->s_maxbytes = SHMEM_MAX_BYTES; 2000 2001 sb->s_blocksize = PAGE_CACHE_SIZE; 2001 2002 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 2002 2003 sb->s_magic = TMPFS_MAGIC; 2003 2004 sb->s_op = &shmem_ops; 2005 + sb->s_xattr = shmem_xattr_handlers; 2006 + 2004 2007 inode = shmem_get_inode(sb, S_IFDIR | mode, 0); 2005 2008 if (!inode) 2006 2009 goto failed;