Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

shmem: add support to ignore swap

In doing experimentations with shmem having the option to avoid swap
becomes a useful mechanism. One of the *raves* about brd over shmem is
you can avoid swap, but that's not really a good reason to use brd if we
can instead use shmem. Using brd has its own good reasons to exist, but
just because "tmpfs" doesn't let you do that is not a great reason to
avoid it if we can easily add support for it.

I don't add support for reconfiguring incompatible options, but if we
really wanted to we can add support for that.

To avoid swap we use mapping_set_unevictable() upon inode creation, and
put a WARN_ON_ONCE() stop-gap on writepages() for reclaim.

Link: https://lkml.kernel.org/r/20230309230545.2930737-7-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Christian Brauner <brauner@kernel.org>
Tested-by: Xin Hao <xhao@linux.alibaba.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Adam Manzanares <a.manzanares@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Luis Chamberlain and committed by
Andrew Morton
2c6efe9c d0f5a854

+36 -4
+6 -3
Documentation/filesystems/tmpfs.rst
··· 13 13 14 14 tmpfs puts everything into the kernel internal caches and grows and 15 15 shrinks to accommodate the files it contains and is able to swap 16 - unneeded pages out to swap space, and supports THP. 16 + unneeded pages out to swap space, if swap was enabled for the tmpfs 17 + mount. tmpfs also supports THP. 17 18 18 19 tmpfs extends ramfs with a few userspace configurable options listed and 19 20 explained further below, some of which can be reconfigured dynamically on the ··· 34 33 Contrary to brd ramdisks, tmpfs has its own filesystem, it does not rely on the 35 34 block layer at all. 36 35 37 - Since tmpfs lives completely in the page cache and on swap, all tmpfs 38 - pages will be shown as "Shmem" in /proc/meminfo and "Shared" in 36 + Since tmpfs lives completely in the page cache and optionally on swap, 37 + all tmpfs pages will be shown as "Shmem" in /proc/meminfo and "Shared" in 39 38 free(1). Notice that these counters also include shared memory 40 39 (shmem, see ipcs(1)). The most reliable way to get the count is 41 40 using df(1) and du(1). ··· 84 83 is half of the number of your physical RAM pages, or (on a 85 84 machine with highmem) the number of lowmem RAM pages, 86 85 whichever is the lower. 86 + noswap Disables swap. Remounts must respect the original settings. 87 + By default swap is enabled. 87 88 ========= ============================================================ 88 89 89 90 These parameters accept a suffix k, m or g for kilo, mega and giga and
+2
Documentation/mm/unevictable-lru.rst
··· 42 42 43 43 * Those owned by ramfs. 44 44 45 + * Those owned by tmpfs with the noswap mount option. 46 + 45 47 * Those mapped into SHM_LOCK'd shared memory regions. 46 48 47 49 * Those mapped into VM_LOCKED [mlock()ed] VMAs.
+1
include/linux/shmem_fs.h
··· 45 45 kuid_t uid; /* Mount uid for root directory */ 46 46 kgid_t gid; /* Mount gid for root directory */ 47 47 bool full_inums; /* If i_ino should be uint or ino_t */ 48 + bool noswap; /* ignores VM reclaim / swap requests */ 48 49 ino_t next_ino; /* The next per-sb inode number to use */ 49 50 ino_t __percpu *ino_batch; /* The next per-cpu inode number to use */ 50 51 struct mempolicy *mpol; /* default memory policy for mappings */
+27 -1
mm/shmem.c
··· 116 116 bool full_inums; 117 117 int huge; 118 118 int seen; 119 + bool noswap; 119 120 #define SHMEM_SEEN_BLOCKS 1 120 121 #define SHMEM_SEEN_INODES 2 121 122 #define SHMEM_SEEN_HUGE 4 122 123 #define SHMEM_SEEN_INUMS 8 124 + #define SHMEM_SEEN_NOSWAP 16 123 125 }; 124 126 125 127 #ifdef CONFIG_TMPFS ··· 1336 1334 struct address_space *mapping = folio->mapping; 1337 1335 struct inode *inode = mapping->host; 1338 1336 struct shmem_inode_info *info = SHMEM_I(inode); 1337 + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 1339 1338 swp_entry_t swap; 1340 1339 pgoff_t index; 1341 1340 ··· 1350 1347 if (WARN_ON_ONCE(!wbc->for_reclaim)) 1351 1348 goto redirty; 1352 1349 1353 - if (WARN_ON_ONCE(info->flags & VM_LOCKED)) 1350 + if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) 1354 1351 goto redirty; 1355 1352 1356 1353 if (!total_swap_pages) ··· 2375 2372 shmem_set_inode_flags(inode, info->fsflags); 2376 2373 INIT_LIST_HEAD(&info->shrinklist); 2377 2374 INIT_LIST_HEAD(&info->swaplist); 2375 + if (sbinfo->noswap) 2376 + mapping_set_unevictable(inode->i_mapping); 2378 2377 simple_xattrs_init(&info->xattrs); 2379 2378 cache_no_acl(inode); 2380 2379 mapping_set_large_folios(inode->i_mapping); ··· 3464 3459 Opt_uid, 3465 3460 Opt_inode32, 3466 3461 Opt_inode64, 3462 + Opt_noswap, 3467 3463 }; 3468 3464 3469 3465 static const struct constant_table shmem_param_enums_huge[] = { ··· 3486 3480 fsparam_u32 ("uid", Opt_uid), 3487 3481 fsparam_flag ("inode32", Opt_inode32), 3488 3482 fsparam_flag ("inode64", Opt_inode64), 3483 + fsparam_flag ("noswap", Opt_noswap), 3489 3484 {} 3490 3485 }; 3491 3486 ··· 3569 3562 } 3570 3563 ctx->full_inums = true; 3571 3564 ctx->seen |= SHMEM_SEEN_INUMS; 3565 + break; 3566 + case Opt_noswap: 3567 + ctx->noswap = true; 3568 + ctx->seen |= SHMEM_SEEN_NOSWAP; 3572 3569 break; 3573 3570 } 3574 3571 return 0; ··· 3672 3661 err = "Current inum too high to switch to 32-bit inums"; 3673 3662 goto out; 3674 3663 } 3664 + if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { 3665 + err = "Cannot disable swap on remount"; 3666 + goto out; 3667 + } 3668 + if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { 3669 + err = "Cannot enable swap on remount if it was disabled on first mount"; 3670 + goto out; 3671 + } 3675 3672 3676 3673 if (ctx->seen & SHMEM_SEEN_HUGE) 3677 3674 sbinfo->huge = ctx->huge; ··· 3700 3681 sbinfo->mpol = ctx->mpol; /* transfers initial ref */ 3701 3682 ctx->mpol = NULL; 3702 3683 } 3684 + 3685 + if (ctx->noswap) 3686 + sbinfo->noswap = true; 3687 + 3703 3688 raw_spin_unlock(&sbinfo->stat_lock); 3704 3689 mpol_put(mpol); 3705 3690 return 0; ··· 3758 3735 seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); 3759 3736 #endif 3760 3737 shmem_show_mpol(seq, sbinfo->mpol); 3738 + if (sbinfo->noswap) 3739 + seq_printf(seq, ",noswap"); 3761 3740 return 0; 3762 3741 } 3763 3742 ··· 3803 3778 ctx->inodes = shmem_default_max_inodes(); 3804 3779 if (!(ctx->seen & SHMEM_SEEN_INUMS)) 3805 3780 ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); 3781 + sbinfo->noswap = ctx->noswap; 3806 3782 } else { 3807 3783 sb->s_flags |= SB_NOUSER; 3808 3784 }