fs/nfsd/filecache.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / nfsd / filecache.c
at master 1430 lines 39 kB view raw
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * The NFSD open file cache.
   4 *
   5 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
   6 *
   7 * An nfsd_file object is a per-file collection of open state that binds
   8 * together:
   9 *   - a struct file *
  10 *   - a user credential
  11 *   - a network namespace
  12 *   - a read-ahead context
  13 *   - monitoring for writeback errors
  14 *
  15 * nfsd_file objects are reference-counted. Consumers acquire a new
  16 * object via the nfsd_file_acquire API. They manage their interest in
  17 * the acquired object, and hence the object's reference count, via
  18 * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
  19 * object:
  20 *
  21 *  * non-garbage-collected: When a consumer wants to precisely control
  22 *    the lifetime of a file's open state, it acquires a non-garbage-
  23 *    collected nfsd_file. The final nfsd_file_put releases the open
  24 *    state immediately.
  25 *
  26 *  * garbage-collected: When a consumer does not control the lifetime
  27 *    of open state, it acquires a garbage-collected nfsd_file. The
  28 *    final nfsd_file_put allows the open state to linger for a period
  29 *    during which it may be re-used.
  30 */
  31
  32#include <linux/hash.h>
  33#include <linux/slab.h>
  34#include <linux/file.h>
  35#include <linux/pagemap.h>
  36#include <linux/sched.h>
  37#include <linux/list_lru.h>
  38#include <linux/fsnotify_backend.h>
  39#include <linux/fsnotify.h>
  40#include <linux/seq_file.h>
  41#include <linux/rhashtable.h>
  42#include <linux/nfslocalio.h>
  43
  44#include "vfs.h"
  45#include "nfsd.h"
  46#include "nfsfh.h"
  47#include "netns.h"
  48#include "filecache.h"
  49#include "trace.h"
  50
  51#define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
  52
  53#define NFSD_FILE_CACHE_UP		     (0)
  54
  55/* We only care about NFSD_MAY_READ/WRITE for this cache */
  56#define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO)
  57
  58static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
  59static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
  60static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations);
  61static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
  62static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
  63static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
  64
  65struct nfsd_fcache_disposal {
  66	spinlock_t lock;
  67	struct list_head freeme;
  68};
  69
  70static struct kmem_cache		*nfsd_file_slab;
  71static struct kmem_cache		*nfsd_file_mark_slab;
  72static struct list_lru			nfsd_file_lru;
  73static unsigned long			nfsd_file_flags;
  74static struct fsnotify_group		*nfsd_file_fsnotify_group;
  75static struct delayed_work		nfsd_filecache_laundrette;
  76static struct rhltable			nfsd_file_rhltable
  77						____cacheline_aligned_in_smp;
  78
  79static bool
  80nfsd_match_cred(const struct cred *c1, const struct cred *c2)
  81{
  82	int i;
  83
  84	if (!uid_eq(c1->fsuid, c2->fsuid))
  85		return false;
  86	if (!gid_eq(c1->fsgid, c2->fsgid))
  87		return false;
  88	if (c1->group_info == NULL || c2->group_info == NULL)
  89		return c1->group_info == c2->group_info;
  90	if (c1->group_info->ngroups != c2->group_info->ngroups)
  91		return false;
  92	for (i = 0; i < c1->group_info->ngroups; i++) {
  93		if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
  94			return false;
  95	}
  96	return true;
  97}
  98
  99static const struct rhashtable_params nfsd_file_rhash_params = {
 100	.key_len		= sizeof_field(struct nfsd_file, nf_inode),
 101	.key_offset		= offsetof(struct nfsd_file, nf_inode),
 102	.head_offset		= offsetof(struct nfsd_file, nf_rlist),
 103
 104	/*
 105	 * Start with a single page hash table to reduce resizing churn
 106	 * on light workloads.
 107	 */
 108	.min_size		= 256,
 109	.automatic_shrinking	= true,
 110};
 111
 112static void
 113nfsd_file_schedule_laundrette(void)
 114{
 115	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
 116		queue_delayed_work(system_dfl_wq, &nfsd_filecache_laundrette,
 117				   NFSD_LAUNDRETTE_DELAY);
 118}
 119
 120static void
 121nfsd_file_slab_free(struct rcu_head *rcu)
 122{
 123	struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
 124
 125	put_cred(nf->nf_cred);
 126	kmem_cache_free(nfsd_file_slab, nf);
 127}
 128
 129static void
 130nfsd_file_mark_free(struct fsnotify_mark *mark)
 131{
 132	struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
 133						  nfm_mark);
 134
 135	kmem_cache_free(nfsd_file_mark_slab, nfm);
 136}
 137
 138static struct nfsd_file_mark *
 139nfsd_file_mark_get(struct nfsd_file_mark *nfm)
 140{
 141	if (!refcount_inc_not_zero(&nfm->nfm_ref))
 142		return NULL;
 143	return nfm;
 144}
 145
 146static void
 147nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 148{
 149	if (refcount_dec_and_test(&nfm->nfm_ref)) {
 150		fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
 151		fsnotify_put_mark(&nfm->nfm_mark);
 152	}
 153}
 154
 155static struct nfsd_file_mark *
 156nfsd_file_mark_find_or_create(struct inode *inode)
 157{
 158	int			err;
 159	struct fsnotify_mark	*mark;
 160	struct nfsd_file_mark	*nfm = NULL, *new;
 161
 162	do {
 163		fsnotify_group_lock(nfsd_file_fsnotify_group);
 164		mark = fsnotify_find_inode_mark(inode,
 165						nfsd_file_fsnotify_group);
 166		if (mark) {
 167			nfm = nfsd_file_mark_get(container_of(mark,
 168						 struct nfsd_file_mark,
 169						 nfm_mark));
 170			fsnotify_group_unlock(nfsd_file_fsnotify_group);
 171			if (nfm) {
 172				fsnotify_put_mark(mark);
 173				break;
 174			}
 175			/* Avoid soft lockup race with nfsd_file_mark_put() */
 176			fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
 177			fsnotify_put_mark(mark);
 178		} else {
 179			fsnotify_group_unlock(nfsd_file_fsnotify_group);
 180		}
 181
 182		/* allocate a new nfm */
 183		new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
 184		if (!new)
 185			return NULL;
 186		fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
 187		new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
 188		refcount_set(&new->nfm_ref, 1);
 189
 190		err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
 191
 192		/*
 193		 * If the add was successful, then return the object.
 194		 * Otherwise, we need to put the reference we hold on the
 195		 * nfm_mark. The fsnotify code will take a reference and put
 196		 * it on failure, so we can't just free it directly. It's also
 197		 * not safe to call fsnotify_destroy_mark on it as the
 198		 * mark->group will be NULL. Thus, we can't let the nfm_ref
 199		 * counter drive the destruction at this point.
 200		 */
 201		if (likely(!err))
 202			nfm = new;
 203		else
 204			fsnotify_put_mark(&new->nfm_mark);
 205	} while (unlikely(err == -EEXIST));
 206
 207	return nfm;
 208}
 209
 210static struct nfsd_file *
 211nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
 212		bool want_gc)
 213{
 214	struct nfsd_file *nf;
 215
 216	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
 217	if (unlikely(!nf))
 218		return NULL;
 219
 220	this_cpu_inc(nfsd_file_allocations);
 221	INIT_LIST_HEAD(&nf->nf_lru);
 222	INIT_LIST_HEAD(&nf->nf_gc);
 223	nf->nf_birthtime = ktime_get();
 224	nf->nf_file = NULL;
 225	nf->nf_cred = get_current_cred();
 226	nf->nf_net = net;
 227	nf->nf_flags = want_gc ?
 228		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
 229		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
 230	nf->nf_inode = inode;
 231	refcount_set(&nf->nf_ref, 1);
 232	nf->nf_may = need;
 233	nf->nf_mark = NULL;
 234	nf->nf_dio_mem_align = 0;
 235	nf->nf_dio_offset_align = 0;
 236	nf->nf_dio_read_offset_align = 0;
 237	return nf;
 238}
 239
 240/**
 241 * nfsd_file_check_write_error - check for writeback errors on a file
 242 * @nf: nfsd_file to check for writeback errors
 243 *
 244 * Check whether a nfsd_file has an unseen error. Reset the write
 245 * verifier if so.
 246 */
 247static void
 248nfsd_file_check_write_error(struct nfsd_file *nf)
 249{
 250	struct file *file = nf->nf_file;
 251
 252	if ((file->f_mode & FMODE_WRITE) &&
 253	    filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
 254		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
 255}
 256
 257static void
 258nfsd_file_hash_remove(struct nfsd_file *nf)
 259{
 260	trace_nfsd_file_unhash(nf);
 261	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
 262			nfsd_file_rhash_params);
 263}
 264
 265static bool
 266nfsd_file_unhash(struct nfsd_file *nf)
 267{
 268	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 269		nfsd_file_hash_remove(nf);
 270		return true;
 271	}
 272	return false;
 273}
 274
 275static void
 276nfsd_file_free(struct nfsd_file *nf)
 277{
 278	s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
 279
 280	trace_nfsd_file_free(nf);
 281
 282	this_cpu_inc(nfsd_file_releases);
 283	this_cpu_add(nfsd_file_total_age, age);
 284
 285	nfsd_file_unhash(nf);
 286	if (nf->nf_mark)
 287		nfsd_file_mark_put(nf->nf_mark);
 288	if (nf->nf_file) {
 289		nfsd_file_check_write_error(nf);
 290		nfsd_filp_close(nf->nf_file);
 291	}
 292
 293	/*
 294	 * If this item is still linked via nf_lru, that's a bug.
 295	 * WARN and leak it to preserve system stability.
 296	 */
 297	if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
 298		return;
 299
 300	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
 301}
 302
 303static bool
 304nfsd_file_check_writeback(struct nfsd_file *nf)
 305{
 306	struct file *file = nf->nf_file;
 307	struct address_space *mapping;
 308
 309	/* File not open for write? */
 310	if (!(file->f_mode & FMODE_WRITE))
 311		return false;
 312
 313	/*
 314	 * Some filesystems (e.g. NFS) flush all dirty data on close.
 315	 * On others, there is no need to wait for writeback.
 316	 */
 317	if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
 318		return false;
 319
 320	mapping = file->f_mapping;
 321	return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
 322		mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
 323}
 324
 325static void nfsd_file_lru_add(struct nfsd_file *nf)
 326{
 327	refcount_inc(&nf->nf_ref);
 328	if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru))
 329		trace_nfsd_file_lru_add(nf);
 330	else
 331		WARN_ON(1);
 332	nfsd_file_schedule_laundrette();
 333}
 334
 335static bool nfsd_file_lru_remove(struct nfsd_file *nf)
 336{
 337	if (list_lru_del_obj(&nfsd_file_lru, &nf->nf_lru)) {
 338		trace_nfsd_file_lru_del(nf);
 339		return true;
 340	}
 341	return false;
 342}
 343
 344struct nfsd_file *
 345nfsd_file_get(struct nfsd_file *nf)
 346{
 347	if (nf && refcount_inc_not_zero(&nf->nf_ref))
 348		return nf;
 349	return NULL;
 350}
 351
 352/**
 353 * nfsd_file_put - put the reference to a nfsd_file
 354 * @nf: nfsd_file of which to put the reference
 355 *
 356 * Put a reference to a nfsd_file. In the non-GC case, we just put the
 357 * reference immediately. In the GC case, if the reference would be
 358 * the last one, the put it on the LRU instead to be cleaned up later.
 359 */
 360void
 361nfsd_file_put(struct nfsd_file *nf)
 362{
 363	might_sleep();
 364	trace_nfsd_file_put(nf);
 365
 366	if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
 367	    test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 368		set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
 369		set_bit(NFSD_FILE_RECENT, &nf->nf_flags);
 370	}
 371
 372	if (refcount_dec_and_test(&nf->nf_ref))
 373		nfsd_file_free(nf);
 374}
 375
 376/**
 377 * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller
 378 * @pnf: nfsd_file of which to put the reference
 379 *
 380 * First save the associated net to return to caller, then put
 381 * the reference of the nfsd_file.
 382 */
 383struct net *
 384nfsd_file_put_local(struct nfsd_file __rcu **pnf)
 385{
 386	struct nfsd_file *nf;
 387	struct net *net = NULL;
 388
 389	nf = unrcu_pointer(xchg(pnf, NULL));
 390	if (nf) {
 391		net = nf->nf_net;
 392		nfsd_file_put(nf);
 393	}
 394	return net;
 395}
 396
 397/**
 398 * nfsd_file_file - get the backing file of an nfsd_file
 399 * @nf: nfsd_file of which to access the backing file.
 400 *
 401 * Return backing file for @nf.
 402 */
 403struct file *
 404nfsd_file_file(struct nfsd_file *nf)
 405{
 406	return nf->nf_file;
 407}
 408
 409static void
 410nfsd_file_dispose_list(struct list_head *dispose)
 411{
 412	struct nfsd_file *nf;
 413
 414	while (!list_empty(dispose)) {
 415		nf = list_first_entry(dispose, struct nfsd_file, nf_gc);
 416		list_del_init(&nf->nf_gc);
 417		nfsd_file_free(nf);
 418	}
 419}
 420
 421/**
 422 * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
 423 * @dispose: list of nfsd_files to be disposed
 424 *
 425 * Transfers each file to the "freeme" list for its nfsd_net, to eventually
 426 * be disposed of by the per-net garbage collector.
 427 */
 428static void
 429nfsd_file_dispose_list_delayed(struct list_head *dispose)
 430{
 431	while(!list_empty(dispose)) {
 432		struct nfsd_file *nf = list_first_entry(dispose,
 433						struct nfsd_file, nf_gc);
 434		struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
 435		struct nfsd_fcache_disposal *l = nn->fcache_disposal;
 436		struct svc_serv *serv;
 437
 438		spin_lock(&l->lock);
 439		list_move_tail(&nf->nf_gc, &l->freeme);
 440		spin_unlock(&l->lock);
 441
 442		/*
 443		 * The filecache laundrette is shut down after the
 444		 * nn->nfsd_serv pointer is cleared, but before the
 445		 * svc_serv is freed.
 446		 */
 447		serv = nn->nfsd_serv;
 448		if (serv)
 449			svc_wake_up(serv);
 450	}
 451}
 452
 453/**
 454 * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed.
 455 * @nn: nfsd_net in which to find files to be disposed.
 456 *
 457 * When files held open for nfsv3 are removed from the filecache, whether
 458 * due to memory pressure or garbage collection, they are queued to
 459 * a per-net-ns queue.  This function completes the disposal, either
 460 * directly or by waking another nfsd thread to help with the work.
 461 */
 462void nfsd_file_net_dispose(struct nfsd_net *nn)
 463{
 464	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
 465
 466	if (!list_empty(&l->freeme)) {
 467		LIST_HEAD(dispose);
 468		int i;
 469
 470		spin_lock(&l->lock);
 471		for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
 472			list_move(l->freeme.next, &dispose);
 473		spin_unlock(&l->lock);
 474		if (!list_empty(&l->freeme))
 475			/* Wake up another thread to share the work
 476			 * *before* doing any actual disposing.
 477			 */
 478			svc_wake_up(nn->nfsd_serv);
 479		nfsd_file_dispose_list(&dispose);
 480	}
 481}
 482
 483/**
 484 * nfsd_file_lru_cb - Examine an entry on the LRU list
 485 * @item: LRU entry to examine
 486 * @lru: controlling LRU
 487 * @arg: dispose list
 488 *
 489 * Return values:
 490 *   %LRU_REMOVED: @item was removed from the LRU
 491 *   %LRU_ROTATE: @item is to be moved to the LRU tail
 492 *   %LRU_SKIP: @item cannot be evicted
 493 */
 494static enum lru_status
 495nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 496		 void *arg)
 497{
 498	struct list_head *head = arg;
 499	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
 500
 501	/* We should only be dealing with GC entries here */
 502	WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
 503
 504	/*
 505	 * Don't throw out files that are still undergoing I/O or
 506	 * that have uncleared errors pending.
 507	 */
 508	if (nfsd_file_check_writeback(nf)) {
 509		trace_nfsd_file_gc_writeback(nf);
 510		return LRU_SKIP;
 511	}
 512
 513	/* If it was recently added to the list, skip it */
 514	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
 515		trace_nfsd_file_gc_referenced(nf);
 516		return LRU_ROTATE;
 517	}
 518
 519	/*
 520	 * Put the reference held on behalf of the LRU if it is the last
 521	 * reference, else rotate.
 522	 */
 523	if (!refcount_dec_if_one(&nf->nf_ref)) {
 524		trace_nfsd_file_gc_in_use(nf);
 525		return LRU_ROTATE;
 526	}
 527
 528	/* Refcount went to zero. Unhash it and queue it to the dispose list */
 529	nfsd_file_unhash(nf);
 530	list_lru_isolate(lru, &nf->nf_lru);
 531	list_add(&nf->nf_gc, head);
 532	this_cpu_inc(nfsd_file_evictions);
 533	trace_nfsd_file_gc_disposed(nf);
 534	return LRU_REMOVED;
 535}
 536
 537static enum lru_status
 538nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru,
 539		 void *arg)
 540{
 541	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
 542
 543	if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) {
 544		/*
 545		 * "REFERENCED" really means "should be at the end of the
 546		 * LRU. As we are putting it there we can clear the flag.
 547		 */
 548		clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
 549		trace_nfsd_file_gc_aged(nf);
 550		return LRU_ROTATE;
 551	}
 552	return nfsd_file_lru_cb(item, lru, arg);
 553}
 554
 555/* If the shrinker runs between calls to list_lru_walk_node() in
 556 * nfsd_file_gc(), the "remaining" count will be wrong.  This could
 557 * result in premature freeing of some files.  This may not matter much
 558 * but is easy to fix with this spinlock which temporarily disables
 559 * the shrinker.
 560 */
 561static DEFINE_SPINLOCK(nfsd_gc_lock);
 562static void
 563nfsd_file_gc(void)
 564{
 565	unsigned long ret = 0;
 566	LIST_HEAD(dispose);
 567	int nid;
 568
 569	spin_lock(&nfsd_gc_lock);
 570	for_each_node_state(nid, N_NORMAL_MEMORY) {
 571		unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid);
 572
 573		while (remaining > 0) {
 574			unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH);
 575
 576			remaining -= nr;
 577			ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb,
 578						  &dispose, &nr);
 579			if (nr)
 580				/* walk aborted early */
 581				remaining = 0;
 582		}
 583	}
 584	spin_unlock(&nfsd_gc_lock);
 585	trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
 586	nfsd_file_dispose_list_delayed(&dispose);
 587}
 588
 589static void
 590nfsd_file_gc_worker(struct work_struct *work)
 591{
 592	if (list_lru_count(&nfsd_file_lru))
 593		nfsd_file_gc();
 594	nfsd_file_schedule_laundrette();
 595}
 596
 597static unsigned long
 598nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
 599{
 600	return list_lru_count(&nfsd_file_lru);
 601}
 602
 603static unsigned long
 604nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
 605{
 606	LIST_HEAD(dispose);
 607	unsigned long ret;
 608
 609	if (!spin_trylock(&nfsd_gc_lock))
 610		return SHRINK_STOP;
 611
 612	ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
 613				   nfsd_file_lru_cb, &dispose);
 614	spin_unlock(&nfsd_gc_lock);
 615	trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
 616	nfsd_file_dispose_list_delayed(&dispose);
 617	return ret;
 618}
 619
 620static struct shrinker *nfsd_file_shrinker;
 621
 622/**
 623 * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
 624 * @nf: nfsd_file to attempt to queue
 625 * @dispose: private list to queue successfully-put objects
 626 *
 627 * Unhash an nfsd_file, try to get a reference to it, and then put that
 628 * reference. If it's the last reference, queue it to the dispose list.
 629 */
 630static void
 631nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
 632	__must_hold(RCU)
 633{
 634	int decrement = 1;
 635
 636	/* If we raced with someone else unhashing, ignore it */
 637	if (!nfsd_file_unhash(nf))
 638		return;
 639
 640	/* If we can't get a reference, ignore it */
 641	if (!nfsd_file_get(nf))
 642		return;
 643
 644	/* Extra decrement if we remove from the LRU */
 645	if (nfsd_file_lru_remove(nf))
 646		++decrement;
 647
 648	/* If refcount goes to 0, then put on the dispose list */
 649	if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
 650		list_add(&nf->nf_gc, dispose);
 651		trace_nfsd_file_closing(nf);
 652	}
 653}
 654
 655/**
 656 * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
 657 * @inode:   inode on which to close out nfsd_files
 658 * @dispose: list on which to gather nfsd_files to close out
 659 *
 660 * An nfsd_file represents a struct file being held open on behalf of nfsd.
 661 * An open file however can block other activity (such as leases), or cause
 662 * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
 663 *
 664 * This function is intended to find open nfsd_files when this sort of
 665 * conflicting access occurs and then attempt to close those files out.
 666 *
 667 * Populates the dispose list with entries that have already had their
 668 * refcounts go to zero. The actual free of an nfsd_file can be expensive,
 669 * so we leave it up to the caller whether it wants to wait or not.
 670 */
 671static void
 672nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
 673{
 674	struct rhlist_head *tmp, *list;
 675	struct nfsd_file *nf;
 676
 677	rcu_read_lock();
 678	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
 679			       nfsd_file_rhash_params);
 680	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
 681		if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
 682			continue;
 683		nfsd_file_cond_queue(nf, dispose);
 684	}
 685	rcu_read_unlock();
 686}
 687
 688/**
 689 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
 690 * @inode: inode of the file to attempt to remove
 691 *
 692 * Close out any open nfsd_files that can be reaped for @inode. The
 693 * actual freeing is deferred to the dispose_list_delayed infrastructure.
 694 *
 695 * This is used by the fsnotify callbacks and setlease notifier.
 696 */
 697static void
 698nfsd_file_close_inode(struct inode *inode)
 699{
 700	LIST_HEAD(dispose);
 701
 702	nfsd_file_queue_for_close(inode, &dispose);
 703	nfsd_file_dispose_list_delayed(&dispose);
 704}
 705
 706/**
 707 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
 708 * @inode: inode of the file to attempt to remove
 709 *
 710 * Close out any open nfsd_files that can be reaped for @inode. The
 711 * nfsd_files are closed out synchronously.
 712 *
 713 * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames
 714 * when reexporting NFS.
 715 */
 716void
 717nfsd_file_close_inode_sync(struct inode *inode)
 718{
 719	LIST_HEAD(dispose);
 720
 721	trace_nfsd_file_close(inode);
 722
 723	nfsd_file_queue_for_close(inode, &dispose);
 724	nfsd_file_dispose_list(&dispose);
 725}
 726
 727static int
 728nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
 729			    void *data)
 730{
 731	struct file_lease *fl = data;
 732
 733	/* Only close files for F_SETLEASE leases */
 734	if (fl->c.flc_flags & FL_LEASE)
 735		nfsd_file_close_inode(file_inode(fl->c.flc_file));
 736	return 0;
 737}
 738
 739static struct notifier_block nfsd_file_lease_notifier = {
 740	.notifier_call = nfsd_file_lease_notifier_call,
 741};
 742
 743static int
 744nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
 745				struct inode *inode, struct inode *dir,
 746				const struct qstr *name, u32 cookie)
 747{
 748	if (WARN_ON_ONCE(!inode))
 749		return 0;
 750
 751	trace_nfsd_file_fsnotify_handle_event(inode, mask);
 752
 753	/* Should be no marks on non-regular files */
 754	if (!S_ISREG(inode->i_mode)) {
 755		WARN_ON_ONCE(1);
 756		return 0;
 757	}
 758
 759	/* don't close files if this was not the last link */
 760	if (mask & FS_ATTRIB) {
 761		if (inode->i_nlink)
 762			return 0;
 763	}
 764
 765	nfsd_file_close_inode(inode);
 766	return 0;
 767}
 768
 769
 770static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
 771	.handle_inode_event = nfsd_file_fsnotify_handle_event,
 772	.free_mark = nfsd_file_mark_free,
 773};
 774
 775int
 776nfsd_file_cache_init(void)
 777{
 778	int ret;
 779
 780	lockdep_assert_held(&nfsd_mutex);
 781	if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
 782		return 0;
 783
 784	ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
 785	if (ret)
 786		goto out;
 787
 788	ret = -ENOMEM;
 789	nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
 790	if (!nfsd_file_slab) {
 791		pr_err("nfsd: unable to create nfsd_file_slab\n");
 792		goto out_err;
 793	}
 794
 795	nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0);
 796	if (!nfsd_file_mark_slab) {
 797		pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
 798		goto out_err;
 799	}
 800
 801	ret = list_lru_init(&nfsd_file_lru);
 802	if (ret) {
 803		pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
 804		goto out_err;
 805	}
 806
 807	nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache");
 808	if (!nfsd_file_shrinker) {
 809		ret = -ENOMEM;
 810		pr_err("nfsd: failed to allocate nfsd_file_shrinker\n");
 811		goto out_lru;
 812	}
 813
 814	nfsd_file_shrinker->count_objects = nfsd_file_lru_count;
 815	nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan;
 816	nfsd_file_shrinker->seeks = 1;
 817
 818	shrinker_register(nfsd_file_shrinker);
 819
 820	ret = lease_register_notifier(&nfsd_file_lease_notifier);
 821	if (ret) {
 822		pr_err("nfsd: unable to register lease notifier: %d\n", ret);
 823		goto out_shrinker;
 824	}
 825
 826	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
 827							0);
 828	if (IS_ERR(nfsd_file_fsnotify_group)) {
 829		pr_err("nfsd: unable to create fsnotify group: %ld\n",
 830			PTR_ERR(nfsd_file_fsnotify_group));
 831		ret = PTR_ERR(nfsd_file_fsnotify_group);
 832		nfsd_file_fsnotify_group = NULL;
 833		goto out_notifier;
 834	}
 835
 836	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
 837out:
 838	if (ret)
 839		clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags);
 840	return ret;
 841out_notifier:
 842	lease_unregister_notifier(&nfsd_file_lease_notifier);
 843out_shrinker:
 844	shrinker_free(nfsd_file_shrinker);
 845out_lru:
 846	list_lru_destroy(&nfsd_file_lru);
 847out_err:
 848	kmem_cache_destroy(nfsd_file_slab);
 849	nfsd_file_slab = NULL;
 850	kmem_cache_destroy(nfsd_file_mark_slab);
 851	nfsd_file_mark_slab = NULL;
 852	rhltable_destroy(&nfsd_file_rhltable);
 853	goto out;
 854}
 855
 856/**
 857 * __nfsd_file_cache_purge: clean out the cache for shutdown
 858 * @net: net-namespace to shut down the cache (may be NULL)
 859 *
 860 * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
 861 * then close out everything. Called when an nfsd instance is being shut down,
 862 * and when the exports table is flushed.
 863 */
 864static void
 865__nfsd_file_cache_purge(struct net *net)
 866{
 867	struct rhashtable_iter iter;
 868	struct nfsd_file *nf;
 869	LIST_HEAD(dispose);
 870
 871#if IS_ENABLED(CONFIG_NFS_LOCALIO)
 872	if (net) {
 873		struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 874		nfs_localio_invalidate_clients(&nn->local_clients,
 875					       &nn->local_clients_lock);
 876	}
 877#endif
 878
 879	rhltable_walk_enter(&nfsd_file_rhltable, &iter);
 880	do {
 881		rhashtable_walk_start(&iter);
 882
 883		nf = rhashtable_walk_next(&iter);
 884		while (!IS_ERR_OR_NULL(nf)) {
 885			if (!net || nf->nf_net == net)
 886				nfsd_file_cond_queue(nf, &dispose);
 887			nf = rhashtable_walk_next(&iter);
 888		}
 889
 890		rhashtable_walk_stop(&iter);
 891	} while (nf == ERR_PTR(-EAGAIN));
 892	rhashtable_walk_exit(&iter);
 893
 894	nfsd_file_dispose_list(&dispose);
 895}
 896
 897static struct nfsd_fcache_disposal *
 898nfsd_alloc_fcache_disposal(void)
 899{
 900	struct nfsd_fcache_disposal *l;
 901
 902	l = kmalloc(sizeof(*l), GFP_KERNEL);
 903	if (!l)
 904		return NULL;
 905	spin_lock_init(&l->lock);
 906	INIT_LIST_HEAD(&l->freeme);
 907	return l;
 908}
 909
 910static void
 911nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
 912{
 913	nfsd_file_dispose_list(&l->freeme);
 914	kfree(l);
 915}
 916
 917static void
 918nfsd_free_fcache_disposal_net(struct net *net)
 919{
 920	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 921	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
 922
 923	nfsd_free_fcache_disposal(l);
 924}
 925
 926int
 927nfsd_file_cache_start_net(struct net *net)
 928{
 929	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 930
 931	nn->fcache_disposal = nfsd_alloc_fcache_disposal();
 932	return nn->fcache_disposal ? 0 : -ENOMEM;
 933}
 934
 935/**
 936 * nfsd_file_cache_purge - Remove all cache items associated with @net
 937 * @net: target net namespace
 938 *
 939 */
 940void
 941nfsd_file_cache_purge(struct net *net)
 942{
 943	lockdep_assert_held(&nfsd_mutex);
 944	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
 945		__nfsd_file_cache_purge(net);
 946}
 947
 948void
 949nfsd_file_cache_shutdown_net(struct net *net)
 950{
 951	nfsd_file_cache_purge(net);
 952	nfsd_free_fcache_disposal_net(net);
 953}
 954
 955void
 956nfsd_file_cache_shutdown(void)
 957{
 958	int i;
 959
 960	lockdep_assert_held(&nfsd_mutex);
 961	if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
 962		return;
 963
 964	lease_unregister_notifier(&nfsd_file_lease_notifier);
 965	shrinker_free(nfsd_file_shrinker);
 966	/*
 967	 * make sure all callers of nfsd_file_lru_cb are done before
 968	 * calling nfsd_file_cache_purge
 969	 */
 970	cancel_delayed_work_sync(&nfsd_filecache_laundrette);
 971	__nfsd_file_cache_purge(NULL);
 972	list_lru_destroy(&nfsd_file_lru);
 973	rcu_barrier();
 974	fsnotify_put_group(nfsd_file_fsnotify_group);
 975	nfsd_file_fsnotify_group = NULL;
 976	kmem_cache_destroy(nfsd_file_slab);
 977	nfsd_file_slab = NULL;
 978	fsnotify_wait_marks_destroyed();
 979	kmem_cache_destroy(nfsd_file_mark_slab);
 980	nfsd_file_mark_slab = NULL;
 981	rhltable_destroy(&nfsd_file_rhltable);
 982
 983	for_each_possible_cpu(i) {
 984		per_cpu(nfsd_file_cache_hits, i) = 0;
 985		per_cpu(nfsd_file_acquisitions, i) = 0;
 986		per_cpu(nfsd_file_allocations, i) = 0;
 987		per_cpu(nfsd_file_releases, i) = 0;
 988		per_cpu(nfsd_file_total_age, i) = 0;
 989		per_cpu(nfsd_file_evictions, i) = 0;
 990	}
 991}
 992
 993static struct nfsd_file *
 994nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
 995			struct inode *inode, unsigned char need,
 996			bool want_gc)
 997{
 998	struct rhlist_head *tmp, *list;
 999	struct nfsd_file *nf;
1000
1001	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
1002			       nfsd_file_rhash_params);
1003	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
1004		if (nf->nf_may != need)
1005			continue;
1006		if (nf->nf_net != net)
1007			continue;
1008		if (!nfsd_match_cred(nf->nf_cred, cred))
1009			continue;
1010		if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
1011			continue;
1012		if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
1013			continue;
1014
1015		if (!nfsd_file_get(nf))
1016			continue;
1017		return nf;
1018	}
1019	return NULL;
1020}
1021
1022/**
1023 * nfsd_file_is_cached - are there any cached open files for this inode?
1024 * @inode: inode to check
1025 *
1026 * The lookup matches inodes in all net namespaces and is atomic wrt
1027 * nfsd_file_acquire().
1028 *
1029 * Return values:
1030 *   %true: filecache contains at least one file matching this inode
1031 *   %false: filecache contains no files matching this inode
1032 */
1033bool
1034nfsd_file_is_cached(struct inode *inode)
1035{
1036	struct rhlist_head *tmp, *list;
1037	struct nfsd_file *nf;
1038	bool ret = false;
1039
1040	rcu_read_lock();
1041	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
1042			       nfsd_file_rhash_params);
1043	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
1044		if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
1045			ret = true;
1046			break;
1047		}
1048	rcu_read_unlock();
1049
1050	trace_nfsd_file_is_cached(inode, (int)ret);
1051	return ret;
1052}
1053
1054static __be32
1055nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf)
1056{
1057	struct inode *inode = file_inode(nf->nf_file);
1058	struct kstat stat;
1059	__be32 status;
1060
1061	/* Currently only need to get DIO alignment info for regular files */
1062	if (!S_ISREG(inode->i_mode))
1063		return nfs_ok;
1064
1065	status = fh_getattr(fhp, &stat);
1066	if (status != nfs_ok)
1067		return status;
1068
1069	trace_nfsd_file_get_dio_attrs(inode, &stat);
1070
1071	if (stat.result_mask & STATX_DIOALIGN) {
1072		nf->nf_dio_mem_align = stat.dio_mem_align;
1073		nf->nf_dio_offset_align = stat.dio_offset_align;
1074	}
1075	if (stat.result_mask & STATX_DIO_READ_ALIGN)
1076		nf->nf_dio_read_offset_align = stat.dio_read_offset_align;
1077	else
1078		nf->nf_dio_read_offset_align = nf->nf_dio_offset_align;
1079
1080	return nfs_ok;
1081}
1082
1083static __be32
1084nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
1085		     struct svc_cred *cred,
1086		     struct auth_domain *client,
1087		     struct svc_fh *fhp,
1088		     unsigned int may_flags, struct file *file,
1089		     umode_t type, bool want_gc, struct nfsd_file **pnf)
1090{
1091	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
1092	struct nfsd_file *new, *nf;
1093	bool stale_retry = true;
1094	bool open_retry = true;
1095	struct inode *inode;
1096	__be32 status;
1097	int ret;
1098
1099retry:
1100	if (rqstp)
1101		status = fh_verify(rqstp, fhp, type,
1102				   may_flags|NFSD_MAY_OWNER_OVERRIDE);
1103	else
1104		status = fh_verify_local(net, cred, client, fhp, type,
1105					 may_flags|NFSD_MAY_OWNER_OVERRIDE);
1106
1107	if (status != nfs_ok)
1108		return status;
1109	inode = d_inode(fhp->fh_dentry);
1110
1111	rcu_read_lock();
1112	nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
1113	rcu_read_unlock();
1114
1115	if (nf)
1116		goto wait_for_construction;
1117
1118	new = nfsd_file_alloc(net, inode, need, want_gc);
1119	if (!new) {
1120		status = nfserr_jukebox;
1121		goto out;
1122	}
1123
1124	rcu_read_lock();
1125	spin_lock(&inode->i_lock);
1126	nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
1127	if (unlikely(nf)) {
1128		spin_unlock(&inode->i_lock);
1129		rcu_read_unlock();
1130		nfsd_file_free(new);
1131		goto wait_for_construction;
1132	}
1133	nf = new;
1134	ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
1135			      nfsd_file_rhash_params);
1136	spin_unlock(&inode->i_lock);
1137	rcu_read_unlock();
1138	if (likely(ret == 0))
1139		goto open_file;
1140
1141	trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
1142	status = nfserr_jukebox;
1143	goto construction_err;
1144
1145wait_for_construction:
1146	wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
1147
1148	/* Did construction of this file fail? */
1149	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
1150		trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
1151		if (!open_retry) {
1152			status = nfserr_jukebox;
1153			goto construction_err;
1154		}
1155		nfsd_file_put(nf);
1156		open_retry = false;
1157		fh_put(fhp);
1158		goto retry;
1159	}
1160	this_cpu_inc(nfsd_file_cache_hits);
1161
1162	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
1163	if (status != nfs_ok) {
1164		nfsd_file_put(nf);
1165		nf = NULL;
1166	}
1167
1168out:
1169	if (status == nfs_ok) {
1170		this_cpu_inc(nfsd_file_acquisitions);
1171		nfsd_file_check_write_error(nf);
1172		*pnf = nf;
1173	}
1174	trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
1175	return status;
1176
1177open_file:
1178	trace_nfsd_file_alloc(nf);
1179
1180	if (type == S_IFREG)
1181		nf->nf_mark = nfsd_file_mark_find_or_create(inode);
1182
1183	if (type != S_IFREG || nf->nf_mark) {
1184		if (file) {
1185			get_file(file);
1186			nf->nf_file = file;
1187			status = nfs_ok;
1188			trace_nfsd_file_opened(nf, status);
1189		} else {
1190			ret = nfsd_open_verified(fhp, type, may_flags, &nf->nf_file);
1191			if (ret == -EOPENSTALE && stale_retry) {
1192				stale_retry = false;
1193				nfsd_file_unhash(nf);
1194				clear_and_wake_up_bit(NFSD_FILE_PENDING,
1195						      &nf->nf_flags);
1196				if (refcount_dec_and_test(&nf->nf_ref))
1197					nfsd_file_free(nf);
1198				nf = NULL;
1199				fh_put(fhp);
1200				goto retry;
1201			}
1202			status = nfserrno(ret);
1203			trace_nfsd_file_open(nf, status);
1204			if (status == nfs_ok)
1205				status = nfsd_file_get_dio_attrs(fhp, nf);
1206		}
1207	} else
1208		status = nfserr_jukebox;
1209	/*
1210	 * If construction failed, or we raced with a call to unlink()
1211	 * then unhash.
1212	 */
1213	if (status != nfs_ok || inode->i_nlink == 0)
1214		nfsd_file_unhash(nf);
1215	else if (want_gc)
1216		nfsd_file_lru_add(nf);
1217
1218	clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
1219	if (status == nfs_ok)
1220		goto out;
1221
1222construction_err:
1223	if (refcount_dec_and_test(&nf->nf_ref))
1224		nfsd_file_free(nf);
1225	nf = NULL;
1226	goto out;
1227}
1228
1229/**
1230 * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file
1231 * @rqstp: the RPC transaction being executed
1232 * @fhp: the NFS filehandle of the file to be opened
1233 * @may_flags: NFSD_MAY_ settings for the file
1234 * @pnf: OUT: new or found "struct nfsd_file" object
1235 *
1236 * The nfsd_file object returned by this API is reference-counted
1237 * and garbage-collected. The object is retained for a few
1238 * seconds after the final nfsd_file_put() in case the caller
1239 * wants to re-use it.
1240 *
1241 * Return values:
1242 *   %nfs_ok - @pnf points to an nfsd_file with its reference
1243 *   count boosted.
1244 *
1245 * On error, an nfsstat value in network byte order is returned.
1246 */
1247__be32
1248nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
1249		     unsigned int may_flags, struct nfsd_file **pnf)
1250{
1251	return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
1252				    fhp, may_flags, NULL, S_IFREG, true, pnf);
1253}
1254
1255/**
1256 * nfsd_file_acquire - Get a struct nfsd_file with an open file
1257 * @rqstp: the RPC transaction being executed
1258 * @fhp: the NFS filehandle of the file to be opened
1259 * @may_flags: NFSD_MAY_ settings for the file
1260 * @pnf: OUT: new or found "struct nfsd_file" object
1261 *
1262 * The nfsd_file_object returned by this API is reference-counted
1263 * but not garbage-collected. The object is unhashed after the
1264 * final nfsd_file_put().
1265 *
1266 * Return values:
1267 *   %nfs_ok - @pnf points to an nfsd_file with its reference
1268 *   count boosted.
1269 *
1270 * On error, an nfsstat value in network byte order is returned.
1271 */
1272__be32
1273nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
1274		  unsigned int may_flags, struct nfsd_file **pnf)
1275{
1276	return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
1277				    fhp, may_flags, NULL, S_IFREG, false, pnf);
1278}
1279
1280/**
1281 * nfsd_file_acquire_local - Get a struct nfsd_file with an open file for localio
1282 * @net: The network namespace in which to perform a lookup
1283 * @cred: the user credential with which to validate access
1284 * @client: the auth_domain for LOCALIO lookup
1285 * @fhp: the NFS filehandle of the file to be opened
1286 * @may_flags: NFSD_MAY_ settings for the file
1287 * @pnf: OUT: new or found "struct nfsd_file" object
1288 *
1289 * This file lookup interface provide access to a file given the
1290 * filehandle and credential.  No connection-based authorisation
1291 * is performed and in that way it is quite different to other
1292 * file access mediated by nfsd.  It allows a kernel module such as the NFS
1293 * client to reach across network and filesystem namespaces to access
1294 * a file.  The security implications of this should be carefully
1295 * considered before use.
1296 *
1297 * The nfsd_file_object returned by this API is reference-counted
1298 * but not garbage-collected. The object is unhashed after the
1299 * final nfsd_file_put().
1300 *
1301 * Return values:
1302 *   %nfs_ok - @pnf points to an nfsd_file with its reference
1303 *   count boosted.
1304 *
1305 * On error, an nfsstat value in network byte order is returned.
1306 */
1307__be32
1308nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
1309			struct auth_domain *client, struct svc_fh *fhp,
1310			unsigned int may_flags, struct nfsd_file **pnf)
1311{
1312	/*
1313	 * Save creds before calling nfsd_file_do_acquire() (which calls
1314	 * nfsd_setuser). Important because caller (LOCALIO) is from
1315	 * client context.
1316	 */
1317	const struct cred *save_cred = get_current_cred();
1318	__be32 beres;
1319
1320	beres = nfsd_file_do_acquire(NULL, net, cred, client, fhp, may_flags,
1321				     NULL, S_IFREG, false, pnf);
1322	put_cred(revert_creds(save_cred));
1323	return beres;
1324}
1325
1326/**
1327 * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
1328 * @rqstp: the RPC transaction being executed
1329 * @fhp: the NFS filehandle of the file just created
1330 * @may_flags: NFSD_MAY_ settings for the file
1331 * @file: cached, already-open file (may be NULL)
1332 * @pnf: OUT: new or found "struct nfsd_file" object
1333 *
1334 * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
1335 * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
1336 * opening a new one.
1337 *
1338 * Return values:
1339 *   %nfs_ok - @pnf points to an nfsd_file with its reference
1340 *   count boosted.
1341 *
1342 * On error, an nfsstat value in network byte order is returned.
1343 */
1344__be32
1345nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
1346			 unsigned int may_flags, struct file *file,
1347			 struct nfsd_file **pnf)
1348{
1349	return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
1350				    fhp, may_flags, file, S_IFREG, false, pnf);
1351}
1352
1353/**
1354 * nfsd_file_acquire_dir - Get a struct nfsd_file with an open directory
1355 * @rqstp: the RPC transaction being executed
1356 * @fhp: the NFS filehandle of the file to be opened
1357 * @pnf: OUT: new or found "struct nfsd_file" object
1358 *
1359 * The nfsd_file_object returned by this API is reference-counted
1360 * but not garbage-collected. The object is unhashed after the
1361 * final nfsd_file_put(). This opens directories only, and only
1362 * in O_RDONLY mode.
1363 *
1364 * Return values:
1365 *   %nfs_ok - @pnf points to an nfsd_file with its reference
1366 *   count boosted.
1367 *
1368 * On error, an nfsstat value in network byte order is returned.
1369 */
1370__be32
1371nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp,
1372		      struct nfsd_file **pnf)
1373{
1374	return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, fhp,
1375				    NFSD_MAY_READ|NFSD_MAY_64BIT_COOKIE,
1376				    NULL, S_IFDIR, false, pnf);
1377}
1378
1379/*
1380 * Note that fields may be added, removed or reordered in the future. Programs
1381 * scraping this file for info should test the labels to ensure they're
1382 * getting the correct field.
1383 */
1384int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
1385{
1386	unsigned long allocations = 0, releases = 0, evictions = 0;
1387	unsigned long hits = 0, acquisitions = 0;
1388	unsigned int i, count = 0, buckets = 0;
1389	unsigned long lru = 0, total_age = 0;
1390
1391	/* Serialize with server shutdown */
1392	mutex_lock(&nfsd_mutex);
1393	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) {
1394		struct bucket_table *tbl;
1395		struct rhashtable *ht;
1396
1397		lru = list_lru_count(&nfsd_file_lru);
1398
1399		rcu_read_lock();
1400		ht = &nfsd_file_rhltable.ht;
1401		count = atomic_read(&ht->nelems);
1402		tbl = rht_dereference_rcu(ht->tbl, ht);
1403		buckets = tbl->size;
1404		rcu_read_unlock();
1405	}
1406	mutex_unlock(&nfsd_mutex);
1407
1408	for_each_possible_cpu(i) {
1409		hits += per_cpu(nfsd_file_cache_hits, i);
1410		acquisitions += per_cpu(nfsd_file_acquisitions, i);
1411		allocations += per_cpu(nfsd_file_allocations, i);
1412		releases += per_cpu(nfsd_file_releases, i);
1413		total_age += per_cpu(nfsd_file_total_age, i);
1414		evictions += per_cpu(nfsd_file_evictions, i);
1415	}
1416
1417	seq_printf(m, "total inodes:  %u\n", count);
1418	seq_printf(m, "hash buckets:  %u\n", buckets);
1419	seq_printf(m, "lru entries:   %lu\n", lru);
1420	seq_printf(m, "cache hits:    %lu\n", hits);
1421	seq_printf(m, "acquisitions:  %lu\n", acquisitions);
1422	seq_printf(m, "allocations:   %lu\n", allocations);
1423	seq_printf(m, "releases:      %lu\n", releases);
1424	seq_printf(m, "evictions:     %lu\n", evictions);
1425	if (releases)
1426		seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
1427	else
1428		seq_printf(m, "mean age (ms): -\n");
1429	return 0;
1430}