fs/nfs/dir.c at v6.19 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / nfs / dir.c
at v6.19 3431 lines 91 kB view raw
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  linux/fs/nfs/dir.c
   4 *
   5 *  Copyright (C) 1992  Rick Sladkey
   6 *
   7 *  nfs directory handling functions
   8 *
   9 * 10 Apr 1996	Added silly rename for unlink	--okir
  10 * 28 Sep 1996	Improved directory cache --okir
  11 * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de 
  12 *              Re-implemented silly rename for unlink, newly implemented
  13 *              silly rename for nfs_rename() following the suggestions
  14 *              of Olaf Kirch (okir) found in this file.
  15 *              Following Linus comments on my original hack, this version
  16 *              depends only on the dcache stuff and doesn't touch the inode
  17 *              layer (iput() and friends).
  18 *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
  19 */
  20
  21#include <linux/compat.h>
  22#include <linux/module.h>
  23#include <linux/time.h>
  24#include <linux/errno.h>
  25#include <linux/stat.h>
  26#include <linux/fcntl.h>
  27#include <linux/string.h>
  28#include <linux/kernel.h>
  29#include <linux/slab.h>
  30#include <linux/mm.h>
  31#include <linux/sunrpc/clnt.h>
  32#include <linux/nfs_fs.h>
  33#include <linux/nfs_mount.h>
  34#include <linux/pagemap.h>
  35#include <linux/pagevec.h>
  36#include <linux/namei.h>
  37#include <linux/mount.h>
  38#include <linux/swap.h>
  39#include <linux/sched.h>
  40#include <linux/kmemleak.h>
  41#include <linux/xattr.h>
  42#include <linux/hash.h>
  43
  44#include "delegation.h"
  45#include "iostat.h"
  46#include "internal.h"
  47#include "fscache.h"
  48
  49#include "nfstrace.h"
  50
  51/* #define NFS_DEBUG_VERBOSE 1 */
  52
  53static int nfs_opendir(struct inode *, struct file *);
  54static int nfs_closedir(struct inode *, struct file *);
  55static int nfs_readdir(struct file *, struct dir_context *);
  56static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
  57static loff_t nfs_llseek_dir(struct file *, loff_t, int);
  58static void nfs_readdir_clear_array(struct folio *);
  59static int nfs_do_create(struct inode *dir, struct dentry *dentry,
  60			 umode_t mode, int open_flags);
  61
  62const struct file_operations nfs_dir_operations = {
  63	.llseek		= nfs_llseek_dir,
  64	.read		= generic_read_dir,
  65	.iterate_shared	= nfs_readdir,
  66	.open		= nfs_opendir,
  67	.release	= nfs_closedir,
  68	.fsync		= nfs_fsync_dir,
  69	.setlease	= simple_nosetlease,
  70};
  71
  72const struct address_space_operations nfs_dir_aops = {
  73	.free_folio = nfs_readdir_clear_array,
  74};
  75
  76#define NFS_INIT_DTSIZE PAGE_SIZE
  77
  78static struct nfs_open_dir_context *
  79alloc_nfs_open_dir_context(struct inode *dir)
  80{
  81	struct nfs_inode *nfsi = NFS_I(dir);
  82	struct nfs_open_dir_context *ctx;
  83
  84	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
  85	if (ctx != NULL) {
  86		ctx->attr_gencount = nfsi->attr_gencount;
  87		ctx->dtsize = NFS_INIT_DTSIZE;
  88		spin_lock(&dir->i_lock);
  89		if (list_empty(&nfsi->open_files) &&
  90		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
  91			nfs_set_cache_invalid(dir,
  92					      NFS_INO_INVALID_DATA |
  93						      NFS_INO_REVAL_FORCED);
  94		list_add_tail_rcu(&ctx->list, &nfsi->open_files);
  95		memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf));
  96		spin_unlock(&dir->i_lock);
  97		return ctx;
  98	}
  99	return  ERR_PTR(-ENOMEM);
 100}
 101
 102static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
 103{
 104	spin_lock(&dir->i_lock);
 105	list_del_rcu(&ctx->list);
 106	spin_unlock(&dir->i_lock);
 107	kfree_rcu(ctx, rcu_head);
 108}
 109
 110/*
 111 * Open file
 112 */
 113static int
 114nfs_opendir(struct inode *inode, struct file *filp)
 115{
 116	int res = 0;
 117	struct nfs_open_dir_context *ctx;
 118
 119	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
 120
 121	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 122
 123	ctx = alloc_nfs_open_dir_context(inode);
 124	if (IS_ERR(ctx)) {
 125		res = PTR_ERR(ctx);
 126		goto out;
 127	}
 128	filp->private_data = ctx;
 129out:
 130	return res;
 131}
 132
 133static int
 134nfs_closedir(struct inode *inode, struct file *filp)
 135{
 136	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
 137	return 0;
 138}
 139
 140struct nfs_cache_array_entry {
 141	u64 cookie;
 142	u64 ino;
 143	const char *name;
 144	unsigned int name_len;
 145	unsigned char d_type;
 146};
 147
 148struct nfs_cache_array {
 149	u64 change_attr;
 150	u64 last_cookie;
 151	unsigned int size;
 152	unsigned char folio_full : 1,
 153		      folio_is_eof : 1,
 154		      cookies_are_ordered : 1;
 155	struct nfs_cache_array_entry array[] __counted_by(size);
 156};
 157
 158struct nfs_readdir_descriptor {
 159	struct file	*file;
 160	struct folio	*folio;
 161	struct dir_context *ctx;
 162	pgoff_t		folio_index;
 163	pgoff_t		folio_index_max;
 164	u64		dir_cookie;
 165	u64		last_cookie;
 166	loff_t		current_index;
 167
 168	__be32		verf[NFS_DIR_VERIFIER_SIZE];
 169	unsigned long	dir_verifier;
 170	unsigned long	timestamp;
 171	unsigned long	gencount;
 172	unsigned long	attr_gencount;
 173	unsigned int	cache_entry_index;
 174	unsigned int	buffer_fills;
 175	unsigned int	dtsize;
 176	bool clear_cache;
 177	bool plus;
 178	bool eob;
 179	bool eof;
 180};
 181
 182static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
 183{
 184	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
 185	unsigned int maxsize = server->dtsize;
 186
 187	if (sz > maxsize)
 188		sz = maxsize;
 189	if (sz < NFS_MIN_FILE_IO_SIZE)
 190		sz = NFS_MIN_FILE_IO_SIZE;
 191	desc->dtsize = sz;
 192}
 193
 194static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
 195{
 196	nfs_set_dtsize(desc, desc->dtsize >> 1);
 197}
 198
 199static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
 200{
 201	nfs_set_dtsize(desc, desc->dtsize << 1);
 202}
 203
 204static void nfs_readdir_folio_init_array(struct folio *folio, u64 last_cookie,
 205					 u64 change_attr)
 206{
 207	struct nfs_cache_array *array;
 208
 209	array = kmap_local_folio(folio, 0);
 210	array->change_attr = change_attr;
 211	array->last_cookie = last_cookie;
 212	array->size = 0;
 213	array->folio_full = 0;
 214	array->folio_is_eof = 0;
 215	array->cookies_are_ordered = 1;
 216	kunmap_local(array);
 217}
 218
 219/*
 220 * we are freeing strings created by nfs_add_to_readdir_array()
 221 */
 222static void nfs_readdir_clear_array(struct folio *folio)
 223{
 224	struct nfs_cache_array *array;
 225	unsigned int i;
 226
 227	array = kmap_local_folio(folio, 0);
 228	for (i = 0; i < array->size; i++)
 229		kfree(array->array[i].name);
 230	array->size = 0;
 231	kunmap_local(array);
 232}
 233
 234static void nfs_readdir_folio_reinit_array(struct folio *folio, u64 last_cookie,
 235					   u64 change_attr)
 236{
 237	nfs_readdir_clear_array(folio);
 238	nfs_readdir_folio_init_array(folio, last_cookie, change_attr);
 239}
 240
 241static struct folio *
 242nfs_readdir_folio_array_alloc(u64 last_cookie, gfp_t gfp_flags)
 243{
 244	struct folio *folio = folio_alloc(gfp_flags, 0);
 245	if (folio)
 246		nfs_readdir_folio_init_array(folio, last_cookie, 0);
 247	return folio;
 248}
 249
 250static void nfs_readdir_folio_array_free(struct folio *folio)
 251{
 252	if (folio) {
 253		nfs_readdir_clear_array(folio);
 254		folio_put(folio);
 255	}
 256}
 257
 258static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array)
 259{
 260	return array->size == 0 ? array->last_cookie : array->array[0].cookie;
 261}
 262
 263static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
 264{
 265	array->folio_is_eof = 1;
 266	array->folio_full = 1;
 267}
 268
 269static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
 270{
 271	return array->folio_full;
 272}
 273
 274/*
 275 * the caller is responsible for freeing qstr.name
 276 * when called by nfs_readdir_add_to_array, the strings will be freed in
 277 * nfs_clear_readdir_array()
 278 */
 279static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
 280{
 281	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
 282
 283	/*
 284	 * Avoid a kmemleak false positive. The pointer to the name is stored
 285	 * in a page cache page which kmemleak does not scan.
 286	 */
 287	if (ret != NULL)
 288		kmemleak_not_leak(ret);
 289	return ret;
 290}
 291
 292static size_t nfs_readdir_array_maxentries(void)
 293{
 294	return (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
 295	       sizeof(struct nfs_cache_array_entry);
 296}
 297
 298/*
 299 * Check that the next array entry lies entirely within the page bounds
 300 */
 301static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
 302{
 303	if (array->folio_full)
 304		return -ENOSPC;
 305	if (array->size == nfs_readdir_array_maxentries()) {
 306		array->folio_full = 1;
 307		return -ENOSPC;
 308	}
 309	return 0;
 310}
 311
 312static int nfs_readdir_folio_array_append(struct folio *folio,
 313					  const struct nfs_entry *entry,
 314					  u64 *cookie)
 315{
 316	struct nfs_cache_array *array;
 317	struct nfs_cache_array_entry *cache_entry;
 318	const char *name;
 319	int ret = -ENOMEM;
 320
 321	name = nfs_readdir_copy_name(entry->name, entry->len);
 322
 323	array = kmap_local_folio(folio, 0);
 324	if (!name)
 325		goto out;
 326	ret = nfs_readdir_array_can_expand(array);
 327	if (ret) {
 328		kfree(name);
 329		goto out;
 330	}
 331
 332	array->size++;
 333	cache_entry = &array->array[array->size - 1];
 334	cache_entry->cookie = array->last_cookie;
 335	cache_entry->ino = entry->ino;
 336	cache_entry->d_type = entry->d_type;
 337	cache_entry->name_len = entry->len;
 338	cache_entry->name = name;
 339	array->last_cookie = entry->cookie;
 340	if (array->last_cookie <= cache_entry->cookie)
 341		array->cookies_are_ordered = 0;
 342	if (entry->eof != 0)
 343		nfs_readdir_array_set_eof(array);
 344out:
 345	*cookie = array->last_cookie;
 346	kunmap_local(array);
 347	return ret;
 348}
 349
 350#define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
 351/*
 352 * Hash algorithm allowing content addressible access to sequences
 353 * of directory cookies. Content is addressed by the value of the
 354 * cookie index of the first readdir entry in a page.
 355 *
 356 * We select only the first 18 bits to avoid issues with excessive
 357 * memory use for the page cache XArray. 18 bits should allow the caching
 358 * of 262144 pages of sequences of readdir entries. Since each page holds
 359 * 127 readdir entries for a typical 64-bit system, that works out to a
 360 * cache of ~ 33 million entries per directory.
 361 */
 362static pgoff_t nfs_readdir_folio_cookie_hash(u64 cookie)
 363{
 364	if (cookie == 0)
 365		return 0;
 366	return hash_64(cookie, 18);
 367}
 368
 369static bool nfs_readdir_folio_validate(struct folio *folio, u64 last_cookie,
 370				       u64 change_attr)
 371{
 372	struct nfs_cache_array *array = kmap_local_folio(folio, 0);
 373	int ret = true;
 374
 375	if (array->change_attr != change_attr)
 376		ret = false;
 377	if (nfs_readdir_array_index_cookie(array) != last_cookie)
 378		ret = false;
 379	kunmap_local(array);
 380	return ret;
 381}
 382
 383static void nfs_readdir_folio_unlock_and_put(struct folio *folio)
 384{
 385	folio_unlock(folio);
 386	folio_put(folio);
 387}
 388
 389static void nfs_readdir_folio_init_and_validate(struct folio *folio, u64 cookie,
 390						u64 change_attr)
 391{
 392	if (folio_test_uptodate(folio)) {
 393		if (nfs_readdir_folio_validate(folio, cookie, change_attr))
 394			return;
 395		nfs_readdir_clear_array(folio);
 396	}
 397	nfs_readdir_folio_init_array(folio, cookie, change_attr);
 398	folio_mark_uptodate(folio);
 399}
 400
 401static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping,
 402						  u64 cookie, u64 change_attr)
 403{
 404	pgoff_t index = nfs_readdir_folio_cookie_hash(cookie);
 405	struct folio *folio;
 406
 407	folio = filemap_grab_folio(mapping, index);
 408	if (IS_ERR(folio))
 409		return NULL;
 410	nfs_readdir_folio_init_and_validate(folio, cookie, change_attr);
 411	return folio;
 412}
 413
 414static u64 nfs_readdir_folio_last_cookie(struct folio *folio)
 415{
 416	struct nfs_cache_array *array;
 417	u64 ret;
 418
 419	array = kmap_local_folio(folio, 0);
 420	ret = array->last_cookie;
 421	kunmap_local(array);
 422	return ret;
 423}
 424
 425static bool nfs_readdir_folio_needs_filling(struct folio *folio)
 426{
 427	struct nfs_cache_array *array;
 428	bool ret;
 429
 430	array = kmap_local_folio(folio, 0);
 431	ret = !nfs_readdir_array_is_full(array);
 432	kunmap_local(array);
 433	return ret;
 434}
 435
 436static void nfs_readdir_folio_set_eof(struct folio *folio)
 437{
 438	struct nfs_cache_array *array;
 439
 440	array = kmap_local_folio(folio, 0);
 441	nfs_readdir_array_set_eof(array);
 442	kunmap_local(array);
 443}
 444
 445static struct folio *nfs_readdir_folio_get_next(struct address_space *mapping,
 446						u64 cookie, u64 change_attr)
 447{
 448	pgoff_t index = nfs_readdir_folio_cookie_hash(cookie);
 449	struct folio *folio;
 450
 451	folio = __filemap_get_folio(mapping, index,
 452			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
 453			mapping_gfp_mask(mapping));
 454	if (IS_ERR(folio))
 455		return NULL;
 456	nfs_readdir_folio_init_and_validate(folio, cookie, change_attr);
 457	if (nfs_readdir_folio_last_cookie(folio) != cookie)
 458		nfs_readdir_folio_reinit_array(folio, cookie, change_attr);
 459	return folio;
 460}
 461
 462static inline
 463int is_32bit_api(void)
 464{
 465#ifdef CONFIG_COMPAT
 466	return in_compat_syscall();
 467#else
 468	return (BITS_PER_LONG == 32);
 469#endif
 470}
 471
 472static
 473bool nfs_readdir_use_cookie(const struct file *filp)
 474{
 475	if ((filp->f_mode & FMODE_32BITHASH) ||
 476	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
 477		return false;
 478	return true;
 479}
 480
 481static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
 482					struct nfs_readdir_descriptor *desc)
 483{
 484	if (array->folio_full) {
 485		desc->last_cookie = array->last_cookie;
 486		desc->current_index += array->size;
 487		desc->cache_entry_index = 0;
 488		desc->folio_index++;
 489	} else
 490		desc->last_cookie = nfs_readdir_array_index_cookie(array);
 491}
 492
 493static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
 494{
 495	desc->current_index = 0;
 496	desc->last_cookie = 0;
 497	desc->folio_index = 0;
 498}
 499
 500static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
 501				      struct nfs_readdir_descriptor *desc)
 502{
 503	loff_t diff = desc->ctx->pos - desc->current_index;
 504	unsigned int index;
 505
 506	if (diff < 0)
 507		goto out_eof;
 508	if (diff >= array->size) {
 509		if (array->folio_is_eof)
 510			goto out_eof;
 511		nfs_readdir_seek_next_array(array, desc);
 512		return -EAGAIN;
 513	}
 514
 515	index = (unsigned int)diff;
 516	desc->dir_cookie = array->array[index].cookie;
 517	desc->cache_entry_index = index;
 518	return 0;
 519out_eof:
 520	desc->eof = true;
 521	return -EBADCOOKIE;
 522}
 523
 524static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
 525					      u64 cookie)
 526{
 527	if (!array->cookies_are_ordered)
 528		return true;
 529	/* Optimisation for monotonically increasing cookies */
 530	if (cookie >= array->last_cookie)
 531		return false;
 532	if (array->size && cookie < array->array[0].cookie)
 533		return false;
 534	return true;
 535}
 536
 537static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
 538					 struct nfs_readdir_descriptor *desc)
 539{
 540	unsigned int i;
 541	int status = -EAGAIN;
 542
 543	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
 544		goto check_eof;
 545
 546	for (i = 0; i < array->size; i++) {
 547		if (array->array[i].cookie == desc->dir_cookie) {
 548			if (nfs_readdir_use_cookie(desc->file))
 549				desc->ctx->pos = desc->dir_cookie;
 550			else
 551				desc->ctx->pos = desc->current_index + i;
 552			desc->cache_entry_index = i;
 553			return 0;
 554		}
 555	}
 556check_eof:
 557	if (array->folio_is_eof) {
 558		status = -EBADCOOKIE;
 559		if (desc->dir_cookie == array->last_cookie)
 560			desc->eof = true;
 561	} else
 562		nfs_readdir_seek_next_array(array, desc);
 563	return status;
 564}
 565
 566static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
 567{
 568	struct nfs_cache_array *array;
 569	int status;
 570
 571	array = kmap_local_folio(desc->folio, 0);
 572
 573	if (desc->dir_cookie == 0)
 574		status = nfs_readdir_search_for_pos(array, desc);
 575	else
 576		status = nfs_readdir_search_for_cookie(array, desc);
 577
 578	kunmap_local(array);
 579	return status;
 580}
 581
 582/* Fill a page with xdr information before transferring to the cache page */
 583static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
 584				  __be32 *verf, u64 cookie,
 585				  struct page **pages, size_t bufsize,
 586				  __be32 *verf_res)
 587{
 588	struct inode *inode = file_inode(desc->file);
 589	struct nfs_readdir_arg arg = {
 590		.dentry = file_dentry(desc->file),
 591		.cred = desc->file->f_cred,
 592		.verf = verf,
 593		.cookie = cookie,
 594		.pages = pages,
 595		.page_len = bufsize,
 596		.plus = desc->plus,
 597	};
 598	struct nfs_readdir_res res = {
 599		.verf = verf_res,
 600	};
 601	unsigned long	timestamp, gencount;
 602	int		error;
 603
 604 again:
 605	timestamp = jiffies;
 606	gencount = nfs_inc_attr_generation_counter();
 607	desc->dir_verifier = nfs_save_change_attribute(inode);
 608	error = NFS_PROTO(inode)->readdir(&arg, &res);
 609	if (error < 0) {
 610		/* We requested READDIRPLUS, but the server doesn't grok it */
 611		if (error == -ENOTSUPP && desc->plus) {
 612			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
 613			desc->plus = arg.plus = false;
 614			goto again;
 615		}
 616		goto error;
 617	}
 618	desc->timestamp = timestamp;
 619	desc->gencount = gencount;
 620error:
 621	return error;
 622}
 623
 624static int xdr_decode(struct nfs_readdir_descriptor *desc,
 625		      struct nfs_entry *entry, struct xdr_stream *xdr)
 626{
 627	struct inode *inode = file_inode(desc->file);
 628	int error;
 629
 630	error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
 631	if (error)
 632		return error;
 633	entry->fattr->time_start = desc->timestamp;
 634	entry->fattr->gencount = desc->gencount;
 635	return 0;
 636}
 637
 638/* Match file and dirent using either filehandle or fileid
 639 * Note: caller is responsible for checking the fsid
 640 */
 641static
 642int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 643{
 644	struct inode *inode;
 645	struct nfs_inode *nfsi;
 646
 647	if (d_really_is_negative(dentry))
 648		return 0;
 649
 650	inode = d_inode(dentry);
 651	if (is_bad_inode(inode) || NFS_STALE(inode))
 652		return 0;
 653
 654	nfsi = NFS_I(inode);
 655	if (entry->fattr->fileid != nfsi->fileid)
 656		return 0;
 657	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
 658		return 0;
 659	return 1;
 660}
 661
 662#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
 663
 664static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
 665				unsigned int cache_hits,
 666				unsigned int cache_misses)
 667{
 668	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
 669		return false;
 670	if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS)
 671		return true;
 672	if (ctx->pos == 0 ||
 673	    cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
 674		return true;
 675	return false;
 676}
 677
 678/*
 679 * This function is called by the getattr code to request the
 680 * use of readdirplus to accelerate any future lookups in the same
 681 * directory.
 682 */
 683void nfs_readdir_record_entry_cache_hit(struct inode *dir)
 684{
 685	struct nfs_inode *nfsi = NFS_I(dir);
 686	struct nfs_open_dir_context *ctx;
 687
 688	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
 689	    S_ISDIR(dir->i_mode)) {
 690		rcu_read_lock();
 691		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
 692			atomic_inc(&ctx->cache_hits);
 693		rcu_read_unlock();
 694	}
 695}
 696
 697/*
 698 * This function is mainly for use by nfs_getattr().
 699 *
 700 * If this is an 'ls -l', we want to force use of readdirplus.
 701 */
 702void nfs_readdir_record_entry_cache_miss(struct inode *dir)
 703{
 704	struct nfs_inode *nfsi = NFS_I(dir);
 705	struct nfs_open_dir_context *ctx;
 706
 707	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
 708	    S_ISDIR(dir->i_mode)) {
 709		rcu_read_lock();
 710		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
 711			atomic_inc(&ctx->cache_misses);
 712		rcu_read_unlock();
 713	}
 714}
 715
 716static void nfs_lookup_advise_force_readdirplus(struct inode *dir,
 717						unsigned int flags)
 718{
 719	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
 720		return;
 721	if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL))
 722		return;
 723	nfs_readdir_record_entry_cache_miss(dir);
 724}
 725
 726static
 727void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
 728		unsigned long dir_verifier)
 729{
 730	struct qstr filename = QSTR_INIT(entry->name, entry->len);
 731	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 732	struct dentry *dentry;
 733	struct dentry *alias;
 734	struct inode *inode;
 735	int status;
 736
 737	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
 738		return;
 739	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
 740		return;
 741	if (filename.len == 0)
 742		return;
 743	/* Validate that the name doesn't contain any illegal '\0' */
 744	if (strnlen(filename.name, filename.len) != filename.len)
 745		return;
 746	/* ...or '/' */
 747	if (strnchr(filename.name, filename.len, '/'))
 748		return;
 749	if (filename.name[0] == '.') {
 750		if (filename.len == 1)
 751			return;
 752		if (filename.len == 2 && filename.name[1] == '.')
 753			return;
 754	}
 755	filename.hash = full_name_hash(parent, filename.name, filename.len);
 756
 757	dentry = d_lookup(parent, &filename);
 758again:
 759	if (!dentry) {
 760		dentry = d_alloc_parallel(parent, &filename, &wq);
 761		if (IS_ERR(dentry))
 762			return;
 763	}
 764	if (!d_in_lookup(dentry)) {
 765		/* Is there a mountpoint here? If so, just exit */
 766		if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
 767					&entry->fattr->fsid))
 768			goto out;
 769		if (nfs_same_file(dentry, entry)) {
 770			if (!entry->fh->size)
 771				goto out;
 772			nfs_set_verifier(dentry, dir_verifier);
 773			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
 774			if (!status)
 775				nfs_setsecurity(d_inode(dentry), entry->fattr);
 776			trace_nfs_readdir_lookup_revalidate(d_inode(parent),
 777							    dentry, 0, status);
 778			goto out;
 779		} else {
 780			trace_nfs_readdir_lookup_revalidate_failed(
 781				d_inode(parent), dentry, 0);
 782			d_invalidate(dentry);
 783			dput(dentry);
 784			dentry = NULL;
 785			goto again;
 786		}
 787	}
 788	if (!entry->fh->size) {
 789		d_lookup_done(dentry);
 790		goto out;
 791	}
 792
 793	nfs_set_verifier(dentry, dir_verifier);
 794	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
 795	alias = d_splice_alias(inode, dentry);
 796	d_lookup_done(dentry);
 797	if (alias) {
 798		if (IS_ERR(alias))
 799			goto out;
 800		nfs_set_verifier(alias, dir_verifier);
 801		dput(dentry);
 802		dentry = alias;
 803	}
 804	trace_nfs_readdir_lookup(d_inode(parent), dentry, 0);
 805out:
 806	dput(dentry);
 807}
 808
 809static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc,
 810				    struct nfs_entry *entry,
 811				    struct xdr_stream *stream)
 812{
 813	int ret;
 814
 815	if (entry->fattr->label)
 816		entry->fattr->label->len = NFS4_MAXLABELLEN;
 817	ret = xdr_decode(desc, entry, stream);
 818	if (ret || !desc->plus)
 819		return ret;
 820	nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier);
 821	return 0;
 822}
 823
 824/* Perform conversion from xdr to cache array */
 825static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc,
 826				    struct nfs_entry *entry,
 827				    struct page **xdr_pages, unsigned int buflen,
 828				    struct folio **arrays, size_t narrays,
 829				    u64 change_attr)
 830{
 831	struct address_space *mapping = desc->file->f_mapping;
 832	struct folio *new, *folio = *arrays;
 833	struct xdr_stream stream;
 834	struct folio *scratch;
 835	struct xdr_buf buf;
 836	u64 cookie;
 837	int status;
 838
 839	scratch = folio_alloc(GFP_KERNEL, 0);
 840	if (scratch == NULL)
 841		return -ENOMEM;
 842
 843	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
 844	xdr_set_scratch_folio(&stream, scratch);
 845
 846	do {
 847		status = nfs_readdir_entry_decode(desc, entry, &stream);
 848		if (status != 0)
 849			break;
 850
 851		status = nfs_readdir_folio_array_append(folio, entry, &cookie);
 852		if (status != -ENOSPC)
 853			continue;
 854
 855		if (folio->mapping != mapping) {
 856			if (!--narrays)
 857				break;
 858			new = nfs_readdir_folio_array_alloc(cookie, GFP_KERNEL);
 859			if (!new)
 860				break;
 861			arrays++;
 862			*arrays = folio = new;
 863		} else {
 864			new = nfs_readdir_folio_get_next(mapping, cookie,
 865							 change_attr);
 866			if (!new)
 867				break;
 868			if (folio != *arrays)
 869				nfs_readdir_folio_unlock_and_put(folio);
 870			folio = new;
 871		}
 872		desc->folio_index_max++;
 873		status = nfs_readdir_folio_array_append(folio, entry, &cookie);
 874	} while (!status && !entry->eof);
 875
 876	switch (status) {
 877	case -EBADCOOKIE:
 878		if (!entry->eof)
 879			break;
 880		nfs_readdir_folio_set_eof(folio);
 881		fallthrough;
 882	case -EAGAIN:
 883		status = 0;
 884		break;
 885	case -ENOSPC:
 886		status = 0;
 887		if (!desc->plus)
 888			break;
 889		while (!nfs_readdir_entry_decode(desc, entry, &stream))
 890			;
 891	}
 892
 893	if (folio != *arrays)
 894		nfs_readdir_folio_unlock_and_put(folio);
 895
 896	folio_put(scratch);
 897	return status;
 898}
 899
 900static void nfs_readdir_free_pages(struct page **pages, size_t npages)
 901{
 902	while (npages--)
 903		put_page(pages[npages]);
 904	kfree(pages);
 905}
 906
 907/*
 908 * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
 909 * to nfs_readdir_free_pages()
 910 */
 911static struct page **nfs_readdir_alloc_pages(size_t npages)
 912{
 913	struct page **pages;
 914	size_t i;
 915
 916	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
 917	if (!pages)
 918		return NULL;
 919	for (i = 0; i < npages; i++) {
 920		struct page *page = alloc_page(GFP_KERNEL);
 921		if (page == NULL)
 922			goto out_freepages;
 923		pages[i] = page;
 924	}
 925	return pages;
 926
 927out_freepages:
 928	nfs_readdir_free_pages(pages, i);
 929	return NULL;
 930}
 931
 932static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 933				    __be32 *verf_arg, __be32 *verf_res,
 934				    struct folio **arrays, size_t narrays)
 935{
 936	u64 change_attr;
 937	struct page **pages;
 938	struct folio *folio = *arrays;
 939	struct nfs_entry *entry;
 940	size_t array_size;
 941	struct inode *inode = file_inode(desc->file);
 942	unsigned int dtsize = desc->dtsize;
 943	unsigned int pglen;
 944	int status = -ENOMEM;
 945
 946	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 947	if (!entry)
 948		return -ENOMEM;
 949	entry->cookie = nfs_readdir_folio_last_cookie(folio);
 950	entry->fh = nfs_alloc_fhandle();
 951	entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
 952	entry->server = NFS_SERVER(inode);
 953	if (entry->fh == NULL || entry->fattr == NULL)
 954		goto out;
 955
 956	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 957	pages = nfs_readdir_alloc_pages(array_size);
 958	if (!pages)
 959		goto out;
 960
 961	change_attr = inode_peek_iversion_raw(inode);
 962	status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
 963					dtsize, verf_res);
 964	if (status < 0)
 965		goto free_pages;
 966
 967	pglen = status;
 968	if (pglen != 0)
 969		status = nfs_readdir_folio_filler(desc, entry, pages, pglen,
 970						  arrays, narrays, change_attr);
 971	else
 972		nfs_readdir_folio_set_eof(folio);
 973	desc->buffer_fills++;
 974
 975free_pages:
 976	nfs_readdir_free_pages(pages, array_size);
 977out:
 978	nfs_free_fattr(entry->fattr);
 979	nfs_free_fhandle(entry->fh);
 980	kfree(entry);
 981	return status;
 982}
 983
 984static void nfs_readdir_folio_put(struct nfs_readdir_descriptor *desc)
 985{
 986	folio_put(desc->folio);
 987	desc->folio = NULL;
 988}
 989
 990static void
 991nfs_readdir_folio_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
 992{
 993	folio_unlock(desc->folio);
 994	nfs_readdir_folio_put(desc);
 995}
 996
 997static struct folio *
 998nfs_readdir_folio_get_cached(struct nfs_readdir_descriptor *desc)
 999{
1000	struct address_space *mapping = desc->file->f_mapping;
1001	u64 change_attr = inode_peek_iversion_raw(mapping->host);
1002	u64 cookie = desc->last_cookie;
1003	struct folio *folio;
1004
1005	folio = nfs_readdir_folio_get_locked(mapping, cookie, change_attr);
1006	if (!folio)
1007		return NULL;
1008	if (desc->clear_cache && !nfs_readdir_folio_needs_filling(folio))
1009		nfs_readdir_folio_reinit_array(folio, cookie, change_attr);
1010	return folio;
1011}
1012
1013/*
1014 * Returns 0 if desc->dir_cookie was found on page desc->page_index
1015 * and locks the page to prevent removal from the page cache.
1016 */
1017static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
1018{
1019	struct inode *inode = file_inode(desc->file);
1020	struct nfs_inode *nfsi = NFS_I(inode);
1021	__be32 verf[NFS_DIR_VERIFIER_SIZE];
1022	int res;
1023
1024	desc->folio = nfs_readdir_folio_get_cached(desc);
1025	if (!desc->folio)
1026		return -ENOMEM;
1027	if (nfs_readdir_folio_needs_filling(desc->folio)) {
1028		/* Grow the dtsize if we had to go back for more pages */
1029		if (desc->folio_index == desc->folio_index_max)
1030			nfs_grow_dtsize(desc);
1031		desc->folio_index_max = desc->folio_index;
1032		trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf,
1033					     desc->last_cookie,
1034					     desc->folio->index, desc->dtsize);
1035		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
1036					       &desc->folio, 1);
1037		if (res < 0) {
1038			nfs_readdir_folio_unlock_and_put_cached(desc);
1039			trace_nfs_readdir_cache_fill_done(inode, res);
1040			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
1041				invalidate_inode_pages2(desc->file->f_mapping);
1042				nfs_readdir_rewind_search(desc);
1043				trace_nfs_readdir_invalidate_cache_range(
1044					inode, 0, MAX_LFS_FILESIZE);
1045				return -EAGAIN;
1046			}
1047			return res;
1048		}
1049		/*
1050		 * Set the cookie verifier if the page cache was empty
1051		 */
1052		if (desc->last_cookie == 0 &&
1053		    memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
1054			memcpy(nfsi->cookieverf, verf,
1055			       sizeof(nfsi->cookieverf));
1056			invalidate_inode_pages2_range(desc->file->f_mapping, 1,
1057						      -1);
1058			trace_nfs_readdir_invalidate_cache_range(
1059				inode, 1, MAX_LFS_FILESIZE);
1060		}
1061		desc->clear_cache = false;
1062	}
1063	res = nfs_readdir_search_array(desc);
1064	if (res == 0)
1065		return 0;
1066	nfs_readdir_folio_unlock_and_put_cached(desc);
1067	return res;
1068}
1069
1070/* Search for desc->dir_cookie from the beginning of the page cache */
1071static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
1072{
1073	int res;
1074
1075	do {
1076		res = find_and_lock_cache_page(desc);
1077	} while (res == -EAGAIN);
1078	return res;
1079}
1080
1081#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
1082
1083/*
1084 * Once we've found the start of the dirent within a page: fill 'er up...
1085 */
1086static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
1087			   const __be32 *verf)
1088{
1089	struct file	*file = desc->file;
1090	struct nfs_cache_array *array;
1091	unsigned int i;
1092	bool first_emit = !desc->dir_cookie;
1093
1094	array = kmap_local_folio(desc->folio, 0);
1095	for (i = desc->cache_entry_index; i < array->size; i++) {
1096		struct nfs_cache_array_entry *ent;
1097
1098		/*
1099		 * nfs_readdir_handle_cache_misses return force clear at
1100		 * (cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) for
1101		 * readdir heuristic, NFS_READDIR_CACHE_MISS_THRESHOLD + 1
1102		 * entries need be emitted here.
1103		 */
1104		if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 2) {
1105			desc->eob = true;
1106			break;
1107		}
1108
1109		ent = &array->array[i];
1110		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
1111		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
1112			desc->eob = true;
1113			break;
1114		}
1115		memcpy(desc->verf, verf, sizeof(desc->verf));
1116		if (i == array->size - 1) {
1117			desc->dir_cookie = array->last_cookie;
1118			nfs_readdir_seek_next_array(array, desc);
1119		} else {
1120			desc->dir_cookie = array->array[i + 1].cookie;
1121			desc->last_cookie = array->array[0].cookie;
1122		}
1123		if (nfs_readdir_use_cookie(file))
1124			desc->ctx->pos = desc->dir_cookie;
1125		else
1126			desc->ctx->pos++;
1127	}
1128	if (array->folio_is_eof)
1129		desc->eof = !desc->eob;
1130
1131	kunmap_local(array);
1132	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
1133			(unsigned long long)desc->dir_cookie);
1134}
1135
1136/*
1137 * If we cannot find a cookie in our cache, we suspect that this is
1138 * because it points to a deleted file, so we ask the server to return
1139 * whatever it thinks is the next entry. We then feed this to filldir.
1140 * If all goes well, we should then be able to find our way round the
1141 * cache on the next call to readdir_search_pagecache();
1142 *
1143 * NOTE: we cannot add the anonymous page to the pagecache because
1144 *	 the data it contains might not be page aligned. Besides,
1145 *	 we should already have a complete representation of the
1146 *	 directory in the page cache by the time we get here.
1147 */
1148static int uncached_readdir(struct nfs_readdir_descriptor *desc)
1149{
1150	struct folio	**arrays;
1151	size_t		i, sz = 512;
1152	__be32		verf[NFS_DIR_VERIFIER_SIZE];
1153	int		status = -ENOMEM;
1154
1155	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
1156			(unsigned long long)desc->dir_cookie);
1157
1158	arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
1159	if (!arrays)
1160		goto out;
1161	arrays[0] = nfs_readdir_folio_array_alloc(desc->dir_cookie, GFP_KERNEL);
1162	if (!arrays[0])
1163		goto out;
1164
1165	desc->folio_index = 0;
1166	desc->cache_entry_index = 0;
1167	desc->last_cookie = desc->dir_cookie;
1168	desc->folio_index_max = 0;
1169
1170	trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
1171				   -1, desc->dtsize);
1172
1173	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
1174	if (status < 0) {
1175		trace_nfs_readdir_uncached_done(file_inode(desc->file), status);
1176		goto out_free;
1177	}
1178
1179	for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
1180		desc->folio = arrays[i];
1181		nfs_do_filldir(desc, verf);
1182	}
1183	desc->folio = NULL;
1184
1185	/*
1186	 * Grow the dtsize if we have to go back for more pages,
1187	 * or shrink it if we're reading too many.
1188	 */
1189	if (!desc->eof) {
1190		if (!desc->eob)
1191			nfs_grow_dtsize(desc);
1192		else if (desc->buffer_fills == 1 &&
1193			 i < (desc->folio_index_max >> 1))
1194			nfs_shrink_dtsize(desc);
1195	}
1196out_free:
1197	for (i = 0; i < sz && arrays[i]; i++)
1198		nfs_readdir_folio_array_free(arrays[i]);
1199out:
1200	if (!nfs_readdir_use_cookie(desc->file))
1201		nfs_readdir_rewind_search(desc);
1202	desc->folio_index_max = -1;
1203	kfree(arrays);
1204	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
1205	return status;
1206}
1207
1208static bool nfs_readdir_handle_cache_misses(struct inode *inode,
1209					    struct nfs_readdir_descriptor *desc,
1210					    unsigned int cache_misses,
1211					    bool force_clear)
1212{
1213	if (desc->ctx->pos == 0 || !desc->plus)
1214		return false;
1215	if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear)
1216		return false;
1217	trace_nfs_readdir_force_readdirplus(inode);
1218	return true;
1219}
1220
1221/* The file offset position represents the dirent entry number.  A
1222   last cookie cache takes care of the common case of reading the
1223   whole directory.
1224 */
1225static int nfs_readdir(struct file *file, struct dir_context *ctx)
1226{
1227	struct dentry	*dentry = file_dentry(file);
1228	struct inode	*inode = d_inode(dentry);
1229	struct nfs_inode *nfsi = NFS_I(inode);
1230	struct nfs_open_dir_context *dir_ctx = file->private_data;
1231	struct nfs_readdir_descriptor *desc;
1232	unsigned int cache_hits, cache_misses;
1233	bool force_clear;
1234	int res;
1235
1236	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
1237			file, (long long)ctx->pos);
1238	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
1239
1240	/*
1241	 * ctx->pos points to the dirent entry number.
1242	 * *desc->dir_cookie has the cookie for the next entry. We have
1243	 * to either find the entry with the appropriate number or
1244	 * revalidate the cookie.
1245	 */
1246	nfs_revalidate_mapping(inode, file->f_mapping);
1247
1248	res = -ENOMEM;
1249	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
1250	if (!desc)
1251		goto out;
1252	desc->file = file;
1253	desc->ctx = ctx;
1254	desc->folio_index_max = -1;
1255
1256	spin_lock(&file->f_lock);
1257	desc->dir_cookie = dir_ctx->dir_cookie;
1258	desc->folio_index = dir_ctx->page_index;
1259	desc->last_cookie = dir_ctx->last_cookie;
1260	desc->attr_gencount = dir_ctx->attr_gencount;
1261	desc->eof = dir_ctx->eof;
1262	nfs_set_dtsize(desc, dir_ctx->dtsize);
1263	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
1264	cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
1265	cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
1266	force_clear = dir_ctx->force_clear;
1267	spin_unlock(&file->f_lock);
1268
1269	if (desc->eof) {
1270		res = 0;
1271		goto out_free;
1272	}
1273
1274	desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
1275	force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses,
1276						      force_clear);
1277	desc->clear_cache = force_clear;
1278
1279	do {
1280		res = readdir_search_pagecache(desc);
1281
1282		if (res == -EBADCOOKIE) {
1283			res = 0;
1284			/* This means either end of directory */
1285			if (desc->dir_cookie && !desc->eof) {
1286				/* Or that the server has 'lost' a cookie */
1287				res = uncached_readdir(desc);
1288				if (res == 0)
1289					continue;
1290				if (res == -EBADCOOKIE || res == -ENOTSYNC)
1291					res = 0;
1292			}
1293			break;
1294		}
1295		if (res == -ETOOSMALL && desc->plus) {
1296			nfs_zap_caches(inode);
1297			desc->plus = false;
1298			desc->eof = false;
1299			continue;
1300		}
1301		if (res < 0)
1302			break;
1303
1304		nfs_do_filldir(desc, nfsi->cookieverf);
1305		nfs_readdir_folio_unlock_and_put_cached(desc);
1306		if (desc->folio_index == desc->folio_index_max)
1307			desc->clear_cache = force_clear;
1308	} while (!desc->eob && !desc->eof);
1309
1310	spin_lock(&file->f_lock);
1311	dir_ctx->dir_cookie = desc->dir_cookie;
1312	dir_ctx->last_cookie = desc->last_cookie;
1313	dir_ctx->attr_gencount = desc->attr_gencount;
1314	dir_ctx->page_index = desc->folio_index;
1315	dir_ctx->force_clear = force_clear;
1316	dir_ctx->eof = desc->eof;
1317	dir_ctx->dtsize = desc->dtsize;
1318	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
1319	spin_unlock(&file->f_lock);
1320out_free:
1321	kfree(desc);
1322
1323out:
1324	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
1325	return res;
1326}
1327
1328static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
1329{
1330	struct nfs_open_dir_context *dir_ctx = filp->private_data;
1331
1332	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
1333			filp, offset, whence);
1334
1335	switch (whence) {
1336	default:
1337		return -EINVAL;
1338	case SEEK_SET:
1339		if (offset < 0)
1340			return -EINVAL;
1341		spin_lock(&filp->f_lock);
1342		break;
1343	case SEEK_CUR:
1344		if (offset == 0)
1345			return filp->f_pos;
1346		spin_lock(&filp->f_lock);
1347		offset += filp->f_pos;
1348		if (offset < 0) {
1349			spin_unlock(&filp->f_lock);
1350			return -EINVAL;
1351		}
1352	}
1353	if (offset != filp->f_pos) {
1354		filp->f_pos = offset;
1355		dir_ctx->page_index = 0;
1356		if (!nfs_readdir_use_cookie(filp)) {
1357			dir_ctx->dir_cookie = 0;
1358			dir_ctx->last_cookie = 0;
1359		} else {
1360			dir_ctx->dir_cookie = offset;
1361			dir_ctx->last_cookie = offset;
1362		}
1363		dir_ctx->eof = false;
1364	}
1365	spin_unlock(&filp->f_lock);
1366	return offset;
1367}
1368
1369/*
1370 * All directory operations under NFS are synchronous, so fsync()
1371 * is a dummy operation.
1372 */
1373static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
1374			 int datasync)
1375{
1376	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
1377
1378	nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
1379	return 0;
1380}
1381
1382/**
1383 * nfs_force_lookup_revalidate - Mark the directory as having changed
1384 * @dir: pointer to directory inode
1385 *
1386 * This forces the revalidation code in nfs_lookup_revalidate() to do a
1387 * full lookup on all child dentries of 'dir' whenever a change occurs
1388 * on the server that might have invalidated our dcache.
1389 *
1390 * Note that we reserve bit '0' as a tag to let us know when a dentry
1391 * was revalidated while holding a delegation on its inode.
1392 *
1393 * The caller should be holding dir->i_lock
1394 */
1395void nfs_force_lookup_revalidate(struct inode *dir)
1396{
1397	NFS_I(dir)->cache_change_attribute += 2;
1398}
1399EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
1400
1401/**
1402 * nfs_verify_change_attribute - Detects NFS remote directory changes
1403 * @dir: pointer to parent directory inode
1404 * @verf: previously saved change attribute
1405 *
1406 * Return "false" if the verifiers doesn't match the change attribute.
1407 * This would usually indicate that the directory contents have changed on
1408 * the server, and that any dentries need revalidating.
1409 */
1410static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
1411{
1412	return (verf & ~1UL) == nfs_save_change_attribute(dir);
1413}
1414
1415static void nfs_set_verifier_delegated(unsigned long *verf)
1416{
1417	*verf |= 1UL;
1418}
1419
1420#if IS_ENABLED(CONFIG_NFS_V4)
1421static void nfs_unset_verifier_delegated(unsigned long *verf)
1422{
1423	*verf &= ~1UL;
1424}
1425#endif /* IS_ENABLED(CONFIG_NFS_V4) */
1426
1427static bool nfs_test_verifier_delegated(unsigned long verf)
1428{
1429	return verf & 1;
1430}
1431
1432static bool nfs_verifier_is_delegated(struct dentry *dentry)
1433{
1434	return nfs_test_verifier_delegated(dentry->d_time);
1435}
1436
1437static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
1438{
1439	struct inode *inode = d_inode(dentry);
1440	struct inode *dir = d_inode_rcu(dentry->d_parent);
1441
1442	if (!dir || !nfs_verify_change_attribute(dir, verf))
1443		return;
1444	if (NFS_PROTO(dir)->have_delegation(dir, FMODE_READ, 0) ||
1445	    (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0)))
1446		nfs_set_verifier_delegated(&verf);
1447	dentry->d_time = verf;
1448}
1449
1450/**
1451 * nfs_set_verifier - save a parent directory verifier in the dentry
1452 * @dentry: pointer to dentry
1453 * @verf: verifier to save
1454 *
1455 * Saves the parent directory verifier in @dentry. If the inode has
1456 * a delegation, we also tag the dentry as having been revalidated
1457 * while holding a delegation so that we know we don't have to
1458 * look it up again after a directory change.
1459 */
1460void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
1461{
1462
1463	spin_lock(&dentry->d_lock);
1464	nfs_set_verifier_locked(dentry, verf);
1465	spin_unlock(&dentry->d_lock);
1466}
1467EXPORT_SYMBOL_GPL(nfs_set_verifier);
1468
1469#if IS_ENABLED(CONFIG_NFS_V4)
1470static void nfs_clear_verifier_file(struct inode *inode)
1471{
1472	struct dentry *alias;
1473	struct inode *dir;
1474
1475	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1476		spin_lock(&alias->d_lock);
1477		dir = d_inode_rcu(alias->d_parent);
1478		if (!dir ||
1479		    !NFS_PROTO(dir)->have_delegation(dir, FMODE_READ, 0))
1480			nfs_unset_verifier_delegated(&alias->d_time);
1481		spin_unlock(&alias->d_lock);
1482	}
1483}
1484
1485static void nfs_clear_verifier_directory(struct inode *dir)
1486{
1487	struct dentry *this_parent;
1488	struct dentry *dentry;
1489	struct inode *inode;
1490
1491	if (hlist_empty(&dir->i_dentry))
1492		return;
1493	this_parent =
1494		hlist_entry(dir->i_dentry.first, struct dentry, d_u.d_alias);
1495
1496	spin_lock(&this_parent->d_lock);
1497	nfs_unset_verifier_delegated(&this_parent->d_time);
1498	dentry = d_first_child(this_parent);
1499	hlist_for_each_entry_from(dentry, d_sib) {
1500		if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
1501			continue;
1502		inode = d_inode_rcu(dentry);
1503		if (inode &&
1504		    NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0))
1505			continue;
1506		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1507		nfs_unset_verifier_delegated(&dentry->d_time);
1508		spin_unlock(&dentry->d_lock);
1509	}
1510	spin_unlock(&this_parent->d_lock);
1511}
1512
1513/**
1514 * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
1515 * @inode: pointer to inode
1516 *
1517 * Iterates through the dentries in the inode alias list and clears
1518 * the tag used to indicate that the dentry has been revalidated
1519 * while holding a delegation.
1520 * This function is intended for use when the delegation is being
1521 * returned or revoked.
1522 */
1523void nfs_clear_verifier_delegated(struct inode *inode)
1524{
1525	if (!inode)
1526		return;
1527	spin_lock(&inode->i_lock);
1528	if (S_ISREG(inode->i_mode))
1529		nfs_clear_verifier_file(inode);
1530	else if (S_ISDIR(inode->i_mode))
1531		nfs_clear_verifier_directory(inode);
1532	spin_unlock(&inode->i_lock);
1533}
1534EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
1535#endif /* IS_ENABLED(CONFIG_NFS_V4) */
1536
1537static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
1538{
1539	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
1540	    d_really_is_negative(dentry))
1541		return dentry->d_time == inode_peek_iversion_raw(dir);
1542	return nfs_verify_change_attribute(dir, dentry->d_time);
1543}
1544
1545/*
1546 * A check for whether or not the parent directory has changed.
1547 * In the case it has, we assume that the dentries are untrustworthy
1548 * and may need to be looked up again.
1549 * If rcu_walk prevents us from performing a full check, return 0.
1550 */
1551static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
1552			      int rcu_walk)
1553{
1554	if (IS_ROOT(dentry))
1555		return 1;
1556	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
1557		return 0;
1558	if (!nfs_dentry_verify_change(dir, dentry))
1559		return 0;
1560
1561	/* Revalidate nfsi->cache_change_attribute before we declare a match */
1562	if (nfs_mapping_need_revalidate_inode(dir)) {
1563		if (rcu_walk)
1564			return 0;
1565		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
1566			return 0;
1567	}
1568	if (!nfs_dentry_verify_change(dir, dentry))
1569		return 0;
1570	return 1;
1571}
1572
1573/*
1574 * Use intent information to check whether or not we're going to do
1575 * an O_EXCL create using this path component.
1576 */
1577static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1578{
1579	if (NFS_PROTO(dir)->version == 2)
1580		return 0;
1581	return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
1582		(LOOKUP_CREATE | LOOKUP_EXCL);
1583}
1584
1585/*
1586 * Inode and filehandle revalidation for lookups.
1587 *
1588 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
1589 * or if the intent information indicates that we're about to open this
1590 * particular file and the "nocto" mount flag is not set.
1591 *
1592 */
1593static
1594int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1595{
1596	struct nfs_server *server = NFS_SERVER(inode);
1597	int ret;
1598
1599	if (IS_AUTOMOUNT(inode))
1600		return 0;
1601
1602	if (flags & LOOKUP_OPEN) {
1603		switch (inode->i_mode & S_IFMT) {
1604		case S_IFREG:
1605			/* A NFSv4 OPEN will revalidate later */
1606			if (server->caps & NFS_CAP_ATOMIC_OPEN)
1607				goto out;
1608			fallthrough;
1609		case S_IFDIR:
1610			if (server->flags & NFS_MOUNT_NOCTO)
1611				break;
1612			/* NFS close-to-open cache consistency validation */
1613			goto out_force;
1614		}
1615	}
1616
1617	/* VFS wants an on-the-wire revalidation */
1618	if (flags & LOOKUP_REVAL)
1619		goto out_force;
1620out:
1621	if (inode->i_nlink > 0 ||
1622	    (inode->i_nlink == 0 &&
1623	     test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags)))
1624		return 0;
1625	else
1626		return -ESTALE;
1627out_force:
1628	if (flags & LOOKUP_RCU)
1629		return -ECHILD;
1630	ret = __nfs_revalidate_inode(server, inode);
1631	if (ret != 0)
1632		return ret;
1633	goto out;
1634}
1635
1636static void nfs_mark_dir_for_revalidate(struct inode *inode)
1637{
1638	spin_lock(&inode->i_lock);
1639	nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
1640	spin_unlock(&inode->i_lock);
1641}
1642
1643/*
1644 * We judge how long we want to trust negative
1645 * dentries by looking at the parent inode mtime.
1646 *
1647 * If parent mtime has changed, we revalidate, else we wait for a
1648 * period corresponding to the parent's attribute cache timeout value.
1649 *
1650 * If LOOKUP_RCU prevents us from performing a full check, return 1
1651 * suggesting a reval is needed.
1652 *
1653 * Note that when creating a new file, or looking up a rename target,
1654 * then it shouldn't be necessary to revalidate a negative dentry.
1655 */
1656static inline
1657int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1658		       unsigned int flags)
1659{
1660	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1661		return 0;
1662	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1663		return 1;
1664	/* Case insensitive server? Revalidate negative dentries */
1665	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1666		return 1;
1667	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1668}
1669
1670static int
1671nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1672			   struct inode *inode, int error)
1673{
1674	switch (error) {
1675	case 1:
1676		break;
1677	case -ETIMEDOUT:
1678		if (inode && (IS_ROOT(dentry) ||
1679			      NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL))
1680			error = 1;
1681		break;
1682	case -ESTALE:
1683	case -ENOENT:
1684		error = 0;
1685		fallthrough;
1686	default:
1687		/*
1688		 * We can't d_drop the root of a disconnected tree:
1689		 * its d_hash is on the s_anon list and d_drop() would hide
1690		 * it from shrink_dcache_for_unmount(), leading to busy
1691		 * inodes on unmount and further oopses.
1692		 */
1693		if (inode && IS_ROOT(dentry))
1694			error = 1;
1695		break;
1696	}
1697	trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
1698	return error;
1699}
1700
1701static int
1702nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1703			       unsigned int flags)
1704{
1705	int ret = 1;
1706	if (nfs_neg_need_reval(dir, dentry, flags)) {
1707		if (flags & LOOKUP_RCU)
1708			return -ECHILD;
1709		ret = 0;
1710	}
1711	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1712}
1713
1714static int
1715nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1716				struct inode *inode)
1717{
1718	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1719	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1720}
1721
1722static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name,
1723					struct dentry *dentry,
1724					struct inode *inode, unsigned int flags)
1725{
1726	struct nfs_fh *fhandle;
1727	struct nfs_fattr *fattr;
1728	unsigned long dir_verifier;
1729	int ret;
1730
1731	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1732
1733	ret = -ENOMEM;
1734	fhandle = nfs_alloc_fhandle();
1735	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
1736	if (fhandle == NULL || fattr == NULL)
1737		goto out;
1738
1739	dir_verifier = nfs_save_change_attribute(dir);
1740	ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr);
1741	if (ret < 0)
1742		goto out;
1743
1744	/* Request help from readdirplus */
1745	nfs_lookup_advise_force_readdirplus(dir, flags);
1746
1747	ret = 0;
1748	if (nfs_compare_fh(NFS_FH(inode), fhandle))
1749		goto out;
1750	if (nfs_refresh_inode(inode, fattr) < 0)
1751		goto out;
1752
1753	nfs_setsecurity(inode, fattr);
1754	nfs_set_verifier(dentry, dir_verifier);
1755
1756	ret = 1;
1757out:
1758	nfs_free_fattr(fattr);
1759	nfs_free_fhandle(fhandle);
1760
1761	/*
1762	 * If the lookup failed despite the dentry change attribute being
1763	 * a match, then we should revalidate the directory cache.
1764	 */
1765	if (!ret && nfs_dentry_verify_change(dir, dentry))
1766		nfs_mark_dir_for_revalidate(dir);
1767	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1768}
1769
1770/*
1771 * This is called every time the dcache has a lookup hit,
1772 * and we should check whether we can really trust that
1773 * lookup.
1774 *
1775 * NOTE! The hit can be a negative hit too, don't assume
1776 * we have an inode!
1777 *
1778 * If the parent directory is seen to have changed, we throw out the
1779 * cached dentry and do a new lookup.
1780 */
1781static int
1782nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name,
1783			 struct dentry *dentry, unsigned int flags)
1784{
1785	struct inode *inode;
1786	int error = 0;
1787
1788	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1789	inode = d_inode(dentry);
1790
1791	if (!inode)
1792		return nfs_lookup_revalidate_negative(dir, dentry, flags);
1793
1794	if (is_bad_inode(inode)) {
1795		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1796				__func__, dentry);
1797		goto out_bad;
1798	}
1799
1800	if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 &&
1801	    nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1802		goto out_bad;
1803
1804	if (nfs_verifier_is_delegated(dentry))
1805		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1806
1807	/* Force a full look up iff the parent directory has changed */
1808	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1809	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1810		error = nfs_lookup_verify_inode(inode, flags);
1811		if (error) {
1812			if (error == -ESTALE)
1813				nfs_mark_dir_for_revalidate(dir);
1814			goto out_bad;
1815		}
1816		goto out_valid;
1817	}
1818
1819	if (flags & LOOKUP_RCU)
1820		return -ECHILD;
1821
1822	if (NFS_STALE(inode))
1823		goto out_bad;
1824
1825	return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
1826out_valid:
1827	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1828out_bad:
1829	if (flags & LOOKUP_RCU)
1830		return -ECHILD;
1831	return nfs_lookup_revalidate_done(dir, dentry, inode, error);
1832}
1833
1834static int
1835__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1836{
1837	if (flags & LOOKUP_RCU) {
1838		if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
1839			return -ECHILD;
1840	} else {
1841		/* Wait for unlink to complete - see unblock_revalidate() */
1842		wait_var_event(&dentry->d_fsdata,
1843			       smp_load_acquire(&dentry->d_fsdata)
1844			       != NFS_FSDATA_BLOCKED);
1845	}
1846	return 0;
1847}
1848
1849static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name,
1850				 struct dentry *dentry, unsigned int flags)
1851{
1852	if (__nfs_lookup_revalidate(dentry, flags))
1853		return -ECHILD;
1854	return nfs_do_lookup_revalidate(dir, name, dentry, flags);
1855}
1856
1857static void block_revalidate(struct dentry *dentry)
1858{
1859	/* old devname - just in case */
1860	kfree(dentry->d_fsdata);
1861
1862	/* Any new reference that could lead to an open
1863	 * will take ->d_lock in lookup_open() -> d_lookup().
1864	 * Holding this lock ensures we cannot race with
1865	 * __nfs_lookup_revalidate() and removes and need
1866	 * for further barriers.
1867	 */
1868	lockdep_assert_held(&dentry->d_lock);
1869
1870	dentry->d_fsdata = NFS_FSDATA_BLOCKED;
1871}
1872
1873static void unblock_revalidate(struct dentry *dentry)
1874{
1875	store_release_wake_up(&dentry->d_fsdata, NULL);
1876}
1877
1878/*
1879 * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
1880 * when we don't really care about the dentry name. This is called when a
1881 * pathwalk ends on a dentry that was not found via a normal lookup in the
1882 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1883 *
1884 * In this situation, we just want to verify that the inode itself is OK
1885 * since the dentry might have changed on the server.
1886 */
1887static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1888{
1889	struct inode *inode = d_inode(dentry);
1890	int error = 0;
1891
1892	/*
1893	 * I believe we can only get a negative dentry here in the case of a
1894	 * procfs-style symlink. Just assume it's correct for now, but we may
1895	 * eventually need to do something more here.
1896	 */
1897	if (!inode) {
1898		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1899				__func__, dentry);
1900		return 1;
1901	}
1902
1903	if (is_bad_inode(inode)) {
1904		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1905				__func__, dentry);
1906		return 0;
1907	}
1908
1909	error = nfs_lookup_verify_inode(inode, flags);
1910	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1911			__func__, inode->i_ino, error ? "invalid" : "valid");
1912	return !error;
1913}
1914
1915/*
1916 * This is called from dput() when d_count is going to 0.
1917 */
1918static int nfs_dentry_delete(const struct dentry *dentry)
1919{
1920	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1921		dentry, dentry->d_flags);
1922
1923	/* Unhash any dentry with a stale inode */
1924	if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
1925		return 1;
1926
1927	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1928		/* Unhash it, so that ->d_iput() would be called */
1929		return 1;
1930	}
1931	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
1932		/* Unhash it, so that ancestors of killed async unlink
1933		 * files will be cleaned up during umount */
1934		return 1;
1935	}
1936	return 0;
1937
1938}
1939
1940/* Ensure that we revalidate inode->i_nlink */
1941static void nfs_drop_nlink(struct inode *inode, unsigned long gencount)
1942{
1943	struct nfs_inode *nfsi = NFS_I(inode);
1944
1945	spin_lock(&inode->i_lock);
1946	/* drop the inode if we're reasonably sure this is the last link */
1947	if (inode->i_nlink > 0 && gencount == nfsi->attr_gencount)
1948		drop_nlink(inode);
1949	nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1950	nfs_set_cache_invalid(
1951		inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
1952			       NFS_INO_INVALID_NLINK);
1953	spin_unlock(&inode->i_lock);
1954}
1955
1956/*
1957 * Called when the dentry loses inode.
1958 * We use it to clean up silly-renamed files.
1959 */
1960static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1961{
1962	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1963		unsigned long gencount = READ_ONCE(NFS_I(inode)->attr_gencount);
1964		nfs_complete_unlink(dentry, inode);
1965		nfs_drop_nlink(inode, gencount);
1966	}
1967	iput(inode);
1968}
1969
1970static void nfs_d_release(struct dentry *dentry)
1971{
1972	/* free cached devname value, if it survived that far */
1973	if (unlikely(dentry->d_fsdata)) {
1974		if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1975			WARN_ON(1);
1976		else
1977			kfree(dentry->d_fsdata);
1978	}
1979}
1980
1981const struct dentry_operations nfs_dentry_operations = {
1982	.d_revalidate	= nfs_lookup_revalidate,
1983	.d_weak_revalidate	= nfs_weak_revalidate,
1984	.d_delete	= nfs_dentry_delete,
1985	.d_iput		= nfs_dentry_iput,
1986	.d_automount	= nfs_d_automount,
1987	.d_release	= nfs_d_release,
1988};
1989EXPORT_SYMBOL_GPL(nfs_dentry_operations);
1990
1991struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1992{
1993	struct dentry *res;
1994	struct inode *inode = NULL;
1995	struct nfs_fh *fhandle = NULL;
1996	struct nfs_fattr *fattr = NULL;
1997	unsigned long dir_verifier;
1998	int error;
1999
2000	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
2001	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
2002
2003	if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
2004		return ERR_PTR(-ENAMETOOLONG);
2005
2006	/*
2007	 * If we're doing an exclusive create, optimize away the lookup
2008	 * but don't hash the dentry.
2009	 */
2010	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
2011		return NULL;
2012
2013	res = ERR_PTR(-ENOMEM);
2014	fhandle = nfs_alloc_fhandle();
2015	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir));
2016	if (fhandle == NULL || fattr == NULL)
2017		goto out;
2018
2019	dir_verifier = nfs_save_change_attribute(dir);
2020	trace_nfs_lookup_enter(dir, dentry, flags);
2021	error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
2022				       fhandle, fattr);
2023	if (error == -ENOENT) {
2024		if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
2025			dir_verifier = inode_peek_iversion_raw(dir);
2026		goto no_entry;
2027	}
2028	if (error < 0) {
2029		res = ERR_PTR(error);
2030		goto out;
2031	}
2032	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
2033	res = ERR_CAST(inode);
2034	if (IS_ERR(res))
2035		goto out;
2036
2037	/* Notify readdir to use READDIRPLUS */
2038	nfs_lookup_advise_force_readdirplus(dir, flags);
2039
2040no_entry:
2041	nfs_set_verifier(dentry, dir_verifier);
2042	res = d_splice_alias(inode, dentry);
2043	if (res != NULL) {
2044		if (IS_ERR(res))
2045			goto out;
2046		nfs_set_verifier(res, dir_verifier);
2047		dentry = res;
2048	}
2049out:
2050	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
2051	nfs_free_fattr(fattr);
2052	nfs_free_fhandle(fhandle);
2053	return res;
2054}
2055EXPORT_SYMBOL_GPL(nfs_lookup);
2056
2057void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
2058{
2059	/* Case insensitive server? Revalidate dentries */
2060	if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
2061		d_prune_aliases(inode);
2062}
2063EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
2064
2065#if IS_ENABLED(CONFIG_NFS_V4)
2066static int nfs4_lookup_revalidate(struct inode *, const struct qstr *,
2067				  struct dentry *, unsigned int);
2068
2069const struct dentry_operations nfs4_dentry_operations = {
2070	.d_revalidate	= nfs4_lookup_revalidate,
2071	.d_weak_revalidate	= nfs_weak_revalidate,
2072	.d_delete	= nfs_dentry_delete,
2073	.d_iput		= nfs_dentry_iput,
2074	.d_automount	= nfs_d_automount,
2075	.d_release	= nfs_d_release,
2076};
2077EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
2078
2079static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
2080{
2081	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
2082}
2083
2084static int do_open(struct inode *inode, struct file *filp)
2085{
2086	nfs_fscache_open_file(inode, filp);
2087	return 0;
2088}
2089
2090static int nfs_finish_open(struct nfs_open_context *ctx,
2091			   struct dentry *dentry,
2092			   struct file *file, unsigned open_flags)
2093{
2094	int err;
2095
2096	err = finish_open(file, dentry, do_open);
2097	if (err)
2098		goto out;
2099	if (S_ISREG(file_inode(file)->i_mode))
2100		nfs_file_set_open_context(file, ctx);
2101	else
2102		err = -EOPENSTALE;
2103out:
2104	return err;
2105}
2106
2107int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
2108		    struct file *file, unsigned open_flags,
2109		    umode_t mode)
2110{
2111	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
2112	struct nfs_open_context *ctx;
2113	struct dentry *res;
2114	struct iattr attr = { .ia_valid = ATTR_OPEN };
2115	struct inode *inode;
2116	unsigned int lookup_flags = 0;
2117	unsigned long dir_verifier;
2118	bool switched = false;
2119	int created = 0;
2120	int err;
2121
2122	/* Expect a negative dentry */
2123	BUG_ON(d_inode(dentry));
2124
2125	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
2126			dir->i_sb->s_id, dir->i_ino, dentry);
2127
2128	err = nfs_check_flags(open_flags);
2129	if (err)
2130		return err;
2131
2132	/* NFS only supports OPEN on regular files */
2133	if ((open_flags & O_DIRECTORY)) {
2134		if (!d_in_lookup(dentry)) {
2135			/*
2136			 * Hashed negative dentry with O_DIRECTORY: dentry was
2137			 * revalidated and is fine, no need to perform lookup
2138			 * again
2139			 */
2140			return -ENOENT;
2141		}
2142		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
2143		goto no_open;
2144	}
2145
2146	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
2147		return -ENAMETOOLONG;
2148
2149	if (open_flags & O_CREAT) {
2150		struct nfs_server *server = NFS_SERVER(dir);
2151
2152		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
2153			mode &= ~current_umask();
2154
2155		attr.ia_valid |= ATTR_MODE;
2156		attr.ia_mode = mode;
2157	}
2158	if (open_flags & O_TRUNC) {
2159		attr.ia_valid |= ATTR_SIZE;
2160		attr.ia_size = 0;
2161	}
2162
2163	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
2164		d_drop(dentry);
2165		switched = true;
2166		dentry = d_alloc_parallel(dentry->d_parent,
2167					  &dentry->d_name, &wq);
2168		if (IS_ERR(dentry))
2169			return PTR_ERR(dentry);
2170		if (unlikely(!d_in_lookup(dentry)))
2171			return finish_no_open(file, dentry);
2172	}
2173
2174	ctx = create_nfs_open_context(dentry, open_flags, file);
2175	err = PTR_ERR(ctx);
2176	if (IS_ERR(ctx))
2177		goto out;
2178
2179	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
2180	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
2181	if (created)
2182		file->f_mode |= FMODE_CREATED;
2183	if (IS_ERR(inode)) {
2184		err = PTR_ERR(inode);
2185		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2186		put_nfs_open_context(ctx);
2187		d_drop(dentry);
2188		switch (err) {
2189		case -ENOENT:
2190			if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
2191				dir_verifier = inode_peek_iversion_raw(dir);
2192			else
2193				dir_verifier = nfs_save_change_attribute(dir);
2194			nfs_set_verifier(dentry, dir_verifier);
2195			d_splice_alias(NULL, dentry);
2196			break;
2197		case -EISDIR:
2198		case -ENOTDIR:
2199			goto no_open;
2200		case -ELOOP:
2201			if (!(open_flags & O_NOFOLLOW))
2202				goto no_open;
2203			break;
2204			/* case -EINVAL: */
2205		default:
2206			break;
2207		}
2208		goto out;
2209	}
2210	file->f_mode |= FMODE_CAN_ODIRECT;
2211
2212	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
2213	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2214	put_nfs_open_context(ctx);
2215out:
2216	if (unlikely(switched)) {
2217		d_lookup_done(dentry);
2218		dput(dentry);
2219	}
2220	return err;
2221
2222no_open:
2223	res = nfs_lookup(dir, dentry, lookup_flags);
2224	if (!res) {
2225		inode = d_inode(dentry);
2226		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2227		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
2228			res = ERR_PTR(-ENOTDIR);
2229		else if (inode && S_ISREG(inode->i_mode))
2230			res = ERR_PTR(-EOPENSTALE);
2231	} else if (!IS_ERR(res)) {
2232		inode = d_inode(res);
2233		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2234		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
2235			dput(res);
2236			res = ERR_PTR(-ENOTDIR);
2237		} else if (inode && S_ISREG(inode->i_mode)) {
2238			dput(res);
2239			res = ERR_PTR(-EOPENSTALE);
2240		}
2241	}
2242	if (switched) {
2243		d_lookup_done(dentry);
2244		if (!res)
2245			res = dentry;
2246		else
2247			dput(dentry);
2248	}
2249	return finish_no_open(file, res);
2250}
2251EXPORT_SYMBOL_GPL(nfs_atomic_open);
2252
2253static int
2254nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name,
2255		       struct dentry *dentry, unsigned int flags)
2256{
2257	struct inode *inode;
2258
2259	if (__nfs_lookup_revalidate(dentry, flags))
2260		return -ECHILD;
2261
2262	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
2263
2264	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
2265		goto full_reval;
2266	if (d_mountpoint(dentry))
2267		goto full_reval;
2268
2269	inode = d_inode(dentry);
2270
2271	/* We can't create new files in nfs_open_revalidate(), so we
2272	 * optimize away revalidation of negative dentries.
2273	 */
2274	if (inode == NULL)
2275		goto full_reval;
2276
2277	if (nfs_verifier_is_delegated(dentry) ||
2278	    nfs_have_directory_delegation(inode))
2279		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
2280
2281	/* NFS only supports OPEN on regular files */
2282	if (!S_ISREG(inode->i_mode))
2283		goto full_reval;
2284
2285	/* We cannot do exclusive creation on a positive dentry */
2286	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
2287		goto reval_dentry;
2288
2289	/* Check if the directory changed */
2290	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
2291		goto reval_dentry;
2292
2293	/* Let f_op->open() actually open (and revalidate) the file */
2294	return 1;
2295reval_dentry:
2296	if (flags & LOOKUP_RCU)
2297		return -ECHILD;
2298	return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
2299
2300full_reval:
2301	return nfs_do_lookup_revalidate(dir, name, dentry, flags);
2302}
2303
2304#endif /* CONFIG_NFSV4 */
2305
2306int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
2307			struct file *file, unsigned int open_flags,
2308			umode_t mode)
2309{
2310	struct dentry *res = NULL;
2311	/* Same as look+open from lookup_open(), but with different O_TRUNC
2312	 * handling.
2313	 */
2314	int error = 0;
2315
2316	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
2317		return -ENAMETOOLONG;
2318
2319	if (open_flags & O_CREAT) {
2320		error = nfs_do_create(dir, dentry, mode, open_flags);
2321		if (!error) {
2322			file->f_mode |= FMODE_CREATED;
2323			return finish_open(file, dentry, NULL);
2324		} else if (error != -EEXIST || open_flags & O_EXCL)
2325			return error;
2326	}
2327	if (d_in_lookup(dentry)) {
2328		/* The only flags nfs_lookup considers are
2329		 * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and
2330		 * we want those to be zero so the lookup isn't skipped.
2331		 */
2332		res = nfs_lookup(dir, dentry, 0);
2333	}
2334	return finish_no_open(file, res);
2335
2336}
2337EXPORT_SYMBOL_GPL(nfs_atomic_open_v23);
2338
2339struct dentry *
2340nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
2341				struct nfs_fattr *fattr)
2342{
2343	struct dentry *parent = dget_parent(dentry);
2344	struct inode *dir = d_inode(parent);
2345	struct inode *inode;
2346	struct dentry *d;
2347	int error;
2348
2349	d_drop(dentry);
2350
2351	if (fhandle->size == 0) {
2352		error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
2353					       fhandle, fattr);
2354		if (error)
2355			goto out_error;
2356	}
2357	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2358	if (!(fattr->valid & NFS_ATTR_FATTR)) {
2359		struct nfs_server *server = NFS_SB(dentry->d_sb);
2360		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
2361				fattr, NULL);
2362		if (error < 0)
2363			goto out_error;
2364	}
2365	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
2366	d = d_splice_alias(inode, dentry);
2367out:
2368	dput(parent);
2369	return d;
2370out_error:
2371	d = ERR_PTR(error);
2372	goto out;
2373}
2374EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
2375
2376/*
2377 * Code common to create, mkdir, and mknod.
2378 */
2379int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
2380				struct nfs_fattr *fattr)
2381{
2382	struct dentry *d;
2383
2384	d = nfs_add_or_obtain(dentry, fhandle, fattr);
2385	if (IS_ERR(d))
2386		return PTR_ERR(d);
2387
2388	/* Callers don't care */
2389	dput(d);
2390	return 0;
2391}
2392EXPORT_SYMBOL_GPL(nfs_instantiate);
2393
2394/*
2395 * Following a failed create operation, we drop the dentry rather
2396 * than retain a negative dentry. This avoids a problem in the event
2397 * that the operation succeeded on the server, but an error in the
2398 * reply path made it appear to have failed.
2399 */
2400static int nfs_do_create(struct inode *dir, struct dentry *dentry,
2401			 umode_t mode, int open_flags)
2402{
2403	struct iattr attr;
2404	int error;
2405
2406	open_flags |= O_CREAT;
2407
2408	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
2409			dir->i_sb->s_id, dir->i_ino, dentry);
2410
2411	attr.ia_mode = mode;
2412	attr.ia_valid = ATTR_MODE;
2413	if (open_flags & O_TRUNC) {
2414		attr.ia_size = 0;
2415		attr.ia_valid |= ATTR_SIZE;
2416	}
2417
2418	trace_nfs_create_enter(dir, dentry, open_flags);
2419	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
2420	trace_nfs_create_exit(dir, dentry, open_flags, error);
2421	if (error != 0)
2422		goto out_err;
2423	return 0;
2424out_err:
2425	d_drop(dentry);
2426	return error;
2427}
2428
2429int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
2430	       struct dentry *dentry, umode_t mode, bool excl)
2431{
2432	return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0);
2433}
2434EXPORT_SYMBOL_GPL(nfs_create);
2435
2436/*
2437 * See comments for nfs_proc_create regarding failed operations.
2438 */
2439int
2440nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
2441	  struct dentry *dentry, umode_t mode, dev_t rdev)
2442{
2443	struct iattr attr;
2444	int status;
2445
2446	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
2447			dir->i_sb->s_id, dir->i_ino, dentry);
2448
2449	attr.ia_mode = mode;
2450	attr.ia_valid = ATTR_MODE;
2451
2452	trace_nfs_mknod_enter(dir, dentry);
2453	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
2454	trace_nfs_mknod_exit(dir, dentry, status);
2455	if (status != 0)
2456		goto out_err;
2457	return 0;
2458out_err:
2459	d_drop(dentry);
2460	return status;
2461}
2462EXPORT_SYMBOL_GPL(nfs_mknod);
2463
2464/*
2465 * See comments for nfs_proc_create regarding failed operations.
2466 */
2467struct dentry *nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
2468			 struct dentry *dentry, umode_t mode)
2469{
2470	struct iattr attr;
2471	struct dentry *ret;
2472
2473	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
2474			dir->i_sb->s_id, dir->i_ino, dentry);
2475
2476	attr.ia_valid = ATTR_MODE;
2477	attr.ia_mode = mode | S_IFDIR;
2478
2479	trace_nfs_mkdir_enter(dir, dentry);
2480	ret = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
2481	trace_nfs_mkdir_exit(dir, dentry, PTR_ERR_OR_ZERO(ret));
2482	return ret;
2483}
2484EXPORT_SYMBOL_GPL(nfs_mkdir);
2485
2486static void nfs_dentry_handle_enoent(struct dentry *dentry)
2487{
2488	if (simple_positive(dentry))
2489		d_delete(dentry);
2490}
2491
2492static void nfs_dentry_remove_handle_error(struct inode *dir,
2493					   struct dentry *dentry, int error)
2494{
2495	switch (error) {
2496	case -ENOENT:
2497		if (d_really_is_positive(dentry))
2498			d_delete(dentry);
2499		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2500		break;
2501	case 0:
2502		nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
2503		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2504	}
2505}
2506
2507int nfs_rmdir(struct inode *dir, struct dentry *dentry)
2508{
2509	int error;
2510
2511	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
2512			dir->i_sb->s_id, dir->i_ino, dentry);
2513
2514	trace_nfs_rmdir_enter(dir, dentry);
2515	if (d_really_is_positive(dentry)) {
2516		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2517		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2518		/* Ensure the VFS deletes this inode */
2519		switch (error) {
2520		case 0:
2521			clear_nlink(d_inode(dentry));
2522			break;
2523		case -ENOENT:
2524			nfs_dentry_handle_enoent(dentry);
2525		}
2526		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2527	} else
2528		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2529	nfs_dentry_remove_handle_error(dir, dentry, error);
2530	trace_nfs_rmdir_exit(dir, dentry, error);
2531
2532	return error;
2533}
2534EXPORT_SYMBOL_GPL(nfs_rmdir);
2535
2536/*
2537 * Remove a file after making sure there are no pending writes,
2538 * and after checking that the file has only one user. 
2539 *
2540 * We invalidate the attribute cache and free the inode prior to the operation
2541 * to avoid possible races if the server reuses the inode.
2542 */
2543static int nfs_safe_remove(struct dentry *dentry)
2544{
2545	struct inode *dir = d_inode(dentry->d_parent);
2546	struct inode *inode = d_inode(dentry);
2547	int error = -EBUSY;
2548		
2549	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
2550
2551	/* If the dentry was sillyrenamed, we simply call d_delete() */
2552	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
2553		error = 0;
2554		goto out;
2555	}
2556
2557	trace_nfs_remove_enter(dir, dentry);
2558	if (inode != NULL) {
2559		unsigned long gencount = READ_ONCE(NFS_I(inode)->attr_gencount);
2560
2561		error = NFS_PROTO(dir)->remove(dir, dentry);
2562		if (error == 0)
2563			nfs_drop_nlink(inode, gencount);
2564	} else
2565		error = NFS_PROTO(dir)->remove(dir, dentry);
2566	if (error == -ENOENT)
2567		nfs_dentry_handle_enoent(dentry);
2568	trace_nfs_remove_exit(dir, dentry, error);
2569out:
2570	return error;
2571}
2572
2573/*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
2574 *  belongs to an active ".nfs..." file and we return -EBUSY.
2575 *
2576 *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
2577 */
2578int nfs_unlink(struct inode *dir, struct dentry *dentry)
2579{
2580	int error;
2581
2582	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
2583		dir->i_ino, dentry);
2584
2585	trace_nfs_unlink_enter(dir, dentry);
2586	spin_lock(&dentry->d_lock);
2587	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
2588					     &NFS_I(d_inode(dentry))->flags)) {
2589		spin_unlock(&dentry->d_lock);
2590		/* Start asynchronous writeout of the inode */
2591		write_inode_now(d_inode(dentry), 0);
2592		error = nfs_sillyrename(dir, dentry);
2593		goto out;
2594	}
2595	/* We must prevent any concurrent open until the unlink
2596	 * completes.  ->d_revalidate will wait for ->d_fsdata
2597	 * to clear.  We set it here to ensure no lookup succeeds until
2598	 * the unlink is complete on the server.
2599	 */
2600	error = -ETXTBSY;
2601	if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2602	    WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
2603		spin_unlock(&dentry->d_lock);
2604		goto out;
2605	}
2606	block_revalidate(dentry);
2607
2608	spin_unlock(&dentry->d_lock);
2609	error = nfs_safe_remove(dentry);
2610	nfs_dentry_remove_handle_error(dir, dentry, error);
2611	unblock_revalidate(dentry);
2612out:
2613	trace_nfs_unlink_exit(dir, dentry, error);
2614	return error;
2615}
2616EXPORT_SYMBOL_GPL(nfs_unlink);
2617
2618/*
2619 * To create a symbolic link, most file systems instantiate a new inode,
2620 * add a page to it containing the path, then write it out to the disk
2621 * using prepare_write/commit_write.
2622 *
2623 * Unfortunately the NFS client can't create the in-core inode first
2624 * because it needs a file handle to create an in-core inode (see
2625 * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
2626 * symlink request has completed on the server.
2627 *
2628 * So instead we allocate a raw page, copy the symname into it, then do
2629 * the SYMLINK request with the page as the buffer.  If it succeeds, we
2630 * now have a new file handle and can instantiate an in-core NFS inode
2631 * and move the raw page into its mapping.
2632 */
2633int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
2634		struct dentry *dentry, const char *symname)
2635{
2636	struct folio *folio;
2637	char *kaddr;
2638	struct iattr attr;
2639	unsigned int pathlen = strlen(symname);
2640	int error;
2641
2642	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
2643		dir->i_ino, dentry, symname);
2644
2645	if (pathlen > PAGE_SIZE)
2646		return -ENAMETOOLONG;
2647
2648	attr.ia_mode = S_IFLNK | S_IRWXUGO;
2649	attr.ia_valid = ATTR_MODE;
2650
2651	folio = folio_alloc(GFP_USER, 0);
2652	if (!folio)
2653		return -ENOMEM;
2654
2655	kaddr = folio_address(folio);
2656	memcpy(kaddr, symname, pathlen);
2657	if (pathlen < PAGE_SIZE)
2658		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
2659
2660	trace_nfs_symlink_enter(dir, dentry);
2661	error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr);
2662	trace_nfs_symlink_exit(dir, dentry, error);
2663	if (error != 0) {
2664		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
2665			dir->i_sb->s_id, dir->i_ino,
2666			dentry, symname, error);
2667		d_drop(dentry);
2668		folio_put(folio);
2669		return error;
2670	}
2671
2672	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2673
2674	/*
2675	 * No big deal if we can't add this page to the page cache here.
2676	 * READLINK will get the missing page from the server if needed.
2677	 */
2678	if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0,
2679							GFP_KERNEL) == 0) {
2680		folio_mark_uptodate(folio);
2681		folio_unlock(folio);
2682	}
2683
2684	folio_put(folio);
2685	return 0;
2686}
2687EXPORT_SYMBOL_GPL(nfs_symlink);
2688
2689int
2690nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2691{
2692	struct inode *inode = d_inode(old_dentry);
2693	int error;
2694
2695	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
2696		old_dentry, dentry);
2697
2698	trace_nfs_link_enter(inode, dir, dentry);
2699	d_drop(dentry);
2700	if (S_ISREG(inode->i_mode))
2701		nfs_sync_inode(inode);
2702	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
2703	if (error == 0) {
2704		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2705		ihold(inode);
2706		d_add(dentry, inode);
2707	}
2708	trace_nfs_link_exit(inode, dir, dentry, error);
2709	return error;
2710}
2711EXPORT_SYMBOL_GPL(nfs_link);
2712
2713static void
2714nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
2715{
2716	struct dentry *new_dentry = data->new_dentry;
2717
2718	unblock_revalidate(new_dentry);
2719}
2720
2721static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
2722					   struct dentry *new_dentry)
2723{
2724	struct nfs_server *server = NFS_SB(old_dentry->d_sb);
2725
2726	if (old_dentry->d_parent != new_dentry->d_parent)
2727		return false;
2728	if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
2729		return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
2730	return true;
2731}
2732
2733/*
2734 * RENAME
2735 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
2736 * different file handle for the same inode after a rename (e.g. when
2737 * moving to a different directory). A fail-safe method to do so would
2738 * be to look up old_dir/old_name, create a link to new_dir/new_name and
2739 * rename the old file using the sillyrename stuff. This way, the original
2740 * file in old_dir will go away when the last process iput()s the inode.
2741 *
2742 * FIXED.
2743 * 
2744 * It actually works quite well. One needs to have the possibility for
2745 * at least one ".nfs..." file in each directory the file ever gets
2746 * moved or linked to which happens automagically with the new
2747 * implementation that only depends on the dcache stuff instead of
2748 * using the inode layer
2749 *
2750 * Unfortunately, things are a little more complicated than indicated
2751 * above. For a cross-directory move, we want to make sure we can get
2752 * rid of the old inode after the operation.  This means there must be
2753 * no pending writes (if it's a file), and the use count must be 1.
2754 * If these conditions are met, we can drop the dentries before doing
2755 * the rename.
2756 */
2757int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
2758	       struct dentry *old_dentry, struct inode *new_dir,
2759	       struct dentry *new_dentry, unsigned int flags)
2760{
2761	struct inode *old_inode = d_inode(old_dentry);
2762	struct inode *new_inode = d_inode(new_dentry);
2763	unsigned long new_gencount = 0;
2764	struct dentry *dentry = NULL;
2765	struct rpc_task *task;
2766	bool must_unblock = false;
2767	int error = -EBUSY;
2768
2769	if (flags)
2770		return -EINVAL;
2771
2772	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
2773		 old_dentry, new_dentry,
2774		 d_count(new_dentry));
2775
2776	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
2777	/*
2778	 * For non-directories, check whether the target is busy and if so,
2779	 * make a copy of the dentry and then do a silly-rename. If the
2780	 * silly-rename succeeds, the copied dentry is hashed and becomes
2781	 * the new target.
2782	 */
2783	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2784		/* We must prevent any concurrent open until the unlink
2785		 * completes.  ->d_revalidate will wait for ->d_fsdata
2786		 * to clear.  We set it here to ensure no lookup succeeds until
2787		 * the unlink is complete on the server.
2788		 */
2789		error = -ETXTBSY;
2790		if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2791		    WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
2792			goto out;
2793
2794		spin_lock(&new_dentry->d_lock);
2795		if (d_count(new_dentry) > 2) {
2796			int err;
2797
2798			spin_unlock(&new_dentry->d_lock);
2799
2800			/* copy the target dentry's name */
2801			dentry = d_alloc(new_dentry->d_parent,
2802					 &new_dentry->d_name);
2803			if (!dentry)
2804				goto out;
2805
2806			/* silly-rename the existing target ... */
2807			err = nfs_sillyrename(new_dir, new_dentry);
2808			if (err)
2809				goto out;
2810
2811			new_dentry = dentry;
2812			new_inode = NULL;
2813		} else {
2814			block_revalidate(new_dentry);
2815			must_unblock = true;
2816			new_gencount = NFS_I(new_inode)->attr_gencount;
2817			spin_unlock(&new_dentry->d_lock);
2818		}
2819
2820	}
2821
2822	if (S_ISREG(old_inode->i_mode) &&
2823	    nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
2824		nfs_sync_inode(old_inode);
2825	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
2826				must_unblock ? nfs_unblock_rename : NULL);
2827	if (IS_ERR(task)) {
2828		if (must_unblock)
2829			unblock_revalidate(new_dentry);
2830		error = PTR_ERR(task);
2831		goto out;
2832	}
2833
2834	error = rpc_wait_for_completion_task(task);
2835	if (error != 0) {
2836		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
2837		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
2838		smp_wmb();
2839	} else
2840		error = task->tk_status;
2841	rpc_put_task(task);
2842	/* Ensure the inode attributes are revalidated */
2843	if (error == 0) {
2844		spin_lock(&old_inode->i_lock);
2845		NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2846		nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE |
2847							 NFS_INO_INVALID_CTIME |
2848							 NFS_INO_REVAL_FORCED);
2849		spin_unlock(&old_inode->i_lock);
2850	}
2851out:
2852	trace_nfs_rename_exit(old_dir, old_dentry,
2853			new_dir, new_dentry, error);
2854	if (!error) {
2855		if (new_inode != NULL)
2856			nfs_drop_nlink(new_inode, new_gencount);
2857		/*
2858		 * The d_move() should be here instead of in an async RPC completion
2859		 * handler because we need the proper locks to move the dentry.  If
2860		 * we're interrupted by a signal, the async RPC completion handler
2861		 * should mark the directories for revalidation.
2862		 */
2863		d_move(old_dentry, new_dentry);
2864		nfs_set_verifier(old_dentry,
2865					nfs_save_change_attribute(new_dir));
2866	} else if (error == -ENOENT)
2867		nfs_dentry_handle_enoent(old_dentry);
2868
2869	/* new dentry created? */
2870	if (dentry)
2871		dput(dentry);
2872	return error;
2873}
2874EXPORT_SYMBOL_GPL(nfs_rename);
2875
2876static DEFINE_SPINLOCK(nfs_access_lru_lock);
2877static LIST_HEAD(nfs_access_lru_list);
2878static atomic_long_t nfs_access_nr_entries;
2879
2880static unsigned long nfs_access_max_cachesize = 4*1024*1024;
2881module_param(nfs_access_max_cachesize, ulong, 0644);
2882MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2883
2884static void nfs_access_free_entry(struct nfs_access_entry *entry)
2885{
2886	put_group_info(entry->group_info);
2887	kfree_rcu(entry, rcu_head);
2888	smp_mb__before_atomic();
2889	atomic_long_dec(&nfs_access_nr_entries);
2890	smp_mb__after_atomic();
2891}
2892
2893static void nfs_access_free_list(struct list_head *head)
2894{
2895	struct nfs_access_entry *cache;
2896
2897	while (!list_empty(head)) {
2898		cache = list_entry(head->next, struct nfs_access_entry, lru);
2899		list_del(&cache->lru);
2900		nfs_access_free_entry(cache);
2901	}
2902}
2903
2904static unsigned long
2905nfs_do_access_cache_scan(unsigned int nr_to_scan)
2906{
2907	LIST_HEAD(head);
2908	struct nfs_inode *nfsi, *next;
2909	struct nfs_access_entry *cache;
2910	long freed = 0;
2911
2912	spin_lock(&nfs_access_lru_lock);
2913	list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2914		struct inode *inode;
2915
2916		if (nr_to_scan-- == 0)
2917			break;
2918		inode = &nfsi->vfs_inode;
2919		spin_lock(&inode->i_lock);
2920		if (list_empty(&nfsi->access_cache_entry_lru))
2921			goto remove_lru_entry;
2922		cache = list_entry(nfsi->access_cache_entry_lru.next,
2923				struct nfs_access_entry, lru);
2924		list_move(&cache->lru, &head);
2925		rb_erase(&cache->rb_node, &nfsi->access_cache);
2926		freed++;
2927		if (!list_empty(&nfsi->access_cache_entry_lru))
2928			list_move_tail(&nfsi->access_cache_inode_lru,
2929					&nfs_access_lru_list);
2930		else {
2931remove_lru_entry:
2932			list_del_init(&nfsi->access_cache_inode_lru);
2933			smp_mb__before_atomic();
2934			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2935			smp_mb__after_atomic();
2936		}
2937		spin_unlock(&inode->i_lock);
2938	}
2939	spin_unlock(&nfs_access_lru_lock);
2940	nfs_access_free_list(&head);
2941	return freed;
2942}
2943
2944unsigned long
2945nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2946{
2947	int nr_to_scan = sc->nr_to_scan;
2948	gfp_t gfp_mask = sc->gfp_mask;
2949
2950	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2951		return SHRINK_STOP;
2952	return nfs_do_access_cache_scan(nr_to_scan);
2953}
2954
2955
2956unsigned long
2957nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2958{
2959	return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2960}
2961
2962static void
2963nfs_access_cache_enforce_limit(void)
2964{
2965	long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2966	unsigned long diff;
2967	unsigned int nr_to_scan;
2968
2969	if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2970		return;
2971	nr_to_scan = 100;
2972	diff = nr_entries - nfs_access_max_cachesize;
2973	if (diff < nr_to_scan)
2974		nr_to_scan = diff;
2975	nfs_do_access_cache_scan(nr_to_scan);
2976}
2977
2978static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2979{
2980	struct rb_root *root_node = &nfsi->access_cache;
2981	struct rb_node *n;
2982	struct nfs_access_entry *entry;
2983
2984	/* Unhook entries from the cache */
2985	while ((n = rb_first(root_node)) != NULL) {
2986		entry = rb_entry(n, struct nfs_access_entry, rb_node);
2987		rb_erase(n, root_node);
2988		list_move(&entry->lru, head);
2989	}
2990	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
2991}
2992
2993void nfs_access_zap_cache(struct inode *inode)
2994{
2995	LIST_HEAD(head);
2996
2997	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
2998		return;
2999	/* Remove from global LRU init */
3000	spin_lock(&nfs_access_lru_lock);
3001	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
3002		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
3003
3004	spin_lock(&inode->i_lock);
3005	__nfs_access_zap_cache(NFS_I(inode), &head);
3006	spin_unlock(&inode->i_lock);
3007	spin_unlock(&nfs_access_lru_lock);
3008	nfs_access_free_list(&head);
3009}
3010EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
3011
3012static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
3013{
3014	struct group_info *ga, *gb;
3015	int g;
3016
3017	if (uid_lt(a->fsuid, b->fsuid))
3018		return -1;
3019	if (uid_gt(a->fsuid, b->fsuid))
3020		return 1;
3021
3022	if (gid_lt(a->fsgid, b->fsgid))
3023		return -1;
3024	if (gid_gt(a->fsgid, b->fsgid))
3025		return 1;
3026
3027	ga = a->group_info;
3028	gb = b->group_info;
3029	if (ga == gb)
3030		return 0;
3031	if (ga == NULL)
3032		return -1;
3033	if (gb == NULL)
3034		return 1;
3035	if (ga->ngroups < gb->ngroups)
3036		return -1;
3037	if (ga->ngroups > gb->ngroups)
3038		return 1;
3039
3040	for (g = 0; g < ga->ngroups; g++) {
3041		if (gid_lt(ga->gid[g], gb->gid[g]))
3042			return -1;
3043		if (gid_gt(ga->gid[g], gb->gid[g]))
3044			return 1;
3045	}
3046	return 0;
3047}
3048
3049static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
3050{
3051	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
3052
3053	while (n != NULL) {
3054		struct nfs_access_entry *entry =
3055			rb_entry(n, struct nfs_access_entry, rb_node);
3056		int cmp = access_cmp(cred, entry);
3057
3058		if (cmp < 0)
3059			n = n->rb_left;
3060		else if (cmp > 0)
3061			n = n->rb_right;
3062		else
3063			return entry;
3064	}
3065	return NULL;
3066}
3067
3068static u64 nfs_access_login_time(const struct task_struct *task,
3069				 const struct cred *cred)
3070{
3071	const struct task_struct *parent;
3072	const struct cred *pcred;
3073	u64 ret;
3074
3075	rcu_read_lock();
3076	for (;;) {
3077		parent = rcu_dereference(task->real_parent);
3078		pcred = __task_cred(parent);
3079		if (parent == task || cred_fscmp(pcred, cred) != 0)
3080			break;
3081		task = parent;
3082	}
3083	ret = task->start_time;
3084	rcu_read_unlock();
3085	return ret;
3086}
3087
3088static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
3089{
3090	struct nfs_inode *nfsi = NFS_I(inode);
3091	u64 login_time = nfs_access_login_time(current, cred);
3092	struct nfs_access_entry *cache;
3093	bool retry = true;
3094	int err;
3095
3096	spin_lock(&inode->i_lock);
3097	for(;;) {
3098		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
3099			goto out_zap;
3100		cache = nfs_access_search_rbtree(inode, cred);
3101		err = -ENOENT;
3102		if (cache == NULL)
3103			goto out;
3104		/* Found an entry, is our attribute cache valid? */
3105		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
3106			break;
3107		if (!retry)
3108			break;
3109		err = -ECHILD;
3110		if (!may_block)
3111			goto out;
3112		spin_unlock(&inode->i_lock);
3113		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
3114		if (err)
3115			return err;
3116		spin_lock(&inode->i_lock);
3117		retry = false;
3118	}
3119	err = -ENOENT;
3120	if ((s64)(login_time - cache->timestamp) > 0)
3121		goto out;
3122	*mask = cache->mask;
3123	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
3124	err = 0;
3125out:
3126	spin_unlock(&inode->i_lock);
3127	return err;
3128out_zap:
3129	spin_unlock(&inode->i_lock);
3130	nfs_access_zap_cache(inode);
3131	return -ENOENT;
3132}
3133
3134static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
3135{
3136	/* Only check the most recently returned cache entry,
3137	 * but do it without locking.
3138	 */
3139	struct nfs_inode *nfsi = NFS_I(inode);
3140	u64 login_time = nfs_access_login_time(current, cred);
3141	struct nfs_access_entry *cache;
3142	int err = -ECHILD;
3143	struct list_head *lh;
3144
3145	rcu_read_lock();
3146	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
3147		goto out;
3148	lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
3149	cache = list_entry(lh, struct nfs_access_entry, lru);
3150	if (lh == &nfsi->access_cache_entry_lru ||
3151	    access_cmp(cred, cache) != 0)
3152		cache = NULL;
3153	if (cache == NULL)
3154		goto out;
3155	if ((s64)(login_time - cache->timestamp) > 0)
3156		goto out;
3157	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
3158		goto out;
3159	*mask = cache->mask;
3160	err = 0;
3161out:
3162	rcu_read_unlock();
3163	return err;
3164}
3165
3166int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
3167			  u32 *mask, bool may_block)
3168{
3169	int status;
3170
3171	status = nfs_access_get_cached_rcu(inode, cred, mask);
3172	if (status != 0)
3173		status = nfs_access_get_cached_locked(inode, cred, mask,
3174		    may_block);
3175
3176	return status;
3177}
3178EXPORT_SYMBOL_GPL(nfs_access_get_cached);
3179
3180static void nfs_access_add_rbtree(struct inode *inode,
3181				  struct nfs_access_entry *set,
3182				  const struct cred *cred)
3183{
3184	struct nfs_inode *nfsi = NFS_I(inode);
3185	struct rb_root *root_node = &nfsi->access_cache;
3186	struct rb_node **p = &root_node->rb_node;
3187	struct rb_node *parent = NULL;
3188	struct nfs_access_entry *entry;
3189	int cmp;
3190
3191	spin_lock(&inode->i_lock);
3192	while (*p != NULL) {
3193		parent = *p;
3194		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
3195		cmp = access_cmp(cred, entry);
3196
3197		if (cmp < 0)
3198			p = &parent->rb_left;
3199		else if (cmp > 0)
3200			p = &parent->rb_right;
3201		else
3202			goto found;
3203	}
3204	rb_link_node(&set->rb_node, parent, p);
3205	rb_insert_color(&set->rb_node, root_node);
3206	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
3207	spin_unlock(&inode->i_lock);
3208	return;
3209found:
3210	rb_replace_node(parent, &set->rb_node, root_node);
3211	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
3212	list_del(&entry->lru);
3213	spin_unlock(&inode->i_lock);
3214	nfs_access_free_entry(entry);
3215}
3216
3217void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
3218			  const struct cred *cred)
3219{
3220	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
3221	if (cache == NULL)
3222		return;
3223	RB_CLEAR_NODE(&cache->rb_node);
3224	cache->fsuid = cred->fsuid;
3225	cache->fsgid = cred->fsgid;
3226	cache->group_info = get_group_info(cred->group_info);
3227	cache->mask = set->mask;
3228	cache->timestamp = ktime_get_ns();
3229
3230	/* The above field assignments must be visible
3231	 * before this item appears on the lru.  We cannot easily
3232	 * use rcu_assign_pointer, so just force the memory barrier.
3233	 */
3234	smp_wmb();
3235	nfs_access_add_rbtree(inode, cache, cred);
3236
3237	/* Update accounting */
3238	smp_mb__before_atomic();
3239	atomic_long_inc(&nfs_access_nr_entries);
3240	smp_mb__after_atomic();
3241
3242	/* Add inode to global LRU list */
3243	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
3244		spin_lock(&nfs_access_lru_lock);
3245		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
3246			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
3247					&nfs_access_lru_list);
3248		spin_unlock(&nfs_access_lru_lock);
3249	}
3250	nfs_access_cache_enforce_limit();
3251}
3252EXPORT_SYMBOL_GPL(nfs_access_add_cache);
3253
3254#define NFS_MAY_READ (NFS_ACCESS_READ)
3255#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
3256		NFS_ACCESS_EXTEND | \
3257		NFS_ACCESS_DELETE)
3258#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
3259		NFS_ACCESS_EXTEND)
3260#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
3261#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
3262#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
3263static int
3264nfs_access_calc_mask(u32 access_result, umode_t umode)
3265{
3266	int mask = 0;
3267
3268	if (access_result & NFS_MAY_READ)
3269		mask |= MAY_READ;
3270	if (S_ISDIR(umode)) {
3271		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
3272			mask |= MAY_WRITE;
3273		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
3274			mask |= MAY_EXEC;
3275	} else if (S_ISREG(umode)) {
3276		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
3277			mask |= MAY_WRITE;
3278		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
3279			mask |= MAY_EXEC;
3280	} else if (access_result & NFS_MAY_WRITE)
3281			mask |= MAY_WRITE;
3282	return mask;
3283}
3284
3285void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
3286{
3287	entry->mask = access_result;
3288}
3289EXPORT_SYMBOL_GPL(nfs_access_set_mask);
3290
3291static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
3292{
3293	struct nfs_access_entry cache;
3294	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
3295	int cache_mask = -1;
3296	int status;
3297
3298	trace_nfs_access_enter(inode);
3299
3300	status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
3301	if (status == 0)
3302		goto out_cached;
3303
3304	status = -ECHILD;
3305	if (!may_block)
3306		goto out;
3307
3308	/*
3309	 * Determine which access bits we want to ask for...
3310	 */
3311	cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND |
3312		     nfs_access_xattr_mask(NFS_SERVER(inode));
3313	if (S_ISDIR(inode->i_mode))
3314		cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
3315	else
3316		cache.mask |= NFS_ACCESS_EXECUTE;
3317	status = NFS_PROTO(inode)->access(inode, &cache, cred);
3318	if (status != 0) {
3319		if (status == -ESTALE) {
3320			if (!S_ISDIR(inode->i_mode))
3321				nfs_set_inode_stale(inode);
3322			else
3323				nfs_zap_caches(inode);
3324		}
3325		goto out;
3326	}
3327	nfs_access_add_cache(inode, &cache, cred);
3328out_cached:
3329	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
3330	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
3331		status = -EACCES;
3332out:
3333	trace_nfs_access_exit(inode, mask, cache_mask, status);
3334	return status;
3335}
3336
3337static int nfs_open_permission_mask(int openflags)
3338{
3339	int mask = 0;
3340
3341	if (openflags & __FMODE_EXEC) {
3342		/* ONLY check exec rights */
3343		mask = MAY_EXEC;
3344	} else {
3345		if ((openflags & O_ACCMODE) != O_WRONLY)
3346			mask |= MAY_READ;
3347		if ((openflags & O_ACCMODE) != O_RDONLY)
3348			mask |= MAY_WRITE;
3349	}
3350
3351	return mask;
3352}
3353
3354int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
3355{
3356	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
3357}
3358EXPORT_SYMBOL_GPL(nfs_may_open);
3359
3360static int nfs_execute_ok(struct inode *inode, int mask)
3361{
3362	struct nfs_server *server = NFS_SERVER(inode);
3363	int ret = 0;
3364
3365	if (S_ISDIR(inode->i_mode))
3366		return 0;
3367	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
3368		if (mask & MAY_NOT_BLOCK)
3369			return -ECHILD;
3370		ret = __nfs_revalidate_inode(server, inode);
3371	}
3372	if (ret == 0 && !execute_ok(inode))
3373		ret = -EACCES;
3374	return ret;
3375}
3376
3377int nfs_permission(struct mnt_idmap *idmap,
3378		   struct inode *inode,
3379		   int mask)
3380{
3381	const struct cred *cred = current_cred();
3382	int res = 0;
3383
3384	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
3385
3386	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
3387		goto out;
3388	/* Is this sys_access() ? */
3389	if (mask & (MAY_ACCESS | MAY_CHDIR))
3390		goto force_lookup;
3391
3392	switch (inode->i_mode & S_IFMT) {
3393		case S_IFLNK:
3394			goto out;
3395		case S_IFREG:
3396			if ((mask & MAY_OPEN) &&
3397			   nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
3398				return 0;
3399			break;
3400		case S_IFDIR:
3401			/*
3402			 * Optimize away all write operations, since the server
3403			 * will check permissions when we perform the op.
3404			 */
3405			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
3406				goto out;
3407	}
3408
3409force_lookup:
3410	if (!NFS_PROTO(inode)->access)
3411		goto out_notsup;
3412
3413	res = nfs_do_access(inode, cred, mask);
3414out:
3415	if (!res && (mask & MAY_EXEC))
3416		res = nfs_execute_ok(inode, mask);
3417
3418	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
3419		inode->i_sb->s_id, inode->i_ino, mask, res);
3420	return res;
3421out_notsup:
3422	if (mask & MAY_NOT_BLOCK)
3423		return -ECHILD;
3424
3425	res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
3426						  NFS_INO_INVALID_OTHER);
3427	if (res == 0)
3428		res = generic_permission(&nop_mnt_idmap, inode, mask);
3429	goto out;
3430}
3431EXPORT_SYMBOL_GPL(nfs_permission);