Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-for-5.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
"Highlights include:

Features:

- Switch NFS to use readahead instead of the obsolete readpages.

- Readdir fixes to improve cacheability of large directories when
there are multiple readers and writers.

- Readdir performance improvements when doing a seekdir() immediately
after opening the directory (common when re-exporting NFS).

- NFS swap improvements from Neil Brown.

- Loosen up memory allocation to permit direct reclaim and write back
in cases where there is no danger of deadlocking the writeback code
or NFS swap.

- Avoid sillyrename when the NFSv4 server claims to support the
necessary features to recover the unlinked but open file after
reboot.

Bugfixes:

- Patch from Olga to add a mount option to control NFSv4.1 session
trunking discovery, and default it to being off.

- Fix a lockup in nfs_do_recoalesce().

- Two fixes for list iterator variables being used when pointing to
the list head.

- Fix a kernel memory scribble when reading from a non-socket
transport in /sys/kernel/sunrpc.

- Fix a race where reconnecting to a server could leave the TCP
socket stuck forever in the connecting state.

- Patch from Neil to fix a shutdown race which can leave the SUNRPC
transport timer primed after we free the struct xprt itself.

- Patch from Xin Xiong to fix reference count leaks in the NFSv4.2
copy offload.

- Sunrpc patch from Olga to avoid resending a task on an offlined
transport.

Cleanups:

- Patches from Dave Wysochanski to clean up the fscache code"

* tag 'nfs-for-5.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (91 commits)
NFSv4/pNFS: Fix another issue with a list iterator pointing to the head
NFS: Don't loop forever in nfs_do_recoalesce()
SUNRPC: Don't return error values in sysfs read of closed files
SUNRPC: Do not dereference non-socket transports in sysfs
NFSv4.1: don't retry BIND_CONN_TO_SESSION on session error
SUNRPC don't resend a task on an offlined transport
NFS: replace usage of found with dedicated list iterator variable
SUNRPC: avoid race between mod_timer() and del_timer_sync()
pNFS/files: Ensure pNFS allocation modes are consistent with nfsiod
pNFS/flexfiles: Ensure pNFS allocation modes are consistent with nfsiod
NFSv4/pnfs: Ensure pNFS allocation modes are consistent with nfsiod
NFS: Avoid writeback threads getting stuck in mempool_alloc()
NFS: nfsiod should not block forever in mempool_alloc()
SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent
SUNRPC: Fix unx_lookup_cred() allocation
NFS: Fix memory allocation in rpc_alloc_task()
NFS: Fix memory allocation in rpc_malloc()
SUNRPC: Improve accuracy of socket ENOBUFS determination
SUNRPC: Replace internal use of SOCKWQ_ASYNC_NOSPACE
SUNRPC: Fix socket waits for write buffer space
...

+1325 -821
+4
fs/nfs/Kconfig
··· 4 4 depends on INET && FILE_LOCKING && MULTIUSER 5 5 select LOCKD 6 6 select SUNRPC 7 + select CRYPTO 8 + select CRYPTO_HASH 9 + select XXHASH 10 + select CRYPTO_XXHASH 7 11 select NFS_ACL_SUPPORT if NFS_V3_ACL 8 12 help 9 13 Choose Y here if you want to access files residing on other
+10 -19
fs/nfs/callback_proc.c
··· 358 358 struct cb_process_state *cps) 359 359 { 360 360 struct cb_devicenotifyargs *args = argp; 361 + const struct pnfs_layoutdriver_type *ld = NULL; 361 362 uint32_t i; 362 363 __be32 res = 0; 363 - struct nfs_client *clp = cps->clp; 364 - struct nfs_server *server = NULL; 365 364 366 - if (!clp) { 365 + if (!cps->clp) { 367 366 res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); 368 367 goto out; 369 368 } ··· 370 371 for (i = 0; i < args->ndevs; i++) { 371 372 struct cb_devicenotifyitem *dev = &args->devs[i]; 372 373 373 - if (!server || 374 - server->pnfs_curr_ld->id != dev->cbd_layout_type) { 375 - rcu_read_lock(); 376 - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) 377 - if (server->pnfs_curr_ld && 378 - server->pnfs_curr_ld->id == dev->cbd_layout_type) { 379 - rcu_read_unlock(); 380 - goto found; 381 - } 382 - rcu_read_unlock(); 383 - continue; 374 + if (!ld || ld->id != dev->cbd_layout_type) { 375 + pnfs_put_layoutdriver(ld); 376 + ld = pnfs_find_layoutdriver(dev->cbd_layout_type); 377 + if (!ld) 378 + continue; 384 379 } 385 - 386 - found: 387 - nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); 380 + nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); 388 381 } 389 - 382 + pnfs_put_layoutdriver(ld); 390 383 out: 391 384 kfree(args->devs); 392 385 return res; ··· 701 710 struct nfs4_copy_state *copy, *tmp_copy; 702 711 bool found = false; 703 712 704 - copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); 713 + copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL); 705 714 if (!copy) 706 715 return htonl(NFS4ERR_SERVERFAULT); 707 716
-4
fs/nfs/callback_xdr.c
··· 271 271 n = ntohl(*p++); 272 272 if (n == 0) 273 273 goto out; 274 - if (n > ULONG_MAX / sizeof(*args->devs)) { 275 - status = htonl(NFS4ERR_BADXDR); 276 - goto out; 277 - } 278 274 279 275 args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); 280 276 if (!args->devs) {
+2 -1
fs/nfs/client.c
··· 857 857 } 858 858 859 859 if (clp->rpc_ops->discover_trunking != NULL && 860 - (server->caps & NFS_CAP_FS_LOCATIONS)) { 860 + (server->caps & NFS_CAP_FS_LOCATIONS && 861 + (server->flags & NFS_MOUNT_TRUNK_DISCOVERY))) { 861 862 error = clp->rpc_ops->discover_trunking(server, mntfh); 862 863 if (error < 0) 863 864 return error;
+1 -1
fs/nfs/delegation.c
··· 439 439 struct nfs_delegation *freeme = NULL; 440 440 int status = 0; 441 441 442 - delegation = kmalloc(sizeof(*delegation), GFP_NOFS); 442 + delegation = kmalloc(sizeof(*delegation), GFP_KERNEL_ACCOUNT); 443 443 if (delegation == NULL) 444 444 return -ENOMEM; 445 445 nfs4_stateid_copy(&delegation->stateid, stateid);
+382 -246
fs/nfs/dir.c
··· 39 39 #include <linux/sched.h> 40 40 #include <linux/kmemleak.h> 41 41 #include <linux/xattr.h> 42 + #include <linux/xxhash.h> 42 43 43 44 #include "delegation.h" 44 45 #include "iostat.h" ··· 70 69 .freepage = nfs_readdir_clear_array, 71 70 }; 72 71 73 - static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir) 72 + #define NFS_INIT_DTSIZE PAGE_SIZE 73 + 74 + static struct nfs_open_dir_context * 75 + alloc_nfs_open_dir_context(struct inode *dir) 74 76 { 75 77 struct nfs_inode *nfsi = NFS_I(dir); 76 78 struct nfs_open_dir_context *ctx; 77 - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 79 + 80 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); 78 81 if (ctx != NULL) { 79 - ctx->duped = 0; 80 82 ctx->attr_gencount = nfsi->attr_gencount; 81 - ctx->dir_cookie = 0; 82 - ctx->dup_cookie = 0; 83 - ctx->page_index = 0; 84 - ctx->eof = false; 83 + ctx->dtsize = NFS_INIT_DTSIZE; 85 84 spin_lock(&dir->i_lock); 86 85 if (list_empty(&nfsi->open_files) && 87 86 (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) 88 87 nfs_set_cache_invalid(dir, 89 88 NFS_INO_INVALID_DATA | 90 89 NFS_INO_REVAL_FORCED); 91 - list_add(&ctx->list, &nfsi->open_files); 92 - clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); 90 + list_add_tail_rcu(&ctx->list, &nfsi->open_files); 91 + memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf)); 93 92 spin_unlock(&dir->i_lock); 94 93 return ctx; 95 94 } ··· 99 98 static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx) 100 99 { 101 100 spin_lock(&dir->i_lock); 102 - list_del(&ctx->list); 101 + list_del_rcu(&ctx->list); 103 102 spin_unlock(&dir->i_lock); 104 - kfree(ctx); 103 + kfree_rcu(ctx, rcu_head); 105 104 } 106 105 107 106 /* ··· 143 142 }; 144 143 145 144 struct nfs_cache_array { 145 + u64 change_attr; 146 146 u64 last_cookie; 147 147 unsigned int size; 148 148 unsigned char page_full : 1, ··· 157 155 struct page *page; 158 156 struct dir_context *ctx; 159 157 pgoff_t page_index; 158 + pgoff_t page_index_max; 160 159 u64 dir_cookie; 161 160 u64 last_cookie; 162 - u64 dup_cookie; 163 161 loff_t current_index; 164 - loff_t prev_index; 165 162 166 163 __be32 verf[NFS_DIR_VERIFIER_SIZE]; 167 164 unsigned long dir_verifier; ··· 168 167 unsigned long gencount; 169 168 unsigned long attr_gencount; 170 169 unsigned int cache_entry_index; 171 - signed char duped; 170 + unsigned int buffer_fills; 171 + unsigned int dtsize; 172 + bool clear_cache; 172 173 bool plus; 173 174 bool eob; 174 175 bool eof; 175 176 }; 176 177 177 - static void nfs_readdir_array_init(struct nfs_cache_array *array) 178 + static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz) 178 179 { 179 - memset(array, 0, sizeof(struct nfs_cache_array)); 180 + struct nfs_server *server = NFS_SERVER(file_inode(desc->file)); 181 + unsigned int maxsize = server->dtsize; 182 + 183 + if (sz > maxsize) 184 + sz = maxsize; 185 + if (sz < NFS_MIN_FILE_IO_SIZE) 186 + sz = NFS_MIN_FILE_IO_SIZE; 187 + desc->dtsize = sz; 180 188 } 181 189 182 - static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie) 190 + static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc) 191 + { 192 + nfs_set_dtsize(desc, desc->dtsize >> 1); 193 + } 194 + 195 + static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc) 196 + { 197 + nfs_set_dtsize(desc, desc->dtsize << 1); 198 + } 199 + 200 + static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie, 201 + u64 change_attr) 183 202 { 184 203 struct nfs_cache_array *array; 185 204 186 205 array = kmap_atomic(page); 187 - nfs_readdir_array_init(array); 206 + array->change_attr = change_attr; 188 207 array->last_cookie = last_cookie; 208 + array->size = 0; 209 + array->page_full = 0; 210 + array->page_is_eof = 0; 189 211 array->cookies_are_ordered = 1; 190 212 kunmap_atomic(array); 191 213 } ··· 216 192 /* 217 193 * we are freeing strings created by nfs_add_to_readdir_array() 218 194 */ 219 - static 220 - void nfs_readdir_clear_array(struct page *page) 195 + static void nfs_readdir_clear_array(struct page *page) 221 196 { 222 197 struct nfs_cache_array *array; 223 - int i; 198 + unsigned int i; 224 199 225 200 array = kmap_atomic(page); 226 201 for (i = 0; i < array->size; i++) 227 202 kfree(array->array[i].name); 228 - nfs_readdir_array_init(array); 203 + array->size = 0; 229 204 kunmap_atomic(array); 205 + } 206 + 207 + static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie, 208 + u64 change_attr) 209 + { 210 + nfs_readdir_clear_array(page); 211 + nfs_readdir_page_init_array(page, last_cookie, change_attr); 230 212 } 231 213 232 214 static struct page * ··· 240 210 { 241 211 struct page *page = alloc_page(gfp_flags); 242 212 if (page) 243 - nfs_readdir_page_init_array(page, last_cookie); 213 + nfs_readdir_page_init_array(page, last_cookie, 0); 244 214 return page; 245 215 } 246 216 ··· 250 220 nfs_readdir_clear_array(page); 251 221 put_page(page); 252 222 } 223 + } 224 + 225 + static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array) 226 + { 227 + return array->size == 0 ? array->last_cookie : array->array[0].cookie; 253 228 } 254 229 255 230 static void nfs_readdir_array_set_eof(struct nfs_cache_array *array) ··· 286 251 return ret; 287 252 } 288 253 254 + static size_t nfs_readdir_array_maxentries(void) 255 + { 256 + return (PAGE_SIZE - sizeof(struct nfs_cache_array)) / 257 + sizeof(struct nfs_cache_array_entry); 258 + } 259 + 289 260 /* 290 261 * Check that the next array entry lies entirely within the page bounds 291 262 */ 292 263 static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) 293 264 { 294 - struct nfs_cache_array_entry *cache_entry; 295 - 296 265 if (array->page_full) 297 266 return -ENOSPC; 298 - cache_entry = &array->array[array->size + 1]; 299 - if ((char *)cache_entry - (char *)array > PAGE_SIZE) { 267 + if (array->size == nfs_readdir_array_maxentries()) { 300 268 array->page_full = 1; 301 269 return -ENOSPC; 302 270 } 303 271 return 0; 304 272 } 305 273 306 - static 307 - int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) 274 + static int nfs_readdir_page_array_append(struct page *page, 275 + const struct nfs_entry *entry, 276 + u64 *cookie) 308 277 { 309 278 struct nfs_cache_array *array; 310 279 struct nfs_cache_array_entry *cache_entry; 311 280 const char *name; 312 - int ret; 281 + int ret = -ENOMEM; 313 282 314 283 name = nfs_readdir_copy_name(entry->name, entry->len); 315 - if (!name) 316 - return -ENOMEM; 317 284 318 285 array = kmap_atomic(page); 286 + if (!name) 287 + goto out; 319 288 ret = nfs_readdir_array_can_expand(array); 320 289 if (ret) { 321 290 kfree(name); ··· 327 288 } 328 289 329 290 cache_entry = &array->array[array->size]; 330 - cache_entry->cookie = entry->prev_cookie; 291 + cache_entry->cookie = array->last_cookie; 331 292 cache_entry->ino = entry->ino; 332 293 cache_entry->d_type = entry->d_type; 333 294 cache_entry->name_len = entry->len; ··· 339 300 if (entry->eof != 0) 340 301 nfs_readdir_array_set_eof(array); 341 302 out: 303 + *cookie = array->last_cookie; 342 304 kunmap_atomic(array); 343 305 return ret; 344 306 } 345 307 346 - static struct page *nfs_readdir_page_get_locked(struct address_space *mapping, 347 - pgoff_t index, u64 last_cookie) 308 + #define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14) 309 + /* 310 + * Hash algorithm allowing content addressible access to sequences 311 + * of directory cookies. Content is addressed by the value of the 312 + * cookie index of the first readdir entry in a page. 313 + * 314 + * The xxhash algorithm is chosen because it is fast, and is supposed 315 + * to result in a decent flat distribution of hashes. 316 + * 317 + * We then select only the first 18 bits to avoid issues with excessive 318 + * memory use for the page cache XArray. 18 bits should allow the caching 319 + * of 262144 pages of sequences of readdir entries. Since each page holds 320 + * 127 readdir entries for a typical 64-bit system, that works out to a 321 + * cache of ~ 33 million entries per directory. 322 + */ 323 + static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie) 348 324 { 325 + if (cookie == 0) 326 + return 0; 327 + return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK; 328 + } 329 + 330 + static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, 331 + u64 change_attr) 332 + { 333 + struct nfs_cache_array *array = kmap_atomic(page); 334 + int ret = true; 335 + 336 + if (array->change_attr != change_attr) 337 + ret = false; 338 + if (nfs_readdir_array_index_cookie(array) != last_cookie) 339 + ret = false; 340 + kunmap_atomic(array); 341 + return ret; 342 + } 343 + 344 + static void nfs_readdir_page_unlock_and_put(struct page *page) 345 + { 346 + unlock_page(page); 347 + put_page(page); 348 + } 349 + 350 + static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie, 351 + u64 change_attr) 352 + { 353 + if (PageUptodate(page)) { 354 + if (nfs_readdir_page_validate(page, cookie, change_attr)) 355 + return; 356 + nfs_readdir_clear_array(page); 357 + } 358 + nfs_readdir_page_init_array(page, cookie, change_attr); 359 + SetPageUptodate(page); 360 + } 361 + 362 + static struct page *nfs_readdir_page_get_locked(struct address_space *mapping, 363 + u64 cookie, u64 change_attr) 364 + { 365 + pgoff_t index = nfs_readdir_page_cookie_hash(cookie); 349 366 struct page *page; 350 367 351 368 page = grab_cache_page(mapping, index); 352 - if (page && !PageUptodate(page)) { 353 - nfs_readdir_page_init_array(page, last_cookie); 354 - if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0) 355 - nfs_zap_mapping(mapping->host, mapping); 356 - SetPageUptodate(page); 357 - } 358 - 369 + if (!page) 370 + return NULL; 371 + nfs_readdir_page_init_and_validate(page, cookie, change_attr); 359 372 return page; 360 373 } 361 374 ··· 442 351 kunmap_atomic(array); 443 352 } 444 353 445 - static void nfs_readdir_page_unlock_and_put(struct page *page) 446 - { 447 - unlock_page(page); 448 - put_page(page); 449 - } 450 - 451 354 static struct page *nfs_readdir_page_get_next(struct address_space *mapping, 452 - pgoff_t index, u64 cookie) 355 + u64 cookie, u64 change_attr) 453 356 { 357 + pgoff_t index = nfs_readdir_page_cookie_hash(cookie); 454 358 struct page *page; 455 359 456 - page = nfs_readdir_page_get_locked(mapping, index, cookie); 457 - if (page) { 458 - if (nfs_readdir_page_last_cookie(page) == cookie) 459 - return page; 460 - nfs_readdir_page_unlock_and_put(page); 461 - } 462 - return NULL; 360 + page = grab_cache_page_nowait(mapping, index); 361 + if (!page) 362 + return NULL; 363 + nfs_readdir_page_init_and_validate(page, cookie, change_attr); 364 + if (nfs_readdir_page_last_cookie(page) != cookie) 365 + nfs_readdir_page_reinit_array(page, cookie, change_attr); 366 + return page; 463 367 } 464 368 465 369 static inline ··· 476 390 return true; 477 391 } 478 392 393 + static void nfs_readdir_seek_next_array(struct nfs_cache_array *array, 394 + struct nfs_readdir_descriptor *desc) 395 + { 396 + if (array->page_full) { 397 + desc->last_cookie = array->last_cookie; 398 + desc->current_index += array->size; 399 + desc->cache_entry_index = 0; 400 + desc->page_index++; 401 + } else 402 + desc->last_cookie = nfs_readdir_array_index_cookie(array); 403 + } 404 + 405 + static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc) 406 + { 407 + desc->current_index = 0; 408 + desc->last_cookie = 0; 409 + desc->page_index = 0; 410 + } 411 + 479 412 static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, 480 413 struct nfs_readdir_descriptor *desc) 481 414 { ··· 506 401 if (diff >= array->size) { 507 402 if (array->page_is_eof) 508 403 goto out_eof; 404 + nfs_readdir_seek_next_array(array, desc); 509 405 return -EAGAIN; 510 406 } 511 407 ··· 517 411 out_eof: 518 412 desc->eof = true; 519 413 return -EBADCOOKIE; 520 - } 521 - 522 - static bool 523 - nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) 524 - { 525 - if (nfsi->cache_validity & (NFS_INO_INVALID_CHANGE | 526 - NFS_INO_INVALID_DATA)) 527 - return false; 528 - smp_rmb(); 529 - return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); 530 414 } 531 415 532 416 static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array, ··· 535 439 static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, 536 440 struct nfs_readdir_descriptor *desc) 537 441 { 538 - int i; 539 - loff_t new_pos; 442 + unsigned int i; 540 443 int status = -EAGAIN; 541 444 542 445 if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie)) ··· 543 448 544 449 for (i = 0; i < array->size; i++) { 545 450 if (array->array[i].cookie == desc->dir_cookie) { 546 - struct nfs_inode *nfsi = NFS_I(file_inode(desc->file)); 547 - 548 - new_pos = desc->current_index + i; 549 - if (desc->attr_gencount != nfsi->attr_gencount || 550 - !nfs_readdir_inode_mapping_valid(nfsi)) { 551 - desc->duped = 0; 552 - desc->attr_gencount = nfsi->attr_gencount; 553 - } else if (new_pos < desc->prev_index) { 554 - if (desc->duped > 0 555 - && desc->dup_cookie == desc->dir_cookie) { 556 - if (printk_ratelimit()) { 557 - pr_notice("NFS: directory %pD2 contains a readdir loop." 558 - "Please contact your server vendor. " 559 - "The file: %s has duplicate cookie %llu\n", 560 - desc->file, array->array[i].name, desc->dir_cookie); 561 - } 562 - status = -ELOOP; 563 - goto out; 564 - } 565 - desc->dup_cookie = desc->dir_cookie; 566 - desc->duped = -1; 567 - } 568 451 if (nfs_readdir_use_cookie(desc->file)) 569 452 desc->ctx->pos = desc->dir_cookie; 570 453 else 571 - desc->ctx->pos = new_pos; 572 - desc->prev_index = new_pos; 454 + desc->ctx->pos = desc->current_index + i; 573 455 desc->cache_entry_index = i; 574 456 return 0; 575 457 } ··· 556 484 status = -EBADCOOKIE; 557 485 if (desc->dir_cookie == array->last_cookie) 558 486 desc->eof = true; 559 - } 560 - out: 487 + } else 488 + nfs_readdir_seek_next_array(array, desc); 561 489 return status; 562 490 } 563 491 ··· 573 501 else 574 502 status = nfs_readdir_search_for_cookie(array, desc); 575 503 576 - if (status == -EAGAIN) { 577 - desc->last_cookie = array->last_cookie; 578 - desc->current_index += array->size; 579 - desc->page_index++; 580 - } 581 504 kunmap_atomic(array); 582 505 return status; 583 506 } ··· 608 541 /* We requested READDIRPLUS, but the server doesn't grok it */ 609 542 if (error == -ENOTSUPP && desc->plus) { 610 543 NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; 611 - clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 612 544 desc->plus = arg.plus = false; 613 545 goto again; 614 546 } ··· 657 591 return 1; 658 592 } 659 593 660 - static 661 - bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) 594 + #define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL) 595 + 596 + static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, 597 + unsigned int cache_hits, 598 + unsigned int cache_misses) 662 599 { 663 600 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) 664 601 return false; 665 - if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) 666 - return true; 667 - if (ctx->pos == 0) 602 + if (ctx->pos == 0 || 603 + cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD) 668 604 return true; 669 605 return false; 670 606 } 671 607 672 608 /* 673 - * This function is called by the lookup and getattr code to request the 609 + * This function is called by the getattr code to request the 674 610 * use of readdirplus to accelerate any future lookups in the same 675 611 * directory. 676 612 */ 677 - void nfs_advise_use_readdirplus(struct inode *dir) 613 + void nfs_readdir_record_entry_cache_hit(struct inode *dir) 678 614 { 679 615 struct nfs_inode *nfsi = NFS_I(dir); 616 + struct nfs_open_dir_context *ctx; 680 617 681 618 if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && 682 - !list_empty(&nfsi->open_files)) 683 - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); 619 + S_ISDIR(dir->i_mode)) { 620 + rcu_read_lock(); 621 + list_for_each_entry_rcu (ctx, &nfsi->open_files, list) 622 + atomic_inc(&ctx->cache_hits); 623 + rcu_read_unlock(); 624 + } 684 625 } 685 626 686 627 /* 687 628 * This function is mainly for use by nfs_getattr(). 688 629 * 689 630 * If this is an 'ls -l', we want to force use of readdirplus. 690 - * Do this by checking if there is an active file descriptor 691 - * and calling nfs_advise_use_readdirplus, then forcing a 692 - * cache flush. 693 631 */ 694 - void nfs_force_use_readdirplus(struct inode *dir) 632 + void nfs_readdir_record_entry_cache_miss(struct inode *dir) 695 633 { 696 634 struct nfs_inode *nfsi = NFS_I(dir); 635 + struct nfs_open_dir_context *ctx; 697 636 698 637 if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && 699 - !list_empty(&nfsi->open_files)) { 700 - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); 701 - set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); 638 + S_ISDIR(dir->i_mode)) { 639 + rcu_read_lock(); 640 + list_for_each_entry_rcu (ctx, &nfsi->open_files, list) 641 + atomic_inc(&ctx->cache_misses); 642 + rcu_read_unlock(); 702 643 } 644 + } 645 + 646 + static void nfs_lookup_advise_force_readdirplus(struct inode *dir, 647 + unsigned int flags) 648 + { 649 + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) 650 + return; 651 + if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL)) 652 + return; 653 + nfs_readdir_record_entry_cache_miss(dir); 703 654 } 704 655 705 656 static ··· 769 686 status = nfs_refresh_inode(d_inode(dentry), entry->fattr); 770 687 if (!status) 771 688 nfs_setsecurity(d_inode(dentry), entry->fattr); 689 + trace_nfs_readdir_lookup_revalidate(d_inode(parent), 690 + dentry, 0, status); 772 691 goto out; 773 692 } else { 693 + trace_nfs_readdir_lookup_revalidate_failed( 694 + d_inode(parent), dentry, 0); 774 695 d_invalidate(dentry); 775 696 dput(dentry); 776 697 dentry = NULL; ··· 796 709 dentry = alias; 797 710 } 798 711 nfs_set_verifier(dentry, dir_verifier); 712 + trace_nfs_readdir_lookup(d_inode(parent), dentry, 0); 799 713 out: 800 714 dput(dentry); 715 + } 716 + 717 + static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc, 718 + struct nfs_entry *entry, 719 + struct xdr_stream *stream) 720 + { 721 + int ret; 722 + 723 + if (entry->fattr->label) 724 + entry->fattr->label->len = NFS4_MAXLABELLEN; 725 + ret = xdr_decode(desc, entry, stream); 726 + if (ret || !desc->plus) 727 + return ret; 728 + nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier); 729 + return 0; 801 730 } 802 731 803 732 /* Perform conversion from xdr to cache array */ 804 733 static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, 805 734 struct nfs_entry *entry, 806 - struct page **xdr_pages, 807 - unsigned int buflen, 808 - struct page **arrays, 809 - size_t narrays) 735 + struct page **xdr_pages, unsigned int buflen, 736 + struct page **arrays, size_t narrays, 737 + u64 change_attr) 810 738 { 811 739 struct address_space *mapping = desc->file->f_mapping; 812 740 struct xdr_stream stream; 813 741 struct xdr_buf buf; 814 742 struct page *scratch, *new, *page = *arrays; 743 + u64 cookie; 815 744 int status; 816 745 817 746 scratch = alloc_page(GFP_KERNEL); ··· 838 735 xdr_set_scratch_page(&stream, scratch); 839 736 840 737 do { 841 - if (entry->fattr->label) 842 - entry->fattr->label->len = NFS4_MAXLABELLEN; 843 - 844 - status = xdr_decode(desc, entry, &stream); 738 + status = nfs_readdir_entry_decode(desc, entry, &stream); 845 739 if (status != 0) 846 740 break; 847 741 848 - if (desc->plus) 849 - nfs_prime_dcache(file_dentry(desc->file), entry, 850 - desc->dir_verifier); 851 - 852 - status = nfs_readdir_add_to_array(entry, page); 742 + status = nfs_readdir_page_array_append(page, entry, &cookie); 853 743 if (status != -ENOSPC) 854 744 continue; 855 745 856 746 if (page->mapping != mapping) { 857 747 if (!--narrays) 858 748 break; 859 - new = nfs_readdir_page_array_alloc(entry->prev_cookie, 860 - GFP_KERNEL); 749 + new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL); 861 750 if (!new) 862 751 break; 863 752 arrays++; 864 753 *arrays = page = new; 865 754 } else { 866 - new = nfs_readdir_page_get_next(mapping, 867 - page->index + 1, 868 - entry->prev_cookie); 755 + new = nfs_readdir_page_get_next(mapping, cookie, 756 + change_attr); 869 757 if (!new) 870 758 break; 871 759 if (page != *arrays) 872 760 nfs_readdir_page_unlock_and_put(page); 873 761 page = new; 874 762 } 875 - status = nfs_readdir_add_to_array(entry, page); 763 + desc->page_index_max++; 764 + status = nfs_readdir_page_array_append(page, entry, &cookie); 876 765 } while (!status && !entry->eof); 877 766 878 767 switch (status) { 879 768 case -EBADCOOKIE: 880 - if (entry->eof) { 881 - nfs_readdir_page_set_eof(page); 882 - status = 0; 883 - } 884 - break; 885 - case -ENOSPC: 769 + if (!entry->eof) 770 + break; 771 + nfs_readdir_page_set_eof(page); 772 + fallthrough; 886 773 case -EAGAIN: 887 774 status = 0; 888 775 break; 776 + case -ENOSPC: 777 + status = 0; 778 + if (!desc->plus) 779 + break; 780 + while (!nfs_readdir_entry_decode(desc, entry, &stream)) 781 + ; 889 782 } 890 783 891 784 if (page != *arrays) ··· 927 828 __be32 *verf_arg, __be32 *verf_res, 928 829 struct page **arrays, size_t narrays) 929 830 { 831 + u64 change_attr; 930 832 struct page **pages; 931 833 struct page *page = *arrays; 932 834 struct nfs_entry *entry; 933 835 size_t array_size; 934 836 struct inode *inode = file_inode(desc->file); 935 - size_t dtsize = NFS_SERVER(inode)->dtsize; 837 + unsigned int dtsize = desc->dtsize; 838 + unsigned int pglen; 936 839 int status = -ENOMEM; 937 840 938 841 entry = kzalloc(sizeof(*entry), GFP_KERNEL); ··· 952 851 if (!pages) 953 852 goto out; 954 853 955 - do { 956 - unsigned int pglen; 957 - status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, 958 - pages, dtsize, 959 - verf_res); 960 - if (status < 0) 961 - break; 854 + change_attr = inode_peek_iversion_raw(inode); 855 + status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages, 856 + dtsize, verf_res); 857 + if (status < 0) 858 + goto free_pages; 962 859 963 - pglen = status; 964 - if (pglen == 0) { 965 - nfs_readdir_page_set_eof(page); 966 - break; 967 - } 968 - 969 - verf_arg = verf_res; 970 - 860 + pglen = status; 861 + if (pglen != 0) 971 862 status = nfs_readdir_page_filler(desc, entry, pages, pglen, 972 - arrays, narrays); 973 - } while (!status && nfs_readdir_page_needs_filling(page) && 974 - page_mapping(page)); 863 + arrays, narrays, change_attr); 864 + else 865 + nfs_readdir_page_set_eof(page); 866 + desc->buffer_fills++; 975 867 868 + free_pages: 976 869 nfs_readdir_free_pages(pages, array_size); 977 870 out: 978 871 nfs_free_fattr(entry->fattr); ··· 991 896 static struct page * 992 897 nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) 993 898 { 994 - return nfs_readdir_page_get_locked(desc->file->f_mapping, 995 - desc->page_index, 996 - desc->last_cookie); 899 + struct address_space *mapping = desc->file->f_mapping; 900 + u64 change_attr = inode_peek_iversion_raw(mapping->host); 901 + u64 cookie = desc->last_cookie; 902 + struct page *page; 903 + 904 + page = nfs_readdir_page_get_locked(mapping, cookie, change_attr); 905 + if (!page) 906 + return NULL; 907 + if (desc->clear_cache && !nfs_readdir_page_needs_filling(page)) 908 + nfs_readdir_page_reinit_array(page, cookie, change_attr); 909 + return page; 997 910 } 998 911 999 912 /* ··· 1019 916 if (!desc->page) 1020 917 return -ENOMEM; 1021 918 if (nfs_readdir_page_needs_filling(desc->page)) { 919 + /* Grow the dtsize if we had to go back for more pages */ 920 + if (desc->page_index == desc->page_index_max) 921 + nfs_grow_dtsize(desc); 922 + desc->page_index_max = desc->page_index; 923 + trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf, 924 + desc->last_cookie, 925 + desc->page->index, desc->dtsize); 1022 926 res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf, 1023 927 &desc->page, 1); 1024 928 if (res < 0) { 1025 929 nfs_readdir_page_unlock_and_put_cached(desc); 930 + trace_nfs_readdir_cache_fill_done(inode, res); 1026 931 if (res == -EBADCOOKIE || res == -ENOTSYNC) { 1027 932 invalidate_inode_pages2(desc->file->f_mapping); 1028 - desc->page_index = 0; 933 + nfs_readdir_rewind_search(desc); 934 + trace_nfs_readdir_invalidate_cache_range( 935 + inode, 0, MAX_LFS_FILESIZE); 1029 936 return -EAGAIN; 1030 937 } 1031 938 return res; ··· 1043 930 /* 1044 931 * Set the cookie verifier if the page cache was empty 1045 932 */ 1046 - if (desc->page_index == 0) 933 + if (desc->last_cookie == 0 && 934 + memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) { 1047 935 memcpy(nfsi->cookieverf, verf, 1048 936 sizeof(nfsi->cookieverf)); 937 + invalidate_inode_pages2_range(desc->file->f_mapping, 1, 938 + -1); 939 + trace_nfs_readdir_invalidate_cache_range( 940 + inode, 1, MAX_LFS_FILESIZE); 941 + } 942 + desc->clear_cache = false; 1049 943 } 1050 944 res = nfs_readdir_search_array(desc); 1051 945 if (res == 0) ··· 1061 941 return res; 1062 942 } 1063 943 1064 - static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc) 1065 - { 1066 - struct address_space *mapping = desc->file->f_mapping; 1067 - struct inode *dir = file_inode(desc->file); 1068 - unsigned int dtsize = NFS_SERVER(dir)->dtsize; 1069 - loff_t size = i_size_read(dir); 1070 - 1071 - /* 1072 - * Default to uncached readdir if the page cache is empty, and 1073 - * we're looking for a non-zero cookie in a large directory. 1074 - */ 1075 - return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize; 1076 - } 1077 - 1078 944 /* Search for desc->dir_cookie from the beginning of the page cache */ 1079 945 static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) 1080 946 { 1081 947 int res; 1082 948 1083 - if (nfs_readdir_dont_search_cache(desc)) 1084 - return -EBADCOOKIE; 1085 - 1086 949 do { 1087 - if (desc->page_index == 0) { 1088 - desc->current_index = 0; 1089 - desc->prev_index = 0; 1090 - desc->last_cookie = 0; 1091 - } 1092 950 res = find_and_lock_cache_page(desc); 1093 951 } while (res == -EAGAIN); 1094 952 return res; ··· 1080 982 { 1081 983 struct file *file = desc->file; 1082 984 struct nfs_cache_array *array; 1083 - unsigned int i = 0; 985 + unsigned int i; 1084 986 1085 987 array = kmap(desc->page); 1086 988 for (i = desc->cache_entry_index; i < array->size; i++) { ··· 1093 995 break; 1094 996 } 1095 997 memcpy(desc->verf, verf, sizeof(desc->verf)); 1096 - if (i < (array->size-1)) 1097 - desc->dir_cookie = array->array[i+1].cookie; 1098 - else 998 + if (i == array->size - 1) { 1099 999 desc->dir_cookie = array->last_cookie; 1000 + nfs_readdir_seek_next_array(array, desc); 1001 + } else { 1002 + desc->dir_cookie = array->array[i + 1].cookie; 1003 + desc->last_cookie = array->array[0].cookie; 1004 + } 1100 1005 if (nfs_readdir_use_cookie(file)) 1101 1006 desc->ctx->pos = desc->dir_cookie; 1102 1007 else 1103 1008 desc->ctx->pos++; 1104 - if (desc->duped != 0) 1105 - desc->duped = 1; 1106 1009 } 1107 1010 if (array->page_is_eof) 1108 1011 desc->eof = !desc->eob; ··· 1145 1046 desc->page_index = 0; 1146 1047 desc->cache_entry_index = 0; 1147 1048 desc->last_cookie = desc->dir_cookie; 1148 - desc->duped = 0; 1049 + desc->page_index_max = 0; 1050 + 1051 + trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie, 1052 + -1, desc->dtsize); 1149 1053 1150 1054 status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz); 1055 + if (status < 0) { 1056 + trace_nfs_readdir_uncached_done(file_inode(desc->file), status); 1057 + goto out_free; 1058 + } 1151 1059 1152 1060 for (i = 0; !desc->eob && i < sz && arrays[i]; i++) { 1153 1061 desc->page = arrays[i]; ··· 1162 1056 } 1163 1057 desc->page = NULL; 1164 1058 1165 - 1059 + /* 1060 + * Grow the dtsize if we have to go back for more pages, 1061 + * or shrink it if we're reading too many. 1062 + */ 1063 + if (!desc->eof) { 1064 + if (!desc->eob) 1065 + nfs_grow_dtsize(desc); 1066 + else if (desc->buffer_fills == 1 && 1067 + i < (desc->page_index_max >> 1)) 1068 + nfs_shrink_dtsize(desc); 1069 + } 1070 + out_free: 1166 1071 for (i = 0; i < sz && arrays[i]; i++) 1167 1072 nfs_readdir_page_array_free(arrays[i]); 1168 1073 out: 1074 + if (!nfs_readdir_use_cookie(desc->file)) 1075 + nfs_readdir_rewind_search(desc); 1076 + desc->page_index_max = -1; 1169 1077 kfree(arrays); 1170 1078 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); 1171 1079 return status; 1080 + } 1081 + 1082 + #define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL) 1083 + 1084 + static bool nfs_readdir_handle_cache_misses(struct inode *inode, 1085 + struct nfs_readdir_descriptor *desc, 1086 + unsigned int cache_misses, 1087 + bool force_clear) 1088 + { 1089 + if (desc->ctx->pos == 0 || !desc->plus) 1090 + return false; 1091 + if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear) 1092 + return false; 1093 + trace_nfs_readdir_force_readdirplus(inode); 1094 + return true; 1172 1095 } 1173 1096 1174 1097 /* The file offset position represents the dirent entry number. A ··· 1211 1076 struct nfs_inode *nfsi = NFS_I(inode); 1212 1077 struct nfs_open_dir_context *dir_ctx = file->private_data; 1213 1078 struct nfs_readdir_descriptor *desc; 1214 - pgoff_t page_index; 1079 + unsigned int cache_hits, cache_misses; 1080 + bool force_clear; 1215 1081 int res; 1216 1082 1217 1083 dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", ··· 1225 1089 * to either find the entry with the appropriate number or 1226 1090 * revalidate the cookie. 1227 1091 */ 1228 - if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) { 1229 - res = nfs_revalidate_mapping(inode, file->f_mapping); 1230 - if (res < 0) 1231 - goto out; 1232 - } 1092 + nfs_revalidate_mapping(inode, file->f_mapping); 1233 1093 1234 1094 res = -ENOMEM; 1235 1095 desc = kzalloc(sizeof(*desc), GFP_KERNEL); ··· 1233 1101 goto out; 1234 1102 desc->file = file; 1235 1103 desc->ctx = ctx; 1236 - desc->plus = nfs_use_readdirplus(inode, ctx); 1104 + desc->page_index_max = -1; 1237 1105 1238 1106 spin_lock(&file->f_lock); 1239 1107 desc->dir_cookie = dir_ctx->dir_cookie; 1240 - desc->dup_cookie = dir_ctx->dup_cookie; 1241 - desc->duped = dir_ctx->duped; 1242 - page_index = dir_ctx->page_index; 1108 + desc->page_index = dir_ctx->page_index; 1109 + desc->last_cookie = dir_ctx->last_cookie; 1243 1110 desc->attr_gencount = dir_ctx->attr_gencount; 1244 1111 desc->eof = dir_ctx->eof; 1112 + nfs_set_dtsize(desc, dir_ctx->dtsize); 1245 1113 memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); 1114 + cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0); 1115 + cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0); 1116 + force_clear = dir_ctx->force_clear; 1246 1117 spin_unlock(&file->f_lock); 1247 1118 1248 1119 if (desc->eof) { ··· 1253 1118 goto out_free; 1254 1119 } 1255 1120 1256 - if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && 1257 - list_is_singular(&nfsi->open_files)) 1258 - invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); 1121 + desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses); 1122 + force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses, 1123 + force_clear); 1124 + desc->clear_cache = force_clear; 1259 1125 1260 1126 do { 1261 1127 res = readdir_search_pagecache(desc); ··· 1275 1139 break; 1276 1140 } 1277 1141 if (res == -ETOOSMALL && desc->plus) { 1278 - clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); 1279 1142 nfs_zap_caches(inode); 1280 - desc->page_index = 0; 1281 1143 desc->plus = false; 1282 1144 desc->eof = false; 1283 1145 continue; ··· 1285 1151 1286 1152 nfs_do_filldir(desc, nfsi->cookieverf); 1287 1153 nfs_readdir_page_unlock_and_put_cached(desc); 1154 + if (desc->page_index == desc->page_index_max) 1155 + desc->clear_cache = force_clear; 1288 1156 } while (!desc->eob && !desc->eof); 1289 1157 1290 1158 spin_lock(&file->f_lock); 1291 1159 dir_ctx->dir_cookie = desc->dir_cookie; 1292 - dir_ctx->dup_cookie = desc->dup_cookie; 1293 - dir_ctx->duped = desc->duped; 1160 + dir_ctx->last_cookie = desc->last_cookie; 1294 1161 dir_ctx->attr_gencount = desc->attr_gencount; 1295 1162 dir_ctx->page_index = desc->page_index; 1163 + dir_ctx->force_clear = force_clear; 1296 1164 dir_ctx->eof = desc->eof; 1165 + dir_ctx->dtsize = desc->dtsize; 1297 1166 memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); 1298 1167 spin_unlock(&file->f_lock); 1299 1168 out_free: ··· 1334 1197 } 1335 1198 if (offset != filp->f_pos) { 1336 1199 filp->f_pos = offset; 1337 - if (nfs_readdir_use_cookie(filp)) 1338 - dir_ctx->dir_cookie = offset; 1339 - else 1200 + dir_ctx->page_index = 0; 1201 + if (!nfs_readdir_use_cookie(filp)) { 1340 1202 dir_ctx->dir_cookie = 0; 1341 - if (offset == 0) 1342 - memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf)); 1343 - dir_ctx->duped = 0; 1203 + dir_ctx->last_cookie = 0; 1204 + } else { 1205 + dir_ctx->dir_cookie = offset; 1206 + dir_ctx->last_cookie = offset; 1207 + } 1344 1208 dir_ctx->eof = false; 1345 1209 } 1346 1210 spin_unlock(&filp->f_lock); ··· 1557 1419 if (flags & LOOKUP_REVAL) 1558 1420 goto out_force; 1559 1421 out: 1560 - return (inode->i_nlink == 0) ? -ESTALE : 0; 1422 + if (inode->i_nlink > 0 || 1423 + (inode->i_nlink == 0 && 1424 + test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags))) 1425 + return 0; 1426 + else 1427 + return -ESTALE; 1561 1428 out_force: 1562 1429 if (flags & LOOKUP_RCU) 1563 1430 return -ECHILD; ··· 1612 1469 { 1613 1470 switch (error) { 1614 1471 case 1: 1615 - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", 1616 - __func__, dentry); 1617 - return 1; 1472 + break; 1618 1473 case 0: 1619 1474 /* 1620 1475 * We can't d_drop the root of a disconnected tree: ··· 1621 1480 * inodes on unmount and further oopses. 1622 1481 */ 1623 1482 if (inode && IS_ROOT(dentry)) 1624 - return 1; 1625 - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", 1626 - __func__, dentry); 1627 - return 0; 1483 + error = 1; 1484 + break; 1628 1485 } 1629 - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", 1630 - __func__, dentry, error); 1486 + trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error); 1631 1487 return error; 1632 1488 } 1633 1489 ··· 1649 1511 return nfs_lookup_revalidate_done(dir, dentry, inode, 1); 1650 1512 } 1651 1513 1652 - static int 1653 - nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, 1654 - struct inode *inode) 1514 + static int nfs_lookup_revalidate_dentry(struct inode *dir, 1515 + struct dentry *dentry, 1516 + struct inode *inode, unsigned int flags) 1655 1517 { 1656 1518 struct nfs_fh *fhandle; 1657 1519 struct nfs_fattr *fattr; 1658 1520 unsigned long dir_verifier; 1659 1521 int ret; 1522 + 1523 + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); 1660 1524 1661 1525 ret = -ENOMEM; 1662 1526 fhandle = nfs_alloc_fhandle(); ··· 1680 1540 } 1681 1541 goto out; 1682 1542 } 1543 + 1544 + /* Request help from readdirplus */ 1545 + nfs_lookup_advise_force_readdirplus(dir, flags); 1546 + 1683 1547 ret = 0; 1684 1548 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1685 1549 goto out; ··· 1693 1549 nfs_setsecurity(inode, fattr); 1694 1550 nfs_set_verifier(dentry, dir_verifier); 1695 1551 1696 - /* set a readdirplus hint that we had a cache miss */ 1697 - nfs_force_use_readdirplus(dir); 1698 1552 ret = 1; 1699 1553 out: 1700 1554 nfs_free_fattr(fattr); ··· 1749 1607 nfs_mark_dir_for_revalidate(dir); 1750 1608 goto out_bad; 1751 1609 } 1752 - nfs_advise_use_readdirplus(dir); 1753 1610 goto out_valid; 1754 1611 } 1755 1612 ··· 1758 1617 if (NFS_STALE(inode)) 1759 1618 goto out_bad; 1760 1619 1761 - trace_nfs_lookup_revalidate_enter(dir, dentry, flags); 1762 - error = nfs_lookup_revalidate_dentry(dir, dentry, inode); 1763 - trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); 1764 - return error; 1620 + return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); 1765 1621 out_valid: 1766 1622 return nfs_lookup_revalidate_done(dir, dentry, inode, 1); 1767 1623 out_bad: ··· 1952 1814 goto out; 1953 1815 1954 1816 /* Notify readdir to use READDIRPLUS */ 1955 - nfs_force_use_readdirplus(dir); 1817 + nfs_lookup_advise_force_readdirplus(dir, flags); 1956 1818 1957 1819 no_entry: 1958 1820 res = d_splice_alias(inode, dentry); ··· 2215 2077 reval_dentry: 2216 2078 if (flags & LOOKUP_RCU) 2217 2079 return -ECHILD; 2218 - return nfs_lookup_revalidate_dentry(dir, dentry, inode); 2080 + return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); 2219 2081 2220 2082 full_reval: 2221 2083 return nfs_do_lookup_revalidate(dir, dentry, flags); ··· 2468 2330 2469 2331 trace_nfs_unlink_enter(dir, dentry); 2470 2332 spin_lock(&dentry->d_lock); 2471 - if (d_count(dentry) > 1) { 2333 + if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED, 2334 + &NFS_I(d_inode(dentry))->flags)) { 2472 2335 spin_unlock(&dentry->d_lock); 2473 2336 /* Start asynchronous writeout of the inode */ 2474 2337 write_inode_now(d_inode(dentry), 0); ··· 3128 2989 /* 3129 2990 * Determine which access bits we want to ask for... 3130 2991 */ 3131 - cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; 3132 - if (nfs_server_capable(inode, NFS_CAP_XATTR)) { 3133 - cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE | 3134 - NFS_ACCESS_XALIST; 3135 - } 2992 + cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND | 2993 + nfs_access_xattr_mask(NFS_SERVER(inode)); 3136 2994 if (S_ISDIR(inode->i_mode)) 3137 2995 cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; 3138 2996 else
+32 -16
fs/nfs/direct.c
··· 173 173 VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); 174 174 175 175 if (iov_iter_rw(iter) == READ) 176 - return nfs_file_direct_read(iocb, iter); 177 - return nfs_file_direct_write(iocb, iter); 176 + return nfs_file_direct_read(iocb, iter, true); 177 + return nfs_file_direct_write(iocb, iter, true); 178 178 } 179 179 180 180 static void nfs_direct_release_pages(struct page **pages, unsigned int npages) ··· 425 425 * nfs_file_direct_read - file direct read operation for NFS files 426 426 * @iocb: target I/O control block 427 427 * @iter: vector of user buffers into which to read data 428 + * @swap: flag indicating this is swap IO, not O_DIRECT IO 428 429 * 429 430 * We use this function for direct reads instead of calling 430 431 * generic_file_aio_read() in order to avoid gfar's check to see if ··· 441 440 * client must read the updated atime from the server back into its 442 441 * cache. 443 442 */ 444 - ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) 443 + ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, 444 + bool swap) 445 445 { 446 446 struct file *file = iocb->ki_filp; 447 447 struct address_space *mapping = file->f_mapping; ··· 484 482 if (iter_is_iovec(iter)) 485 483 dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; 486 484 487 - nfs_start_io_direct(inode); 485 + if (!swap) 486 + nfs_start_io_direct(inode); 488 487 489 488 NFS_I(inode)->read_io += count; 490 489 requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); 491 490 492 - nfs_end_io_direct(inode); 491 + if (!swap) 492 + nfs_end_io_direct(inode); 493 493 494 494 if (requested > 0) { 495 495 result = nfs_direct_wait(dreq); ··· 794 790 */ 795 791 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 796 792 struct iov_iter *iter, 797 - loff_t pos) 793 + loff_t pos, int ioflags) 798 794 { 799 795 struct nfs_pageio_descriptor desc; 800 796 struct inode *inode = dreq->inode; ··· 802 798 size_t requested_bytes = 0; 803 799 size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); 804 800 805 - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, 801 + nfs_pageio_init_write(&desc, inode, ioflags, false, 806 802 &nfs_direct_write_completion_ops); 807 803 desc.pg_dreq = dreq; 808 804 get_dreq(dreq); ··· 880 876 * nfs_file_direct_write - file direct write operation for NFS files 881 877 * @iocb: target I/O control block 882 878 * @iter: vector of user buffers from which to write data 879 + * @swap: flag indicating this is swap IO, not O_DIRECT IO 883 880 * 884 881 * We use this function for direct writes instead of calling 885 882 * generic_file_aio_write() in order to avoid taking the inode ··· 897 892 * Note that O_APPEND is not supported for NFS direct writes, as there 898 893 * is no atomic O_APPEND write facility in the NFS protocol. 899 894 */ 900 - ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) 895 + ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 896 + bool swap) 901 897 { 902 898 ssize_t result, requested; 903 899 size_t count; ··· 912 906 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", 913 907 file, iov_iter_count(iter), (long long) iocb->ki_pos); 914 908 915 - result = generic_write_checks(iocb, iter); 909 + if (swap) 910 + /* bypass generic checks */ 911 + result = iov_iter_count(iter); 912 + else 913 + result = generic_write_checks(iocb, iter); 916 914 if (result <= 0) 917 915 return result; 918 916 count = result; ··· 947 937 dreq->iocb = iocb; 948 938 pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); 949 939 950 - nfs_start_io_direct(inode); 940 + if (swap) { 941 + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, 942 + FLUSH_STABLE); 943 + } else { 944 + nfs_start_io_direct(inode); 951 945 952 - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); 946 + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, 947 + FLUSH_COND_STABLE); 953 948 954 - if (mapping->nrpages) { 955 - invalidate_inode_pages2_range(mapping, 956 - pos >> PAGE_SHIFT, end); 949 + if (mapping->nrpages) { 950 + invalidate_inode_pages2_range(mapping, 951 + pos >> PAGE_SHIFT, end); 952 + } 953 + 954 + nfs_end_io_direct(inode); 957 955 } 958 - 959 - nfs_end_io_direct(inode); 960 956 961 957 if (requested > 0) { 962 958 result = nfs_direct_wait(dreq);
+15 -11
fs/nfs/file.c
··· 44 44 45 45 static const struct vm_operations_struct nfs_file_vm_ops; 46 46 47 - /* Hack for future NFS swap support */ 48 - #ifndef IS_SWAPFILE 49 - # define IS_SWAPFILE(inode) (0) 50 - #endif 51 - 52 47 int nfs_check_flags(int flags) 53 48 { 54 49 if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) ··· 157 162 ssize_t result; 158 163 159 164 if (iocb->ki_flags & IOCB_DIRECT) 160 - return nfs_file_direct_read(iocb, to); 165 + return nfs_file_direct_read(iocb, to, false); 161 166 162 167 dprintk("NFS: read(%pD2, %zu@%lu)\n", 163 168 iocb->ki_filp, ··· 483 488 { 484 489 unsigned long blocks; 485 490 long long isize; 486 - struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); 487 - struct inode *inode = file->f_mapping->host; 491 + struct inode *inode = file_inode(file); 492 + struct rpc_clnt *clnt = NFS_CLIENT(inode); 493 + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; 488 494 489 495 spin_lock(&inode->i_lock); 490 496 blocks = inode->i_blocks; ··· 498 502 499 503 *span = sis->pages; 500 504 505 + 506 + if (cl->rpc_ops->enable_swap) 507 + cl->rpc_ops->enable_swap(inode); 508 + 501 509 return rpc_clnt_swap_activate(clnt); 502 510 } 503 511 504 512 static void nfs_swap_deactivate(struct file *file) 505 513 { 506 - struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); 514 + struct inode *inode = file_inode(file); 515 + struct rpc_clnt *clnt = NFS_CLIENT(inode); 516 + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; 507 517 508 518 rpc_clnt_swap_deactivate(clnt); 519 + if (cl->rpc_ops->disable_swap) 520 + cl->rpc_ops->disable_swap(file_inode(file)); 509 521 } 510 522 511 523 const struct address_space_operations nfs_file_aops = { 512 524 .readpage = nfs_readpage, 513 - .readpages = nfs_readpages, 525 + .readahead = nfs_readahead, 514 526 .dirty_folio = filemap_dirty_folio, 515 527 .writepage = nfs_writepage, 516 528 .writepages = nfs_writepages, ··· 623 619 return result; 624 620 625 621 if (iocb->ki_flags & IOCB_DIRECT) 626 - return nfs_file_direct_write(iocb, from); 622 + return nfs_file_direct_write(iocb, from, false); 627 623 628 624 dprintk("NFS: write(%pD2, %zu@%Ld)\n", 629 625 file, iov_iter_count(from), (long long) iocb->ki_pos);
+1 -1
fs/nfs/filelayout/filelayout.c
··· 1075 1075 unsigned int size = (fl->stripe_type == STRIPE_SPARSE) ? 1076 1076 fl->dsaddr->ds_num : fl->dsaddr->stripe_count; 1077 1077 1078 - new = pnfs_alloc_commit_array(size, GFP_NOIO); 1078 + new = pnfs_alloc_commit_array(size, nfs_io_gfp_mask()); 1079 1079 if (new) { 1080 1080 spin_lock(&inode->i_lock); 1081 1081 array = pnfs_add_commit_array(fl_cinfo, new, lseg);
+23 -30
fs/nfs/flexfilelayout/flexfilelayout.c
··· 663 663 spin_unlock(&mirror->lock); 664 664 665 665 if (report) 666 - pnfs_report_layoutstat(inode, GFP_KERNEL); 666 + pnfs_report_layoutstat(inode, nfs_io_gfp_mask()); 667 667 } 668 668 669 669 static void ··· 694 694 spin_unlock(&mirror->lock); 695 695 696 696 if (report) 697 - pnfs_report_layoutstat(inode, GFP_NOIO); 697 + pnfs_report_layoutstat(inode, nfs_io_gfp_mask()); 698 698 } 699 699 700 700 static void ··· 806 806 bool strict_iomode) 807 807 { 808 808 pnfs_put_lseg(pgio->pg_lseg); 809 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 810 - nfs_req_openctx(req), 811 - req_offset(req), 812 - req->wb_bytes, 813 - IOMODE_READ, 814 - strict_iomode, 815 - GFP_KERNEL); 809 + pgio->pg_lseg = 810 + pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 811 + req_offset(req), req->wb_bytes, IOMODE_READ, 812 + strict_iomode, nfs_io_gfp_mask()); 816 813 if (IS_ERR(pgio->pg_lseg)) { 817 814 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 818 815 pgio->pg_lseg = NULL; ··· 891 894 retry: 892 895 ff_layout_pg_check_layout(pgio, req); 893 896 if (!pgio->pg_lseg) { 894 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 895 - nfs_req_openctx(req), 896 - req_offset(req), 897 - req->wb_bytes, 898 - IOMODE_RW, 899 - false, 900 - GFP_NOFS); 897 + pgio->pg_lseg = 898 + pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 899 + req_offset(req), req->wb_bytes, 900 + IOMODE_RW, false, nfs_io_gfp_mask()); 901 901 if (IS_ERR(pgio->pg_lseg)) { 902 902 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 903 903 pgio->pg_lseg = NULL; ··· 947 953 struct nfs_page *req) 948 954 { 949 955 if (!pgio->pg_lseg) { 950 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 951 - nfs_req_openctx(req), 952 - req_offset(req), 953 - req->wb_bytes, 954 - IOMODE_RW, 955 - false, 956 - GFP_NOFS); 956 + pgio->pg_lseg = 957 + pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 958 + req_offset(req), req->wb_bytes, 959 + IOMODE_RW, false, nfs_io_gfp_mask()); 957 960 if (IS_ERR(pgio->pg_lseg)) { 958 961 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 959 962 pgio->pg_lseg = NULL; ··· 1249 1258 mirror = FF_LAYOUT_COMP(lseg, idx); 1250 1259 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 1251 1260 mirror, offset, length, status, opnum, 1252 - GFP_NOIO); 1261 + nfs_io_gfp_mask()); 1253 1262 1254 1263 switch (status) { 1255 1264 case NFS4ERR_DELAY: ··· 1964 1973 struct inode *inode = lseg->pls_layout->plh_inode; 1965 1974 struct pnfs_commit_array *array, *new; 1966 1975 1967 - new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, GFP_NOIO); 1976 + new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, 1977 + nfs_io_gfp_mask()); 1968 1978 if (new) { 1969 1979 spin_lock(&inode->i_lock); 1970 1980 array = pnfs_add_commit_array(fl_cinfo, new, lseg); ··· 2144 2152 struct nfs4_flexfile_layoutreturn_args *ff_args; 2145 2153 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout); 2146 2154 2147 - ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); 2155 + ff_args = kmalloc(sizeof(*ff_args), nfs_io_gfp_mask()); 2148 2156 if (!ff_args) 2149 2157 goto out_nomem; 2150 - ff_args->pages[0] = alloc_page(GFP_KERNEL); 2158 + ff_args->pages[0] = alloc_page(nfs_io_gfp_mask()); 2151 2159 if (!ff_args->pages[0]) 2152 2160 goto out_nomem_free; 2153 2161 ··· 2184 2192 if (list_empty(&head)) 2185 2193 return; 2186 2194 2187 - errors = kmalloc_array(NFS42_LAYOUTERROR_MAX, 2188 - sizeof(*errors), GFP_NOFS); 2195 + errors = kmalloc_array(NFS42_LAYOUTERROR_MAX, sizeof(*errors), 2196 + nfs_io_gfp_mask()); 2189 2197 if (errors != NULL) { 2190 2198 const struct nfs4_ff_layout_ds_err *pos; 2191 2199 size_t n = 0; ··· 2436 2444 const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; 2437 2445 2438 2446 /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ 2439 - args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO); 2447 + args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), 2448 + nfs_io_gfp_mask()); 2440 2449 if (!args->devinfo) 2441 2450 return -ENOMEM; 2442 2451
+8
fs/nfs/fs_context.c
··· 80 80 Opt_source, 81 81 Opt_tcp, 82 82 Opt_timeo, 83 + Opt_trunkdiscovery, 83 84 Opt_udp, 84 85 Opt_v, 85 86 Opt_vers, ··· 181 180 fsparam_string("source", Opt_source), 182 181 fsparam_flag ("tcp", Opt_tcp), 183 182 fsparam_u32 ("timeo", Opt_timeo), 183 + fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery), 184 184 fsparam_flag ("udp", Opt_udp), 185 185 fsparam_flag ("v2", Opt_v), 186 186 fsparam_flag ("v3", Opt_v), ··· 530 528 ctx->flags |= NFS_MOUNT_NOCTO; 531 529 else 532 530 ctx->flags &= ~NFS_MOUNT_NOCTO; 531 + break; 532 + case Opt_trunkdiscovery: 533 + if (result.negated) 534 + ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY; 535 + else 536 + ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; 533 537 break; 534 538 case Opt_ac: 535 539 if (result.negated)
+17 -36
fs/nfs/fscache.c
··· 19 19 #include "internal.h" 20 20 #include "iostat.h" 21 21 #include "fscache.h" 22 - 23 - #define NFSDBG_FACILITY NFSDBG_FSCACHE 22 + #include "nfstrace.h" 24 23 25 24 #define NFS_MAX_KEY_LEN 1000 26 25 ··· 127 128 vcookie = fscache_acquire_volume(key, 128 129 NULL, /* preferred_cache */ 129 130 NULL, 0 /* coherency_data */); 130 - dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", 131 - nfss, vcookie); 132 131 if (IS_ERR(vcookie)) { 133 132 if (vcookie != ERR_PTR(-EBUSY)) { 134 133 kfree(key); ··· 149 152 { 150 153 struct nfs_server *nfss = NFS_SB(sb); 151 154 152 - dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n", 153 - nfss, nfss->fscache); 154 - 155 155 fscache_relinquish_volume(nfss->fscache, NULL, false); 156 156 nfss->fscache = NULL; 157 157 kfree(nfss->fscache_uniq); ··· 167 173 if (!(nfss->fscache && S_ISREG(inode->i_mode))) 168 174 return; 169 175 170 - nfs_fscache_update_auxdata(&auxdata, nfsi); 176 + nfs_fscache_update_auxdata(&auxdata, inode); 171 177 172 178 nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, 173 179 0, ··· 175 181 nfsi->fh.size, 176 182 &auxdata, /* aux_data */ 177 183 sizeof(auxdata), 178 - i_size_read(&nfsi->vfs_inode)); 184 + i_size_read(inode)); 179 185 } 180 186 181 187 /* ··· 185 191 { 186 192 struct nfs_inode *nfsi = NFS_I(inode); 187 193 struct fscache_cookie *cookie = nfs_i_fscache(inode); 188 - 189 - dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); 190 194 191 195 fscache_relinquish_cookie(cookie, false); 192 196 nfsi->fscache = NULL; ··· 212 220 void nfs_fscache_open_file(struct inode *inode, struct file *filp) 213 221 { 214 222 struct nfs_fscache_inode_auxdata auxdata; 215 - struct nfs_inode *nfsi = NFS_I(inode); 216 223 struct fscache_cookie *cookie = nfs_i_fscache(inode); 217 224 bool open_for_write = inode_is_open_for_write(inode); 218 225 ··· 220 229 221 230 fscache_use_cookie(cookie, open_for_write); 222 231 if (open_for_write) { 223 - dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); 224 - nfs_fscache_update_auxdata(&auxdata, nfsi); 232 + nfs_fscache_update_auxdata(&auxdata, inode); 225 233 fscache_invalidate(cookie, &auxdata, i_size_read(inode), 226 234 FSCACHE_INVAL_DIO_WRITE); 227 235 } ··· 230 240 void nfs_fscache_release_file(struct inode *inode, struct file *filp) 231 241 { 232 242 struct nfs_fscache_inode_auxdata auxdata; 233 - struct nfs_inode *nfsi = NFS_I(inode); 234 243 struct fscache_cookie *cookie = nfs_i_fscache(inode); 235 244 236 245 if (fscache_cookie_valid(cookie)) { 237 - nfs_fscache_update_auxdata(&auxdata, nfsi); 246 + nfs_fscache_update_auxdata(&auxdata, inode); 238 247 fscache_unuse_cookie(cookie, &auxdata, NULL); 239 248 } 240 249 } ··· 308 319 /* 309 320 * Retrieve a page from fscache 310 321 */ 311 - int __nfs_readpage_from_fscache(struct inode *inode, struct page *page) 322 + int __nfs_fscache_read_page(struct inode *inode, struct page *page) 312 323 { 313 324 int ret; 314 325 315 - dfprintk(FSCACHE, 316 - "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", 317 - nfs_i_fscache(inode), page, page->index, page->flags, inode); 318 - 326 + trace_nfs_fscache_read_page(inode, page); 319 327 if (PageChecked(page)) { 320 - dfprintk(FSCACHE, "NFS: readpage_from_fscache: PageChecked\n"); 321 328 ClearPageChecked(page); 322 - return 1; 329 + ret = 1; 330 + goto out; 323 331 } 324 332 325 333 ret = fscache_fallback_read_page(inode, page); 326 334 if (ret < 0) { 327 335 nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); 328 - dfprintk(FSCACHE, 329 - "NFS: readpage_from_fscache failed %d\n", ret); 330 336 SetPageChecked(page); 331 - return ret; 337 + goto out; 332 338 } 333 339 334 340 /* Read completed synchronously */ 335 - dfprintk(FSCACHE, "NFS: readpage_from_fscache: read successful\n"); 336 341 nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); 337 342 SetPageUptodate(page); 338 - return 0; 343 + ret = 0; 344 + out: 345 + trace_nfs_fscache_read_page_exit(inode, page, ret); 346 + return ret; 339 347 } 340 348 341 349 /* 342 350 * Store a newly fetched page in fscache. We can be certain there's no page 343 351 * stored in the cache as yet otherwise we would've read it from there. 344 352 */ 345 - void __nfs_readpage_to_fscache(struct inode *inode, struct page *page) 353 + void __nfs_fscache_write_page(struct inode *inode, struct page *page) 346 354 { 347 355 int ret; 348 356 349 - dfprintk(FSCACHE, 350 - "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx))\n", 351 - nfs_i_fscache(inode), page, page->index, page->flags); 357 + trace_nfs_fscache_write_page(inode, page); 352 358 353 359 ret = fscache_fallback_write_page(inode, page, true); 354 - 355 - dfprintk(FSCACHE, 356 - "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", 357 - page, page->index, page->flags, ret); 358 360 359 361 if (ret != 0) { 360 362 nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); ··· 353 373 } else { 354 374 nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_OK); 355 375 } 376 + trace_nfs_fscache_write_page_exit(inode, page, ret); 356 377 }
+19 -26
fs/nfs/fscache.h
··· 45 45 extern void nfs_fscache_open_file(struct inode *, struct file *); 46 46 extern void nfs_fscache_release_file(struct inode *, struct file *); 47 47 48 - extern int __nfs_readpage_from_fscache(struct inode *, struct page *); 49 - extern void __nfs_read_completion_to_fscache(struct nfs_pgio_header *hdr, 50 - unsigned long bytes); 51 - extern void __nfs_readpage_to_fscache(struct inode *, struct page *); 48 + extern int __nfs_fscache_read_page(struct inode *, struct page *); 49 + extern void __nfs_fscache_write_page(struct inode *, struct page *); 52 50 53 51 static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) 54 52 { ··· 64 66 /* 65 67 * Retrieve a page from an inode data storage object. 66 68 */ 67 - static inline int nfs_readpage_from_fscache(struct inode *inode, 68 - struct page *page) 69 + static inline int nfs_fscache_read_page(struct inode *inode, struct page *page) 69 70 { 70 - if (NFS_I(inode)->fscache) 71 - return __nfs_readpage_from_fscache(inode, page); 71 + if (nfs_i_fscache(inode)) 72 + return __nfs_fscache_read_page(inode, page); 72 73 return -ENOBUFS; 73 74 } 74 75 ··· 75 78 * Store a page newly fetched from the server in an inode data storage object 76 79 * in the cache. 77 80 */ 78 - static inline void nfs_readpage_to_fscache(struct inode *inode, 81 + static inline void nfs_fscache_write_page(struct inode *inode, 79 82 struct page *page) 80 83 { 81 - if (NFS_I(inode)->fscache) 82 - __nfs_readpage_to_fscache(inode, page); 84 + if (nfs_i_fscache(inode)) 85 + __nfs_fscache_write_page(inode, page); 83 86 } 84 87 85 88 static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata, 86 - struct nfs_inode *nfsi) 89 + struct inode *inode) 87 90 { 88 91 memset(auxdata, 0, sizeof(*auxdata)); 89 - auxdata->mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; 90 - auxdata->mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; 91 - auxdata->ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; 92 - auxdata->ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; 92 + auxdata->mtime_sec = inode->i_mtime.tv_sec; 93 + auxdata->mtime_nsec = inode->i_mtime.tv_nsec; 94 + auxdata->ctime_sec = inode->i_ctime.tv_sec; 95 + auxdata->ctime_nsec = inode->i_ctime.tv_nsec; 93 96 94 - if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) 95 - auxdata->change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); 97 + if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4) 98 + auxdata->change_attr = inode_peek_iversion_raw(inode); 96 99 } 97 100 98 101 /* ··· 104 107 struct nfs_inode *nfsi = NFS_I(inode); 105 108 106 109 if (nfsi->fscache) { 107 - nfs_fscache_update_auxdata(&auxdata, nfsi); 110 + nfs_fscache_update_auxdata(&auxdata, inode); 108 111 fscache_invalidate(nfsi->fscache, &auxdata, 109 - i_size_read(&nfsi->vfs_inode), flags); 112 + i_size_read(inode), flags); 110 113 } 111 114 } 112 115 ··· 133 136 { 134 137 return 1; /* True: may release page */ 135 138 } 136 - static inline int nfs_readpage_from_fscache(struct inode *inode, 137 - struct page *page) 139 + static inline int nfs_fscache_read_page(struct inode *inode, struct page *page) 138 140 { 139 141 return -ENOBUFS; 140 142 } 141 - static inline void nfs_readpage_to_fscache(struct inode *inode, 142 - struct page *page) {} 143 - 144 - 143 + static inline void nfs_fscache_write_page(struct inode *inode, struct page *page) {} 145 144 static inline void nfs_fscache_invalidate(struct inode *inode, int flags) {} 146 145 147 146 static inline const char *nfs_server_fscache_state(struct nfs_server *server)
+43 -43
fs/nfs/inode.c
··· 203 203 NFS_INO_INVALID_OTHER | 204 204 NFS_INO_INVALID_XATTR); 205 205 flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); 206 - } else if (flags & NFS_INO_REVAL_PAGECACHE) 207 - flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE; 206 + } 208 207 209 208 if (!nfs_has_xattr_cache(nfsi)) 210 209 flags &= ~NFS_INO_INVALID_XATTR; 211 210 if (flags & NFS_INO_INVALID_DATA) 212 211 nfs_fscache_invalidate(inode, 0); 213 - flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); 212 + flags &= ~NFS_INO_REVAL_FORCED; 214 213 215 214 nfsi->cache_validity |= flags; 216 215 ··· 235 236 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 236 237 nfsi->attrtimeo_timestamp = jiffies; 237 238 238 - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 239 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR 240 - | NFS_INO_INVALID_DATA 241 - | NFS_INO_INVALID_ACCESS 242 - | NFS_INO_INVALID_ACL 243 - | NFS_INO_INVALID_XATTR 244 - | NFS_INO_REVAL_PAGECACHE); 245 - } else 246 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR 247 - | NFS_INO_INVALID_ACCESS 248 - | NFS_INO_INVALID_ACL 249 - | NFS_INO_INVALID_XATTR 250 - | NFS_INO_REVAL_PAGECACHE); 239 + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) 240 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | 241 + NFS_INO_INVALID_DATA | 242 + NFS_INO_INVALID_ACCESS | 243 + NFS_INO_INVALID_ACL | 244 + NFS_INO_INVALID_XATTR); 245 + else 246 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | 247 + NFS_INO_INVALID_ACCESS | 248 + NFS_INO_INVALID_ACL | 249 + NFS_INO_INVALID_XATTR); 251 250 nfs_zap_label_cache_locked(nfsi); 252 251 } 253 252 ··· 561 564 inode->i_gid = fattr->gid; 562 565 else if (fattr_supported & NFS_ATTR_FATTR_GROUP) 563 566 nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); 564 - if (nfs_server_capable(inode, NFS_CAP_XATTR)) 565 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); 566 567 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 567 568 inode->i_blocks = fattr->du.nfs2.blocks; 568 569 else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED && ··· 780 785 } 781 786 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); 782 787 788 + /* 789 + * Don't request help from readdirplus if the file is being written to, 790 + * or if attribute caching is turned off 791 + */ 792 + static bool nfs_getattr_readdirplus_enable(const struct inode *inode) 793 + { 794 + return nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && 795 + !nfs_have_writebacks(inode) && NFS_MAXATTRTIMEO(inode) > 5 * HZ; 796 + } 797 + 783 798 static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) 784 799 { 785 - struct dentry *parent; 786 - 787 - if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) 788 - return; 789 - parent = dget_parent(dentry); 790 - nfs_force_use_readdirplus(d_inode(parent)); 791 - dput(parent); 800 + if (!IS_ROOT(dentry)) { 801 + struct dentry *parent = dget_parent(dentry); 802 + nfs_readdir_record_entry_cache_miss(d_inode(parent)); 803 + dput(parent); 804 + } 792 805 } 793 806 794 807 static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) 795 808 { 796 - struct dentry *parent; 797 - 798 - if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) 799 - return; 800 - parent = dget_parent(dentry); 801 - nfs_advise_use_readdirplus(d_inode(parent)); 802 - dput(parent); 809 + if (!IS_ROOT(dentry)) { 810 + struct dentry *parent = dget_parent(dentry); 811 + nfs_readdir_record_entry_cache_hit(d_inode(parent)); 812 + dput(parent); 813 + } 803 814 } 804 815 805 816 static u32 nfs_get_valid_attrmask(struct inode *inode) ··· 841 840 int err = 0; 842 841 bool force_sync = query_flags & AT_STATX_FORCE_SYNC; 843 842 bool do_update = false; 843 + bool readdirplus_enabled = nfs_getattr_readdirplus_enable(inode); 844 844 845 845 trace_nfs_getattr_enter(inode); 846 846 ··· 850 848 STATX_INO | STATX_SIZE | STATX_BLOCKS; 851 849 852 850 if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { 853 - nfs_readdirplus_parent_cache_hit(path->dentry); 851 + if (readdirplus_enabled) 852 + nfs_readdirplus_parent_cache_hit(path->dentry); 854 853 goto out_no_revalidate; 855 854 } 856 855 ··· 901 898 do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; 902 899 903 900 if (do_update) { 904 - /* Update the attribute cache */ 905 - if (!(server->flags & NFS_MOUNT_NOAC)) 901 + if (readdirplus_enabled) 906 902 nfs_readdirplus_parent_cache_miss(path->dentry); 907 - else 908 - nfs_readdirplus_parent_cache_hit(path->dentry); 909 903 err = __nfs_revalidate_inode(server, inode); 910 904 if (err) 911 905 goto out; 912 - } else 906 + } else if (readdirplus_enabled) 913 907 nfs_readdirplus_parent_cache_hit(path->dentry); 914 908 out_no_revalidate: 915 909 /* Only return attributes that were revalidated. */ ··· 952 952 res = __nfs_find_lock_context(ctx); 953 953 rcu_read_unlock(); 954 954 if (res == NULL) { 955 - new = kmalloc(sizeof(*new), GFP_KERNEL); 955 + new = kmalloc(sizeof(*new), GFP_KERNEL_ACCOUNT); 956 956 if (new == NULL) 957 957 return ERR_PTR(-ENOMEM); 958 958 nfs_init_lock_context(new); ··· 1030 1030 { 1031 1031 struct nfs_open_context *ctx; 1032 1032 1033 - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 1033 + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); 1034 1034 if (!ctx) 1035 1035 return ERR_PTR(-ENOMEM); 1036 1036 nfs_sb_active(dentry->d_sb); ··· 1583 1583 { 1584 1584 struct nfs_fattr *fattr; 1585 1585 1586 - fattr = kmalloc(sizeof(*fattr), GFP_NOFS); 1586 + fattr = kmalloc(sizeof(*fattr), GFP_KERNEL); 1587 1587 if (fattr != NULL) { 1588 1588 nfs_fattr_init(fattr); 1589 1589 fattr->label = NULL; ··· 1599 1599 if (!fattr) 1600 1600 return NULL; 1601 1601 1602 - fattr->label = nfs4_label_alloc(server, GFP_NOFS); 1602 + fattr->label = nfs4_label_alloc(server, GFP_KERNEL); 1603 1603 if (IS_ERR(fattr->label)) { 1604 1604 kfree(fattr); 1605 1605 return NULL; ··· 1613 1613 { 1614 1614 struct nfs_fh *fh; 1615 1615 1616 - fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS); 1616 + fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 1617 1617 if (fh != NULL) 1618 1618 fh->size = 0; 1619 1619 return fh;
+23 -2
fs/nfs/internal.h
··· 366 366 const struct nfs_client_initdata *); 367 367 368 368 /* dir.c */ 369 - extern void nfs_advise_use_readdirplus(struct inode *dir); 370 - extern void nfs_force_use_readdirplus(struct inode *dir); 369 + extern void nfs_readdir_record_entry_cache_hit(struct inode *dir); 370 + extern void nfs_readdir_record_entry_cache_miss(struct inode *dir); 371 371 extern unsigned long nfs_access_cache_count(struct shrinker *shrink, 372 372 struct shrink_control *sc); 373 373 extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, ··· 387 387 dev_t); 388 388 int nfs_rename(struct user_namespace *, struct inode *, struct dentry *, 389 389 struct inode *, struct dentry *, unsigned int); 390 + 391 + #ifdef CONFIG_NFS_V4_2 392 + static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server) 393 + { 394 + if (!(server->caps & NFS_CAP_XATTR)) 395 + return 0; 396 + return NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE | NFS4_ACCESS_XALIST; 397 + } 398 + #else 399 + static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server) 400 + { 401 + return 0; 402 + } 403 + #endif 390 404 391 405 /* file.c */ 392 406 int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); ··· 585 571 { 586 572 return verf->committed > NFS_UNSTABLE && 587 573 !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); 574 + } 575 + 576 + static inline gfp_t nfs_io_gfp_mask(void) 577 + { 578 + if (current->flags & PF_WQ_WORKER) 579 + return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; 580 + return GFP_KERNEL; 588 581 } 589 582 590 583 /* unlink.c */
+1 -2
fs/nfs/nfs2xdr.c
··· 949 949 950 950 error = decode_filename_inline(xdr, &entry->name, &entry->len); 951 951 if (unlikely(error)) 952 - return error; 952 + return -EAGAIN; 953 953 954 954 /* 955 955 * The type (size and byte order) of nfscookie isn't defined in 956 956 * RFC 1094. This implementation assumes that it's an XDR uint32. 957 957 */ 958 - entry->prev_cookie = entry->cookie; 959 958 p = xdr_inline_decode(xdr, 4); 960 959 if (unlikely(!p)) 961 960 return -EAGAIN;
+11 -19
fs/nfs/nfs3xdr.c
··· 1261 1261 static void encode_readdirplus3args(struct xdr_stream *xdr, 1262 1262 const struct nfs3_readdirargs *args) 1263 1263 { 1264 + uint32_t dircount = args->count; 1265 + uint32_t maxcount = args->count; 1264 1266 __be32 *p; 1265 1267 1266 1268 encode_nfs_fh3(xdr, args->fh); ··· 1275 1273 * readdirplus: need dircount + buffer size. 1276 1274 * We just make sure we make dircount big enough 1277 1275 */ 1278 - *p++ = cpu_to_be32(args->count >> 3); 1279 - 1280 - *p = cpu_to_be32(args->count); 1276 + *p++ = cpu_to_be32(dircount); 1277 + *p = cpu_to_be32(maxcount); 1281 1278 } 1282 1279 1283 1280 static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, ··· 1968 1967 bool plus) 1969 1968 { 1970 1969 struct user_namespace *userns = rpc_userns(entry->server->client); 1971 - struct nfs_entry old = *entry; 1972 1970 __be32 *p; 1973 1971 int error; 1974 1972 u64 new_cookie; ··· 1987 1987 1988 1988 error = decode_fileid3(xdr, &entry->ino); 1989 1989 if (unlikely(error)) 1990 - return error; 1990 + return -EAGAIN; 1991 1991 1992 1992 error = decode_inline_filename3(xdr, &entry->name, &entry->len); 1993 1993 if (unlikely(error)) 1994 - return error; 1994 + return -EAGAIN; 1995 1995 1996 1996 error = decode_cookie3(xdr, &new_cookie); 1997 1997 if (unlikely(error)) 1998 - return error; 1998 + return -EAGAIN; 1999 1999 2000 2000 entry->d_type = DT_UNKNOWN; 2001 2001 ··· 2003 2003 entry->fattr->valid = 0; 2004 2004 error = decode_post_op_attr(xdr, entry->fattr, userns); 2005 2005 if (unlikely(error)) 2006 - return error; 2006 + return -EAGAIN; 2007 2007 if (entry->fattr->valid & NFS_ATTR_FATTR_V3) 2008 2008 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 2009 2009 ··· 2018 2018 return -EAGAIN; 2019 2019 if (*p != xdr_zero) { 2020 2020 error = decode_nfs_fh3(xdr, entry->fh); 2021 - if (unlikely(error)) { 2022 - if (error == -E2BIG) 2023 - goto out_truncated; 2024 - return error; 2025 - } 2021 + if (unlikely(error)) 2022 + return -EAGAIN; 2026 2023 } else 2027 2024 zero_nfs_fh3(entry->fh); 2028 2025 } 2029 2026 2030 - entry->prev_cookie = entry->cookie; 2031 2027 entry->cookie = new_cookie; 2032 2028 2033 2029 return 0; 2034 - 2035 - out_truncated: 2036 - dprintk("NFS: directory entry contains invalid file handle\n"); 2037 - *entry = old; 2038 - return -EAGAIN; 2039 2030 } 2040 2031 2041 2032 /* ··· 2219 2228 /* ignore properties */ 2220 2229 result->lease_time = 0; 2221 2230 result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; 2231 + result->xattr_support = 0; 2222 2232 return 0; 2223 2233 } 2224 2234
+18 -16
fs/nfs/nfs42proc.c
··· 175 175 nfs4_stateid *src_stateid, 176 176 bool *restart) 177 177 { 178 - struct nfs4_copy_state *copy, *tmp_copy; 178 + struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter; 179 179 int status = NFS4_OK; 180 - bool found_pending = false; 181 180 struct nfs_open_context *dst_ctx = nfs_file_open_context(dst); 182 181 struct nfs_open_context *src_ctx = nfs_file_open_context(src); 183 182 184 - copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); 183 + copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL); 185 184 if (!copy) 186 185 return -ENOMEM; 187 186 188 187 spin_lock(&dst_server->nfs_client->cl_lock); 189 - list_for_each_entry(tmp_copy, 188 + list_for_each_entry(iter, 190 189 &dst_server->nfs_client->pending_cb_stateids, 191 190 copies) { 192 - if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, 191 + if (memcmp(&res->write_res.stateid, &iter->stateid, 193 192 NFS4_STATEID_SIZE)) 194 193 continue; 195 - found_pending = true; 196 - list_del(&tmp_copy->copies); 194 + tmp_copy = iter; 195 + list_del(&iter->copies); 197 196 break; 198 197 } 199 - if (found_pending) { 198 + if (tmp_copy) { 200 199 spin_unlock(&dst_server->nfs_client->cl_lock); 201 200 kfree(copy); 202 201 copy = tmp_copy; ··· 253 254 struct nfs_commitres cres; 254 255 int status = -ENOMEM; 255 256 256 - cres.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS); 257 + cres.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_KERNEL); 257 258 if (!cres.verf) 258 259 goto out; 259 260 ··· 356 357 res->commit_res.verf = NULL; 357 358 if (args->sync) { 358 359 res->commit_res.verf = 359 - kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS); 360 + kzalloc(sizeof(struct nfs_writeverf), GFP_KERNEL); 360 361 if (!res->commit_res.verf) 361 362 return -ENOMEM; 362 363 } ··· 551 552 if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL)) 552 553 return -EOPNOTSUPP; 553 554 554 - data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_NOFS); 555 + data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL); 555 556 if (data == NULL) 556 557 return -ENOMEM; 557 558 ··· 590 591 591 592 ctx = get_nfs_open_context(nfs_file_open_context(src)); 592 593 l_ctx = nfs_get_lock_context(ctx); 593 - if (IS_ERR(l_ctx)) 594 - return PTR_ERR(l_ctx); 594 + if (IS_ERR(l_ctx)) { 595 + status = PTR_ERR(l_ctx); 596 + goto out; 597 + } 595 598 596 599 status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, 597 600 FMODE_READ); ··· 601 600 if (status) { 602 601 if (status == -EAGAIN) 603 602 status = -NFS4ERR_BAD_STATEID; 604 - return status; 603 + goto out; 605 604 } 606 605 607 606 status = nfs4_call_sync(src_server->client, src_server, &msg, ··· 610 609 if (status == -ENOTSUPP) 611 610 src_server->caps &= ~NFS_CAP_COPY_NOTIFY; 612 611 612 + out: 613 613 put_nfs_open_context(nfs_file_open_context(src)); 614 614 return status; 615 615 } ··· 628 626 if (!(src_server->caps & NFS_CAP_COPY_NOTIFY)) 629 627 return -EOPNOTSUPP; 630 628 631 - args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS); 629 + args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_KERNEL); 632 630 if (args == NULL) 633 631 return -ENOMEM; 634 632 ··· 1016 1014 return -EOPNOTSUPP; 1017 1015 if (n > NFS42_LAYOUTERROR_MAX) 1018 1016 return -EINVAL; 1019 - data = nfs42_alloc_layouterror_data(lseg, GFP_NOFS); 1017 + data = nfs42_alloc_layouterror_data(lseg, nfs_io_gfp_mask()); 1020 1018 if (!data) 1021 1019 return -ENOMEM; 1022 1020 for (i = 0; i < n; i++) {
+3 -4
fs/nfs/nfs42xattr.c
··· 199 199 flags = NFS4_XATTR_ENTRY_EXTVAL; 200 200 } 201 201 202 - buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS); 202 + buf = kmalloc(alloclen, GFP_KERNEL); 203 203 if (buf == NULL) 204 204 return NULL; 205 205 entry = (struct nfs4_xattr_entry *)buf; ··· 213 213 214 214 215 215 if (flags & NFS4_XATTR_ENTRY_EXTVAL) { 216 - valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS); 216 + valp = kvmalloc(len, GFP_KERNEL); 217 217 if (valp == NULL) { 218 218 kfree(buf); 219 219 return NULL; ··· 289 289 { 290 290 struct nfs4_xattr_cache *cache; 291 291 292 - cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, 293 - GFP_KERNEL_ACCOUNT | GFP_NOFS); 292 + cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, GFP_KERNEL); 294 293 if (cache == NULL) 295 294 return NULL; 296 295
+1
fs/nfs/nfs4_fs.h
··· 42 42 NFS4CLNT_LEASE_MOVED, 43 43 NFS4CLNT_DELEGATION_EXPIRED, 44 44 NFS4CLNT_RUN_MANAGER, 45 + NFS4CLNT_MANAGER_AVAILABLE, 45 46 NFS4CLNT_RECALL_RUNNING, 46 47 NFS4CLNT_RECALL_ANY_LAYOUT_READ, 47 48 NFS4CLNT_RECALL_ANY_LAYOUT_RW,
+4 -4
fs/nfs/nfs4file.c
··· 165 165 if (sync) 166 166 return -EOPNOTSUPP; 167 167 cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res), 168 - GFP_NOFS); 168 + GFP_KERNEL); 169 169 if (unlikely(cn_resp == NULL)) 170 170 return -ENOMEM; 171 171 ··· 180 180 ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count, 181 181 nss, cnrs, sync); 182 182 out: 183 - if (!nfs42_files_from_same_server(file_in, file_out)) 184 - kfree(cn_resp); 183 + kfree(cn_resp); 184 + 185 185 if (ret == -EAGAIN) 186 186 goto retry; 187 187 return ret; ··· 339 339 340 340 res = ERR_PTR(-ENOMEM); 341 341 len = strlen(SSC_READ_NAME_BODY) + 16; 342 - read_name = kzalloc(len, GFP_NOFS); 342 + read_name = kzalloc(len, GFP_KERNEL); 343 343 if (read_name == NULL) 344 344 goto out; 345 345 snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
+45 -17
fs/nfs/nfs4proc.c
··· 1392 1392 case NFS4_OPEN_CLAIM_FH: 1393 1393 p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | 1394 1394 NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE | 1395 - NFS4_ACCESS_EXECUTE; 1396 - #ifdef CONFIG_NFS_V4_2 1397 - if (!(server->caps & NFS_CAP_XATTR)) 1398 - break; 1399 - p->o_arg.access |= NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE | 1400 - NFS4_ACCESS_XALIST; 1401 - #endif 1395 + NFS4_ACCESS_EXECUTE | 1396 + nfs_access_xattr_mask(server); 1402 1397 } 1403 1398 p->o_arg.clientid = server->nfs_client->cl_clientid; 1404 1399 p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); ··· 3045 3050 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 3046 3051 if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) 3047 3052 set_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags); 3053 + if (opendata->o_res.rflags & NFS4_OPEN_RESULT_PRESERVE_UNLINKED) 3054 + set_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(state->inode)->flags); 3048 3055 3049 3056 dentry = opendata->dentry; 3050 3057 if (d_really_is_negative(dentry)) { ··· 5901 5904 buflen = server->rsize; 5902 5905 5903 5906 npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; 5904 - pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS); 5907 + pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); 5905 5908 if (!pages) 5906 5909 return -ENOMEM; 5907 5910 ··· 6606 6609 }; 6607 6610 int status = 0; 6608 6611 6609 - data = kzalloc(sizeof(*data), GFP_NOFS); 6612 + data = kzalloc(sizeof(*data), GFP_KERNEL); 6610 6613 if (data == NULL) 6611 6614 return -ENOMEM; 6612 6615 ··· 6794 6797 struct nfs4_state *state = lsp->ls_state; 6795 6798 struct inode *inode = state->inode; 6796 6799 6797 - p = kzalloc(sizeof(*p), GFP_NOFS); 6800 + p = kzalloc(sizeof(*p), GFP_KERNEL); 6798 6801 if (p == NULL) 6799 6802 return NULL; 6800 6803 p->arg.fh = NFS_FH(inode); ··· 7199 7202 task_setup_data.flags |= RPC_TASK_MOVEABLE; 7200 7203 7201 7204 data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), 7202 - fl->fl_u.nfs4_fl.owner, 7203 - recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); 7205 + fl->fl_u.nfs4_fl.owner, GFP_KERNEL); 7204 7206 if (data == NULL) 7205 7207 return -ENOMEM; 7206 7208 if (IS_SETLKW(cmd)) ··· 7622 7626 if (server->nfs_client->cl_mvops->minor_version != 0) 7623 7627 return; 7624 7628 7625 - data = kmalloc(sizeof(*data), GFP_NOFS); 7629 + data = kmalloc(sizeof(*data), GFP_KERNEL); 7626 7630 if (!data) 7627 7631 return; 7628 7632 data->lsp = lsp; ··· 8008 8012 .rpc_resp = &res, 8009 8013 .rpc_cred = cred, 8010 8014 }; 8015 + struct nfs4_call_sync_data data = { 8016 + .seq_server = server, 8017 + .seq_args = &args.seq_args, 8018 + .seq_res = &res.seq_res, 8019 + }; 8020 + struct rpc_task_setup task_setup_data = { 8021 + .rpc_client = clnt, 8022 + .rpc_message = &msg, 8023 + .callback_ops = server->nfs_client->cl_mvops->call_sync_ops, 8024 + .callback_data = &data, 8025 + .flags = RPC_TASK_NO_ROUND_ROBIN, 8026 + }; 8011 8027 int status; 8012 8028 8013 8029 nfs_fattr_init(&locations->fattr); ··· 8027 8019 locations->nlocations = 0; 8028 8020 8029 8021 nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); 8030 - status = nfs4_call_sync_sequence(clnt, server, &msg, 8031 - &args.seq_args, &res.seq_res); 8022 + status = nfs4_call_sync_custom(&task_setup_data); 8032 8023 if (status == NFS4_OK && 8033 8024 res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) 8034 8025 status = -NFS4ERR_LEASE_MOVED; ··· 8340 8333 case -NFS4ERR_DEADSESSION: 8341 8334 nfs4_schedule_session_recovery(clp->cl_session, 8342 8335 task->tk_status); 8336 + return; 8343 8337 } 8344 8338 if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && 8345 8339 res->dir != NFS4_CDFS4_BOTH) { ··· 9299 9291 goto out_err; 9300 9292 9301 9293 ret = ERR_PTR(-ENOMEM); 9302 - calldata = kzalloc(sizeof(*calldata), GFP_NOFS); 9294 + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); 9303 9295 if (calldata == NULL) 9304 9296 goto out_put_clp; 9305 9297 nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); ··· 10230 10222 &task_setup.rpc_client, &msg); 10231 10223 10232 10224 dprintk("NFS call free_stateid %p\n", stateid); 10233 - data = kmalloc(sizeof(*data), GFP_NOFS); 10225 + data = kmalloc(sizeof(*data), GFP_KERNEL); 10234 10226 if (!data) 10235 10227 return -ENOMEM; 10236 10228 data->server = server; ··· 10469 10461 return error + error2 + error3; 10470 10462 } 10471 10463 10464 + static void nfs4_enable_swap(struct inode *inode) 10465 + { 10466 + /* The state manager thread must always be running. 10467 + * It will notice the client is a swapper, and stay put. 10468 + */ 10469 + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 10470 + 10471 + nfs4_schedule_state_manager(clp); 10472 + } 10473 + 10474 + static void nfs4_disable_swap(struct inode *inode) 10475 + { 10476 + /* The state manager thread will now exit once it is 10477 + * woken. 10478 + */ 10479 + wake_up_var(&NFS_SERVER(inode)->nfs_client->cl_state); 10480 + } 10481 + 10472 10482 static const struct inode_operations nfs4_dir_inode_operations = { 10473 10483 .create = nfs_create, 10474 10484 .lookup = nfs_lookup, ··· 10564 10538 .create_server = nfs4_create_server, 10565 10539 .clone_server = nfs_clone_server, 10566 10540 .discover_trunking = nfs4_discover_trunking, 10541 + .enable_swap = nfs4_enable_swap, 10542 + .disable_swap = nfs4_disable_swap, 10567 10543 }; 10568 10544 10569 10545 static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
+49 -10
fs/nfs/nfs4state.c
··· 49 49 #include <linux/workqueue.h> 50 50 #include <linux/bitops.h> 51 51 #include <linux/jiffies.h> 52 + #include <linux/sched/mm.h> 52 53 53 54 #include <linux/sunrpc/clnt.h> 54 55 ··· 667 666 { 668 667 struct nfs4_state *state; 669 668 670 - state = kzalloc(sizeof(*state), GFP_NOFS); 669 + state = kzalloc(sizeof(*state), GFP_KERNEL_ACCOUNT); 671 670 if (!state) 672 671 return NULL; 673 672 refcount_set(&state->count, 1); ··· 821 820 822 821 void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) 823 822 { 824 - __nfs4_close(state, fmode, GFP_NOFS, 0); 823 + __nfs4_close(state, fmode, GFP_KERNEL, 0); 825 824 } 826 825 827 826 void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) ··· 870 869 struct nfs4_lock_state *lsp; 871 870 struct nfs_server *server = state->owner->so_server; 872 871 873 - lsp = kzalloc(sizeof(*lsp), GFP_NOFS); 872 + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL_ACCOUNT); 874 873 if (lsp == NULL) 875 874 return NULL; 876 875 nfs4_init_seqid_counter(&lsp->ls_seqid); 877 876 refcount_set(&lsp->ls_count, 1); 878 877 lsp->ls_state = state; 879 878 lsp->ls_owner = fl_owner; 880 - lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); 879 + lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 880 + 0, 0, GFP_KERNEL_ACCOUNT); 881 881 if (lsp->ls_seqid.owner_id < 0) 882 882 goto out_free; 883 883 INIT_LIST_HEAD(&lsp->ls_locks); ··· 1207 1205 { 1208 1206 struct task_struct *task; 1209 1207 char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; 1208 + struct rpc_clnt *cl = clp->cl_rpcclient; 1209 + 1210 + while (cl != cl->cl_parent) 1211 + cl = cl->cl_parent; 1210 1212 1211 1213 set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); 1212 - if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 1214 + if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { 1215 + wake_up_var(&clp->cl_state); 1213 1216 return; 1217 + } 1218 + set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); 1214 1219 __module_get(THIS_MODULE); 1215 1220 refcount_inc(&clp->cl_count); 1216 1221 ··· 1233 1224 printk(KERN_ERR "%s: kthread_run: %ld\n", 1234 1225 __func__, PTR_ERR(task)); 1235 1226 nfs4_clear_state_manager_bit(clp); 1227 + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); 1236 1228 nfs_put_client(clp); 1237 1229 module_put(THIS_MODULE); 1238 1230 } ··· 2570 2560 2571 2561 static void nfs4_state_manager(struct nfs_client *clp) 2572 2562 { 2563 + unsigned int memflags; 2573 2564 int status = 0; 2574 2565 const char *section = "", *section_sep = ""; 2566 + 2567 + /* 2568 + * State recovery can deadlock if the direct reclaim code tries 2569 + * start NFS writeback. So ensure memory allocations are all 2570 + * GFP_NOFS. 2571 + */ 2572 + memflags = memalloc_nofs_save(); 2575 2573 2576 2574 /* Ensure exclusive access to NFSv4 state */ 2577 2575 do { ··· 2675 2657 clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); 2676 2658 } 2677 2659 2660 + memalloc_nofs_restore(memflags); 2678 2661 nfs4_end_drain_session(clp); 2679 2662 nfs4_clear_state_manager_bit(clp); 2680 2663 ··· 2688 2669 clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state); 2689 2670 } 2690 2671 2691 - /* Did we race with an attempt to give us more work? */ 2692 - if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) 2693 - return; 2694 - if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 2695 - return; 2672 + return; 2673 + 2696 2674 } while (refcount_read(&clp->cl_count) > 1 && !signalled()); 2697 2675 goto out_drain; 2698 2676 ··· 2702 2686 clp->cl_hostname, -status); 2703 2687 ssleep(1); 2704 2688 out_drain: 2689 + memalloc_nofs_restore(memflags); 2705 2690 nfs4_end_drain_session(clp); 2706 2691 nfs4_clear_state_manager_bit(clp); 2707 2692 } ··· 2710 2693 static int nfs4_run_state_manager(void *ptr) 2711 2694 { 2712 2695 struct nfs_client *clp = ptr; 2696 + struct rpc_clnt *cl = clp->cl_rpcclient; 2697 + 2698 + while (cl != cl->cl_parent) 2699 + cl = cl->cl_parent; 2713 2700 2714 2701 allow_signal(SIGKILL); 2702 + again: 2703 + set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); 2715 2704 nfs4_state_manager(clp); 2705 + if (atomic_read(&cl->cl_swapper)) { 2706 + wait_var_event_interruptible(&clp->cl_state, 2707 + test_bit(NFS4CLNT_RUN_MANAGER, 2708 + &clp->cl_state)); 2709 + if (atomic_read(&cl->cl_swapper) && 2710 + test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) 2711 + goto again; 2712 + /* Either no longer a swapper, or were signalled */ 2713 + } 2714 + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); 2715 + 2716 + if (refcount_read(&clp->cl_count) > 1 && !signalled() && 2717 + test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && 2718 + !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) 2719 + goto again; 2720 + 2716 2721 nfs_put_client(clp); 2717 2722 return 0; 2718 2723 }
+3 -4
fs/nfs/nfs4xdr.c
··· 1605 1605 FATTR4_WORD0_RDATTR_ERROR, 1606 1606 FATTR4_WORD1_MOUNTED_ON_FILEID, 1607 1607 }; 1608 - uint32_t dircount = readdir->count >> 1; 1608 + uint32_t dircount = readdir->count; 1609 + uint32_t maxcount = readdir->count; 1609 1610 __be32 *p, verf[2]; 1610 1611 uint32_t attrlen = 0; 1611 1612 unsigned int i; ··· 1619 1618 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| 1620 1619 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 1621 1620 attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; 1622 - dircount >>= 1; 1623 1621 } 1624 1622 /* Use mounted_on_fileid only if the server supports it */ 1625 1623 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) ··· 1634 1634 encode_nfs4_verifier(xdr, &readdir->verifier); 1635 1635 p = reserve_space(xdr, 12 + (attrlen << 2)); 1636 1636 *p++ = cpu_to_be32(dircount); 1637 - *p++ = cpu_to_be32(readdir->count); 1637 + *p++ = cpu_to_be32(maxcount); 1638 1638 *p++ = cpu_to_be32(attrlen); 1639 1639 for (i = 0; i < attrlen; i++) 1640 1640 *p++ = cpu_to_be32(attrs[i]); ··· 7508 7508 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) 7509 7509 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 7510 7510 7511 - entry->prev_cookie = entry->cookie; 7512 7511 entry->cookie = new_cookie; 7513 7512 7514 7513 return 0;
+216 -5
fs/nfs/nfstrace.h
··· 21 21 { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ 22 22 { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ 23 23 { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ 24 - { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \ 25 24 { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ 26 25 { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \ 27 26 { NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \ ··· 36 37 37 38 #define nfs_show_nfsi_flags(v) \ 38 39 __print_flags(v, "|", \ 39 - { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ 40 40 { BIT(NFS_INO_STALE), "STALE" }, \ 41 41 { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ 42 42 { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ ··· 160 162 DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); 161 163 DEFINE_NFS_INODE_EVENT(nfs_access_enter); 162 164 DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid); 165 + DEFINE_NFS_INODE_EVENT(nfs_readdir_force_readdirplus); 166 + DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_cache_fill_done); 167 + DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_uncached_done); 163 168 164 169 TRACE_EVENT(nfs_access_exit, 165 170 TP_PROTO( ··· 274 273 DEFINE_NFS_UPDATE_SIZE_EVENT(update); 275 274 DEFINE_NFS_UPDATE_SIZE_EVENT(grow); 276 275 276 + DECLARE_EVENT_CLASS(nfs_inode_range_event, 277 + TP_PROTO( 278 + const struct inode *inode, 279 + loff_t range_start, 280 + loff_t range_end 281 + ), 282 + 283 + TP_ARGS(inode, range_start, range_end), 284 + 285 + TP_STRUCT__entry( 286 + __field(dev_t, dev) 287 + __field(u32, fhandle) 288 + __field(u64, fileid) 289 + __field(u64, version) 290 + __field(loff_t, range_start) 291 + __field(loff_t, range_end) 292 + ), 293 + 294 + TP_fast_assign( 295 + const struct nfs_inode *nfsi = NFS_I(inode); 296 + 297 + __entry->dev = inode->i_sb->s_dev; 298 + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 299 + __entry->fileid = nfsi->fileid; 300 + __entry->version = inode_peek_iversion_raw(inode); 301 + __entry->range_start = range_start; 302 + __entry->range_end = range_end; 303 + ), 304 + 305 + TP_printk( 306 + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " 307 + "range=[%lld, %lld]", 308 + MAJOR(__entry->dev), MINOR(__entry->dev), 309 + (unsigned long long)__entry->fileid, 310 + __entry->fhandle, __entry->version, 311 + __entry->range_start, __entry->range_end 312 + ) 313 + ); 314 + 315 + #define DEFINE_NFS_INODE_RANGE_EVENT(name) \ 316 + DEFINE_EVENT(nfs_inode_range_event, name, \ 317 + TP_PROTO( \ 318 + const struct inode *inode, \ 319 + loff_t range_start, \ 320 + loff_t range_end \ 321 + ), \ 322 + TP_ARGS(inode, range_start, range_end)) 323 + 324 + DEFINE_NFS_INODE_RANGE_EVENT(nfs_readdir_invalidate_cache_range); 325 + 326 + DECLARE_EVENT_CLASS(nfs_readdir_event, 327 + TP_PROTO( 328 + const struct file *file, 329 + const __be32 *verifier, 330 + u64 cookie, 331 + pgoff_t page_index, 332 + unsigned int dtsize 333 + ), 334 + 335 + TP_ARGS(file, verifier, cookie, page_index, dtsize), 336 + 337 + TP_STRUCT__entry( 338 + __field(dev_t, dev) 339 + __field(u32, fhandle) 340 + __field(u64, fileid) 341 + __field(u64, version) 342 + __array(char, verifier, NFS4_VERIFIER_SIZE) 343 + __field(u64, cookie) 344 + __field(pgoff_t, index) 345 + __field(unsigned int, dtsize) 346 + ), 347 + 348 + TP_fast_assign( 349 + const struct inode *dir = file_inode(file); 350 + const struct nfs_inode *nfsi = NFS_I(dir); 351 + 352 + __entry->dev = dir->i_sb->s_dev; 353 + __entry->fileid = nfsi->fileid; 354 + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 355 + __entry->version = inode_peek_iversion_raw(dir); 356 + if (cookie != 0) 357 + memcpy(__entry->verifier, verifier, 358 + NFS4_VERIFIER_SIZE); 359 + else 360 + memset(__entry->verifier, 0, 361 + NFS4_VERIFIER_SIZE); 362 + __entry->cookie = cookie; 363 + __entry->index = page_index; 364 + __entry->dtsize = dtsize; 365 + ), 366 + 367 + TP_printk( 368 + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " 369 + "cookie=%s:0x%llx cache_index=%lu dtsize=%u", 370 + MAJOR(__entry->dev), MINOR(__entry->dev), 371 + (unsigned long long)__entry->fileid, __entry->fhandle, 372 + __entry->version, show_nfs4_verifier(__entry->verifier), 373 + (unsigned long long)__entry->cookie, __entry->index, 374 + __entry->dtsize 375 + ) 376 + ); 377 + 378 + #define DEFINE_NFS_READDIR_EVENT(name) \ 379 + DEFINE_EVENT(nfs_readdir_event, name, \ 380 + TP_PROTO( \ 381 + const struct file *file, \ 382 + const __be32 *verifier, \ 383 + u64 cookie, \ 384 + pgoff_t page_index, \ 385 + unsigned int dtsize \ 386 + ), \ 387 + TP_ARGS(file, verifier, cookie, page_index, dtsize)) 388 + 389 + DEFINE_NFS_READDIR_EVENT(nfs_readdir_cache_fill); 390 + DEFINE_NFS_READDIR_EVENT(nfs_readdir_uncached); 391 + 277 392 DECLARE_EVENT_CLASS(nfs_lookup_event, 278 393 TP_PROTO( 279 394 const struct inode *dir, ··· 483 366 DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); 484 367 DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); 485 368 DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); 369 + DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup); 370 + DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup_revalidate_failed); 371 + DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_readdir_lookup_revalidate); 486 372 487 373 TRACE_EVENT(nfs_atomic_open_enter, 488 374 TP_PROTO( ··· 1009 889 TRACE_EVENT(nfs_aop_readahead, 1010 890 TP_PROTO( 1011 891 const struct inode *inode, 1012 - struct page *page, 892 + loff_t pos, 1013 893 unsigned int nr_pages 1014 894 ), 1015 895 1016 - TP_ARGS(inode, page, nr_pages), 896 + TP_ARGS(inode, pos, nr_pages), 1017 897 1018 898 TP_STRUCT__entry( 1019 899 __field(dev_t, dev) ··· 1031 911 __entry->fileid = nfsi->fileid; 1032 912 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 1033 913 __entry->version = inode_peek_iversion_raw(inode); 1034 - __entry->offset = page_index(page) << PAGE_SHIFT; 914 + __entry->offset = pos; 1035 915 __entry->nr_pages = nr_pages; 1036 916 ), 1037 917 ··· 1214 1094 __entry->eof ? " eof" : "" 1215 1095 ) 1216 1096 ); 1097 + 1098 + DECLARE_EVENT_CLASS(nfs_fscache_page_event, 1099 + TP_PROTO( 1100 + const struct inode *inode, 1101 + struct page *page 1102 + ), 1103 + 1104 + TP_ARGS(inode, page), 1105 + 1106 + TP_STRUCT__entry( 1107 + __field(dev_t, dev) 1108 + __field(u32, fhandle) 1109 + __field(u64, fileid) 1110 + __field(loff_t, offset) 1111 + ), 1112 + 1113 + TP_fast_assign( 1114 + const struct nfs_inode *nfsi = NFS_I(inode); 1115 + const struct nfs_fh *fh = &nfsi->fh; 1116 + 1117 + __entry->offset = page_index(page) << PAGE_SHIFT; 1118 + __entry->dev = inode->i_sb->s_dev; 1119 + __entry->fileid = nfsi->fileid; 1120 + __entry->fhandle = nfs_fhandle_hash(fh); 1121 + ), 1122 + 1123 + TP_printk( 1124 + "fileid=%02x:%02x:%llu fhandle=0x%08x " 1125 + "offset=%lld", 1126 + MAJOR(__entry->dev), MINOR(__entry->dev), 1127 + (unsigned long long)__entry->fileid, 1128 + __entry->fhandle, 1129 + (long long)__entry->offset 1130 + ) 1131 + ); 1132 + DECLARE_EVENT_CLASS(nfs_fscache_page_event_done, 1133 + TP_PROTO( 1134 + const struct inode *inode, 1135 + struct page *page, 1136 + int error 1137 + ), 1138 + 1139 + TP_ARGS(inode, page, error), 1140 + 1141 + TP_STRUCT__entry( 1142 + __field(int, error) 1143 + __field(dev_t, dev) 1144 + __field(u32, fhandle) 1145 + __field(u64, fileid) 1146 + __field(loff_t, offset) 1147 + ), 1148 + 1149 + TP_fast_assign( 1150 + const struct nfs_inode *nfsi = NFS_I(inode); 1151 + const struct nfs_fh *fh = &nfsi->fh; 1152 + 1153 + __entry->offset = page_index(page) << PAGE_SHIFT; 1154 + __entry->dev = inode->i_sb->s_dev; 1155 + __entry->fileid = nfsi->fileid; 1156 + __entry->fhandle = nfs_fhandle_hash(fh); 1157 + __entry->error = error; 1158 + ), 1159 + 1160 + TP_printk( 1161 + "fileid=%02x:%02x:%llu fhandle=0x%08x " 1162 + "offset=%lld error=%d", 1163 + MAJOR(__entry->dev), MINOR(__entry->dev), 1164 + (unsigned long long)__entry->fileid, 1165 + __entry->fhandle, 1166 + (long long)__entry->offset, __entry->error 1167 + ) 1168 + ); 1169 + #define DEFINE_NFS_FSCACHE_PAGE_EVENT(name) \ 1170 + DEFINE_EVENT(nfs_fscache_page_event, name, \ 1171 + TP_PROTO( \ 1172 + const struct inode *inode, \ 1173 + struct page *page \ 1174 + ), \ 1175 + TP_ARGS(inode, page)) 1176 + #define DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(name) \ 1177 + DEFINE_EVENT(nfs_fscache_page_event_done, name, \ 1178 + TP_PROTO( \ 1179 + const struct inode *inode, \ 1180 + struct page *page, \ 1181 + int error \ 1182 + ), \ 1183 + TP_ARGS(inode, page, error)) 1184 + DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_read_page); 1185 + DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_read_page_exit); 1186 + DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_write_page); 1187 + DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_write_page_exit); 1217 1188 1218 1189 TRACE_EVENT(nfs_pgio_error, 1219 1190 TP_PROTO(
+6 -5
fs/nfs/pagelist.c
··· 90 90 } 91 91 } 92 92 93 - static inline struct nfs_page * 94 - nfs_page_alloc(void) 93 + static inline struct nfs_page *nfs_page_alloc(void) 95 94 { 96 - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); 95 + struct nfs_page *p = 96 + kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask()); 97 97 if (p) 98 98 INIT_LIST_HEAD(&p->wb_list); 99 99 return p; ··· 892 892 struct nfs_commit_info cinfo; 893 893 struct nfs_page_array *pg_array = &hdr->page_array; 894 894 unsigned int pagecount, pageused; 895 - gfp_t gfp_flags = GFP_KERNEL; 895 + gfp_t gfp_flags = nfs_io_gfp_mask(); 896 896 897 897 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); 898 898 pg_array->npages = pagecount; ··· 979 979 desc->pg_mirrors_dynamic = NULL; 980 980 if (mirror_count == 1) 981 981 return desc->pg_mirrors_static; 982 - ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); 982 + ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask()); 983 983 if (ret != NULL) { 984 984 for (i = 0; i < mirror_count; i++) 985 985 nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); ··· 1218 1218 1219 1219 do { 1220 1220 list_splice_init(&mirror->pg_list, &head); 1221 + mirror->pg_recoalesce = 0; 1221 1222 1222 1223 while (!list_empty(&head)) { 1223 1224 struct nfs_page *req;
+28 -22
fs/nfs/pnfs.c
··· 92 92 return local; 93 93 } 94 94 95 + const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) 96 + { 97 + return find_pnfs_driver(id); 98 + } 99 + 100 + void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) 101 + { 102 + if (ld) 103 + module_put(ld->owner); 104 + } 105 + 95 106 void 96 107 unset_pnfs_layoutdriver(struct nfs_server *nfss) 97 108 { ··· 1244 1233 int status = 0; 1245 1234 1246 1235 *pcred = NULL; 1247 - lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 1236 + lrp = kzalloc(sizeof(*lrp), nfs_io_gfp_mask()); 1248 1237 if (unlikely(lrp == NULL)) { 1249 1238 status = -ENOMEM; 1250 1239 spin_lock(&ino->i_lock); ··· 2217 2206 struct pnfs_layout_hdr *lo; 2218 2207 2219 2208 spin_lock(&ino->i_lock); 2220 - lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); 2209 + lo = pnfs_find_alloc_layout(ino, ctx, nfs_io_gfp_mask()); 2221 2210 if (!lo) 2222 2211 goto out_unlock; 2223 2212 if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) ··· 2260 2249 lo = _pnfs_grab_empty_layout(ino, ctx); 2261 2250 if (!lo) 2262 2251 return; 2263 - lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, 2264 - &rng, GFP_KERNEL); 2252 + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng, 2253 + nfs_io_gfp_mask()); 2265 2254 if (!lgp) { 2266 2255 pnfs_clear_first_layoutget(lo); 2267 2256 nfs_layoutget_end(lo); ··· 2286 2275 }; 2287 2276 struct nfs4_layoutget *lgp; 2288 2277 2289 - lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, 2290 - &rng, GFP_KERNEL); 2278 + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng, 2279 + nfs_io_gfp_mask()); 2291 2280 if (!lgp) 2292 2281 return; 2293 2282 data->lgp = lgp; ··· 2702 2691 else 2703 2692 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 2704 2693 2705 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 2706 - nfs_req_openctx(req), 2707 - req_offset(req), 2708 - rd_size, 2709 - IOMODE_READ, 2710 - false, 2711 - GFP_KERNEL); 2694 + pgio->pg_lseg = 2695 + pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 2696 + req_offset(req), rd_size, 2697 + IOMODE_READ, false, 2698 + nfs_io_gfp_mask()); 2712 2699 if (IS_ERR(pgio->pg_lseg)) { 2713 2700 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 2714 2701 pgio->pg_lseg = NULL; ··· 2727 2718 pnfs_generic_pg_check_layout(pgio); 2728 2719 pnfs_generic_pg_check_range(pgio, req); 2729 2720 if (pgio->pg_lseg == NULL) { 2730 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 2731 - nfs_req_openctx(req), 2732 - req_offset(req), 2733 - wb_size, 2734 - IOMODE_RW, 2735 - false, 2736 - GFP_KERNEL); 2721 + pgio->pg_lseg = 2722 + pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 2723 + req_offset(req), wb_size, IOMODE_RW, 2724 + false, nfs_io_gfp_mask()); 2737 2725 if (IS_ERR(pgio->pg_lseg)) { 2738 2726 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 2739 2727 pgio->pg_lseg = NULL; ··· 3189 3183 3190 3184 status = -ENOMEM; 3191 3185 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 3192 - data = kzalloc(sizeof(*data), GFP_NOFS); 3186 + data = kzalloc(sizeof(*data), nfs_io_gfp_mask()); 3193 3187 if (!data) 3194 3188 goto clear_layoutcommitting; 3195 3189 ··· 3256 3250 { 3257 3251 struct nfs4_threshold *thp; 3258 3252 3259 - thp = kzalloc(sizeof(*thp), GFP_NOFS); 3253 + thp = kzalloc(sizeof(*thp), nfs_io_gfp_mask()); 3260 3254 if (!thp) { 3261 3255 dprintk("%s mdsthreshold allocation failed\n", __func__); 3262 3256 return NULL;
+2
fs/nfs/pnfs.h
··· 234 234 235 235 extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); 236 236 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 237 + extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); 238 + extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); 237 239 238 240 /* nfs4proc.c */ 239 241 extern size_t max_response_pages(struct nfs_server *server);
+6 -2
fs/nfs/pnfs_nfs.c
··· 419 419 pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, 420 420 struct nfs_commit_info *cinfo) 421 421 { 422 - struct nfs_commit_data *data = nfs_commitdata_alloc(false); 422 + struct nfs_commit_data *data = nfs_commitdata_alloc(); 423 423 424 424 if (!data) 425 425 return NULL; ··· 515 515 unsigned int nreq = 0; 516 516 517 517 if (!list_empty(mds_pages)) { 518 - data = nfs_commitdata_alloc(true); 518 + data = nfs_commitdata_alloc(); 519 + if (!data) { 520 + nfs_retry_commit(mds_pages, NULL, cinfo, -1); 521 + return -ENOMEM; 522 + } 519 523 data->ds_commit_index = -1; 520 524 list_splice_init(mds_pages, &data->pages); 521 525 list_add_tail(&data->list, &list);
+1
fs/nfs/proc.c
··· 92 92 info->maxfilesize = 0x7FFFFFFF; 93 93 info->lease_time = 0; 94 94 info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; 95 + info->xattr_support = 0; 95 96 return 0; 96 97 } 97 98
+15 -14
fs/nfs/read.c
··· 123 123 struct address_space *mapping = page_file_mapping(page); 124 124 125 125 if (PageUptodate(page)) 126 - nfs_readpage_to_fscache(inode, page); 126 + nfs_fscache_write_page(inode, page); 127 127 else if (!PageError(page) && !PagePrivate(page)) 128 128 generic_error_remove_page(mapping, page); 129 129 unlock_page(page); ··· 194 194 const struct nfs_rpc_ops *rpc_ops, 195 195 struct rpc_task_setup *task_setup_data, int how) 196 196 { 197 - struct inode *inode = hdr->inode; 198 - int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 199 - 200 - task_setup_data->flags |= swap_flags; 201 197 rpc_ops->read_setup(hdr, msg); 202 198 trace_nfs_initiate_read(hdr); 203 199 } ··· 286 290 } 287 291 288 292 static int 289 - readpage_async_filler(void *data, struct page *page) 293 + readpage_async_filler(struct nfs_readdesc *desc, struct page *page) 290 294 { 291 - struct nfs_readdesc *desc = data; 292 295 struct inode *inode = page_file_mapping(page)->host; 293 296 unsigned int rsize = NFS_SERVER(inode)->rsize; 294 297 struct nfs_page *new; ··· 301 306 aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE); 302 307 303 308 if (!IS_SYNC(page->mapping->host)) { 304 - error = nfs_readpage_from_fscache(page->mapping->host, page); 309 + error = nfs_fscache_read_page(page->mapping->host, page); 305 310 if (error == 0) 306 311 goto out_unlock; 307 312 } ··· 392 397 return ret; 393 398 } 394 399 395 - int nfs_readpages(struct file *file, struct address_space *mapping, 396 - struct list_head *pages, unsigned nr_pages) 400 + void nfs_readahead(struct readahead_control *ractl) 397 401 { 402 + unsigned int nr_pages = readahead_count(ractl); 403 + struct file *file = ractl->file; 398 404 struct nfs_readdesc desc; 399 - struct inode *inode = mapping->host; 405 + struct inode *inode = ractl->mapping->host; 406 + struct page *page; 400 407 int ret; 401 408 402 - trace_nfs_aop_readahead(inode, lru_to_page(pages), nr_pages); 409 + trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages); 403 410 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 404 411 405 412 ret = -ESTALE; ··· 419 422 nfs_pageio_init_read(&desc.pgio, inode, false, 420 423 &nfs_async_read_completion_ops); 421 424 422 - ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 425 + while ((page = readahead_page(ractl)) != NULL) { 426 + ret = readpage_async_filler(&desc, page); 427 + put_page(page); 428 + if (ret) 429 + break; 430 + } 423 431 424 432 nfs_pageio_complete_read(&desc.pgio); 425 433 426 434 put_nfs_open_context(desc.ctx); 427 435 out: 428 436 trace_nfs_aop_readahead_done(inode, nr_pages, ret); 429 - return ret; 430 437 } 431 438 432 439 int __init nfs_init_readpagecache(void)
+24 -19
fs/nfs/write.c
··· 70 70 static struct kmem_cache *nfs_cdata_cachep; 71 71 static mempool_t *nfs_commit_mempool; 72 72 73 - struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) 73 + struct nfs_commit_data *nfs_commitdata_alloc(void) 74 74 { 75 75 struct nfs_commit_data *p; 76 76 77 - if (never_fail) 78 - p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); 79 - else { 80 - /* It is OK to do some reclaim, not no safe to wait 81 - * for anything to be returned to the pool. 82 - * mempool_alloc() cannot handle that particular combination, 83 - * so we need two separate attempts. 84 - */ 77 + p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask()); 78 + if (!p) { 85 79 p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); 86 80 if (!p) 87 - p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | 88 - __GFP_NOWARN | __GFP_NORETRY); 89 - if (!p) 90 81 return NULL; 82 + memset(p, 0, sizeof(*p)); 91 83 } 92 - 93 - memset(p, 0, sizeof(*p)); 94 84 INIT_LIST_HEAD(&p->pages); 95 85 return p; 96 86 } ··· 94 104 95 105 static struct nfs_pgio_header *nfs_writehdr_alloc(void) 96 106 { 97 - struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); 107 + struct nfs_pgio_header *p; 98 108 99 - memset(p, 0, sizeof(*p)); 109 + p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask()); 110 + if (!p) { 111 + p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT); 112 + if (!p) 113 + return NULL; 114 + memset(p, 0, sizeof(*p)); 115 + } 100 116 p->rw_mode = FMODE_WRITE; 101 117 return p; 102 118 } ··· 302 306 /* Force file size revalidation */ 303 307 spin_lock(&inode->i_lock); 304 308 nfs_set_cache_invalid(inode, NFS_INO_REVAL_FORCED | 305 - NFS_INO_REVAL_PAGECACHE | 309 + NFS_INO_INVALID_CHANGE | 306 310 NFS_INO_INVALID_SIZE); 307 311 spin_unlock(&inode->i_lock); 308 312 } ··· 312 316 struct address_space *mapping = page_file_mapping(page); 313 317 314 318 SetPageError(page); 315 - mapping_set_error(mapping, error); 319 + filemap_set_wb_err(mapping, error); 320 + if (mapping->host) 321 + errseq_set(&mapping->host->i_sb->s_wb_err, 322 + error == -ENOSPC ? -ENOSPC : -EIO); 316 323 nfs_set_pageerror(mapping); 317 324 } 318 325 ··· 1416 1417 { 1417 1418 int priority = flush_task_priority(how); 1418 1419 1420 + if (IS_SWAPFILE(hdr->inode)) 1421 + task_setup_data->flags |= RPC_TASK_SWAPPER; 1419 1422 task_setup_data->priority = priority; 1420 1423 rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); 1421 1424 trace_nfs_initiate_write(hdr); ··· 1830 1829 if (list_empty(head)) 1831 1830 return 0; 1832 1831 1833 - data = nfs_commitdata_alloc(true); 1832 + data = nfs_commitdata_alloc(); 1833 + if (!data) { 1834 + nfs_retry_commit(head, NULL, cinfo, -1); 1835 + return -ENOMEM; 1836 + } 1834 1837 1835 1838 /* Set up the argument struct */ 1836 1839 nfs_init_commit(data, head, NULL, cinfo);
+19 -26
include/linux/nfs_fs.h
··· 46 46 #define NFS_MAX_TRANSPORTS 16 47 47 48 48 /* 49 - * These are the default flags for swap requests 50 - */ 51 - #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) 52 - 53 - /* 54 49 * Size of the NFS directory verifier 55 50 */ 56 51 #define NFS_DIR_VERIFIER_SIZE 2 ··· 96 101 97 102 struct nfs_open_dir_context { 98 103 struct list_head list; 104 + atomic_t cache_hits; 105 + atomic_t cache_misses; 99 106 unsigned long attr_gencount; 100 107 __be32 verf[NFS_DIR_VERIFIER_SIZE]; 101 108 __u64 dir_cookie; 102 - __u64 dup_cookie; 109 + __u64 last_cookie; 103 110 pgoff_t page_index; 104 - signed char duped; 111 + unsigned int dtsize; 112 + bool force_clear; 105 113 bool eof; 114 + struct rcu_head rcu_head; 106 115 }; 107 116 108 117 /* ··· 246 247 #define NFS_INO_INVALID_ATIME BIT(2) /* cached atime is invalid */ 247 248 #define NFS_INO_INVALID_ACCESS BIT(3) /* cached access cred invalid */ 248 249 #define NFS_INO_INVALID_ACL BIT(4) /* cached acls are invalid */ 249 - #define NFS_INO_REVAL_PAGECACHE BIT(5) /* must revalidate pagecache */ 250 250 #define NFS_INO_REVAL_FORCED BIT(6) /* force revalidation ignoring a delegation */ 251 251 #define NFS_INO_INVALID_LABEL BIT(7) /* cached label is invalid */ 252 252 #define NFS_INO_INVALID_CHANGE BIT(8) /* cached change is invalid */ ··· 271 273 /* 272 274 * Bit offsets in flags field 273 275 */ 274 - #define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ 275 276 #define NFS_INO_STALE (1) /* possible stale inode */ 276 277 #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ 277 278 #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ 279 + #define NFS_INO_PRESERVE_UNLINKED (4) /* preserve file if removed while open */ 278 280 #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 279 - #define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ 280 281 #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ 281 282 #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ 282 283 #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ ··· 352 355 struct nfs_inode *nfsi = NFS_I(inode); 353 356 354 357 spin_lock(&inode->i_lock); 355 - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE 356 - | NFS_INO_INVALID_ACCESS 357 - | NFS_INO_INVALID_ACL 358 - | NFS_INO_INVALID_CHANGE 359 - | NFS_INO_INVALID_CTIME; 358 + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | 359 + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | 360 + NFS_INO_INVALID_SIZE; 360 361 if (S_ISDIR(inode->i_mode)) 361 362 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 362 363 spin_unlock(&inode->i_lock); 363 364 } 364 365 365 - static inline int nfs_server_capable(struct inode *inode, int cap) 366 + static inline int nfs_server_capable(const struct inode *inode, int cap) 366 367 { 367 368 return NFS_SERVER(inode)->caps & cap; 368 369 } ··· 508 513 * linux/fs/nfs/direct.c 509 514 */ 510 515 extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); 511 - extern ssize_t nfs_file_direct_read(struct kiocb *iocb, 512 - struct iov_iter *iter); 513 - extern ssize_t nfs_file_direct_write(struct kiocb *iocb, 514 - struct iov_iter *iter); 516 + ssize_t nfs_file_direct_read(struct kiocb *iocb, 517 + struct iov_iter *iter, bool swap); 518 + ssize_t nfs_file_direct_write(struct kiocb *iocb, 519 + struct iov_iter *iter, bool swap); 515 520 516 521 /* 517 522 * linux/fs/nfs/dir.c ··· 580 585 extern int nfs_wb_page(struct inode *inode, struct page *page); 581 586 int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); 582 587 extern int nfs_commit_inode(struct inode *, int); 583 - extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); 588 + extern struct nfs_commit_data *nfs_commitdata_alloc(void); 584 589 extern void nfs_commit_free(struct nfs_commit_data *data); 585 590 bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); 586 591 587 - static inline int 588 - nfs_have_writebacks(struct inode *inode) 592 + static inline bool nfs_have_writebacks(const struct inode *inode) 589 593 { 590 594 if (S_ISREG(inode->i_mode)) 591 595 return atomic_long_read(&NFS_I(inode)->nrequests) != 0; 592 - return 0; 596 + return false; 593 597 } 594 598 595 599 /* 596 600 * linux/fs/nfs/read.c 597 601 */ 598 602 extern int nfs_readpage(struct file *, struct page *); 599 - extern int nfs_readpages(struct file *, struct address_space *, 600 - struct list_head *, unsigned); 603 + void nfs_readahead(struct readahead_control *); 601 604 602 605 /* 603 606 * inline functions
+1
include/linux/nfs_fs_sb.h
··· 152 152 #define NFS_MOUNT_SOFTREVAL 0x800000 153 153 #define NFS_MOUNT_WRITE_EAGER 0x01000000 154 154 #define NFS_MOUNT_WRITE_WAIT 0x02000000 155 + #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 155 156 156 157 unsigned int fattr_valid; /* Valid attributes */ 157 158 unsigned int caps; /* server capabilities */
+3 -2
include/linux/nfs_xdr.h
··· 745 745 */ 746 746 struct nfs_entry { 747 747 __u64 ino; 748 - __u64 cookie, 749 - prev_cookie; 748 + __u64 cookie; 750 749 const char * name; 751 750 unsigned int len; 752 751 int eof; ··· 1797 1798 struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, 1798 1799 struct nfs_fattr *, rpc_authflavor_t); 1799 1800 int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); 1801 + void (*enable_swap)(struct inode *inode); 1802 + void (*disable_swap)(struct inode *inode); 1800 1803 }; 1801 1804 1802 1805 /*
+1
include/linux/sunrpc/auth.h
··· 99 99 100 100 /* Flags for rpcauth_lookupcred() */ 101 101 #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ 102 + #define RPCAUTH_LOOKUP_ASYNC 0x02 /* Don't block waiting for memory */ 102 103 103 104 /* 104 105 * Client authentication ops
+1 -1
include/linux/sunrpc/sched.h
··· 124 124 #define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ 125 125 #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ 126 126 #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ 127 - #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ 128 127 #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ 129 128 #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ 130 129 #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ ··· 262 263 extern struct workqueue_struct *rpciod_workqueue; 263 264 extern struct workqueue_struct *xprtiod_workqueue; 264 265 void rpc_prepare_task(struct rpc_task *task); 266 + gfp_t rpc_task_gfp_mask(void); 265 267 266 268 static inline int rpc_wait_for_completion_task(struct rpc_task *task) 267 269 {
+3
include/linux/sunrpc/xprt.h
··· 139 139 void (*rpcbind)(struct rpc_task *task); 140 140 void (*set_port)(struct rpc_xprt *xprt, unsigned short port); 141 141 void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); 142 + int (*get_srcaddr)(struct rpc_xprt *xprt, char *buf, 143 + size_t buflen); 144 + unsigned short (*get_srcport)(struct rpc_xprt *xprt); 142 145 int (*buf_alloc)(struct rpc_task *task); 143 146 void (*buf_free)(struct rpc_task *task); 144 147 void (*prepare_request)(struct rpc_rqst *req);
+2 -1
include/linux/sunrpc/xprtsock.h
··· 10 10 11 11 int init_socket_xprt(void); 12 12 void cleanup_socket_xprt(void); 13 - unsigned short get_srcport(struct rpc_xprt *); 14 13 15 14 #define RPC_MIN_RESVPORT (1U) 16 15 #define RPC_MAX_RESVPORT (65535U) ··· 88 89 #define XPRT_SOCK_WAKE_WRITE (5) 89 90 #define XPRT_SOCK_WAKE_PENDING (6) 90 91 #define XPRT_SOCK_WAKE_DISCONNECT (7) 92 + #define XPRT_SOCK_CONNECT_SENT (8) 93 + #define XPRT_SOCK_NOSPACE (9) 91 94 92 95 #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
-1
include/trace/events/sunrpc.h
··· 311 311 { RPC_TASK_MOVEABLE, "MOVEABLE" }, \ 312 312 { RPC_TASK_NULLCREDS, "NULLCREDS" }, \ 313 313 { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ 314 - { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ 315 314 { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ 316 315 { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \ 317 316 { RPC_TASK_SOFT, "SOFT" }, \
+1
include/uapi/linux/nfs4.h
··· 45 45 46 46 #define NFS4_OPEN_RESULT_CONFIRM 0x0002 47 47 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 48 + #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 48 49 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 49 50 50 51 #define NFS4_SHARE_ACCESS_MASK 0x000F
+1 -1
include/uapi/linux/nfs_fs.h
··· 52 52 #define NFSDBG_CALLBACK 0x0100 53 53 #define NFSDBG_CLIENT 0x0200 54 54 #define NFSDBG_MOUNT 0x0400 55 - #define NFSDBG_FSCACHE 0x0800 55 + #define NFSDBG_FSCACHE 0x0800 /* unused */ 56 56 #define NFSDBG_PNFS 0x1000 57 57 #define NFSDBG_PNFS_LD 0x2000 58 58 #define NFSDBG_STATE 0x4000
+6 -2
net/sunrpc/auth.c
··· 615 615 }; 616 616 struct rpc_cred *ret; 617 617 618 + if (RPC_IS_ASYNC(task)) 619 + lookupflags |= RPCAUTH_LOOKUP_ASYNC; 618 620 ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); 619 621 put_cred(acred.cred); 620 622 return ret; ··· 633 631 634 632 if (!acred.principal) 635 633 return NULL; 634 + if (RPC_IS_ASYNC(task)) 635 + lookupflags |= RPCAUTH_LOOKUP_ASYNC; 636 636 return auth->au_ops->lookup_cred(auth, &acred, lookupflags); 637 637 } 638 638 ··· 658 654 }; 659 655 660 656 if (flags & RPC_TASK_ASYNC) 661 - lookupflags |= RPCAUTH_LOOKUP_NEW; 657 + lookupflags |= RPCAUTH_LOOKUP_NEW | RPCAUTH_LOOKUP_ASYNC; 662 658 if (task->tk_op_cred) 663 659 /* Task must use exactly this rpc_cred */ 664 660 new = get_rpccred(task->tk_op_cred); ··· 670 666 /* If machine cred couldn't be bound, try a root cred */ 671 667 if (new) 672 668 ; 673 - else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS)) 669 + else if (cred == &machine_cred) 674 670 new = rpcauth_bind_root_cred(task, lookupflags); 675 671 else if (flags & RPC_TASK_NULLCREDS) 676 672 new = authnull_ops.lookup_cred(NULL, NULL, 0);
+15 -11
net/sunrpc/auth_gss/auth_gss.c
··· 146 146 { 147 147 struct gss_cl_ctx *ctx; 148 148 149 - ctx = kzalloc(sizeof(*ctx), GFP_NOFS); 149 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 150 150 if (ctx != NULL) { 151 151 ctx->gc_proc = RPC_GSS_PROC_DATA; 152 152 ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ ··· 209 209 p = ERR_PTR(-EFAULT); 210 210 goto err; 211 211 } 212 - ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS); 212 + ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL); 213 213 if (ret < 0) { 214 214 trace_rpcgss_import_ctx(ret); 215 215 p = ERR_PTR(ret); ··· 511 511 int vers; 512 512 int err = -ENOMEM; 513 513 514 - gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); 514 + gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); 515 515 if (gss_msg == NULL) 516 516 goto err; 517 517 vers = get_pipe_version(gss_auth->net); ··· 527 527 gss_msg->auth = gss_auth; 528 528 kref_get(&gss_auth->kref); 529 529 if (service_name) { 530 - gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS); 530 + gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL); 531 531 if (!gss_msg->service_name) { 532 532 err = -ENOMEM; 533 533 goto err_put_pipe_version; ··· 703 703 if (mlen > MSG_BUF_MAXSIZE) 704 704 goto out; 705 705 err = -ENOMEM; 706 - buf = kmalloc(mlen, GFP_NOFS); 706 + buf = kmalloc(mlen, GFP_KERNEL); 707 707 if (!buf) 708 708 goto out; 709 709 ··· 1220 1220 struct gss_cred *new; 1221 1221 1222 1222 /* Make a copy of the cred so that we can reference count it */ 1223 - new = kzalloc(sizeof(*gss_cred), GFP_NOFS); 1223 + new = kzalloc(sizeof(*gss_cred), GFP_KERNEL); 1224 1224 if (new) { 1225 1225 struct auth_cred acred = { 1226 1226 .cred = gss_cred->gc_base.cr_cred, ··· 1343 1343 static struct rpc_cred * 1344 1344 gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 1345 1345 { 1346 - return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); 1346 + gfp_t gfp = GFP_KERNEL; 1347 + 1348 + if (flags & RPCAUTH_LOOKUP_ASYNC) 1349 + gfp = GFP_NOWAIT | __GFP_NOWARN; 1350 + return rpcauth_lookup_credcache(auth, acred, flags, gfp); 1347 1351 } 1348 1352 1349 1353 static struct rpc_cred * ··· 1673 1669 if (!p) 1674 1670 goto validate_failed; 1675 1671 1676 - seq = kmalloc(4, GFP_NOFS); 1672 + seq = kmalloc(4, GFP_KERNEL); 1677 1673 if (!seq) 1678 1674 goto validate_failed; 1679 1675 *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); ··· 1783 1779 rqstp->rq_enc_pages 1784 1780 = kmalloc_array(rqstp->rq_enc_pages_num, 1785 1781 sizeof(struct page *), 1786 - GFP_NOFS); 1782 + GFP_KERNEL); 1787 1783 if (!rqstp->rq_enc_pages) 1788 1784 goto out; 1789 1785 for (i=0; i < rqstp->rq_enc_pages_num; i++) { 1790 - rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); 1786 + rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL); 1791 1787 if (rqstp->rq_enc_pages[i] == NULL) 1792 1788 goto out_free; 1793 1789 } ··· 1991 1987 if (offset + len > rcv_buf->len) 1992 1988 goto unwrap_failed; 1993 1989 mic.len = len; 1994 - mic.data = kmalloc(len, GFP_NOFS); 1990 + mic.data = kmalloc(len, GFP_KERNEL); 1995 1991 if (!mic.data) 1996 1992 goto unwrap_failed; 1997 1993 if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len))
+1 -1
net/sunrpc/auth_gss/auth_gss_internal.h
··· 35 35 if (unlikely(q > end || q < p)) 36 36 return ERR_PTR(-EFAULT); 37 37 if (len) { 38 - dest->data = kmemdup(p, len, GFP_NOFS); 38 + dest->data = kmemdup(p, len, GFP_KERNEL); 39 39 if (unlikely(dest->data == NULL)) 40 40 return ERR_PTR(-ENOMEM); 41 41 } else
+5 -5
net/sunrpc/auth_gss/gss_krb5_crypto.c
··· 161 161 return GSS_S_FAILURE; 162 162 } 163 163 164 - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); 164 + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); 165 165 if (checksumdata == NULL) 166 166 return GSS_S_FAILURE; 167 167 ··· 169 169 if (IS_ERR(tfm)) 170 170 goto out_free_cksum; 171 171 172 - req = ahash_request_alloc(tfm, GFP_NOFS); 172 + req = ahash_request_alloc(tfm, GFP_KERNEL); 173 173 if (!req) 174 174 goto out_free_ahash; 175 175 ··· 257 257 return GSS_S_FAILURE; 258 258 } 259 259 260 - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); 260 + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); 261 261 if (!checksumdata) 262 262 return GSS_S_FAILURE; 263 263 ··· 265 265 if (IS_ERR(tfm)) 266 266 goto out_free_cksum; 267 267 268 - req = ahash_request_alloc(tfm, GFP_NOFS); 268 + req = ahash_request_alloc(tfm, GFP_KERNEL); 269 269 if (!req) 270 270 goto out_free_ahash; 271 271 ··· 554 554 WARN_ON(0); 555 555 return -ENOMEM; 556 556 } 557 - data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS); 557 + data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_KERNEL); 558 558 if (!data) 559 559 return -ENOMEM; 560 560
+2 -2
net/sunrpc/auth_gss/gss_krb5_seqnum.c
··· 49 49 unsigned char *plain; 50 50 s32 code; 51 51 52 - plain = kmalloc(8, GFP_NOFS); 52 + plain = kmalloc(8, GFP_KERNEL); 53 53 if (!plain) 54 54 return -ENOMEM; 55 55 ··· 80 80 81 81 dprintk("RPC: krb5_get_seq_num:\n"); 82 82 83 - plain = kmalloc(8, GFP_NOFS); 83 + plain = kmalloc(8, GFP_KERNEL); 84 84 if (!plain) 85 85 return -ENOMEM; 86 86
+2 -2
net/sunrpc/auth_gss/gss_krb5_wrap.c
··· 409 409 gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, 410 410 struct xdr_buf *buf, struct page **pages) 411 411 { 412 - u8 *ptr, *plainhdr; 412 + u8 *ptr; 413 413 time64_t now; 414 414 u8 flags = 0x00; 415 415 __be16 *be16ptr; ··· 426 426 return GSS_S_FAILURE; 427 427 428 428 /* construct gss token header */ 429 - ptr = plainhdr = buf->head[0].iov_base + offset; 429 + ptr = buf->head[0].iov_base + offset; 430 430 *ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff); 431 431 *ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff); 432 432
+11 -3
net/sunrpc/auth_unix.c
··· 40 40 /* 41 41 * Lookup AUTH_UNIX creds for current process 42 42 */ 43 - static struct rpc_cred * 44 - unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) 43 + static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth, 44 + struct auth_cred *acred, int flags) 45 45 { 46 - struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); 46 + struct rpc_cred *ret; 47 47 48 + ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask()); 49 + if (!ret) { 50 + if (!(flags & RPCAUTH_LOOKUP_ASYNC)) 51 + return ERR_PTR(-ENOMEM); 52 + ret = mempool_alloc(unix_pool, GFP_NOWAIT); 53 + if (!ret) 54 + return ERR_PTR(-ENOMEM); 55 + } 48 56 rpcauth_init_cred(ret, acred, auth, &unix_credops); 49 57 ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; 50 58 return ret;
+4 -4
net/sunrpc/backchannel_rqst.c
··· 75 75 return 0; 76 76 } 77 77 78 - static 79 - struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) 78 + static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt) 80 79 { 80 + gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; 81 81 struct rpc_rqst *req; 82 82 83 83 /* Pre-allocate one backchannel rpc_rqst */ ··· 154 154 INIT_LIST_HEAD(&tmp_list); 155 155 for (i = 0; i < min_reqs; i++) { 156 156 /* Pre-allocate one backchannel rpc_rqst */ 157 - req = xprt_alloc_bc_req(xprt, GFP_KERNEL); 157 + req = xprt_alloc_bc_req(xprt); 158 158 if (req == NULL) { 159 159 printk(KERN_ERR "Failed to create bc rpc_rqst\n"); 160 160 goto out_free; ··· 343 343 break; 344 344 } else if (req) 345 345 break; 346 - new = xprt_alloc_bc_req(xprt, GFP_KERNEL); 346 + new = xprt_alloc_bc_req(xprt); 347 347 } while (new); 348 348 return req; 349 349 }
+9 -4
net/sunrpc/clnt.c
··· 1065 1065 static 1066 1066 void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) 1067 1067 { 1068 - if (task->tk_xprt) 1068 + if (task->tk_xprt && 1069 + !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && 1070 + (task->tk_flags & RPC_TASK_MOVEABLE))) 1069 1071 return; 1070 1072 if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) 1071 1073 task->tk_xprt = rpc_task_get_first_xprt(clnt); ··· 1087 1085 task->tk_flags |= RPC_TASK_TIMEOUT; 1088 1086 if (clnt->cl_noretranstimeo) 1089 1087 task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; 1090 - if (atomic_read(&clnt->cl_swapper)) 1091 - task->tk_flags |= RPC_TASK_SWAPPER; 1092 1088 /* Add to the client's list of all tasks */ 1093 1089 spin_lock(&clnt->cl_lock); 1094 1090 list_add_tail(&task->tk_task, &clnt->cl_tasks); ··· 1744 1744 break; 1745 1745 task->tk_cred_retry--; 1746 1746 trace_rpc_retry_refresh_status(task); 1747 + return; 1748 + case -ENOMEM: 1749 + rpc_delay(task, HZ >> 4); 1747 1750 return; 1748 1751 } 1749 1752 trace_rpc_refresh_status(task); ··· 2796 2793 return -EINVAL; 2797 2794 } 2798 2795 2799 - data = kmalloc(sizeof(*data), GFP_NOFS); 2796 + data = kmalloc(sizeof(*data), GFP_KERNEL); 2800 2797 if (!data) 2801 2798 return -ENOMEM; 2802 2799 data->xps = xprt_switch_get(xps); ··· 3071 3068 int 3072 3069 rpc_clnt_swap_activate(struct rpc_clnt *clnt) 3073 3070 { 3071 + while (clnt != clnt->cl_parent) 3072 + clnt = clnt->cl_parent; 3074 3073 if (atomic_inc_return(&clnt->cl_swapper) == 1) 3075 3074 return rpc_clnt_iterate_for_each_xprt(clnt, 3076 3075 rpc_clnt_swap_activate_callback, NULL);
+2 -2
net/sunrpc/rpcb_clnt.c
··· 714 714 goto bailout_nofree; 715 715 } 716 716 717 - map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); 717 + map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask()); 718 718 if (!map) { 719 719 status = -ENOMEM; 720 720 goto bailout_release_client; ··· 730 730 case RPCBVERS_4: 731 731 case RPCBVERS_3: 732 732 map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; 733 - map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); 733 + map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask()); 734 734 if (!map->r_addr) { 735 735 status = -ENOMEM; 736 736 goto bailout_free_args;
+38 -18
net/sunrpc/sched.c
··· 57 57 struct workqueue_struct *xprtiod_workqueue __read_mostly; 58 58 EXPORT_SYMBOL_GPL(xprtiod_workqueue); 59 59 60 + gfp_t rpc_task_gfp_mask(void) 61 + { 62 + if (current->flags & PF_WQ_WORKER) 63 + return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; 64 + return GFP_KERNEL; 65 + } 66 + 60 67 unsigned long 61 68 rpc_task_timeout(const struct rpc_task *task) 62 69 { ··· 193 186 194 187 /* 195 188 * Add new request to wait queue. 196 - * 197 - * Swapper tasks always get inserted at the head of the queue. 198 - * This should avoid many nasty memory deadlocks and hopefully 199 - * improve overall performance. 200 - * Everyone else gets appended to the queue to ensure proper FIFO behavior. 201 189 */ 202 190 static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, 203 191 struct rpc_task *task, ··· 201 199 INIT_LIST_HEAD(&task->u.tk_wait.timer_list); 202 200 if (RPC_IS_PRIORITY(queue)) 203 201 __rpc_add_wait_queue_priority(queue, task, queue_priority); 204 - else if (RPC_IS_SWAPPER(task)) 205 - list_add(&task->u.tk_wait.list, &queue->tasks[0]); 206 202 else 207 203 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); 208 204 task->tk_waitqueue = queue; ··· 876 876 ops->rpc_release(calldata); 877 877 } 878 878 879 + static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk) 880 + { 881 + if (!xprt) 882 + return false; 883 + if (!atomic_read(&xprt->swapper)) 884 + return false; 885 + return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk; 886 + } 887 + 879 888 /* 880 889 * This is the RPC `scheduler' (or rather, the finite state machine). 881 890 */ ··· 893 884 struct rpc_wait_queue *queue; 894 885 int task_is_async = RPC_IS_ASYNC(task); 895 886 int status = 0; 887 + unsigned long pflags = current->flags; 896 888 897 889 WARN_ON_ONCE(RPC_IS_QUEUED(task)); 898 890 if (RPC_IS_QUEUED(task)) ··· 916 906 } 917 907 if (!do_action) 918 908 break; 909 + if (RPC_IS_SWAPPER(task) || 910 + xprt_needs_memalloc(task->tk_xprt, task)) 911 + current->flags |= PF_MEMALLOC; 912 + 919 913 trace_rpc_task_run_action(task, do_action); 920 914 do_action(task); 921 915 ··· 957 943 rpc_clear_running(task); 958 944 spin_unlock(&queue->lock); 959 945 if (task_is_async) 960 - return; 946 + goto out; 961 947 962 948 /* sync task: sleep here */ 963 949 trace_rpc_task_sync_sleep(task, task->tk_action); ··· 981 967 982 968 /* Release all resources associated with the task */ 983 969 rpc_release_task(task); 970 + out: 971 + current_restore_flags(pflags, PF_MEMALLOC); 984 972 } 985 973 986 974 /* ··· 1037 1021 struct rpc_rqst *rqst = task->tk_rqstp; 1038 1022 size_t size = rqst->rq_callsize + rqst->rq_rcvsize; 1039 1023 struct rpc_buffer *buf; 1040 - gfp_t gfp = GFP_NOFS; 1041 - 1042 - if (RPC_IS_SWAPPER(task)) 1043 - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; 1024 + gfp_t gfp = rpc_task_gfp_mask(); 1044 1025 1045 1026 size += sizeof(struct rpc_buffer); 1046 - if (size <= RPC_BUFFER_MAXSIZE) 1047 - buf = mempool_alloc(rpc_buffer_mempool, gfp); 1048 - else 1027 + if (size <= RPC_BUFFER_MAXSIZE) { 1028 + buf = kmem_cache_alloc(rpc_buffer_slabp, gfp); 1029 + /* Reach for the mempool if dynamic allocation fails */ 1030 + if (!buf && RPC_IS_ASYNC(task)) 1031 + buf = mempool_alloc(rpc_buffer_mempool, GFP_NOWAIT); 1032 + } else 1049 1033 buf = kmalloc(size, gfp); 1050 1034 1051 1035 if (!buf) ··· 1108 1092 rpc_init_task_statistics(task); 1109 1093 } 1110 1094 1111 - static struct rpc_task * 1112 - rpc_alloc_task(void) 1095 + static struct rpc_task *rpc_alloc_task(void) 1113 1096 { 1114 - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); 1097 + struct rpc_task *task; 1098 + 1099 + task = kmem_cache_alloc(rpc_task_slabp, rpc_task_gfp_mask()); 1100 + if (task) 1101 + return task; 1102 + return mempool_alloc(rpc_task_mempool, GFP_NOWAIT); 1115 1103 } 1116 1104 1117 1105 /*
+2 -1
net/sunrpc/socklib.c
··· 15 15 #include <linux/pagemap.h> 16 16 #include <linux/udp.h> 17 17 #include <linux/sunrpc/msg_prot.h> 18 + #include <linux/sunrpc/sched.h> 18 19 #include <linux/sunrpc/xdr.h> 19 20 #include <linux/export.h> 20 21 ··· 223 222 { 224 223 int err; 225 224 226 - err = xdr_alloc_bvec(xdr, GFP_KERNEL); 225 + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); 227 226 if (err < 0) 228 227 return err; 229 228
+47 -45
net/sunrpc/sysfs.c
··· 93 93 struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); 94 94 ssize_t ret; 95 95 96 - if (!xprt) 97 - return 0; 96 + if (!xprt) { 97 + ret = sprintf(buf, "<closed>\n"); 98 + goto out; 99 + } 98 100 ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); 99 101 xprt_put(xprt); 100 - return ret + 1; 102 + out: 103 + return ret; 101 104 } 102 105 103 106 static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, ··· 108 105 char *buf) 109 106 { 110 107 struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); 111 - struct sockaddr_storage saddr; 112 - struct sock_xprt *sock; 113 - ssize_t ret = -1; 114 - 115 - if (!xprt || !xprt_connected(xprt)) { 116 - xprt_put(xprt); 117 - return -ENOTCONN; 118 - } 119 - 120 - sock = container_of(xprt, struct sock_xprt, xprt); 121 - mutex_lock(&sock->recv_mutex); 122 - if (sock->sock == NULL || 123 - kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) 124 - goto out; 125 - 126 - ret = sprintf(buf, "%pISc\n", &saddr); 127 - out: 128 - mutex_unlock(&sock->recv_mutex); 129 - xprt_put(xprt); 130 - return ret + 1; 131 - } 132 - 133 - static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, 134 - struct kobj_attribute *attr, 135 - char *buf) 136 - { 137 - struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); 108 + size_t buflen = PAGE_SIZE; 138 109 ssize_t ret; 139 110 140 111 if (!xprt || !xprt_connected(xprt)) { 141 - xprt_put(xprt); 142 - return -ENOTCONN; 112 + ret = sprintf(buf, "<closed>\n"); 113 + } else if (xprt->ops->get_srcaddr) { 114 + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); 115 + if (ret > 0) { 116 + if (ret < buflen - 1) { 117 + buf[ret] = '\n'; 118 + ret++; 119 + buf[ret] = '\0'; 120 + } 121 + } else 122 + ret = sprintf(buf, "<closed>\n"); 123 + } else 124 + ret = sprintf(buf, "<not a socket>\n"); 125 + xprt_put(xprt); 126 + return ret; 127 + } 128 + 129 + static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, 130 + struct kobj_attribute *attr, char *buf) 131 + { 132 + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); 133 + unsigned short srcport = 0; 134 + size_t buflen = PAGE_SIZE; 135 + ssize_t ret; 136 + 137 + if (!xprt || !xprt_connected(xprt)) { 138 + ret = sprintf(buf, "<closed>\n"); 139 + goto out; 143 140 } 144 141 145 - ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" 142 + if (xprt->ops->get_srcport) 143 + srcport = xprt->ops->get_srcport(xprt); 144 + 145 + ret = snprintf(buf, buflen, 146 + "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" 146 147 "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" 147 148 "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" 148 149 "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" ··· 154 147 xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, 155 148 xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, 156 149 xprt->sending.qlen, xprt->pending.qlen, 157 - xprt->backlog.qlen, xprt->main, 158 - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? 159 - get_srcport(xprt) : 0, 150 + xprt->backlog.qlen, xprt->main, srcport, 160 151 atomic_long_read(&xprt->queuelen), 161 - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? 162 - xprt->address_strings[RPC_DISPLAY_PORT] : "0"); 152 + xprt->address_strings[RPC_DISPLAY_PORT]); 153 + out: 163 154 xprt_put(xprt); 164 - return ret + 1; 155 + return ret; 165 156 } 166 157 167 158 static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, ··· 171 166 int locked, connected, connecting, close_wait, bound, binding, 172 167 closing, congested, cwnd_wait, write_space, offline, remove; 173 168 174 - if (!xprt) 175 - return 0; 176 - 177 - if (!xprt->state) { 169 + if (!(xprt && xprt->state)) { 178 170 ret = sprintf(buf, "state=CLOSED\n"); 179 171 } else { 180 172 locked = test_bit(XPRT_LOCKED, &xprt->state); ··· 203 201 } 204 202 205 203 xprt_put(xprt); 206 - return ret + 1; 204 + return ret; 207 205 } 208 206 209 207 static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, ··· 222 220 xprt_switch->xps_nunique_destaddr_xprts, 223 221 atomic_long_read(&xprt_switch->xps_queuelen)); 224 222 xprt_switch_put(xprt_switch); 225 - return ret + 1; 223 + return ret; 226 224 } 227 225 228 226 static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
+11 -12
net/sunrpc/xprt.c
··· 1354 1354 INIT_LIST_HEAD(&req->rq_xmit2); 1355 1355 goto out; 1356 1356 } 1357 - } else if (RPC_IS_SWAPPER(task)) { 1358 - list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { 1359 - if (pos->rq_cong || pos->rq_bytes_sent) 1360 - continue; 1361 - if (RPC_IS_SWAPPER(pos->rq_task)) 1362 - continue; 1363 - /* Note: req is added _before_ pos */ 1364 - list_add_tail(&req->rq_xmit, &pos->rq_xmit); 1365 - INIT_LIST_HEAD(&req->rq_xmit2); 1366 - goto out; 1367 - } 1368 1357 } else if (!req->rq_seqno) { 1369 1358 list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { 1370 1359 if (pos->rq_task->tk_owner != task->tk_owner) ··· 1492 1503 return false; 1493 1504 1494 1505 } 1506 + if (atomic_read(&xprt->swapper)) 1507 + /* This will be clear in __rpc_execute */ 1508 + current->flags |= PF_MEMALLOC; 1495 1509 return true; 1496 1510 } 1497 1511 ··· 1684 1692 goto out; 1685 1693 ++xprt->num_reqs; 1686 1694 spin_unlock(&xprt->reserve_lock); 1687 - req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); 1695 + req = kzalloc(sizeof(*req), rpc_task_gfp_mask()); 1688 1696 spin_lock(&xprt->reserve_lock); 1689 1697 if (req != NULL) 1690 1698 goto out; ··· 2104 2112 */ 2105 2113 wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); 2106 2114 2115 + /* 2116 + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED 2117 + * is cleared. We use ->transport_lock to ensure the mod_timer() 2118 + * can only run *before* del_time_sync(), never after. 2119 + */ 2120 + spin_lock(&xprt->transport_lock); 2107 2121 del_timer_sync(&xprt->timer); 2122 + spin_unlock(&xprt->transport_lock); 2108 2123 2109 2124 /* 2110 2125 * Destroy sockets etc from the system workqueue so they can
+1 -1
net/sunrpc/xprtrdma/frwr_ops.c
··· 130 130 if (IS_ERR(frmr)) 131 131 goto out_mr_err; 132 132 133 - sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); 133 + sg = kmalloc_array(depth, sizeof(*sg), GFP_KERNEL); 134 134 if (!sg) 135 135 goto out_list_err; 136 136
+7 -3
net/sunrpc/xprtrdma/transport.c
··· 235 235 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, 236 236 rx_connect_worker.work); 237 237 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 238 + unsigned int pflags = current->flags; 238 239 int rc; 239 240 241 + if (atomic_read(&xprt->swapper)) 242 + current->flags |= PF_MEMALLOC; 240 243 rc = rpcrdma_xprt_connect(r_xprt); 241 244 xprt_clear_connecting(xprt); 242 245 if (!rc) { ··· 253 250 rpcrdma_xprt_disconnect(r_xprt); 254 251 xprt_unlock_connect(xprt, r_xprt); 255 252 xprt_wake_pending_tasks(xprt, rc); 253 + current_restore_flags(pflags, PF_MEMALLOC); 256 254 } 257 255 258 256 /** ··· 521 517 return; 522 518 523 519 out_sleep: 524 - task->tk_status = -EAGAIN; 520 + task->tk_status = -ENOMEM; 525 521 xprt_add_backlog(xprt, task); 526 522 } 527 523 ··· 574 570 gfp_t flags; 575 571 576 572 flags = RPCRDMA_DEF_GFP; 577 - if (RPC_IS_SWAPPER(task)) 578 - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; 573 + if (RPC_IS_ASYNC(task)) 574 + flags = GFP_NOWAIT | __GFP_NOWARN; 579 575 580 576 if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, 581 577 flags))
+2 -2
net/sunrpc/xprtrdma/verbs.c
··· 373 373 struct rpcrdma_ep *ep; 374 374 int rc; 375 375 376 - ep = kzalloc(sizeof(*ep), GFP_NOFS); 376 + ep = kzalloc(sizeof(*ep), GFP_KERNEL); 377 377 if (!ep) 378 378 return -ENOTCONN; 379 379 ep->re_xprt = &r_xprt->rx_xprt; ··· 746 746 struct rpcrdma_mr *mr; 747 747 int rc; 748 748 749 - mr = kzalloc(sizeof(*mr), GFP_NOFS); 749 + mr = kzalloc(sizeof(*mr), GFP_KERNEL); 750 750 if (!mr) 751 751 break; 752 752
+115 -92
net/sunrpc/xprtsock.c
··· 58 58 #include "sunrpc.h" 59 59 60 60 static void xs_close(struct rpc_xprt *xprt); 61 + static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock); 61 62 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, 62 63 struct socket *sock); 63 64 ··· 428 427 offset += want; 429 428 } 430 429 431 - want = xs_alloc_sparse_pages(buf, 432 - min_t(size_t, count - offset, buf->page_len), 433 - GFP_KERNEL); 430 + want = xs_alloc_sparse_pages( 431 + buf, min_t(size_t, count - offset, buf->page_len), 432 + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); 434 433 if (seek < want) { 435 434 ret = xs_read_bvec(sock, msg, flags, buf->bvec, 436 435 xdr_buf_pagecount(buf), ··· 764 763 /** 765 764 * xs_nospace - handle transmit was incomplete 766 765 * @req: pointer to RPC request 766 + * @transport: pointer to struct sock_xprt 767 767 * 768 768 */ 769 - static int xs_nospace(struct rpc_rqst *req) 769 + static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) 770 770 { 771 - struct rpc_xprt *xprt = req->rq_xprt; 772 - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 771 + struct rpc_xprt *xprt = &transport->xprt; 773 772 struct sock *sk = transport->inet; 774 773 int ret = -EAGAIN; 775 774 ··· 781 780 /* Don't race with disconnect */ 782 781 if (xprt_connected(xprt)) { 783 782 /* wait for more buffer space */ 783 + set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); 784 + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 784 785 sk->sk_write_pending++; 785 786 xprt_wait_for_buffer_space(xprt); 786 787 } else 787 788 ret = -ENOTCONN; 788 789 789 790 spin_unlock(&xprt->transport_lock); 791 + return ret; 792 + } 790 793 791 - /* Race breaker in case memory is freed before above code is called */ 792 - if (ret == -EAGAIN) { 793 - struct socket_wq *wq; 794 + static int xs_sock_nospace(struct rpc_rqst *req) 795 + { 796 + struct sock_xprt *transport = 797 + container_of(req->rq_xprt, struct sock_xprt, xprt); 798 + struct sock *sk = transport->inet; 799 + int ret = -EAGAIN; 794 800 795 - rcu_read_lock(); 796 - wq = rcu_dereference(sk->sk_wq); 797 - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); 798 - rcu_read_unlock(); 801 + lock_sock(sk); 802 + if (!sock_writeable(sk)) 803 + ret = xs_nospace(req, transport); 804 + release_sock(sk); 805 + return ret; 806 + } 799 807 800 - sk->sk_write_space(sk); 801 - } 808 + static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) 809 + { 810 + struct sock_xprt *transport = 811 + container_of(req->rq_xprt, struct sock_xprt, xprt); 812 + struct sock *sk = transport->inet; 813 + int ret = -EAGAIN; 814 + 815 + if (vm_wait) 816 + return -ENOBUFS; 817 + lock_sock(sk); 818 + if (!sk_stream_memory_free(sk)) 819 + ret = xs_nospace(req, transport); 820 + release_sock(sk); 802 821 return ret; 803 822 } 804 823 ··· 826 805 xs_stream_prepare_request(struct rpc_rqst *req) 827 806 { 828 807 xdr_free_bvec(&req->rq_rcv_buf); 829 - req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); 808 + req->rq_task->tk_status = xdr_alloc_bvec( 809 + &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); 830 810 } 831 811 832 812 /* ··· 873 851 struct msghdr msg = { 874 852 .msg_flags = XS_SENDMSG_FLAGS, 875 853 }; 854 + bool vm_wait; 876 855 unsigned int sent; 877 856 int status; 878 857 ··· 886 863 xs_pktdump("packet data:", 887 864 req->rq_svec->iov_base, req->rq_svec->iov_len); 888 865 866 + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; 867 + 889 868 req->rq_xtime = ktime_get(); 890 869 status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 891 870 transport->xmit.offset, rm, &sent); 892 871 dprintk("RPC: %s(%u) = %d\n", 893 872 __func__, xdr->len - transport->xmit.offset, status); 894 - 895 - if (status == -EAGAIN && sock_writeable(transport->inet)) 896 - status = -ENOBUFS; 897 873 898 874 if (likely(sent > 0) || status == 0) { 899 875 transport->xmit.offset += sent; ··· 903 881 return 0; 904 882 } 905 883 status = -EAGAIN; 884 + vm_wait = false; 906 885 } 907 886 908 887 switch (status) { 909 - case -ENOBUFS: 910 - break; 911 888 case -EAGAIN: 912 - status = xs_nospace(req); 889 + status = xs_stream_nospace(req, vm_wait); 913 890 break; 914 891 default: 915 892 dprintk("RPC: sendmsg returned unrecognized error %d\n", ··· 984 963 /* Should we call xs_close() here? */ 985 964 break; 986 965 case -EAGAIN: 987 - status = xs_nospace(req); 966 + status = xs_sock_nospace(req); 988 967 break; 989 968 case -ENETUNREACH: 990 969 case -ENOBUFS: ··· 1026 1005 struct msghdr msg = { 1027 1006 .msg_flags = XS_SENDMSG_FLAGS, 1028 1007 }; 1029 - bool vm_wait = false; 1008 + bool vm_wait; 1030 1009 unsigned int sent; 1031 1010 int status; 1032 1011 ··· 1046 1025 if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) 1047 1026 xs_tcp_set_socket_timeouts(xprt, transport->sock); 1048 1027 1028 + xs_set_srcport(transport, transport->sock); 1029 + 1049 1030 /* Continue transmitting the packet/record. We must be careful 1050 1031 * to cope with writespace callbacks arriving _after_ we have 1051 1032 * called sendmsg(). */ 1052 1033 req->rq_xtime = ktime_get(); 1053 1034 tcp_sock_set_cork(transport->inet, true); 1054 - while (1) { 1035 + 1036 + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; 1037 + 1038 + do { 1055 1039 status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 1056 1040 transport->xmit.offset, rm, &sent); 1057 1041 ··· 1077 1051 1078 1052 WARN_ON_ONCE(sent == 0 && status == 0); 1079 1053 1080 - if (status == -EAGAIN ) { 1081 - /* 1082 - * Return EAGAIN if we're sure we're hitting the 1083 - * socket send buffer limits. 1084 - */ 1085 - if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) 1086 - break; 1087 - /* 1088 - * Did we hit a memory allocation failure? 1089 - */ 1090 - if (sent == 0) { 1091 - status = -ENOBUFS; 1092 - if (vm_wait) 1093 - break; 1094 - /* Retry, knowing now that we're below the 1095 - * socket send buffer limit 1096 - */ 1097 - vm_wait = true; 1098 - } 1099 - continue; 1100 - } 1101 - if (status < 0) 1102 - break; 1103 - vm_wait = false; 1104 - } 1054 + if (sent > 0) 1055 + vm_wait = false; 1056 + 1057 + } while (status == 0); 1105 1058 1106 1059 switch (status) { 1107 1060 case -ENOTSOCK: ··· 1088 1083 /* Should we call xs_close() here? */ 1089 1084 break; 1090 1085 case -EAGAIN: 1091 - status = xs_nospace(req); 1086 + status = xs_stream_nospace(req, vm_wait); 1092 1087 break; 1093 1088 case -ECONNRESET: 1094 1089 case -ECONNREFUSED: ··· 1129 1124 clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); 1130 1125 clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); 1131 1126 clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); 1127 + clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); 1132 1128 } 1133 1129 1134 1130 static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) ··· 1476 1470 1477 1471 static void xs_write_space(struct sock *sk) 1478 1472 { 1479 - struct socket_wq *wq; 1480 1473 struct sock_xprt *transport; 1481 1474 struct rpc_xprt *xprt; 1482 1475 ··· 1486 1481 if (unlikely(!(xprt = xprt_from_sock(sk)))) 1487 1482 return; 1488 1483 transport = container_of(xprt, struct sock_xprt, xprt); 1489 - rcu_read_lock(); 1490 - wq = rcu_dereference(sk->sk_wq); 1491 - if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) 1492 - goto out; 1493 - 1484 + if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state)) 1485 + return; 1494 1486 xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); 1495 1487 sk->sk_write_pending--; 1496 - out: 1497 - rcu_read_unlock(); 1498 1488 } 1499 1489 1500 1490 /** ··· 1638 1638 return port; 1639 1639 } 1640 1640 1641 - unsigned short get_srcport(struct rpc_xprt *xprt) 1641 + static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) 1642 1642 { 1643 1643 struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); 1644 1644 unsigned short ret = 0; ··· 1648 1648 mutex_unlock(&sock->recv_mutex); 1649 1649 return ret; 1650 1650 } 1651 - EXPORT_SYMBOL(get_srcport); 1651 + 1652 + static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) 1653 + { 1654 + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); 1655 + union { 1656 + struct sockaddr sa; 1657 + struct sockaddr_storage st; 1658 + } saddr; 1659 + int ret = -ENOTCONN; 1660 + 1661 + mutex_lock(&sock->recv_mutex); 1662 + if (sock->sock) { 1663 + ret = kernel_getsockname(sock->sock, &saddr.sa); 1664 + if (ret >= 0) 1665 + ret = snprintf(buf, buflen, "%pISc", &saddr.sa); 1666 + } 1667 + mutex_unlock(&sock->recv_mutex); 1668 + return ret; 1669 + } 1652 1670 1653 1671 static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) 1654 1672 { ··· 1848 1830 sk->sk_user_data = xprt; 1849 1831 sk->sk_data_ready = xs_data_ready; 1850 1832 sk->sk_write_space = xs_udp_write_space; 1851 - sock_set_flag(sk, SOCK_FASYNC); 1852 1833 sk->sk_error_report = xs_error_report; 1853 1834 1854 1835 xprt_clear_connected(xprt); ··· 1953 1936 1954 1937 #if IS_ENABLED(CONFIG_SUNRPC_SWAP) 1955 1938 /* 1956 - * Note that this should be called with XPRT_LOCKED held (or when we otherwise 1957 - * know that we have exclusive access to the socket), to guard against 1958 - * races with xs_reset_transport. 1939 + * Note that this should be called with XPRT_LOCKED held, or recv_mutex 1940 + * held, or when we otherwise know that we have exclusive access to the 1941 + * socket, to guard against races with xs_reset_transport. 1959 1942 */ 1960 1943 static void xs_set_memalloc(struct rpc_xprt *xprt) 1961 1944 { ··· 1984 1967 { 1985 1968 struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); 1986 1969 1987 - if (atomic_inc_return(&xprt->swapper) != 1) 1988 - return 0; 1989 - if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) 1990 - return -ERESTARTSYS; 1991 - if (xs->inet) 1970 + mutex_lock(&xs->recv_mutex); 1971 + if (atomic_inc_return(&xprt->swapper) == 1 && 1972 + xs->inet) 1992 1973 sk_set_memalloc(xs->inet); 1993 - xprt_release_xprt(xprt, NULL); 1974 + mutex_unlock(&xs->recv_mutex); 1994 1975 return 0; 1995 1976 } 1996 1977 ··· 2004 1989 { 2005 1990 struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); 2006 1991 2007 - if (!atomic_dec_and_test(&xprt->swapper)) 2008 - return; 2009 - if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) 2010 - return; 2011 - if (xs->inet) 1992 + mutex_lock(&xs->recv_mutex); 1993 + if (atomic_dec_and_test(&xprt->swapper) && 1994 + xs->inet) 2012 1995 sk_clear_memalloc(xs->inet); 2013 - xprt_release_xprt(xprt, NULL); 1996 + mutex_unlock(&xs->recv_mutex); 2014 1997 } 2015 1998 #else 2016 1999 static void xs_set_memalloc(struct rpc_xprt *xprt) ··· 2041 2028 sk->sk_user_data = xprt; 2042 2029 sk->sk_data_ready = xs_data_ready; 2043 2030 sk->sk_write_space = xs_udp_write_space; 2044 - sock_set_flag(sk, SOCK_FASYNC); 2045 2031 2046 2032 xprt_set_connected(xprt); 2047 2033 ··· 2064 2052 struct rpc_xprt *xprt = &transport->xprt; 2065 2053 struct socket *sock; 2066 2054 int status = -EIO; 2055 + unsigned int pflags = current->flags; 2067 2056 2057 + if (atomic_read(&xprt->swapper)) 2058 + current->flags |= PF_MEMALLOC; 2068 2059 sock = xs_create_sock(xprt, transport, 2069 2060 xs_addr(xprt)->sa_family, SOCK_DGRAM, 2070 2061 IPPROTO_UDP, false); ··· 2087 2072 xprt_clear_connecting(xprt); 2088 2073 xprt_unlock_connect(xprt, transport); 2089 2074 xprt_wake_pending_tasks(xprt, status); 2075 + current_restore_flags(pflags, PF_MEMALLOC); 2090 2076 } 2091 2077 2092 2078 /** ··· 2207 2191 sk->sk_data_ready = xs_data_ready; 2208 2192 sk->sk_state_change = xs_tcp_state_change; 2209 2193 sk->sk_write_space = xs_tcp_write_space; 2210 - sock_set_flag(sk, SOCK_FASYNC); 2211 2194 sk->sk_error_report = xs_error_report; 2212 2195 2213 2196 /* socket options */ ··· 2246 2231 struct socket *sock = transport->sock; 2247 2232 struct rpc_xprt *xprt = &transport->xprt; 2248 2233 int status; 2234 + unsigned int pflags = current->flags; 2249 2235 2250 - if (!sock) { 2251 - sock = xs_create_sock(xprt, transport, 2252 - xs_addr(xprt)->sa_family, SOCK_STREAM, 2253 - IPPROTO_TCP, true); 2236 + if (atomic_read(&xprt->swapper)) 2237 + current->flags |= PF_MEMALLOC; 2238 + 2239 + if (xprt_connected(xprt)) 2240 + goto out; 2241 + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, 2242 + &transport->sock_state) || 2243 + !sock) { 2244 + xs_reset_transport(transport); 2245 + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, 2246 + SOCK_STREAM, IPPROTO_TCP, true); 2254 2247 if (IS_ERR(sock)) { 2255 2248 xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); 2256 2249 goto out; ··· 2278 2255 sock->sk->sk_state); 2279 2256 switch (status) { 2280 2257 case 0: 2281 - xs_set_srcport(transport, sock); 2282 - fallthrough; 2283 2258 case -EINPROGRESS: 2284 2259 /* SYN_SENT! */ 2260 + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); 2285 2261 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2286 2262 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2287 2263 fallthrough; ··· 2318 2296 xprt_clear_connecting(xprt); 2319 2297 out_unlock: 2320 2298 xprt_unlock_connect(xprt, transport); 2299 + current_restore_flags(pflags, PF_MEMALLOC); 2321 2300 } 2322 2301 2323 2302 /** ··· 2342 2319 2343 2320 WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); 2344 2321 2345 - if (transport->sock != NULL && !xprt_connecting(xprt)) { 2322 + if (transport->sock != NULL) { 2346 2323 dprintk("RPC: xs_connect delayed xprt %p for %lu " 2347 - "seconds\n", 2348 - xprt, xprt->reestablish_timeout / HZ); 2349 - 2350 - /* Start by resetting any existing state */ 2351 - xs_reset_transport(transport); 2324 + "seconds\n", xprt, xprt->reestablish_timeout / HZ); 2352 2325 2353 2326 delay = xprt_reconnect_delay(xprt); 2354 2327 xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); ··· 2506 2487 return -EINVAL; 2507 2488 } 2508 2489 2509 - page = alloc_page(GFP_KERNEL); 2490 + page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); 2510 2491 if (!page) 2511 2492 return -ENOMEM; 2512 2493 ··· 2640 2621 .rpcbind = rpcb_getport_async, 2641 2622 .set_port = xs_set_port, 2642 2623 .connect = xs_connect, 2624 + .get_srcaddr = xs_sock_srcaddr, 2625 + .get_srcport = xs_sock_srcport, 2643 2626 .buf_alloc = rpc_malloc, 2644 2627 .buf_free = rpc_free, 2645 2628 .send_request = xs_udp_send_request, ··· 2664 2643 .rpcbind = rpcb_getport_async, 2665 2644 .set_port = xs_set_port, 2666 2645 .connect = xs_connect, 2646 + .get_srcaddr = xs_sock_srcaddr, 2647 + .get_srcport = xs_sock_srcport, 2667 2648 .buf_alloc = rpc_malloc, 2668 2649 .buf_free = rpc_free, 2669 2650 .prepare_request = xs_stream_prepare_request,