Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nfsd: add a new struct file caching facility to nfsd

Currently, NFSv2/3 reads and writes have to open a file, do the read or
write and then close it again for each RPC. This is highly inefficient,
especially when the underlying filesystem has a relatively slow open
routine.

This patch adds a new open file cache to knfsd. Rather than doing an
open for each RPC, the read/write handlers can call into this cache to
see if there is one already there for the correct filehandle and
NFS_MAY_READ/WRITE flags.

If there isn't an entry, then we create a new one and attempt to
perform the open. If there is, then we wait until the entry is fully
instantiated and return it if it is at the end of the wait. If it's
not, then we attempt to take over construction.

Since the main goal is to speed up NFSv2/3 I/O, we don't want to
close these files on last put of these objects. We need to keep them
around for a little while since we never know when the next READ/WRITE
will come in.

Cache entries have a hardcoded 1s timeout, and we have a recurring
workqueue job that walks the cache and purges any entries that have
expired.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Richard Sharpe <richard.sharpe@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

authored by

Jeff Layton and committed by
J. Bruce Fields
65294c1f 7239a40c

+1155 -24
+1
fs/nfsd/Kconfig
··· 3 3 tristate "NFS server support" 4 4 depends on INET 5 5 depends on FILE_LOCKING 6 + depends on FSNOTIFY 6 7 select LOCKD 7 8 select SUNRPC 8 9 select EXPORTFS
+2 -1
fs/nfsd/Makefile
··· 11 11 nfsd-y += trace.o 12 12 13 13 nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ 14 - export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o 14 + export.o auth.o lockd.o nfscache.o nfsxdr.o \ 15 + stats.o filecache.o 15 16 nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o 16 17 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o 17 18 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
+13
fs/nfsd/export.c
··· 22 22 #include "nfsfh.h" 23 23 #include "netns.h" 24 24 #include "pnfs.h" 25 + #include "filecache.h" 25 26 26 27 #define NFSDDBG_FACILITY NFSDDBG_EXPORT 27 28 ··· 233 232 return NULL; 234 233 } 235 234 235 + static void expkey_flush(void) 236 + { 237 + /* 238 + * Take the nfsd_mutex here to ensure that the file cache is not 239 + * destroyed while we're in the middle of flushing. 240 + */ 241 + mutex_lock(&nfsd_mutex); 242 + nfsd_file_cache_purge(); 243 + mutex_unlock(&nfsd_mutex); 244 + } 245 + 236 246 static const struct cache_detail svc_expkey_cache_template = { 237 247 .owner = THIS_MODULE, 238 248 .hash_size = EXPKEY_HASHMAX, ··· 256 244 .init = expkey_init, 257 245 .update = expkey_update, 258 246 .alloc = expkey_alloc, 247 + .flush = expkey_flush, 259 248 }; 260 249 261 250 static int
+885
fs/nfsd/filecache.c
··· 1 + /* 2 + * Open file cache. 3 + * 4 + * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 + */ 6 + 7 + #include <linux/hash.h> 8 + #include <linux/slab.h> 9 + #include <linux/hash.h> 10 + #include <linux/file.h> 11 + #include <linux/sched.h> 12 + #include <linux/list_lru.h> 13 + #include <linux/fsnotify_backend.h> 14 + #include <linux/fsnotify.h> 15 + #include <linux/seq_file.h> 16 + 17 + #include "vfs.h" 18 + #include "nfsd.h" 19 + #include "nfsfh.h" 20 + #include "filecache.h" 21 + #include "trace.h" 22 + 23 + #define NFSDDBG_FACILITY NFSDDBG_FH 24 + 25 + /* FIXME: dynamically size this for the machine somehow? */ 26 + #define NFSD_FILE_HASH_BITS 12 27 + #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 + #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 + 30 + #define NFSD_FILE_LRU_RESCAN (0) 31 + #define NFSD_FILE_SHUTDOWN (1) 32 + #define NFSD_FILE_LRU_THRESHOLD (4096UL) 33 + #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 + 35 + /* We only care about NFSD_MAY_READ/WRITE for this cache */ 36 + #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 + 38 + struct nfsd_fcache_bucket { 39 + struct hlist_head nfb_head; 40 + spinlock_t nfb_lock; 41 + unsigned int nfb_count; 42 + unsigned int nfb_maxcount; 43 + }; 44 + 45 + static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 + 47 + static struct kmem_cache *nfsd_file_slab; 48 + static struct kmem_cache *nfsd_file_mark_slab; 49 + static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 50 + static struct list_lru nfsd_file_lru; 51 + static long nfsd_file_lru_flags; 52 + static struct fsnotify_group *nfsd_file_fsnotify_group; 53 + static atomic_long_t nfsd_filecache_count; 54 + static struct delayed_work nfsd_filecache_laundrette; 55 + 56 + enum nfsd_file_laundrette_ctl { 57 + NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, 58 + NFSD_FILE_LAUNDRETTE_MAY_FLUSH 59 + }; 60 + 61 + static void 62 + nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) 63 + { 64 + long count = atomic_long_read(&nfsd_filecache_count); 65 + 66 + if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 67 + return; 68 + 69 + /* Be more aggressive about scanning if over the threshold */ 70 + if (count > NFSD_FILE_LRU_THRESHOLD) 71 + mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); 72 + else 73 + schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); 74 + 75 + if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) 76 + return; 77 + 78 + /* ...and don't delay flushing if we're out of control */ 79 + if (count >= NFSD_FILE_LRU_LIMIT) 80 + flush_delayed_work(&nfsd_filecache_laundrette); 81 + } 82 + 83 + static void 84 + nfsd_file_slab_free(struct rcu_head *rcu) 85 + { 86 + struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 87 + 88 + put_cred(nf->nf_cred); 89 + kmem_cache_free(nfsd_file_slab, nf); 90 + } 91 + 92 + static void 93 + nfsd_file_mark_free(struct fsnotify_mark *mark) 94 + { 95 + struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 96 + nfm_mark); 97 + 98 + kmem_cache_free(nfsd_file_mark_slab, nfm); 99 + } 100 + 101 + static struct nfsd_file_mark * 102 + nfsd_file_mark_get(struct nfsd_file_mark *nfm) 103 + { 104 + if (!atomic_inc_not_zero(&nfm->nfm_ref)) 105 + return NULL; 106 + return nfm; 107 + } 108 + 109 + static void 110 + nfsd_file_mark_put(struct nfsd_file_mark *nfm) 111 + { 112 + if (atomic_dec_and_test(&nfm->nfm_ref)) { 113 + 114 + fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 115 + fsnotify_put_mark(&nfm->nfm_mark); 116 + } 117 + } 118 + 119 + static struct nfsd_file_mark * 120 + nfsd_file_mark_find_or_create(struct nfsd_file *nf) 121 + { 122 + int err; 123 + struct fsnotify_mark *mark; 124 + struct nfsd_file_mark *nfm = NULL, *new; 125 + struct inode *inode = nf->nf_inode; 126 + 127 + do { 128 + mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 129 + mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 130 + nfsd_file_fsnotify_group); 131 + if (mark) { 132 + nfm = nfsd_file_mark_get(container_of(mark, 133 + struct nfsd_file_mark, 134 + nfm_mark)); 135 + mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 136 + fsnotify_put_mark(mark); 137 + if (likely(nfm)) 138 + break; 139 + } else 140 + mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 141 + 142 + /* allocate a new nfm */ 143 + new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 144 + if (!new) 145 + return NULL; 146 + fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 147 + new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 148 + atomic_set(&new->nfm_ref, 1); 149 + 150 + err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 151 + 152 + /* 153 + * If the add was successful, then return the object. 154 + * Otherwise, we need to put the reference we hold on the 155 + * nfm_mark. The fsnotify code will take a reference and put 156 + * it on failure, so we can't just free it directly. It's also 157 + * not safe to call fsnotify_destroy_mark on it as the 158 + * mark->group will be NULL. Thus, we can't let the nfm_ref 159 + * counter drive the destruction at this point. 160 + */ 161 + if (likely(!err)) 162 + nfm = new; 163 + else 164 + fsnotify_put_mark(&new->nfm_mark); 165 + } while (unlikely(err == -EEXIST)); 166 + 167 + return nfm; 168 + } 169 + 170 + static struct nfsd_file * 171 + nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval) 172 + { 173 + struct nfsd_file *nf; 174 + 175 + nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 176 + if (nf) { 177 + INIT_HLIST_NODE(&nf->nf_node); 178 + INIT_LIST_HEAD(&nf->nf_lru); 179 + nf->nf_file = NULL; 180 + nf->nf_cred = get_current_cred(); 181 + nf->nf_flags = 0; 182 + nf->nf_inode = inode; 183 + nf->nf_hashval = hashval; 184 + atomic_set(&nf->nf_ref, 1); 185 + nf->nf_may = may & NFSD_FILE_MAY_MASK; 186 + if (may & NFSD_MAY_NOT_BREAK_LEASE) { 187 + if (may & NFSD_MAY_WRITE) 188 + __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 189 + if (may & NFSD_MAY_READ) 190 + __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 191 + } 192 + nf->nf_mark = NULL; 193 + trace_nfsd_file_alloc(nf); 194 + } 195 + return nf; 196 + } 197 + 198 + static bool 199 + nfsd_file_free(struct nfsd_file *nf) 200 + { 201 + bool flush = false; 202 + 203 + trace_nfsd_file_put_final(nf); 204 + if (nf->nf_mark) 205 + nfsd_file_mark_put(nf->nf_mark); 206 + if (nf->nf_file) { 207 + get_file(nf->nf_file); 208 + filp_close(nf->nf_file, NULL); 209 + fput(nf->nf_file); 210 + flush = true; 211 + } 212 + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 213 + return flush; 214 + } 215 + 216 + static void 217 + nfsd_file_do_unhash(struct nfsd_file *nf) 218 + { 219 + lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 220 + 221 + trace_nfsd_file_unhash(nf); 222 + 223 + --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 224 + hlist_del_rcu(&nf->nf_node); 225 + if (!list_empty(&nf->nf_lru)) 226 + list_lru_del(&nfsd_file_lru, &nf->nf_lru); 227 + atomic_long_dec(&nfsd_filecache_count); 228 + } 229 + 230 + static bool 231 + nfsd_file_unhash(struct nfsd_file *nf) 232 + { 233 + if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 234 + nfsd_file_do_unhash(nf); 235 + return true; 236 + } 237 + return false; 238 + } 239 + 240 + /* 241 + * Return true if the file was unhashed. 242 + */ 243 + static bool 244 + nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 245 + { 246 + lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 247 + 248 + trace_nfsd_file_unhash_and_release_locked(nf); 249 + if (!nfsd_file_unhash(nf)) 250 + return false; 251 + /* keep final reference for nfsd_file_lru_dispose */ 252 + if (atomic_add_unless(&nf->nf_ref, -1, 1)) 253 + return true; 254 + 255 + list_add(&nf->nf_lru, dispose); 256 + return true; 257 + } 258 + 259 + static int 260 + nfsd_file_put_noref(struct nfsd_file *nf) 261 + { 262 + int count; 263 + trace_nfsd_file_put(nf); 264 + 265 + count = atomic_dec_return(&nf->nf_ref); 266 + if (!count) { 267 + WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 268 + nfsd_file_free(nf); 269 + } 270 + return count; 271 + } 272 + 273 + void 274 + nfsd_file_put(struct nfsd_file *nf) 275 + { 276 + bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 277 + 278 + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 279 + if (nfsd_file_put_noref(nf) == 1 && is_hashed) 280 + nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); 281 + } 282 + 283 + struct nfsd_file * 284 + nfsd_file_get(struct nfsd_file *nf) 285 + { 286 + if (likely(atomic_inc_not_zero(&nf->nf_ref))) 287 + return nf; 288 + return NULL; 289 + } 290 + 291 + static void 292 + nfsd_file_dispose_list(struct list_head *dispose) 293 + { 294 + struct nfsd_file *nf; 295 + 296 + while(!list_empty(dispose)) { 297 + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 298 + list_del(&nf->nf_lru); 299 + nfsd_file_put_noref(nf); 300 + } 301 + } 302 + 303 + static void 304 + nfsd_file_dispose_list_sync(struct list_head *dispose) 305 + { 306 + bool flush = false; 307 + struct nfsd_file *nf; 308 + 309 + while(!list_empty(dispose)) { 310 + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 311 + list_del(&nf->nf_lru); 312 + if (!atomic_dec_and_test(&nf->nf_ref)) 313 + continue; 314 + if (nfsd_file_free(nf)) 315 + flush = true; 316 + } 317 + if (flush) 318 + flush_delayed_fput(); 319 + } 320 + 321 + /* 322 + * Note this can deadlock with nfsd_file_cache_purge. 323 + */ 324 + static enum lru_status 325 + nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 326 + spinlock_t *lock, void *arg) 327 + __releases(lock) 328 + __acquires(lock) 329 + { 330 + struct list_head *head = arg; 331 + struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 332 + 333 + /* 334 + * Do a lockless refcount check. The hashtable holds one reference, so 335 + * we look to see if anything else has a reference, or if any have 336 + * been put since the shrinker last ran. Those don't get unhashed and 337 + * released. 338 + * 339 + * Note that in the put path, we set the flag and then decrement the 340 + * counter. Here we check the counter and then test and clear the flag. 341 + * That order is deliberate to ensure that we can do this locklessly. 342 + */ 343 + if (atomic_read(&nf->nf_ref) > 1) 344 + goto out_skip; 345 + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 346 + goto out_rescan; 347 + 348 + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 349 + goto out_skip; 350 + 351 + list_lru_isolate_move(lru, &nf->nf_lru, head); 352 + return LRU_REMOVED; 353 + out_rescan: 354 + set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); 355 + out_skip: 356 + return LRU_SKIP; 357 + } 358 + 359 + static void 360 + nfsd_file_lru_dispose(struct list_head *head) 361 + { 362 + while(!list_empty(head)) { 363 + struct nfsd_file *nf = list_first_entry(head, 364 + struct nfsd_file, nf_lru); 365 + list_del_init(&nf->nf_lru); 366 + spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 367 + nfsd_file_do_unhash(nf); 368 + spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 369 + nfsd_file_put_noref(nf); 370 + } 371 + } 372 + 373 + static unsigned long 374 + nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 375 + { 376 + return list_lru_count(&nfsd_file_lru); 377 + } 378 + 379 + static unsigned long 380 + nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 381 + { 382 + LIST_HEAD(head); 383 + unsigned long ret; 384 + 385 + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); 386 + nfsd_file_lru_dispose(&head); 387 + return ret; 388 + } 389 + 390 + static struct shrinker nfsd_file_shrinker = { 391 + .scan_objects = nfsd_file_lru_scan, 392 + .count_objects = nfsd_file_lru_count, 393 + .seeks = 1, 394 + }; 395 + 396 + static void 397 + __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 398 + struct list_head *dispose) 399 + { 400 + struct nfsd_file *nf; 401 + struct hlist_node *tmp; 402 + 403 + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 404 + hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 405 + if (inode == nf->nf_inode) 406 + nfsd_file_unhash_and_release_locked(nf, dispose); 407 + } 408 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 409 + } 410 + 411 + /** 412 + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 413 + * @inode: inode of the file to attempt to remove 414 + * 415 + * Walk the whole hash bucket, looking for any files that correspond to "inode". 416 + * If any do, then unhash them and put the hashtable reference to them and 417 + * destroy any that had their last reference put. Also ensure that any of the 418 + * fputs also have their final __fput done as well. 419 + */ 420 + void 421 + nfsd_file_close_inode_sync(struct inode *inode) 422 + { 423 + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 424 + NFSD_FILE_HASH_BITS); 425 + LIST_HEAD(dispose); 426 + 427 + __nfsd_file_close_inode(inode, hashval, &dispose); 428 + trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 429 + nfsd_file_dispose_list_sync(&dispose); 430 + } 431 + 432 + /** 433 + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 434 + * @inode: inode of the file to attempt to remove 435 + * 436 + * Walk the whole hash bucket, looking for any files that correspond to "inode". 437 + * If any do, then unhash them and put the hashtable reference to them and 438 + * destroy any that had their last reference put. 439 + */ 440 + static void 441 + nfsd_file_close_inode(struct inode *inode) 442 + { 443 + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 444 + NFSD_FILE_HASH_BITS); 445 + LIST_HEAD(dispose); 446 + 447 + __nfsd_file_close_inode(inode, hashval, &dispose); 448 + trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 449 + nfsd_file_dispose_list(&dispose); 450 + } 451 + 452 + /** 453 + * nfsd_file_delayed_close - close unused nfsd_files 454 + * @work: dummy 455 + * 456 + * Walk the LRU list and close any entries that have not been used since 457 + * the last scan. 458 + * 459 + * Note this can deadlock with nfsd_file_cache_purge. 460 + */ 461 + static void 462 + nfsd_file_delayed_close(struct work_struct *work) 463 + { 464 + LIST_HEAD(head); 465 + 466 + list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); 467 + 468 + if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) 469 + nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); 470 + 471 + if (!list_empty(&head)) { 472 + nfsd_file_lru_dispose(&head); 473 + flush_delayed_fput(); 474 + } 475 + } 476 + 477 + static int 478 + nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 479 + void *data) 480 + { 481 + struct file_lock *fl = data; 482 + 483 + /* Only close files for F_SETLEASE leases */ 484 + if (fl->fl_flags & FL_LEASE) 485 + nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 486 + return 0; 487 + } 488 + 489 + static struct notifier_block nfsd_file_lease_notifier = { 490 + .notifier_call = nfsd_file_lease_notifier_call, 491 + }; 492 + 493 + static int 494 + nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, 495 + struct inode *inode, 496 + u32 mask, const void *data, int data_type, 497 + const struct qstr *file_name, u32 cookie, 498 + struct fsnotify_iter_info *iter_info) 499 + { 500 + trace_nfsd_file_fsnotify_handle_event(inode, mask); 501 + 502 + /* Should be no marks on non-regular files */ 503 + if (!S_ISREG(inode->i_mode)) { 504 + WARN_ON_ONCE(1); 505 + return 0; 506 + } 507 + 508 + /* don't close files if this was not the last link */ 509 + if (mask & FS_ATTRIB) { 510 + if (inode->i_nlink) 511 + return 0; 512 + } 513 + 514 + nfsd_file_close_inode(inode); 515 + return 0; 516 + } 517 + 518 + 519 + static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 520 + .handle_event = nfsd_file_fsnotify_handle_event, 521 + .free_mark = nfsd_file_mark_free, 522 + }; 523 + 524 + int 525 + nfsd_file_cache_init(void) 526 + { 527 + int ret = -ENOMEM; 528 + unsigned int i; 529 + 530 + clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 531 + 532 + if (nfsd_file_hashtbl) 533 + return 0; 534 + 535 + nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 536 + sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 537 + if (!nfsd_file_hashtbl) { 538 + pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 539 + goto out_err; 540 + } 541 + 542 + nfsd_file_slab = kmem_cache_create("nfsd_file", 543 + sizeof(struct nfsd_file), 0, 0, NULL); 544 + if (!nfsd_file_slab) { 545 + pr_err("nfsd: unable to create nfsd_file_slab\n"); 546 + goto out_err; 547 + } 548 + 549 + nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 550 + sizeof(struct nfsd_file_mark), 0, 0, NULL); 551 + if (!nfsd_file_mark_slab) { 552 + pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 553 + goto out_err; 554 + } 555 + 556 + 557 + ret = list_lru_init(&nfsd_file_lru); 558 + if (ret) { 559 + pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 560 + goto out_err; 561 + } 562 + 563 + ret = register_shrinker(&nfsd_file_shrinker); 564 + if (ret) { 565 + pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 566 + goto out_lru; 567 + } 568 + 569 + ret = lease_register_notifier(&nfsd_file_lease_notifier); 570 + if (ret) { 571 + pr_err("nfsd: unable to register lease notifier: %d\n", ret); 572 + goto out_shrinker; 573 + } 574 + 575 + nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 576 + if (IS_ERR(nfsd_file_fsnotify_group)) { 577 + pr_err("nfsd: unable to create fsnotify group: %ld\n", 578 + PTR_ERR(nfsd_file_fsnotify_group)); 579 + nfsd_file_fsnotify_group = NULL; 580 + goto out_notifier; 581 + } 582 + 583 + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 584 + INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 585 + spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 586 + } 587 + 588 + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); 589 + out: 590 + return ret; 591 + out_notifier: 592 + lease_unregister_notifier(&nfsd_file_lease_notifier); 593 + out_shrinker: 594 + unregister_shrinker(&nfsd_file_shrinker); 595 + out_lru: 596 + list_lru_destroy(&nfsd_file_lru); 597 + out_err: 598 + kmem_cache_destroy(nfsd_file_slab); 599 + nfsd_file_slab = NULL; 600 + kmem_cache_destroy(nfsd_file_mark_slab); 601 + nfsd_file_mark_slab = NULL; 602 + kfree(nfsd_file_hashtbl); 603 + nfsd_file_hashtbl = NULL; 604 + goto out; 605 + } 606 + 607 + /* 608 + * Note this can deadlock with nfsd_file_lru_cb. 609 + */ 610 + void 611 + nfsd_file_cache_purge(void) 612 + { 613 + unsigned int i; 614 + struct nfsd_file *nf; 615 + LIST_HEAD(dispose); 616 + bool del; 617 + 618 + if (!nfsd_file_hashtbl) 619 + return; 620 + 621 + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 622 + spin_lock(&nfsd_file_hashtbl[i].nfb_lock); 623 + while(!hlist_empty(&nfsd_file_hashtbl[i].nfb_head)) { 624 + nf = hlist_entry(nfsd_file_hashtbl[i].nfb_head.first, 625 + struct nfsd_file, nf_node); 626 + del = nfsd_file_unhash_and_release_locked(nf, &dispose); 627 + 628 + /* 629 + * Deadlock detected! Something marked this entry as 630 + * unhased, but hasn't removed it from the hash list. 631 + */ 632 + WARN_ON_ONCE(!del); 633 + } 634 + spin_unlock(&nfsd_file_hashtbl[i].nfb_lock); 635 + nfsd_file_dispose_list(&dispose); 636 + } 637 + } 638 + 639 + void 640 + nfsd_file_cache_shutdown(void) 641 + { 642 + LIST_HEAD(dispose); 643 + 644 + set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 645 + 646 + lease_unregister_notifier(&nfsd_file_lease_notifier); 647 + unregister_shrinker(&nfsd_file_shrinker); 648 + /* 649 + * make sure all callers of nfsd_file_lru_cb are done before 650 + * calling nfsd_file_cache_purge 651 + */ 652 + cancel_delayed_work_sync(&nfsd_filecache_laundrette); 653 + nfsd_file_cache_purge(); 654 + list_lru_destroy(&nfsd_file_lru); 655 + rcu_barrier(); 656 + fsnotify_put_group(nfsd_file_fsnotify_group); 657 + nfsd_file_fsnotify_group = NULL; 658 + kmem_cache_destroy(nfsd_file_slab); 659 + nfsd_file_slab = NULL; 660 + fsnotify_wait_marks_destroyed(); 661 + kmem_cache_destroy(nfsd_file_mark_slab); 662 + nfsd_file_mark_slab = NULL; 663 + kfree(nfsd_file_hashtbl); 664 + nfsd_file_hashtbl = NULL; 665 + } 666 + 667 + static bool 668 + nfsd_match_cred(const struct cred *c1, const struct cred *c2) 669 + { 670 + int i; 671 + 672 + if (!uid_eq(c1->fsuid, c2->fsuid)) 673 + return false; 674 + if (!gid_eq(c1->fsgid, c2->fsgid)) 675 + return false; 676 + if (c1->group_info == NULL || c2->group_info == NULL) 677 + return c1->group_info == c2->group_info; 678 + if (c1->group_info->ngroups != c2->group_info->ngroups) 679 + return false; 680 + for (i = 0; i < c1->group_info->ngroups; i++) { 681 + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 682 + return false; 683 + } 684 + return true; 685 + } 686 + 687 + static struct nfsd_file * 688 + nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 689 + unsigned int hashval) 690 + { 691 + struct nfsd_file *nf; 692 + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 693 + 694 + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 695 + nf_node) { 696 + if ((need & nf->nf_may) != need) 697 + continue; 698 + if (nf->nf_inode != inode) 699 + continue; 700 + if (!nfsd_match_cred(nf->nf_cred, current_cred())) 701 + continue; 702 + if (nfsd_file_get(nf) != NULL) 703 + return nf; 704 + } 705 + return NULL; 706 + } 707 + 708 + /** 709 + * nfsd_file_is_cached - are there any cached open files for this fh? 710 + * @inode: inode of the file to check 711 + * 712 + * Scan the hashtable for open files that match this fh. Returns true if there 713 + * are any, and false if not. 714 + */ 715 + bool 716 + nfsd_file_is_cached(struct inode *inode) 717 + { 718 + bool ret = false; 719 + struct nfsd_file *nf; 720 + unsigned int hashval; 721 + 722 + hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 723 + 724 + rcu_read_lock(); 725 + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 726 + nf_node) { 727 + if (inode == nf->nf_inode) { 728 + ret = true; 729 + break; 730 + } 731 + } 732 + rcu_read_unlock(); 733 + trace_nfsd_file_is_cached(inode, hashval, (int)ret); 734 + return ret; 735 + } 736 + 737 + __be32 738 + nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 739 + unsigned int may_flags, struct nfsd_file **pnf) 740 + { 741 + __be32 status; 742 + struct nfsd_file *nf, *new; 743 + struct inode *inode; 744 + unsigned int hashval; 745 + 746 + /* FIXME: skip this if fh_dentry is already set? */ 747 + status = fh_verify(rqstp, fhp, S_IFREG, 748 + may_flags|NFSD_MAY_OWNER_OVERRIDE); 749 + if (status != nfs_ok) 750 + return status; 751 + 752 + inode = d_inode(fhp->fh_dentry); 753 + hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 754 + retry: 755 + rcu_read_lock(); 756 + nf = nfsd_file_find_locked(inode, may_flags, hashval); 757 + rcu_read_unlock(); 758 + if (nf) 759 + goto wait_for_construction; 760 + 761 + new = nfsd_file_alloc(inode, may_flags, hashval); 762 + if (!new) { 763 + trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 764 + NULL, nfserr_jukebox); 765 + return nfserr_jukebox; 766 + } 767 + 768 + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 769 + nf = nfsd_file_find_locked(inode, may_flags, hashval); 770 + if (nf == NULL) 771 + goto open_file; 772 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 773 + nfsd_file_slab_free(&new->nf_rcu); 774 + 775 + wait_for_construction: 776 + wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 777 + 778 + /* Did construction of this file fail? */ 779 + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 780 + nfsd_file_put_noref(nf); 781 + goto retry; 782 + } 783 + 784 + this_cpu_inc(nfsd_file_cache_hits); 785 + 786 + if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 787 + bool write = (may_flags & NFSD_MAY_WRITE); 788 + 789 + if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 790 + (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 791 + status = nfserrno(nfsd_open_break_lease( 792 + file_inode(nf->nf_file), may_flags)); 793 + if (status == nfs_ok) { 794 + clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 795 + if (write) 796 + clear_bit(NFSD_FILE_BREAK_WRITE, 797 + &nf->nf_flags); 798 + } 799 + } 800 + } 801 + out: 802 + if (status == nfs_ok) { 803 + *pnf = nf; 804 + } else { 805 + nfsd_file_put(nf); 806 + nf = NULL; 807 + } 808 + 809 + trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 810 + return status; 811 + open_file: 812 + nf = new; 813 + /* Take reference for the hashtable */ 814 + atomic_inc(&nf->nf_ref); 815 + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 816 + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 817 + list_lru_add(&nfsd_file_lru, &nf->nf_lru); 818 + hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 819 + ++nfsd_file_hashtbl[hashval].nfb_count; 820 + nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 821 + nfsd_file_hashtbl[hashval].nfb_count); 822 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 823 + atomic_long_inc(&nfsd_filecache_count); 824 + 825 + nf->nf_mark = nfsd_file_mark_find_or_create(nf); 826 + if (nf->nf_mark) 827 + status = nfsd_open_verified(rqstp, fhp, S_IFREG, 828 + may_flags, &nf->nf_file); 829 + else 830 + status = nfserr_jukebox; 831 + /* 832 + * If construction failed, or we raced with a call to unlink() 833 + * then unhash. 834 + */ 835 + if (status != nfs_ok || inode->i_nlink == 0) { 836 + bool do_free; 837 + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 838 + do_free = nfsd_file_unhash(nf); 839 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 840 + if (do_free) 841 + nfsd_file_put_noref(nf); 842 + } 843 + clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 844 + smp_mb__after_atomic(); 845 + wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 846 + goto out; 847 + } 848 + 849 + /* 850 + * Note that fields may be added, removed or reordered in the future. Programs 851 + * scraping this file for info should test the labels to ensure they're 852 + * getting the correct field. 853 + */ 854 + static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 855 + { 856 + unsigned int i, count = 0, longest = 0; 857 + unsigned long hits = 0; 858 + 859 + /* 860 + * No need for spinlocks here since we're not terribly interested in 861 + * accuracy. We do take the nfsd_mutex simply to ensure that we 862 + * don't end up racing with server shutdown 863 + */ 864 + mutex_lock(&nfsd_mutex); 865 + if (nfsd_file_hashtbl) { 866 + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 867 + count += nfsd_file_hashtbl[i].nfb_count; 868 + longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 869 + } 870 + } 871 + mutex_unlock(&nfsd_mutex); 872 + 873 + for_each_possible_cpu(i) 874 + hits += per_cpu(nfsd_file_cache_hits, i); 875 + 876 + seq_printf(m, "total entries: %u\n", count); 877 + seq_printf(m, "longest chain: %u\n", longest); 878 + seq_printf(m, "cache hits: %lu\n", hits); 879 + return 0; 880 + } 881 + 882 + int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 883 + { 884 + return single_open(file, nfsd_file_cache_stats_show, NULL); 885 + }
+60
fs/nfsd/filecache.h
··· 1 + #ifndef _FS_NFSD_FILECACHE_H 2 + #define _FS_NFSD_FILECACHE_H 3 + 4 + #include <linux/fsnotify_backend.h> 5 + 6 + /* 7 + * This is the fsnotify_mark container that nfsd attaches to the files that it 8 + * is holding open. Note that we have a separate refcount here aside from the 9 + * one in the fsnotify_mark. We only want a single fsnotify_mark attached to 10 + * the inode, and for each nfsd_file to hold a reference to it. 11 + * 12 + * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us 13 + * how to put that reference. If there are still outstanding nfsd_files that 14 + * reference the mark, then we would want to call fsnotify_put_mark on it. 15 + * If there were not, then we'd need to call fsnotify_destroy_mark. Since we 16 + * can't really tell the difference, we use the nfm_mark to keep track of how 17 + * many nfsd_files hold references to the mark. When that counter goes to zero 18 + * then we know to call fsnotify_destroy_mark on it. 19 + */ 20 + struct nfsd_file_mark { 21 + struct fsnotify_mark nfm_mark; 22 + atomic_t nfm_ref; 23 + }; 24 + 25 + /* 26 + * A representation of a file that has been opened by knfsd. These are hashed 27 + * in the hashtable by inode pointer value. Note that this object doesn't 28 + * hold a reference to the inode by itself, so the nf_inode pointer should 29 + * never be dereferenced, only used for comparison. 30 + */ 31 + struct nfsd_file { 32 + struct hlist_node nf_node; 33 + struct list_head nf_lru; 34 + struct rcu_head nf_rcu; 35 + struct file *nf_file; 36 + const struct cred *nf_cred; 37 + #define NFSD_FILE_HASHED (0) 38 + #define NFSD_FILE_PENDING (1) 39 + #define NFSD_FILE_BREAK_READ (2) 40 + #define NFSD_FILE_BREAK_WRITE (3) 41 + #define NFSD_FILE_REFERENCED (4) 42 + unsigned long nf_flags; 43 + struct inode *nf_inode; 44 + unsigned int nf_hashval; 45 + atomic_t nf_ref; 46 + unsigned char nf_may; 47 + struct nfsd_file_mark *nf_mark; 48 + }; 49 + 50 + int nfsd_file_cache_init(void); 51 + void nfsd_file_cache_purge(void); 52 + void nfsd_file_cache_shutdown(void); 53 + void nfsd_file_put(struct nfsd_file *nf); 54 + struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); 55 + void nfsd_file_close_inode_sync(struct inode *inode); 56 + bool nfsd_file_is_cached(struct inode *inode); 57 + __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 58 + unsigned int may_flags, struct nfsd_file **nfp); 59 + int nfsd_file_cache_stats_open(struct inode *, struct file *); 60 + #endif /* _FS_NFSD_FILECACHE_H */
+8 -1
fs/nfsd/nfssvc.c
··· 27 27 #include "cache.h" 28 28 #include "vfs.h" 29 29 #include "netns.h" 30 + #include "filecache.h" 30 31 31 32 #define NFSDDBG_FACILITY NFSDDBG_SVC 32 33 ··· 314 313 if (nfsd_users++) 315 314 return 0; 316 315 316 + ret = nfsd_file_cache_init(); 317 + if (ret) 318 + goto dec_users; 317 319 /* 318 320 * Readahead param cache - will no-op if it already exists. 319 321 * (Note therefore results will be suboptimal if number of ··· 324 320 */ 325 321 ret = nfsd_racache_init(2*nrservs); 326 322 if (ret) 327 - goto dec_users; 323 + goto out_file_cache; 328 324 329 325 ret = nfs4_state_start(); 330 326 if (ret) ··· 333 329 334 330 out_racache: 335 331 nfsd_racache_shutdown(); 332 + out_file_cache: 333 + nfsd_file_cache_shutdown(); 336 334 dec_users: 337 335 nfsd_users--; 338 336 return ret; ··· 346 340 return; 347 341 348 342 nfs4_state_shutdown(); 343 + nfsd_file_cache_shutdown(); 349 344 nfsd_racache_shutdown(); 350 345 } 351 346
+140
fs/nfsd/trace.h
··· 126 126 DEFINE_NFSD_ERR_EVENT(write_err); 127 127 128 128 #include "state.h" 129 + #include "filecache.h" 130 + #include "vfs.h" 129 131 130 132 DECLARE_EVENT_CLASS(nfsd_stateid_class, 131 133 TP_PROTO(stateid_t *stp), ··· 165 163 DEFINE_STATEID_EVENT(layout_recall_done); 166 164 DEFINE_STATEID_EVENT(layout_recall_fail); 167 165 DEFINE_STATEID_EVENT(layout_recall_release); 166 + 167 + #define show_nf_flags(val) \ 168 + __print_flags(val, "|", \ 169 + { 1 << NFSD_FILE_HASHED, "HASHED" }, \ 170 + { 1 << NFSD_FILE_PENDING, "PENDING" }, \ 171 + { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ 172 + { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ 173 + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) 174 + 175 + /* FIXME: This should probably be fleshed out in the future. */ 176 + #define show_nf_may(val) \ 177 + __print_flags(val, "|", \ 178 + { NFSD_MAY_READ, "READ" }, \ 179 + { NFSD_MAY_WRITE, "WRITE" }, \ 180 + { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }) 181 + 182 + DECLARE_EVENT_CLASS(nfsd_file_class, 183 + TP_PROTO(struct nfsd_file *nf), 184 + TP_ARGS(nf), 185 + TP_STRUCT__entry( 186 + __field(unsigned int, nf_hashval) 187 + __field(void *, nf_inode) 188 + __field(int, nf_ref) 189 + __field(unsigned long, nf_flags) 190 + __field(unsigned char, nf_may) 191 + __field(struct file *, nf_file) 192 + ), 193 + TP_fast_assign( 194 + __entry->nf_hashval = nf->nf_hashval; 195 + __entry->nf_inode = nf->nf_inode; 196 + __entry->nf_ref = atomic_read(&nf->nf_ref); 197 + __entry->nf_flags = nf->nf_flags; 198 + __entry->nf_may = nf->nf_may; 199 + __entry->nf_file = nf->nf_file; 200 + ), 201 + TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p", 202 + __entry->nf_hashval, 203 + __entry->nf_inode, 204 + __entry->nf_ref, 205 + show_nf_flags(__entry->nf_flags), 206 + show_nf_may(__entry->nf_may), 207 + __entry->nf_file) 208 + ) 209 + 210 + #define DEFINE_NFSD_FILE_EVENT(name) \ 211 + DEFINE_EVENT(nfsd_file_class, name, \ 212 + TP_PROTO(struct nfsd_file *nf), \ 213 + TP_ARGS(nf)) 214 + 215 + DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); 216 + DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); 217 + DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); 218 + DEFINE_NFSD_FILE_EVENT(nfsd_file_put); 219 + DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); 220 + 221 + TRACE_EVENT(nfsd_file_acquire, 222 + TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, 223 + struct inode *inode, unsigned int may_flags, 224 + struct nfsd_file *nf, __be32 status), 225 + 226 + TP_ARGS(rqstp, hash, inode, may_flags, nf, status), 227 + 228 + TP_STRUCT__entry( 229 + __field(__be32, xid) 230 + __field(unsigned int, hash) 231 + __field(void *, inode) 232 + __field(unsigned int, may_flags) 233 + __field(int, nf_ref) 234 + __field(unsigned long, nf_flags) 235 + __field(unsigned char, nf_may) 236 + __field(struct file *, nf_file) 237 + __field(__be32, status) 238 + ), 239 + 240 + TP_fast_assign( 241 + __entry->xid = rqstp->rq_xid; 242 + __entry->hash = hash; 243 + __entry->inode = inode; 244 + __entry->may_flags = may_flags; 245 + __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0; 246 + __entry->nf_flags = nf ? nf->nf_flags : 0; 247 + __entry->nf_may = nf ? nf->nf_may : 0; 248 + __entry->nf_file = nf ? nf->nf_file : NULL; 249 + __entry->status = status; 250 + ), 251 + 252 + TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", 253 + be32_to_cpu(__entry->xid), __entry->hash, __entry->inode, 254 + show_nf_may(__entry->may_flags), __entry->nf_ref, 255 + show_nf_flags(__entry->nf_flags), 256 + show_nf_may(__entry->nf_may), __entry->nf_file, 257 + be32_to_cpu(__entry->status)) 258 + ); 259 + 260 + DECLARE_EVENT_CLASS(nfsd_file_search_class, 261 + TP_PROTO(struct inode *inode, unsigned int hash, int found), 262 + TP_ARGS(inode, hash, found), 263 + TP_STRUCT__entry( 264 + __field(struct inode *, inode) 265 + __field(unsigned int, hash) 266 + __field(int, found) 267 + ), 268 + TP_fast_assign( 269 + __entry->inode = inode; 270 + __entry->hash = hash; 271 + __entry->found = found; 272 + ), 273 + TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, 274 + __entry->inode, __entry->found) 275 + ); 276 + 277 + #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ 278 + DEFINE_EVENT(nfsd_file_search_class, name, \ 279 + TP_PROTO(struct inode *inode, unsigned int hash, int found), \ 280 + TP_ARGS(inode, hash, found)) 281 + 282 + DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); 283 + DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); 284 + DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); 285 + 286 + TRACE_EVENT(nfsd_file_fsnotify_handle_event, 287 + TP_PROTO(struct inode *inode, u32 mask), 288 + TP_ARGS(inode, mask), 289 + TP_STRUCT__entry( 290 + __field(struct inode *, inode) 291 + __field(unsigned int, nlink) 292 + __field(umode_t, mode) 293 + __field(u32, mask) 294 + ), 295 + TP_fast_assign( 296 + __entry->inode = inode; 297 + __entry->nlink = inode->i_nlink; 298 + __entry->mode = inode->i_mode; 299 + __entry->mask = mask; 300 + ), 301 + TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, 302 + __entry->nlink, __entry->mode, __entry->mask) 303 + ); 168 304 169 305 #endif /* _NFSD_TRACE_H */ 170 306
+43 -22
fs/nfsd/vfs.c
··· 699 699 } 700 700 #endif /* CONFIG_NFSD_V3 */ 701 701 702 - static int nfsd_open_break_lease(struct inode *inode, int access) 702 + int nfsd_open_break_lease(struct inode *inode, int access) 703 703 { 704 704 unsigned int mode; 705 705 ··· 715 715 * and additional flags. 716 716 * N.B. After this call fhp needs an fh_put 717 717 */ 718 - __be32 719 - nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 718 + static __be32 719 + __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 720 720 int may_flags, struct file **filp) 721 721 { 722 722 struct path path; ··· 725 725 int flags = O_RDONLY|O_LARGEFILE; 726 726 __be32 err; 727 727 int host_err = 0; 728 - 729 - validate_process_creds(); 730 - 731 - /* 732 - * If we get here, then the client has already done an "open", 733 - * and (hopefully) checked permission - so allow OWNER_OVERRIDE 734 - * in case a chmod has now revoked permission. 735 - * 736 - * Arguably we should also allow the owner override for 737 - * directories, but we never have and it doesn't seem to have 738 - * caused anyone a problem. If we were to change this, note 739 - * also that our filldir callbacks would need a variant of 740 - * lookup_one_len that doesn't check permissions. 741 - */ 742 - if (type == S_IFREG) 743 - may_flags |= NFSD_MAY_OWNER_OVERRIDE; 744 - err = fh_verify(rqstp, fhp, type, may_flags); 745 - if (err) 746 - goto out; 747 728 748 729 path.mnt = fhp->fh_export->ex_path.mnt; 749 730 path.dentry = fhp->fh_dentry; ··· 779 798 out_nfserr: 780 799 err = nfserrno(host_err); 781 800 out: 801 + return err; 802 + } 803 + 804 + __be32 805 + nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 806 + int may_flags, struct file **filp) 807 + { 808 + __be32 err; 809 + 810 + validate_process_creds(); 811 + /* 812 + * If we get here, then the client has already done an "open", 813 + * and (hopefully) checked permission - so allow OWNER_OVERRIDE 814 + * in case a chmod has now revoked permission. 815 + * 816 + * Arguably we should also allow the owner override for 817 + * directories, but we never have and it doesn't seem to have 818 + * caused anyone a problem. If we were to change this, note 819 + * also that our filldir callbacks would need a variant of 820 + * lookup_one_len that doesn't check permissions. 821 + */ 822 + if (type == S_IFREG) 823 + may_flags |= NFSD_MAY_OWNER_OVERRIDE; 824 + err = fh_verify(rqstp, fhp, type, may_flags); 825 + if (!err) 826 + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); 782 827 validate_process_creds(); 783 828 return err; 784 829 } 830 + 831 + __be32 832 + nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 833 + int may_flags, struct file **filp) 834 + { 835 + __be32 err; 836 + 837 + validate_process_creds(); 838 + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); 839 + validate_process_creds(); 840 + return err; 841 + } 842 + 843 + 785 844 786 845 struct raparms * 787 846 nfsd_init_raparms(struct file *file)
+3
fs/nfsd/vfs.h
··· 75 75 __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 76 76 loff_t, unsigned long); 77 77 #endif /* CONFIG_NFSD_V3 */ 78 + int nfsd_open_break_lease(struct inode *, int); 78 79 __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, 80 + int, struct file **); 81 + __be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t, 79 82 int, struct file **); 80 83 struct raparms; 81 84 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,