nfsd: add a new struct file caching facility to nfsd

+1

fs/nfsd/Kconfig

··· 3 3 tristate "NFS server support" 4 4 depends on INET 5 5 depends on FILE_LOCKING 6 + depends on FSNOTIFY 6 7 select LOCKD 7 8 select SUNRPC 8 9 select EXPORTFS

+2 -1

fs/nfsd/Makefile

··· 11 11 nfsd-y += trace.o 12 12 13 13 nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ 14 - export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o 14 + export.o auth.o lockd.o nfscache.o nfsxdr.o \ 15 + stats.o filecache.o 15 16 nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o 16 17 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o 17 18 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o

+13

fs/nfsd/export.c

··· 22 22 #include "nfsfh.h" 23 23 #include "netns.h" 24 24 #include "pnfs.h" 25 + #include "filecache.h" 25 26 26 27 #define NFSDDBG_FACILITY NFSDDBG_EXPORT 27 28 ··· 233 232 return NULL; 234 233 } 235 234 235 + static void expkey_flush(void) 236 + { 237 + /* 238 + * Take the nfsd_mutex here to ensure that the file cache is not 239 + * destroyed while we're in the middle of flushing. 240 + */ 241 + mutex_lock(&nfsd_mutex); 242 + nfsd_file_cache_purge(); 243 + mutex_unlock(&nfsd_mutex); 244 + } 245 + 236 246 static const struct cache_detail svc_expkey_cache_template = { 237 247 .owner = THIS_MODULE, 238 248 .hash_size = EXPKEY_HASHMAX, ··· 256 244 .init = expkey_init, 257 245 .update = expkey_update, 258 246 .alloc = expkey_alloc, 247 + .flush = expkey_flush, 259 248 }; 260 249 261 250 static int

+885

fs/nfsd/filecache.c

··· 1 + /* 2 + * Open file cache. 3 + * 4 + * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 + */ 6 + 7 + #include <linux/hash.h> 8 + #include <linux/slab.h> 9 + #include <linux/hash.h> 10 + #include <linux/file.h> 11 + #include <linux/sched.h> 12 + #include <linux/list_lru.h> 13 + #include <linux/fsnotify_backend.h> 14 + #include <linux/fsnotify.h> 15 + #include <linux/seq_file.h> 16 + 17 + #include "vfs.h" 18 + #include "nfsd.h" 19 + #include "nfsfh.h" 20 + #include "filecache.h" 21 + #include "trace.h" 22 + 23 + #define NFSDDBG_FACILITY NFSDDBG_FH 24 + 25 + /* FIXME: dynamically size this for the machine somehow? */ 26 + #define NFSD_FILE_HASH_BITS 12 27 + #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28 + #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 + 30 + #define NFSD_FILE_LRU_RESCAN (0) 31 + #define NFSD_FILE_SHUTDOWN (1) 32 + #define NFSD_FILE_LRU_THRESHOLD (4096UL) 33 + #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 + 35 + /* We only care about NFSD_MAY_READ/WRITE for this cache */ 36 + #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 + 38 + struct nfsd_fcache_bucket { 39 + struct hlist_head nfb_head; 40 + spinlock_t nfb_lock; 41 + unsigned int nfb_count; 42 + unsigned int nfb_maxcount; 43 + }; 44 + 45 + static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 + 47 + static struct kmem_cache *nfsd_file_slab; 48 + static struct kmem_cache *nfsd_file_mark_slab; 49 + static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 50 + static struct list_lru nfsd_file_lru; 51 + static long nfsd_file_lru_flags; 52 + static struct fsnotify_group *nfsd_file_fsnotify_group; 53 + static atomic_long_t nfsd_filecache_count; 54 + static struct delayed_work nfsd_filecache_laundrette; 55 + 56 + enum nfsd_file_laundrette_ctl { 57 + NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, 58 + NFSD_FILE_LAUNDRETTE_MAY_FLUSH 59 + }; 60 + 61 + static void 62 + nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) 63 + { 64 + long count = atomic_long_read(&nfsd_filecache_count); 65 + 66 + if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 67 + return; 68 + 69 + /* Be more aggressive about scanning if over the threshold */ 70 + if (count > NFSD_FILE_LRU_THRESHOLD) 71 + mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); 72 + else 73 + schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); 74 + 75 + if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) 76 + return; 77 + 78 + /* ...and don't delay flushing if we're out of control */ 79 + if (count >= NFSD_FILE_LRU_LIMIT) 80 + flush_delayed_work(&nfsd_filecache_laundrette); 81 + } 82 + 83 + static void 84 + nfsd_file_slab_free(struct rcu_head *rcu) 85 + { 86 + struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 87 + 88 + put_cred(nf->nf_cred); 89 + kmem_cache_free(nfsd_file_slab, nf); 90 + } 91 + 92 + static void 93 + nfsd_file_mark_free(struct fsnotify_mark *mark) 94 + { 95 + struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 96 + nfm_mark); 97 + 98 + kmem_cache_free(nfsd_file_mark_slab, nfm); 99 + } 100 + 101 + static struct nfsd_file_mark * 102 + nfsd_file_mark_get(struct nfsd_file_mark *nfm) 103 + { 104 + if (!atomic_inc_not_zero(&nfm->nfm_ref)) 105 + return NULL; 106 + return nfm; 107 + } 108 + 109 + static void 110 + nfsd_file_mark_put(struct nfsd_file_mark *nfm) 111 + { 112 + if (atomic_dec_and_test(&nfm->nfm_ref)) { 113 + 114 + fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 115 + fsnotify_put_mark(&nfm->nfm_mark); 116 + } 117 + } 118 + 119 + static struct nfsd_file_mark * 120 + nfsd_file_mark_find_or_create(struct nfsd_file *nf) 121 + { 122 + int err; 123 + struct fsnotify_mark *mark; 124 + struct nfsd_file_mark *nfm = NULL, *new; 125 + struct inode *inode = nf->nf_inode; 126 + 127 + do { 128 + mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 129 + mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 130 + nfsd_file_fsnotify_group); 131 + if (mark) { 132 + nfm = nfsd_file_mark_get(container_of(mark, 133 + struct nfsd_file_mark, 134 + nfm_mark)); 135 + mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 136 + fsnotify_put_mark(mark); 137 + if (likely(nfm)) 138 + break; 139 + } else 140 + mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 141 + 142 + /* allocate a new nfm */ 143 + new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 144 + if (!new) 145 + return NULL; 146 + fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 147 + new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 148 + atomic_set(&new->nfm_ref, 1); 149 + 150 + err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 151 + 152 + /* 153 + * If the add was successful, then return the object. 154 + * Otherwise, we need to put the reference we hold on the 155 + * nfm_mark. The fsnotify code will take a reference and put 156 + * it on failure, so we can't just free it directly. It's also 157 + * not safe to call fsnotify_destroy_mark on it as the 158 + * mark->group will be NULL. Thus, we can't let the nfm_ref 159 + * counter drive the destruction at this point. 160 + */ 161 + if (likely(!err)) 162 + nfm = new; 163 + else 164 + fsnotify_put_mark(&new->nfm_mark); 165 + } while (unlikely(err == -EEXIST)); 166 + 167 + return nfm; 168 + } 169 + 170 + static struct nfsd_file * 171 + nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval) 172 + { 173 + struct nfsd_file *nf; 174 + 175 + nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 176 + if (nf) { 177 + INIT_HLIST_NODE(&nf->nf_node); 178 + INIT_LIST_HEAD(&nf->nf_lru); 179 + nf->nf_file = NULL; 180 + nf->nf_cred = get_current_cred(); 181 + nf->nf_flags = 0; 182 + nf->nf_inode = inode; 183 + nf->nf_hashval = hashval; 184 + atomic_set(&nf->nf_ref, 1); 185 + nf->nf_may = may & NFSD_FILE_MAY_MASK; 186 + if (may & NFSD_MAY_NOT_BREAK_LEASE) { 187 + if (may & NFSD_MAY_WRITE) 188 + __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 189 + if (may & NFSD_MAY_READ) 190 + __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 191 + } 192 + nf->nf_mark = NULL; 193 + trace_nfsd_file_alloc(nf); 194 + } 195 + return nf; 196 + } 197 + 198 + static bool 199 + nfsd_file_free(struct nfsd_file *nf) 200 + { 201 + bool flush = false; 202 + 203 + trace_nfsd_file_put_final(nf); 204 + if (nf->nf_mark) 205 + nfsd_file_mark_put(nf->nf_mark); 206 + if (nf->nf_file) { 207 + get_file(nf->nf_file); 208 + filp_close(nf->nf_file, NULL); 209 + fput(nf->nf_file); 210 + flush = true; 211 + } 212 + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 213 + return flush; 214 + } 215 + 216 + static void 217 + nfsd_file_do_unhash(struct nfsd_file *nf) 218 + { 219 + lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 220 + 221 + trace_nfsd_file_unhash(nf); 222 + 223 + --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 224 + hlist_del_rcu(&nf->nf_node); 225 + if (!list_empty(&nf->nf_lru)) 226 + list_lru_del(&nfsd_file_lru, &nf->nf_lru); 227 + atomic_long_dec(&nfsd_filecache_count); 228 + } 229 + 230 + static bool 231 + nfsd_file_unhash(struct nfsd_file *nf) 232 + { 233 + if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 234 + nfsd_file_do_unhash(nf); 235 + return true; 236 + } 237 + return false; 238 + } 239 + 240 + /* 241 + * Return true if the file was unhashed. 242 + */ 243 + static bool 244 + nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 245 + { 246 + lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 247 + 248 + trace_nfsd_file_unhash_and_release_locked(nf); 249 + if (!nfsd_file_unhash(nf)) 250 + return false; 251 + /* keep final reference for nfsd_file_lru_dispose */ 252 + if (atomic_add_unless(&nf->nf_ref, -1, 1)) 253 + return true; 254 + 255 + list_add(&nf->nf_lru, dispose); 256 + return true; 257 + } 258 + 259 + static int 260 + nfsd_file_put_noref(struct nfsd_file *nf) 261 + { 262 + int count; 263 + trace_nfsd_file_put(nf); 264 + 265 + count = atomic_dec_return(&nf->nf_ref); 266 + if (!count) { 267 + WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 268 + nfsd_file_free(nf); 269 + } 270 + return count; 271 + } 272 + 273 + void 274 + nfsd_file_put(struct nfsd_file *nf) 275 + { 276 + bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 277 + 278 + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 279 + if (nfsd_file_put_noref(nf) == 1 && is_hashed) 280 + nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); 281 + } 282 + 283 + struct nfsd_file * 284 + nfsd_file_get(struct nfsd_file *nf) 285 + { 286 + if (likely(atomic_inc_not_zero(&nf->nf_ref))) 287 + return nf; 288 + return NULL; 289 + } 290 + 291 + static void 292 + nfsd_file_dispose_list(struct list_head *dispose) 293 + { 294 + struct nfsd_file *nf; 295 + 296 + while(!list_empty(dispose)) { 297 + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 298 + list_del(&nf->nf_lru); 299 + nfsd_file_put_noref(nf); 300 + } 301 + } 302 + 303 + static void 304 + nfsd_file_dispose_list_sync(struct list_head *dispose) 305 + { 306 + bool flush = false; 307 + struct nfsd_file *nf; 308 + 309 + while(!list_empty(dispose)) { 310 + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 311 + list_del(&nf->nf_lru); 312 + if (!atomic_dec_and_test(&nf->nf_ref)) 313 + continue; 314 + if (nfsd_file_free(nf)) 315 + flush = true; 316 + } 317 + if (flush) 318 + flush_delayed_fput(); 319 + } 320 + 321 + /* 322 + * Note this can deadlock with nfsd_file_cache_purge. 323 + */ 324 + static enum lru_status 325 + nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 326 + spinlock_t *lock, void *arg) 327 + __releases(lock) 328 + __acquires(lock) 329 + { 330 + struct list_head *head = arg; 331 + struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 332 + 333 + /* 334 + * Do a lockless refcount check. The hashtable holds one reference, so 335 + * we look to see if anything else has a reference, or if any have 336 + * been put since the shrinker last ran. Those don't get unhashed and 337 + * released. 338 + * 339 + * Note that in the put path, we set the flag and then decrement the 340 + * counter. Here we check the counter and then test and clear the flag. 341 + * That order is deliberate to ensure that we can do this locklessly. 342 + */ 343 + if (atomic_read(&nf->nf_ref) > 1) 344 + goto out_skip; 345 + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 346 + goto out_rescan; 347 + 348 + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 349 + goto out_skip; 350 + 351 + list_lru_isolate_move(lru, &nf->nf_lru, head); 352 + return LRU_REMOVED; 353 + out_rescan: 354 + set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); 355 + out_skip: 356 + return LRU_SKIP; 357 + } 358 + 359 + static void 360 + nfsd_file_lru_dispose(struct list_head *head) 361 + { 362 + while(!list_empty(head)) { 363 + struct nfsd_file *nf = list_first_entry(head, 364 + struct nfsd_file, nf_lru); 365 + list_del_init(&nf->nf_lru); 366 + spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 367 + nfsd_file_do_unhash(nf); 368 + spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 369 + nfsd_file_put_noref(nf); 370 + } 371 + } 372 + 373 + static unsigned long 374 + nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 375 + { 376 + return list_lru_count(&nfsd_file_lru); 377 + } 378 + 379 + static unsigned long 380 + nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 381 + { 382 + LIST_HEAD(head); 383 + unsigned long ret; 384 + 385 + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); 386 + nfsd_file_lru_dispose(&head); 387 + return ret; 388 + } 389 + 390 + static struct shrinker nfsd_file_shrinker = { 391 + .scan_objects = nfsd_file_lru_scan, 392 + .count_objects = nfsd_file_lru_count, 393 + .seeks = 1, 394 + }; 395 + 396 + static void 397 + __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 398 + struct list_head *dispose) 399 + { 400 + struct nfsd_file *nf; 401 + struct hlist_node *tmp; 402 + 403 + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 404 + hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 405 + if (inode == nf->nf_inode) 406 + nfsd_file_unhash_and_release_locked(nf, dispose); 407 + } 408 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 409 + } 410 + 411 + /** 412 + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 413 + * @inode: inode of the file to attempt to remove 414 + * 415 + * Walk the whole hash bucket, looking for any files that correspond to "inode". 416 + * If any do, then unhash them and put the hashtable reference to them and 417 + * destroy any that had their last reference put. Also ensure that any of the 418 + * fputs also have their final __fput done as well. 419 + */ 420 + void 421 + nfsd_file_close_inode_sync(struct inode *inode) 422 + { 423 + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 424 + NFSD_FILE_HASH_BITS); 425 + LIST_HEAD(dispose); 426 + 427 + __nfsd_file_close_inode(inode, hashval, &dispose); 428 + trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 429 + nfsd_file_dispose_list_sync(&dispose); 430 + } 431 + 432 + /** 433 + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 434 + * @inode: inode of the file to attempt to remove 435 + * 436 + * Walk the whole hash bucket, looking for any files that correspond to "inode". 437 + * If any do, then unhash them and put the hashtable reference to them and 438 + * destroy any that had their last reference put. 439 + */ 440 + static void 441 + nfsd_file_close_inode(struct inode *inode) 442 + { 443 + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 444 + NFSD_FILE_HASH_BITS); 445 + LIST_HEAD(dispose); 446 + 447 + __nfsd_file_close_inode(inode, hashval, &dispose); 448 + trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 449 + nfsd_file_dispose_list(&dispose); 450 + } 451 + 452 + /** 453 + * nfsd_file_delayed_close - close unused nfsd_files 454 + * @work: dummy 455 + * 456 + * Walk the LRU list and close any entries that have not been used since 457 + * the last scan. 458 + * 459 + * Note this can deadlock with nfsd_file_cache_purge. 460 + */ 461 + static void 462 + nfsd_file_delayed_close(struct work_struct *work) 463 + { 464 + LIST_HEAD(head); 465 + 466 + list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); 467 + 468 + if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) 469 + nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); 470 + 471 + if (!list_empty(&head)) { 472 + nfsd_file_lru_dispose(&head); 473 + flush_delayed_fput(); 474 + } 475 + } 476 + 477 + static int 478 + nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 479 + void *data) 480 + { 481 + struct file_lock *fl = data; 482 + 483 + /* Only close files for F_SETLEASE leases */ 484 + if (fl->fl_flags & FL_LEASE) 485 + nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 486 + return 0; 487 + } 488 + 489 + static struct notifier_block nfsd_file_lease_notifier = { 490 + .notifier_call = nfsd_file_lease_notifier_call, 491 + }; 492 + 493 + static int 494 + nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, 495 + struct inode *inode, 496 + u32 mask, const void *data, int data_type, 497 + const struct qstr *file_name, u32 cookie, 498 + struct fsnotify_iter_info *iter_info) 499 + { 500 + trace_nfsd_file_fsnotify_handle_event(inode, mask); 501 + 502 + /* Should be no marks on non-regular files */ 503 + if (!S_ISREG(inode->i_mode)) { 504 + WARN_ON_ONCE(1); 505 + return 0; 506 + } 507 + 508 + /* don't close files if this was not the last link */ 509 + if (mask & FS_ATTRIB) { 510 + if (inode->i_nlink) 511 + return 0; 512 + } 513 + 514 + nfsd_file_close_inode(inode); 515 + return 0; 516 + } 517 + 518 + 519 + static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 520 + .handle_event = nfsd_file_fsnotify_handle_event, 521 + .free_mark = nfsd_file_mark_free, 522 + }; 523 + 524 + int 525 + nfsd_file_cache_init(void) 526 + { 527 + int ret = -ENOMEM; 528 + unsigned int i; 529 + 530 + clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 531 + 532 + if (nfsd_file_hashtbl) 533 + return 0; 534 + 535 + nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, 536 + sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 537 + if (!nfsd_file_hashtbl) { 538 + pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 539 + goto out_err; 540 + } 541 + 542 + nfsd_file_slab = kmem_cache_create("nfsd_file", 543 + sizeof(struct nfsd_file), 0, 0, NULL); 544 + if (!nfsd_file_slab) { 545 + pr_err("nfsd: unable to create nfsd_file_slab\n"); 546 + goto out_err; 547 + } 548 + 549 + nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 550 + sizeof(struct nfsd_file_mark), 0, 0, NULL); 551 + if (!nfsd_file_mark_slab) { 552 + pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 553 + goto out_err; 554 + } 555 + 556 + 557 + ret = list_lru_init(&nfsd_file_lru); 558 + if (ret) { 559 + pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 560 + goto out_err; 561 + } 562 + 563 + ret = register_shrinker(&nfsd_file_shrinker); 564 + if (ret) { 565 + pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 566 + goto out_lru; 567 + } 568 + 569 + ret = lease_register_notifier(&nfsd_file_lease_notifier); 570 + if (ret) { 571 + pr_err("nfsd: unable to register lease notifier: %d\n", ret); 572 + goto out_shrinker; 573 + } 574 + 575 + nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 576 + if (IS_ERR(nfsd_file_fsnotify_group)) { 577 + pr_err("nfsd: unable to create fsnotify group: %ld\n", 578 + PTR_ERR(nfsd_file_fsnotify_group)); 579 + nfsd_file_fsnotify_group = NULL; 580 + goto out_notifier; 581 + } 582 + 583 + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 584 + INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 585 + spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 586 + } 587 + 588 + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); 589 + out: 590 + return ret; 591 + out_notifier: 592 + lease_unregister_notifier(&nfsd_file_lease_notifier); 593 + out_shrinker: 594 + unregister_shrinker(&nfsd_file_shrinker); 595 + out_lru: 596 + list_lru_destroy(&nfsd_file_lru); 597 + out_err: 598 + kmem_cache_destroy(nfsd_file_slab); 599 + nfsd_file_slab = NULL; 600 + kmem_cache_destroy(nfsd_file_mark_slab); 601 + nfsd_file_mark_slab = NULL; 602 + kfree(nfsd_file_hashtbl); 603 + nfsd_file_hashtbl = NULL; 604 + goto out; 605 + } 606 + 607 + /* 608 + * Note this can deadlock with nfsd_file_lru_cb. 609 + */ 610 + void 611 + nfsd_file_cache_purge(void) 612 + { 613 + unsigned int i; 614 + struct nfsd_file *nf; 615 + LIST_HEAD(dispose); 616 + bool del; 617 + 618 + if (!nfsd_file_hashtbl) 619 + return; 620 + 621 + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 622 + spin_lock(&nfsd_file_hashtbl[i].nfb_lock); 623 + while(!hlist_empty(&nfsd_file_hashtbl[i].nfb_head)) { 624 + nf = hlist_entry(nfsd_file_hashtbl[i].nfb_head.first, 625 + struct nfsd_file, nf_node); 626 + del = nfsd_file_unhash_and_release_locked(nf, &dispose); 627 + 628 + /* 629 + * Deadlock detected! Something marked this entry as 630 + * unhased, but hasn't removed it from the hash list. 631 + */ 632 + WARN_ON_ONCE(!del); 633 + } 634 + spin_unlock(&nfsd_file_hashtbl[i].nfb_lock); 635 + nfsd_file_dispose_list(&dispose); 636 + } 637 + } 638 + 639 + void 640 + nfsd_file_cache_shutdown(void) 641 + { 642 + LIST_HEAD(dispose); 643 + 644 + set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 645 + 646 + lease_unregister_notifier(&nfsd_file_lease_notifier); 647 + unregister_shrinker(&nfsd_file_shrinker); 648 + /* 649 + * make sure all callers of nfsd_file_lru_cb are done before 650 + * calling nfsd_file_cache_purge 651 + */ 652 + cancel_delayed_work_sync(&nfsd_filecache_laundrette); 653 + nfsd_file_cache_purge(); 654 + list_lru_destroy(&nfsd_file_lru); 655 + rcu_barrier(); 656 + fsnotify_put_group(nfsd_file_fsnotify_group); 657 + nfsd_file_fsnotify_group = NULL; 658 + kmem_cache_destroy(nfsd_file_slab); 659 + nfsd_file_slab = NULL; 660 + fsnotify_wait_marks_destroyed(); 661 + kmem_cache_destroy(nfsd_file_mark_slab); 662 + nfsd_file_mark_slab = NULL; 663 + kfree(nfsd_file_hashtbl); 664 + nfsd_file_hashtbl = NULL; 665 + } 666 + 667 + static bool 668 + nfsd_match_cred(const struct cred *c1, const struct cred *c2) 669 + { 670 + int i; 671 + 672 + if (!uid_eq(c1->fsuid, c2->fsuid)) 673 + return false; 674 + if (!gid_eq(c1->fsgid, c2->fsgid)) 675 + return false; 676 + if (c1->group_info == NULL || c2->group_info == NULL) 677 + return c1->group_info == c2->group_info; 678 + if (c1->group_info->ngroups != c2->group_info->ngroups) 679 + return false; 680 + for (i = 0; i < c1->group_info->ngroups; i++) { 681 + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 682 + return false; 683 + } 684 + return true; 685 + } 686 + 687 + static struct nfsd_file * 688 + nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 689 + unsigned int hashval) 690 + { 691 + struct nfsd_file *nf; 692 + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 693 + 694 + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 695 + nf_node) { 696 + if ((need & nf->nf_may) != need) 697 + continue; 698 + if (nf->nf_inode != inode) 699 + continue; 700 + if (!nfsd_match_cred(nf->nf_cred, current_cred())) 701 + continue; 702 + if (nfsd_file_get(nf) != NULL) 703 + return nf; 704 + } 705 + return NULL; 706 + } 707 + 708 + /** 709 + * nfsd_file_is_cached - are there any cached open files for this fh? 710 + * @inode: inode of the file to check 711 + * 712 + * Scan the hashtable for open files that match this fh. Returns true if there 713 + * are any, and false if not. 714 + */ 715 + bool 716 + nfsd_file_is_cached(struct inode *inode) 717 + { 718 + bool ret = false; 719 + struct nfsd_file *nf; 720 + unsigned int hashval; 721 + 722 + hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 723 + 724 + rcu_read_lock(); 725 + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 726 + nf_node) { 727 + if (inode == nf->nf_inode) { 728 + ret = true; 729 + break; 730 + } 731 + } 732 + rcu_read_unlock(); 733 + trace_nfsd_file_is_cached(inode, hashval, (int)ret); 734 + return ret; 735 + } 736 + 737 + __be32 738 + nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 739 + unsigned int may_flags, struct nfsd_file **pnf) 740 + { 741 + __be32 status; 742 + struct nfsd_file *nf, *new; 743 + struct inode *inode; 744 + unsigned int hashval; 745 + 746 + /* FIXME: skip this if fh_dentry is already set? */ 747 + status = fh_verify(rqstp, fhp, S_IFREG, 748 + may_flags|NFSD_MAY_OWNER_OVERRIDE); 749 + if (status != nfs_ok) 750 + return status; 751 + 752 + inode = d_inode(fhp->fh_dentry); 753 + hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 754 + retry: 755 + rcu_read_lock(); 756 + nf = nfsd_file_find_locked(inode, may_flags, hashval); 757 + rcu_read_unlock(); 758 + if (nf) 759 + goto wait_for_construction; 760 + 761 + new = nfsd_file_alloc(inode, may_flags, hashval); 762 + if (!new) { 763 + trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 764 + NULL, nfserr_jukebox); 765 + return nfserr_jukebox; 766 + } 767 + 768 + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 769 + nf = nfsd_file_find_locked(inode, may_flags, hashval); 770 + if (nf == NULL) 771 + goto open_file; 772 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 773 + nfsd_file_slab_free(&new->nf_rcu); 774 + 775 + wait_for_construction: 776 + wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 777 + 778 + /* Did construction of this file fail? */ 779 + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 780 + nfsd_file_put_noref(nf); 781 + goto retry; 782 + } 783 + 784 + this_cpu_inc(nfsd_file_cache_hits); 785 + 786 + if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 787 + bool write = (may_flags & NFSD_MAY_WRITE); 788 + 789 + if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 790 + (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 791 + status = nfserrno(nfsd_open_break_lease( 792 + file_inode(nf->nf_file), may_flags)); 793 + if (status == nfs_ok) { 794 + clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 795 + if (write) 796 + clear_bit(NFSD_FILE_BREAK_WRITE, 797 + &nf->nf_flags); 798 + } 799 + } 800 + } 801 + out: 802 + if (status == nfs_ok) { 803 + *pnf = nf; 804 + } else { 805 + nfsd_file_put(nf); 806 + nf = NULL; 807 + } 808 + 809 + trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 810 + return status; 811 + open_file: 812 + nf = new; 813 + /* Take reference for the hashtable */ 814 + atomic_inc(&nf->nf_ref); 815 + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 816 + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 817 + list_lru_add(&nfsd_file_lru, &nf->nf_lru); 818 + hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 819 + ++nfsd_file_hashtbl[hashval].nfb_count; 820 + nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 821 + nfsd_file_hashtbl[hashval].nfb_count); 822 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 823 + atomic_long_inc(&nfsd_filecache_count); 824 + 825 + nf->nf_mark = nfsd_file_mark_find_or_create(nf); 826 + if (nf->nf_mark) 827 + status = nfsd_open_verified(rqstp, fhp, S_IFREG, 828 + may_flags, &nf->nf_file); 829 + else 830 + status = nfserr_jukebox; 831 + /* 832 + * If construction failed, or we raced with a call to unlink() 833 + * then unhash. 834 + */ 835 + if (status != nfs_ok || inode->i_nlink == 0) { 836 + bool do_free; 837 + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 838 + do_free = nfsd_file_unhash(nf); 839 + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 840 + if (do_free) 841 + nfsd_file_put_noref(nf); 842 + } 843 + clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 844 + smp_mb__after_atomic(); 845 + wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 846 + goto out; 847 + } 848 + 849 + /* 850 + * Note that fields may be added, removed or reordered in the future. Programs 851 + * scraping this file for info should test the labels to ensure they're 852 + * getting the correct field. 853 + */ 854 + static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 855 + { 856 + unsigned int i, count = 0, longest = 0; 857 + unsigned long hits = 0; 858 + 859 + /* 860 + * No need for spinlocks here since we're not terribly interested in 861 + * accuracy. We do take the nfsd_mutex simply to ensure that we 862 + * don't end up racing with server shutdown 863 + */ 864 + mutex_lock(&nfsd_mutex); 865 + if (nfsd_file_hashtbl) { 866 + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 867 + count += nfsd_file_hashtbl[i].nfb_count; 868 + longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 869 + } 870 + } 871 + mutex_unlock(&nfsd_mutex); 872 + 873 + for_each_possible_cpu(i) 874 + hits += per_cpu(nfsd_file_cache_hits, i); 875 + 876 + seq_printf(m, "total entries: %u\n", count); 877 + seq_printf(m, "longest chain: %u\n", longest); 878 + seq_printf(m, "cache hits: %lu\n", hits); 879 + return 0; 880 + } 881 + 882 + int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 883 + { 884 + return single_open(file, nfsd_file_cache_stats_show, NULL); 885 + }

+60

fs/nfsd/filecache.h

··· 1 + #ifndef _FS_NFSD_FILECACHE_H 2 + #define _FS_NFSD_FILECACHE_H 3 + 4 + #include <linux/fsnotify_backend.h> 5 + 6 + /* 7 + * This is the fsnotify_mark container that nfsd attaches to the files that it 8 + * is holding open. Note that we have a separate refcount here aside from the 9 + * one in the fsnotify_mark. We only want a single fsnotify_mark attached to 10 + * the inode, and for each nfsd_file to hold a reference to it. 11 + * 12 + * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us 13 + * how to put that reference. If there are still outstanding nfsd_files that 14 + * reference the mark, then we would want to call fsnotify_put_mark on it. 15 + * If there were not, then we'd need to call fsnotify_destroy_mark. Since we 16 + * can't really tell the difference, we use the nfm_mark to keep track of how 17 + * many nfsd_files hold references to the mark. When that counter goes to zero 18 + * then we know to call fsnotify_destroy_mark on it. 19 + */ 20 + struct nfsd_file_mark { 21 + struct fsnotify_mark nfm_mark; 22 + atomic_t nfm_ref; 23 + }; 24 + 25 + /* 26 + * A representation of a file that has been opened by knfsd. These are hashed 27 + * in the hashtable by inode pointer value. Note that this object doesn't 28 + * hold a reference to the inode by itself, so the nf_inode pointer should 29 + * never be dereferenced, only used for comparison. 30 + */ 31 + struct nfsd_file { 32 + struct hlist_node nf_node; 33 + struct list_head nf_lru; 34 + struct rcu_head nf_rcu; 35 + struct file *nf_file; 36 + const struct cred *nf_cred; 37 + #define NFSD_FILE_HASHED (0) 38 + #define NFSD_FILE_PENDING (1) 39 + #define NFSD_FILE_BREAK_READ (2) 40 + #define NFSD_FILE_BREAK_WRITE (3) 41 + #define NFSD_FILE_REFERENCED (4) 42 + unsigned long nf_flags; 43 + struct inode *nf_inode; 44 + unsigned int nf_hashval; 45 + atomic_t nf_ref; 46 + unsigned char nf_may; 47 + struct nfsd_file_mark *nf_mark; 48 + }; 49 + 50 + int nfsd_file_cache_init(void); 51 + void nfsd_file_cache_purge(void); 52 + void nfsd_file_cache_shutdown(void); 53 + void nfsd_file_put(struct nfsd_file *nf); 54 + struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); 55 + void nfsd_file_close_inode_sync(struct inode *inode); 56 + bool nfsd_file_is_cached(struct inode *inode); 57 + __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 58 + unsigned int may_flags, struct nfsd_file **nfp); 59 + int nfsd_file_cache_stats_open(struct inode *, struct file *); 60 + #endif /* _FS_NFSD_FILECACHE_H */

+8 -1

fs/nfsd/nfssvc.c

··· 27 27 #include "cache.h" 28 28 #include "vfs.h" 29 29 #include "netns.h" 30 + #include "filecache.h" 30 31 31 32 #define NFSDDBG_FACILITY NFSDDBG_SVC 32 33 ··· 314 313 if (nfsd_users++) 315 314 return 0; 316 315 316 + ret = nfsd_file_cache_init(); 317 + if (ret) 318 + goto dec_users; 317 319 /* 318 320 * Readahead param cache - will no-op if it already exists. 319 321 * (Note therefore results will be suboptimal if number of ··· 324 320 */ 325 321 ret = nfsd_racache_init(2*nrservs); 326 322 if (ret) 327 - goto dec_users; 323 + goto out_file_cache; 328 324 329 325 ret = nfs4_state_start(); 330 326 if (ret) ··· 333 329 334 330 out_racache: 335 331 nfsd_racache_shutdown(); 332 + out_file_cache: 333 + nfsd_file_cache_shutdown(); 336 334 dec_users: 337 335 nfsd_users--; 338 336 return ret; ··· 346 340 return; 347 341 348 342 nfs4_state_shutdown(); 343 + nfsd_file_cache_shutdown(); 349 344 nfsd_racache_shutdown(); 350 345 } 351 346

+140

fs/nfsd/trace.h

··· 126 126 DEFINE_NFSD_ERR_EVENT(write_err); 127 127 128 128 #include "state.h" 129 + #include "filecache.h" 130 + #include "vfs.h" 129 131 130 132 DECLARE_EVENT_CLASS(nfsd_stateid_class, 131 133 TP_PROTO(stateid_t *stp), ··· 165 163 DEFINE_STATEID_EVENT(layout_recall_done); 166 164 DEFINE_STATEID_EVENT(layout_recall_fail); 167 165 DEFINE_STATEID_EVENT(layout_recall_release); 166 + 167 + #define show_nf_flags(val) \ 168 + __print_flags(val, "|", \ 169 + { 1 << NFSD_FILE_HASHED, "HASHED" }, \ 170 + { 1 << NFSD_FILE_PENDING, "PENDING" }, \ 171 + { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ 172 + { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ 173 + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) 174 + 175 + /* FIXME: This should probably be fleshed out in the future. */ 176 + #define show_nf_may(val) \ 177 + __print_flags(val, "|", \ 178 + { NFSD_MAY_READ, "READ" }, \ 179 + { NFSD_MAY_WRITE, "WRITE" }, \ 180 + { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }) 181 + 182 + DECLARE_EVENT_CLASS(nfsd_file_class, 183 + TP_PROTO(struct nfsd_file *nf), 184 + TP_ARGS(nf), 185 + TP_STRUCT__entry( 186 + __field(unsigned int, nf_hashval) 187 + __field(void *, nf_inode) 188 + __field(int, nf_ref) 189 + __field(unsigned long, nf_flags) 190 + __field(unsigned char, nf_may) 191 + __field(struct file *, nf_file) 192 + ), 193 + TP_fast_assign( 194 + __entry->nf_hashval = nf->nf_hashval; 195 + __entry->nf_inode = nf->nf_inode; 196 + __entry->nf_ref = atomic_read(&nf->nf_ref); 197 + __entry->nf_flags = nf->nf_flags; 198 + __entry->nf_may = nf->nf_may; 199 + __entry->nf_file = nf->nf_file; 200 + ), 201 + TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p", 202 + __entry->nf_hashval, 203 + __entry->nf_inode, 204 + __entry->nf_ref, 205 + show_nf_flags(__entry->nf_flags), 206 + show_nf_may(__entry->nf_may), 207 + __entry->nf_file) 208 + ) 209 + 210 + #define DEFINE_NFSD_FILE_EVENT(name) \ 211 + DEFINE_EVENT(nfsd_file_class, name, \ 212 + TP_PROTO(struct nfsd_file *nf), \ 213 + TP_ARGS(nf)) 214 + 215 + DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); 216 + DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); 217 + DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); 218 + DEFINE_NFSD_FILE_EVENT(nfsd_file_put); 219 + DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); 220 + 221 + TRACE_EVENT(nfsd_file_acquire, 222 + TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, 223 + struct inode *inode, unsigned int may_flags, 224 + struct nfsd_file *nf, __be32 status), 225 + 226 + TP_ARGS(rqstp, hash, inode, may_flags, nf, status), 227 + 228 + TP_STRUCT__entry( 229 + __field(__be32, xid) 230 + __field(unsigned int, hash) 231 + __field(void *, inode) 232 + __field(unsigned int, may_flags) 233 + __field(int, nf_ref) 234 + __field(unsigned long, nf_flags) 235 + __field(unsigned char, nf_may) 236 + __field(struct file *, nf_file) 237 + __field(__be32, status) 238 + ), 239 + 240 + TP_fast_assign( 241 + __entry->xid = rqstp->rq_xid; 242 + __entry->hash = hash; 243 + __entry->inode = inode; 244 + __entry->may_flags = may_flags; 245 + __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0; 246 + __entry->nf_flags = nf ? nf->nf_flags : 0; 247 + __entry->nf_may = nf ? nf->nf_may : 0; 248 + __entry->nf_file = nf ? nf->nf_file : NULL; 249 + __entry->status = status; 250 + ), 251 + 252 + TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", 253 + be32_to_cpu(__entry->xid), __entry->hash, __entry->inode, 254 + show_nf_may(__entry->may_flags), __entry->nf_ref, 255 + show_nf_flags(__entry->nf_flags), 256 + show_nf_may(__entry->nf_may), __entry->nf_file, 257 + be32_to_cpu(__entry->status)) 258 + ); 259 + 260 + DECLARE_EVENT_CLASS(nfsd_file_search_class, 261 + TP_PROTO(struct inode *inode, unsigned int hash, int found), 262 + TP_ARGS(inode, hash, found), 263 + TP_STRUCT__entry( 264 + __field(struct inode *, inode) 265 + __field(unsigned int, hash) 266 + __field(int, found) 267 + ), 268 + TP_fast_assign( 269 + __entry->inode = inode; 270 + __entry->hash = hash; 271 + __entry->found = found; 272 + ), 273 + TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, 274 + __entry->inode, __entry->found) 275 + ); 276 + 277 + #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ 278 + DEFINE_EVENT(nfsd_file_search_class, name, \ 279 + TP_PROTO(struct inode *inode, unsigned int hash, int found), \ 280 + TP_ARGS(inode, hash, found)) 281 + 282 + DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); 283 + DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); 284 + DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); 285 + 286 + TRACE_EVENT(nfsd_file_fsnotify_handle_event, 287 + TP_PROTO(struct inode *inode, u32 mask), 288 + TP_ARGS(inode, mask), 289 + TP_STRUCT__entry( 290 + __field(struct inode *, inode) 291 + __field(unsigned int, nlink) 292 + __field(umode_t, mode) 293 + __field(u32, mask) 294 + ), 295 + TP_fast_assign( 296 + __entry->inode = inode; 297 + __entry->nlink = inode->i_nlink; 298 + __entry->mode = inode->i_mode; 299 + __entry->mask = mask; 300 + ), 301 + TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, 302 + __entry->nlink, __entry->mode, __entry->mask) 303 + ); 168 304 169 305 #endif /* _NFSD_TRACE_H */ 170 306

+43 -22

fs/nfsd/vfs.c

··· 699 699 } 700 700 #endif /* CONFIG_NFSD_V3 */ 701 701 702 - static int nfsd_open_break_lease(struct inode *inode, int access) 702 + int nfsd_open_break_lease(struct inode *inode, int access) 703 703 { 704 704 unsigned int mode; 705 705 ··· 715 715 * and additional flags. 716 716 * N.B. After this call fhp needs an fh_put 717 717 */ 718 - __be32 719 - nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 718 + static __be32 719 + __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 720 720 int may_flags, struct file **filp) 721 721 { 722 722 struct path path; ··· 725 725 int flags = O_RDONLY|O_LARGEFILE; 726 726 __be32 err; 727 727 int host_err = 0; 728 - 729 - validate_process_creds(); 730 - 731 - /* 732 - * If we get here, then the client has already done an "open", 733 - * and (hopefully) checked permission - so allow OWNER_OVERRIDE 734 - * in case a chmod has now revoked permission. 735 - * 736 - * Arguably we should also allow the owner override for 737 - * directories, but we never have and it doesn't seem to have 738 - * caused anyone a problem. If we were to change this, note 739 - * also that our filldir callbacks would need a variant of 740 - * lookup_one_len that doesn't check permissions. 741 - */ 742 - if (type == S_IFREG) 743 - may_flags |= NFSD_MAY_OWNER_OVERRIDE; 744 - err = fh_verify(rqstp, fhp, type, may_flags); 745 - if (err) 746 - goto out; 747 728 748 729 path.mnt = fhp->fh_export->ex_path.mnt; 749 730 path.dentry = fhp->fh_dentry; ··· 779 798 out_nfserr: 780 799 err = nfserrno(host_err); 781 800 out: 801 + return err; 802 + } 803 + 804 + __be32 805 + nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 806 + int may_flags, struct file **filp) 807 + { 808 + __be32 err; 809 + 810 + validate_process_creds(); 811 + /* 812 + * If we get here, then the client has already done an "open", 813 + * and (hopefully) checked permission - so allow OWNER_OVERRIDE 814 + * in case a chmod has now revoked permission. 815 + * 816 + * Arguably we should also allow the owner override for 817 + * directories, but we never have and it doesn't seem to have 818 + * caused anyone a problem. If we were to change this, note 819 + * also that our filldir callbacks would need a variant of 820 + * lookup_one_len that doesn't check permissions. 821 + */ 822 + if (type == S_IFREG) 823 + may_flags |= NFSD_MAY_OWNER_OVERRIDE; 824 + err = fh_verify(rqstp, fhp, type, may_flags); 825 + if (!err) 826 + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); 782 827 validate_process_creds(); 783 828 return err; 784 829 } 830 + 831 + __be32 832 + nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 833 + int may_flags, struct file **filp) 834 + { 835 + __be32 err; 836 + 837 + validate_process_creds(); 838 + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); 839 + validate_process_creds(); 840 + return err; 841 + } 842 + 843 + 785 844 786 845 struct raparms * 787 846 nfsd_init_raparms(struct file *file)

+3

fs/nfsd/vfs.h

··· 75 75 __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 76 76 loff_t, unsigned long); 77 77 #endif /* CONFIG_NFSD_V3 */ 78 + int nfsd_open_break_lease(struct inode *, int); 78 79 __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, 80 + int, struct file **); 81 + __be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t, 79 82 int, struct file **); 80 83 struct raparms; 81 84 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,