Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.2 505 lines 13 kB view raw
1/* 2 * linux/fs/nfs/pagelist.c 3 * 4 * A set of helper functions for managing NFS read and write requests. 5 * The main purpose of these routines is to provide support for the 6 * coalescing of several requests into a single RPC call. 7 * 8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no> 9 * 10 */ 11 12#include <linux/slab.h> 13#include <linux/file.h> 14#include <linux/sched.h> 15#include <linux/sunrpc/clnt.h> 16#include <linux/nfs3.h> 17#include <linux/nfs4.h> 18#include <linux/nfs_page.h> 19#include <linux/nfs_fs.h> 20#include <linux/nfs_mount.h> 21#include <linux/export.h> 22 23#include "internal.h" 24#include "pnfs.h" 25 26static struct kmem_cache *nfs_page_cachep; 27 28static inline struct nfs_page * 29nfs_page_alloc(void) 30{ 31 struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); 32 if (p) 33 INIT_LIST_HEAD(&p->wb_list); 34 return p; 35} 36 37static inline void 38nfs_page_free(struct nfs_page *p) 39{ 40 kmem_cache_free(nfs_page_cachep, p); 41} 42 43/** 44 * nfs_create_request - Create an NFS read/write request. 45 * @ctx: open context to use 46 * @inode: inode to which the request is attached 47 * @page: page to write 48 * @offset: starting offset within the page for the write 49 * @count: number of bytes to read/write 50 * 51 * The page must be locked by the caller. This makes sure we never 52 * create two different requests for the same page. 53 * User should ensure it is safe to sleep in this function. 54 */ 55struct nfs_page * 56nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, 57 struct page *page, 58 unsigned int offset, unsigned int count) 59{ 60 struct nfs_page *req; 61 62 /* try to allocate the request struct */ 63 req = nfs_page_alloc(); 64 if (req == NULL) 65 return ERR_PTR(-ENOMEM); 66 67 /* get lock context early so we can deal with alloc failures */ 68 req->wb_lock_context = nfs_get_lock_context(ctx); 69 if (req->wb_lock_context == NULL) { 70 nfs_page_free(req); 71 return ERR_PTR(-ENOMEM); 72 } 73 74 /* Initialize the request struct. Initially, we assume a 75 * long write-back delay. This will be adjusted in 76 * update_nfs_request below if the region is not locked. */ 77 req->wb_page = page; 78 atomic_set(&req->wb_complete, 0); 79 req->wb_index = page->index; 80 page_cache_get(page); 81 BUG_ON(PagePrivate(page)); 82 BUG_ON(!PageLocked(page)); 83 BUG_ON(page->mapping->host != inode); 84 req->wb_offset = offset; 85 req->wb_pgbase = offset; 86 req->wb_bytes = count; 87 req->wb_context = get_nfs_open_context(ctx); 88 kref_init(&req->wb_kref); 89 return req; 90} 91 92/** 93 * nfs_unlock_request - Unlock request and wake up sleepers. 94 * @req: 95 */ 96void nfs_unlock_request(struct nfs_page *req) 97{ 98 if (!NFS_WBACK_BUSY(req)) { 99 printk(KERN_ERR "NFS: Invalid unlock attempted\n"); 100 BUG(); 101 } 102 smp_mb__before_clear_bit(); 103 clear_bit(PG_BUSY, &req->wb_flags); 104 smp_mb__after_clear_bit(); 105 wake_up_bit(&req->wb_flags, PG_BUSY); 106 nfs_release_request(req); 107} 108 109/** 110 * nfs_set_page_tag_locked - Tag a request as locked 111 * @req: 112 */ 113int nfs_set_page_tag_locked(struct nfs_page *req) 114{ 115 if (!nfs_lock_request_dontget(req)) 116 return 0; 117 if (test_bit(PG_MAPPED, &req->wb_flags)) 118 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 119 return 1; 120} 121 122/** 123 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers 124 */ 125void nfs_clear_page_tag_locked(struct nfs_page *req) 126{ 127 if (test_bit(PG_MAPPED, &req->wb_flags)) { 128 struct inode *inode = req->wb_context->dentry->d_inode; 129 struct nfs_inode *nfsi = NFS_I(inode); 130 131 spin_lock(&inode->i_lock); 132 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 133 nfs_unlock_request(req); 134 spin_unlock(&inode->i_lock); 135 } else 136 nfs_unlock_request(req); 137} 138 139/* 140 * nfs_clear_request - Free up all resources allocated to the request 141 * @req: 142 * 143 * Release page and open context resources associated with a read/write 144 * request after it has completed. 145 */ 146static void nfs_clear_request(struct nfs_page *req) 147{ 148 struct page *page = req->wb_page; 149 struct nfs_open_context *ctx = req->wb_context; 150 struct nfs_lock_context *l_ctx = req->wb_lock_context; 151 152 if (page != NULL) { 153 page_cache_release(page); 154 req->wb_page = NULL; 155 } 156 if (l_ctx != NULL) { 157 nfs_put_lock_context(l_ctx); 158 req->wb_lock_context = NULL; 159 } 160 if (ctx != NULL) { 161 put_nfs_open_context(ctx); 162 req->wb_context = NULL; 163 } 164} 165 166 167/** 168 * nfs_release_request - Release the count on an NFS read/write request 169 * @req: request to release 170 * 171 * Note: Should never be called with the spinlock held! 172 */ 173static void nfs_free_request(struct kref *kref) 174{ 175 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 176 177 /* Release struct file and open context */ 178 nfs_clear_request(req); 179 nfs_page_free(req); 180} 181 182void nfs_release_request(struct nfs_page *req) 183{ 184 kref_put(&req->wb_kref, nfs_free_request); 185} 186 187static int nfs_wait_bit_uninterruptible(void *word) 188{ 189 io_schedule(); 190 return 0; 191} 192 193/** 194 * nfs_wait_on_request - Wait for a request to complete. 195 * @req: request to wait upon. 196 * 197 * Interruptible by fatal signals only. 198 * The user is responsible for holding a count on the request. 199 */ 200int 201nfs_wait_on_request(struct nfs_page *req) 202{ 203 return wait_on_bit(&req->wb_flags, PG_BUSY, 204 nfs_wait_bit_uninterruptible, 205 TASK_UNINTERRUPTIBLE); 206} 207 208bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 209{ 210 /* 211 * FIXME: ideally we should be able to coalesce all requests 212 * that are not block boundary aligned, but currently this 213 * is problematic for the case of bsize < PAGE_CACHE_SIZE, 214 * since nfs_flush_multi and nfs_pagein_multi assume you 215 * can have only one struct nfs_page. 216 */ 217 if (desc->pg_bsize < PAGE_SIZE) 218 return 0; 219 220 return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 221} 222EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 223 224/** 225 * nfs_pageio_init - initialise a page io descriptor 226 * @desc: pointer to descriptor 227 * @inode: pointer to inode 228 * @doio: pointer to io function 229 * @bsize: io block size 230 * @io_flags: extra parameters for the io function 231 */ 232void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 233 struct inode *inode, 234 const struct nfs_pageio_ops *pg_ops, 235 size_t bsize, 236 int io_flags) 237{ 238 INIT_LIST_HEAD(&desc->pg_list); 239 desc->pg_bytes_written = 0; 240 desc->pg_count = 0; 241 desc->pg_bsize = bsize; 242 desc->pg_base = 0; 243 desc->pg_moreio = 0; 244 desc->pg_recoalesce = 0; 245 desc->pg_inode = inode; 246 desc->pg_ops = pg_ops; 247 desc->pg_ioflags = io_flags; 248 desc->pg_error = 0; 249 desc->pg_lseg = NULL; 250} 251 252/** 253 * nfs_can_coalesce_requests - test two requests for compatibility 254 * @prev: pointer to nfs_page 255 * @req: pointer to nfs_page 256 * 257 * The nfs_page structures 'prev' and 'req' are compared to ensure that the 258 * page data area they describe is contiguous, and that their RPC 259 * credentials, NFSv4 open state, and lockowners are the same. 260 * 261 * Return 'true' if this is the case, else return 'false'. 262 */ 263static bool nfs_can_coalesce_requests(struct nfs_page *prev, 264 struct nfs_page *req, 265 struct nfs_pageio_descriptor *pgio) 266{ 267 if (req->wb_context->cred != prev->wb_context->cred) 268 return false; 269 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) 270 return false; 271 if (req->wb_context->state != prev->wb_context->state) 272 return false; 273 if (req->wb_index != (prev->wb_index + 1)) 274 return false; 275 if (req->wb_pgbase != 0) 276 return false; 277 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 278 return false; 279 return pgio->pg_ops->pg_test(pgio, prev, req); 280} 281 282/** 283 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. 284 * @desc: destination io descriptor 285 * @req: request 286 * 287 * Returns true if the request 'req' was successfully coalesced into the 288 * existing list of pages 'desc'. 289 */ 290static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 291 struct nfs_page *req) 292{ 293 if (desc->pg_count != 0) { 294 struct nfs_page *prev; 295 296 prev = nfs_list_entry(desc->pg_list.prev); 297 if (!nfs_can_coalesce_requests(prev, req, desc)) 298 return 0; 299 } else { 300 if (desc->pg_ops->pg_init) 301 desc->pg_ops->pg_init(desc, req); 302 desc->pg_base = req->wb_pgbase; 303 } 304 nfs_list_remove_request(req); 305 nfs_list_add_request(req, &desc->pg_list); 306 desc->pg_count += req->wb_bytes; 307 return 1; 308} 309 310/* 311 * Helper for nfs_pageio_add_request and nfs_pageio_complete 312 */ 313static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 314{ 315 if (!list_empty(&desc->pg_list)) { 316 int error = desc->pg_ops->pg_doio(desc); 317 if (error < 0) 318 desc->pg_error = error; 319 else 320 desc->pg_bytes_written += desc->pg_count; 321 } 322 if (list_empty(&desc->pg_list)) { 323 desc->pg_count = 0; 324 desc->pg_base = 0; 325 } 326} 327 328/** 329 * nfs_pageio_add_request - Attempt to coalesce a request into a page list. 330 * @desc: destination io descriptor 331 * @req: request 332 * 333 * Returns true if the request 'req' was successfully coalesced into the 334 * existing list of pages 'desc'. 335 */ 336static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 337 struct nfs_page *req) 338{ 339 while (!nfs_pageio_do_add_request(desc, req)) { 340 desc->pg_moreio = 1; 341 nfs_pageio_doio(desc); 342 if (desc->pg_error < 0) 343 return 0; 344 desc->pg_moreio = 0; 345 if (desc->pg_recoalesce) 346 return 0; 347 } 348 return 1; 349} 350 351static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 352{ 353 LIST_HEAD(head); 354 355 do { 356 list_splice_init(&desc->pg_list, &head); 357 desc->pg_bytes_written -= desc->pg_count; 358 desc->pg_count = 0; 359 desc->pg_base = 0; 360 desc->pg_recoalesce = 0; 361 362 while (!list_empty(&head)) { 363 struct nfs_page *req; 364 365 req = list_first_entry(&head, struct nfs_page, wb_list); 366 nfs_list_remove_request(req); 367 if (__nfs_pageio_add_request(desc, req)) 368 continue; 369 if (desc->pg_error < 0) 370 return 0; 371 break; 372 } 373 } while (desc->pg_recoalesce); 374 return 1; 375} 376 377int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 378 struct nfs_page *req) 379{ 380 int ret; 381 382 do { 383 ret = __nfs_pageio_add_request(desc, req); 384 if (ret) 385 break; 386 if (desc->pg_error < 0) 387 break; 388 ret = nfs_do_recoalesce(desc); 389 } while (ret); 390 return ret; 391} 392 393/** 394 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 395 * @desc: pointer to io descriptor 396 */ 397void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 398{ 399 for (;;) { 400 nfs_pageio_doio(desc); 401 if (!desc->pg_recoalesce) 402 break; 403 if (!nfs_do_recoalesce(desc)) 404 break; 405 } 406} 407 408/** 409 * nfs_pageio_cond_complete - Conditional I/O completion 410 * @desc: pointer to io descriptor 411 * @index: page index 412 * 413 * It is important to ensure that processes don't try to take locks 414 * on non-contiguous ranges of pages as that might deadlock. This 415 * function should be called before attempting to wait on a locked 416 * nfs_page. It will complete the I/O if the page index 'index' 417 * is not contiguous with the existing list of pages in 'desc'. 418 */ 419void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) 420{ 421 if (!list_empty(&desc->pg_list)) { 422 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 423 if (index != prev->wb_index + 1) 424 nfs_pageio_complete(desc); 425 } 426} 427 428#define NFS_SCAN_MAXENTRIES 16 429/** 430 * nfs_scan_list - Scan a list for matching requests 431 * @nfsi: NFS inode 432 * @dst: Destination list 433 * @idx_start: lower bound of page->index to scan 434 * @npages: idx_start + npages sets the upper bound to scan. 435 * @tag: tag to scan for 436 * 437 * Moves elements from one of the inode request lists. 438 * If the number of requests is set to 0, the entire address_space 439 * starting at index idx_start, is scanned. 440 * The requests are *not* checked to ensure that they form a contiguous set. 441 * You must be holding the inode's i_lock when calling this function 442 */ 443int nfs_scan_list(struct nfs_inode *nfsi, 444 struct list_head *dst, pgoff_t idx_start, 445 unsigned int npages, int tag) 446{ 447 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 448 struct nfs_page *req; 449 pgoff_t idx_end; 450 int found, i; 451 int res; 452 struct list_head *list; 453 454 res = 0; 455 if (npages == 0) 456 idx_end = ~0; 457 else 458 idx_end = idx_start + npages - 1; 459 460 for (;;) { 461 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 462 (void **)&pgvec[0], idx_start, 463 NFS_SCAN_MAXENTRIES, tag); 464 if (found <= 0) 465 break; 466 for (i = 0; i < found; i++) { 467 req = pgvec[i]; 468 if (req->wb_index > idx_end) 469 goto out; 470 idx_start = req->wb_index + 1; 471 if (nfs_set_page_tag_locked(req)) { 472 kref_get(&req->wb_kref); 473 radix_tree_tag_clear(&nfsi->nfs_page_tree, 474 req->wb_index, tag); 475 list = pnfs_choose_commit_list(req, dst); 476 nfs_list_add_request(req, list); 477 res++; 478 if (res == INT_MAX) 479 goto out; 480 } 481 } 482 /* for latency reduction */ 483 cond_resched_lock(&nfsi->vfs_inode.i_lock); 484 } 485out: 486 return res; 487} 488 489int __init nfs_init_nfspagecache(void) 490{ 491 nfs_page_cachep = kmem_cache_create("nfs_page", 492 sizeof(struct nfs_page), 493 0, SLAB_HWCACHE_ALIGN, 494 NULL); 495 if (nfs_page_cachep == NULL) 496 return -ENOMEM; 497 498 return 0; 499} 500 501void nfs_destroy_nfspagecache(void) 502{ 503 kmem_cache_destroy(nfs_page_cachep); 504} 505