fs/nfs/pagelist.c at v3.2 · tjh.dev/kernel

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / fs / nfs / pagelist.c
at v3.2 505 lines 13 kB view raw
wrap content
  1/*
  2 * linux/fs/nfs/pagelist.c
  3 *
  4 * A set of helper functions for managing NFS read and write requests.
  5 * The main purpose of these routines is to provide support for the
  6 * coalescing of several requests into a single RPC call.
  7 *
  8 * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
  9 *
 10 */
 11
 12#include <linux/slab.h>
 13#include <linux/file.h>
 14#include <linux/sched.h>
 15#include <linux/sunrpc/clnt.h>
 16#include <linux/nfs3.h>
 17#include <linux/nfs4.h>
 18#include <linux/nfs_page.h>
 19#include <linux/nfs_fs.h>
 20#include <linux/nfs_mount.h>
 21#include <linux/export.h>
 22
 23#include "internal.h"
 24#include "pnfs.h"
 25
 26static struct kmem_cache *nfs_page_cachep;
 27
 28static inline struct nfs_page *
 29nfs_page_alloc(void)
 30{
 31	struct nfs_page	*p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
 32	if (p)
 33		INIT_LIST_HEAD(&p->wb_list);
 34	return p;
 35}
 36
 37static inline void
 38nfs_page_free(struct nfs_page *p)
 39{
 40	kmem_cache_free(nfs_page_cachep, p);
 41}
 42
 43/**
 44 * nfs_create_request - Create an NFS read/write request.
 45 * @ctx: open context to use
 46 * @inode: inode to which the request is attached
 47 * @page: page to write
 48 * @offset: starting offset within the page for the write
 49 * @count: number of bytes to read/write
 50 *
 51 * The page must be locked by the caller. This makes sure we never
 52 * create two different requests for the same page.
 53 * User should ensure it is safe to sleep in this function.
 54 */
 55struct nfs_page *
 56nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 57		   struct page *page,
 58		   unsigned int offset, unsigned int count)
 59{
 60	struct nfs_page		*req;
 61
 62	/* try to allocate the request struct */
 63	req = nfs_page_alloc();
 64	if (req == NULL)
 65		return ERR_PTR(-ENOMEM);
 66
 67	/* get lock context early so we can deal with alloc failures */
 68	req->wb_lock_context = nfs_get_lock_context(ctx);
 69	if (req->wb_lock_context == NULL) {
 70		nfs_page_free(req);
 71		return ERR_PTR(-ENOMEM);
 72	}
 73
 74	/* Initialize the request struct. Initially, we assume a
 75	 * long write-back delay. This will be adjusted in
 76	 * update_nfs_request below if the region is not locked. */
 77	req->wb_page    = page;
 78	atomic_set(&req->wb_complete, 0);
 79	req->wb_index	= page->index;
 80	page_cache_get(page);
 81	BUG_ON(PagePrivate(page));
 82	BUG_ON(!PageLocked(page));
 83	BUG_ON(page->mapping->host != inode);
 84	req->wb_offset  = offset;
 85	req->wb_pgbase	= offset;
 86	req->wb_bytes   = count;
 87	req->wb_context = get_nfs_open_context(ctx);
 88	kref_init(&req->wb_kref);
 89	return req;
 90}
 91
 92/**
 93 * nfs_unlock_request - Unlock request and wake up sleepers.
 94 * @req:
 95 */
 96void nfs_unlock_request(struct nfs_page *req)
 97{
 98	if (!NFS_WBACK_BUSY(req)) {
 99		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
100		BUG();
101	}
102	smp_mb__before_clear_bit();
103	clear_bit(PG_BUSY, &req->wb_flags);
104	smp_mb__after_clear_bit();
105	wake_up_bit(&req->wb_flags, PG_BUSY);
106	nfs_release_request(req);
107}
108
109/**
110 * nfs_set_page_tag_locked - Tag a request as locked
111 * @req:
112 */
113int nfs_set_page_tag_locked(struct nfs_page *req)
114{
115	if (!nfs_lock_request_dontget(req))
116		return 0;
117	if (test_bit(PG_MAPPED, &req->wb_flags))
118		radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
119	return 1;
120}
121
122/**
123 * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
124 */
125void nfs_clear_page_tag_locked(struct nfs_page *req)
126{
127	if (test_bit(PG_MAPPED, &req->wb_flags)) {
128		struct inode *inode = req->wb_context->dentry->d_inode;
129		struct nfs_inode *nfsi = NFS_I(inode);
130
131		spin_lock(&inode->i_lock);
132		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
133		nfs_unlock_request(req);
134		spin_unlock(&inode->i_lock);
135	} else
136		nfs_unlock_request(req);
137}
138
139/*
140 * nfs_clear_request - Free up all resources allocated to the request
141 * @req:
142 *
143 * Release page and open context resources associated with a read/write
144 * request after it has completed.
145 */
146static void nfs_clear_request(struct nfs_page *req)
147{
148	struct page *page = req->wb_page;
149	struct nfs_open_context *ctx = req->wb_context;
150	struct nfs_lock_context *l_ctx = req->wb_lock_context;
151
152	if (page != NULL) {
153		page_cache_release(page);
154		req->wb_page = NULL;
155	}
156	if (l_ctx != NULL) {
157		nfs_put_lock_context(l_ctx);
158		req->wb_lock_context = NULL;
159	}
160	if (ctx != NULL) {
161		put_nfs_open_context(ctx);
162		req->wb_context = NULL;
163	}
164}
165
166
167/**
168 * nfs_release_request - Release the count on an NFS read/write request
169 * @req: request to release
170 *
171 * Note: Should never be called with the spinlock held!
172 */
173static void nfs_free_request(struct kref *kref)
174{
175	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
176
177	/* Release struct file and open context */
178	nfs_clear_request(req);
179	nfs_page_free(req);
180}
181
182void nfs_release_request(struct nfs_page *req)
183{
184	kref_put(&req->wb_kref, nfs_free_request);
185}
186
187static int nfs_wait_bit_uninterruptible(void *word)
188{
189	io_schedule();
190	return 0;
191}
192
193/**
194 * nfs_wait_on_request - Wait for a request to complete.
195 * @req: request to wait upon.
196 *
197 * Interruptible by fatal signals only.
198 * The user is responsible for holding a count on the request.
199 */
200int
201nfs_wait_on_request(struct nfs_page *req)
202{
203	return wait_on_bit(&req->wb_flags, PG_BUSY,
204			nfs_wait_bit_uninterruptible,
205			TASK_UNINTERRUPTIBLE);
206}
207
208bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
209{
210	/*
211	 * FIXME: ideally we should be able to coalesce all requests
212	 * that are not block boundary aligned, but currently this
213	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
214	 * since nfs_flush_multi and nfs_pagein_multi assume you
215	 * can have only one struct nfs_page.
216	 */
217	if (desc->pg_bsize < PAGE_SIZE)
218		return 0;
219
220	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
221}
222EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
223
224/**
225 * nfs_pageio_init - initialise a page io descriptor
226 * @desc: pointer to descriptor
227 * @inode: pointer to inode
228 * @doio: pointer to io function
229 * @bsize: io block size
230 * @io_flags: extra parameters for the io function
231 */
232void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
233		     struct inode *inode,
234		     const struct nfs_pageio_ops *pg_ops,
235		     size_t bsize,
236		     int io_flags)
237{
238	INIT_LIST_HEAD(&desc->pg_list);
239	desc->pg_bytes_written = 0;
240	desc->pg_count = 0;
241	desc->pg_bsize = bsize;
242	desc->pg_base = 0;
243	desc->pg_moreio = 0;
244	desc->pg_recoalesce = 0;
245	desc->pg_inode = inode;
246	desc->pg_ops = pg_ops;
247	desc->pg_ioflags = io_flags;
248	desc->pg_error = 0;
249	desc->pg_lseg = NULL;
250}
251
252/**
253 * nfs_can_coalesce_requests - test two requests for compatibility
254 * @prev: pointer to nfs_page
255 * @req: pointer to nfs_page
256 *
257 * The nfs_page structures 'prev' and 'req' are compared to ensure that the
258 * page data area they describe is contiguous, and that their RPC
259 * credentials, NFSv4 open state, and lockowners are the same.
260 *
261 * Return 'true' if this is the case, else return 'false'.
262 */
263static bool nfs_can_coalesce_requests(struct nfs_page *prev,
264				      struct nfs_page *req,
265				      struct nfs_pageio_descriptor *pgio)
266{
267	if (req->wb_context->cred != prev->wb_context->cred)
268		return false;
269	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
270		return false;
271	if (req->wb_context->state != prev->wb_context->state)
272		return false;
273	if (req->wb_index != (prev->wb_index + 1))
274		return false;
275	if (req->wb_pgbase != 0)
276		return false;
277	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
278		return false;
279	return pgio->pg_ops->pg_test(pgio, prev, req);
280}
281
282/**
283 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list.
284 * @desc: destination io descriptor
285 * @req: request
286 *
287 * Returns true if the request 'req' was successfully coalesced into the
288 * existing list of pages 'desc'.
289 */
290static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
291				     struct nfs_page *req)
292{
293	if (desc->pg_count != 0) {
294		struct nfs_page *prev;
295
296		prev = nfs_list_entry(desc->pg_list.prev);
297		if (!nfs_can_coalesce_requests(prev, req, desc))
298			return 0;
299	} else {
300		if (desc->pg_ops->pg_init)
301			desc->pg_ops->pg_init(desc, req);
302		desc->pg_base = req->wb_pgbase;
303	}
304	nfs_list_remove_request(req);
305	nfs_list_add_request(req, &desc->pg_list);
306	desc->pg_count += req->wb_bytes;
307	return 1;
308}
309
310/*
311 * Helper for nfs_pageio_add_request and nfs_pageio_complete
312 */
313static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
314{
315	if (!list_empty(&desc->pg_list)) {
316		int error = desc->pg_ops->pg_doio(desc);
317		if (error < 0)
318			desc->pg_error = error;
319		else
320			desc->pg_bytes_written += desc->pg_count;
321	}
322	if (list_empty(&desc->pg_list)) {
323		desc->pg_count = 0;
324		desc->pg_base = 0;
325	}
326}
327
328/**
329 * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
330 * @desc: destination io descriptor
331 * @req: request
332 *
333 * Returns true if the request 'req' was successfully coalesced into the
334 * existing list of pages 'desc'.
335 */
336static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
337			   struct nfs_page *req)
338{
339	while (!nfs_pageio_do_add_request(desc, req)) {
340		desc->pg_moreio = 1;
341		nfs_pageio_doio(desc);
342		if (desc->pg_error < 0)
343			return 0;
344		desc->pg_moreio = 0;
345		if (desc->pg_recoalesce)
346			return 0;
347	}
348	return 1;
349}
350
351static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
352{
353	LIST_HEAD(head);
354
355	do {
356		list_splice_init(&desc->pg_list, &head);
357		desc->pg_bytes_written -= desc->pg_count;
358		desc->pg_count = 0;
359		desc->pg_base = 0;
360		desc->pg_recoalesce = 0;
361
362		while (!list_empty(&head)) {
363			struct nfs_page *req;
364
365			req = list_first_entry(&head, struct nfs_page, wb_list);
366			nfs_list_remove_request(req);
367			if (__nfs_pageio_add_request(desc, req))
368				continue;
369			if (desc->pg_error < 0)
370				return 0;
371			break;
372		}
373	} while (desc->pg_recoalesce);
374	return 1;
375}
376
377int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
378		struct nfs_page *req)
379{
380	int ret;
381
382	do {
383		ret = __nfs_pageio_add_request(desc, req);
384		if (ret)
385			break;
386		if (desc->pg_error < 0)
387			break;
388		ret = nfs_do_recoalesce(desc);
389	} while (ret);
390	return ret;
391}
392
393/**
394 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
395 * @desc: pointer to io descriptor
396 */
397void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
398{
399	for (;;) {
400		nfs_pageio_doio(desc);
401		if (!desc->pg_recoalesce)
402			break;
403		if (!nfs_do_recoalesce(desc))
404			break;
405	}
406}
407
408/**
409 * nfs_pageio_cond_complete - Conditional I/O completion
410 * @desc: pointer to io descriptor
411 * @index: page index
412 *
413 * It is important to ensure that processes don't try to take locks
414 * on non-contiguous ranges of pages as that might deadlock. This
415 * function should be called before attempting to wait on a locked
416 * nfs_page. It will complete the I/O if the page index 'index'
417 * is not contiguous with the existing list of pages in 'desc'.
418 */
419void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
420{
421	if (!list_empty(&desc->pg_list)) {
422		struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
423		if (index != prev->wb_index + 1)
424			nfs_pageio_complete(desc);
425	}
426}
427
428#define NFS_SCAN_MAXENTRIES 16
429/**
430 * nfs_scan_list - Scan a list for matching requests
431 * @nfsi: NFS inode
432 * @dst: Destination list
433 * @idx_start: lower bound of page->index to scan
434 * @npages: idx_start + npages sets the upper bound to scan.
435 * @tag: tag to scan for
436 *
437 * Moves elements from one of the inode request lists.
438 * If the number of requests is set to 0, the entire address_space
439 * starting at index idx_start, is scanned.
440 * The requests are *not* checked to ensure that they form a contiguous set.
441 * You must be holding the inode's i_lock when calling this function
442 */
443int nfs_scan_list(struct nfs_inode *nfsi,
444		struct list_head *dst, pgoff_t idx_start,
445		unsigned int npages, int tag)
446{
447	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
448	struct nfs_page *req;
449	pgoff_t idx_end;
450	int found, i;
451	int res;
452	struct list_head *list;
453
454	res = 0;
455	if (npages == 0)
456		idx_end = ~0;
457	else
458		idx_end = idx_start + npages - 1;
459
460	for (;;) {
461		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
462				(void **)&pgvec[0], idx_start,
463				NFS_SCAN_MAXENTRIES, tag);
464		if (found <= 0)
465			break;
466		for (i = 0; i < found; i++) {
467			req = pgvec[i];
468			if (req->wb_index > idx_end)
469				goto out;
470			idx_start = req->wb_index + 1;
471			if (nfs_set_page_tag_locked(req)) {
472				kref_get(&req->wb_kref);
473				radix_tree_tag_clear(&nfsi->nfs_page_tree,
474						req->wb_index, tag);
475				list = pnfs_choose_commit_list(req, dst);
476				nfs_list_add_request(req, list);
477				res++;
478				if (res == INT_MAX)
479					goto out;
480			}
481		}
482		/* for latency reduction */
483		cond_resched_lock(&nfsi->vfs_inode.i_lock);
484	}
485out:
486	return res;
487}
488
489int __init nfs_init_nfspagecache(void)
490{
491	nfs_page_cachep = kmem_cache_create("nfs_page",
492					    sizeof(struct nfs_page),
493					    0, SLAB_HWCACHE_ALIGN,
494					    NULL);
495	if (nfs_page_cachep == NULL)
496		return -ENOMEM;
497
498	return 0;
499}
500
501void nfs_destroy_nfspagecache(void)
502{
503	kmem_cache_destroy(nfs_page_cachep);
504}
505
Configure Feed

Configure Feed