Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] NFS: Replace nfs_page insertion sort with a radix sort

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

+107 -74
+1 -1
fs/nfs/inode.c
··· 135 135 int flags = sync ? FLUSH_WAIT : 0; 136 136 int ret; 137 137 138 - ret = nfs_commit_inode(inode, 0, 0, flags); 138 + ret = nfs_commit_inode(inode, flags); 139 139 if (ret < 0) 140 140 return ret; 141 141 return 0;
+56 -30
fs/nfs/pagelist.c
··· 177 177 nfs_page_free(req); 178 178 } 179 179 180 - /** 181 - * nfs_list_add_request - Insert a request into a sorted list 182 - * @req: request 183 - * @head: head of list into which to insert the request. 184 - * 185 - * Note that the wb_list is sorted by page index in order to facilitate 186 - * coalescing of requests. 187 - * We use an insertion sort that is optimized for the case of appended 188 - * writes. 189 - */ 190 - void 191 - nfs_list_add_request(struct nfs_page *req, struct list_head *head) 192 - { 193 - struct list_head *pos; 194 - 195 - #ifdef NFS_PARANOIA 196 - if (!list_empty(&req->wb_list)) { 197 - printk(KERN_ERR "NFS: Add to list failed!\n"); 198 - BUG(); 199 - } 200 - #endif 201 - list_for_each_prev(pos, head) { 202 - struct nfs_page *p = nfs_list_entry(pos); 203 - if (p->wb_index < req->wb_index) 204 - break; 205 - } 206 - list_add(&req->wb_list, pos); 207 - req->wb_list_head = head; 208 - } 209 - 210 180 static int nfs_wait_bit_interruptible(void *word) 211 181 { 212 182 int ret = 0; ··· 259 289 break; 260 290 } 261 291 return npages; 292 + } 293 + 294 + #define NFS_SCAN_MAXENTRIES 16 295 + /** 296 + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests 297 + * @nfsi: NFS inode 298 + * @dst: Destination list 299 + * @idx_start: lower bound of page->index to scan 300 + * @npages: idx_start + npages sets the upper bound to scan. 301 + * 302 + * Moves elements from one of the inode request lists. 303 + * If the number of requests is set to 0, the entire address_space 304 + * starting at index idx_start, is scanned. 305 + * The requests are *not* checked to ensure that they form a contiguous set. 306 + * You must be holding the inode's req_lock when calling this function 307 + */ 308 + int 309 + nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, 310 + unsigned long idx_start, unsigned int npages) 311 + { 312 + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 313 + struct nfs_page *req; 314 + unsigned long idx_end; 315 + int found, i; 316 + int res; 317 + 318 + res = 0; 319 + if (npages == 0) 320 + idx_end = ~0; 321 + else 322 + idx_end = idx_start + npages - 1; 323 + 324 + for (;;) { 325 + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 326 + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, 327 + NFS_PAGE_TAG_DIRTY); 328 + if (found <= 0) 329 + break; 330 + for (i = 0; i < found; i++) { 331 + req = pgvec[i]; 332 + if (req->wb_index > idx_end) 333 + goto out; 334 + 335 + idx_start = req->wb_index + 1; 336 + 337 + if (nfs_set_page_writeback_locked(req)) { 338 + radix_tree_tag_clear(&nfsi->nfs_page_tree, 339 + req->wb_index, NFS_PAGE_TAG_DIRTY); 340 + nfs_list_remove_request(req); 341 + nfs_list_add_request(req, dst); 342 + res++; 343 + } 344 + } 345 + } 346 + out: 347 + return res; 262 348 } 263 349 264 350 /**
+32 -39
fs/nfs/write.c
··· 352 352 if (err < 0) 353 353 goto out; 354 354 } 355 - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); 355 + err = nfs_commit_inode(inode, wb_priority(wbc)); 356 356 if (err > 0) { 357 357 wbc->nr_to_write -= err; 358 358 err = 0; ··· 446 446 struct nfs_inode *nfsi = NFS_I(inode); 447 447 448 448 spin_lock(&nfsi->req_lock); 449 + radix_tree_tag_set(&nfsi->nfs_page_tree, 450 + req->wb_index, NFS_PAGE_TAG_DIRTY); 449 451 nfs_list_add_request(req, &nfsi->dirty); 450 452 nfsi->ndirty++; 451 453 spin_unlock(&nfsi->req_lock); ··· 539 537 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) 540 538 { 541 539 struct nfs_inode *nfsi = NFS_I(inode); 542 - int res; 543 - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); 544 - nfsi->ndirty -= res; 545 - sub_page_state(nr_dirty,res); 546 - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) 547 - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); 540 + int res = 0; 541 + 542 + if (nfsi->ndirty != 0) { 543 + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); 544 + nfsi->ndirty -= res; 545 + sub_page_state(nr_dirty,res); 546 + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) 547 + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); 548 + } 548 549 return res; 549 550 } 550 551 ··· 566 561 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) 567 562 { 568 563 struct nfs_inode *nfsi = NFS_I(inode); 569 - int res; 570 - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); 571 - nfsi->ncommit -= res; 572 - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) 573 - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); 564 + int res = 0; 565 + 566 + if (nfsi->ncommit != 0) { 567 + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); 568 + nfsi->ncommit -= res; 569 + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) 570 + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); 571 + } 574 572 return res; 575 573 } 576 574 #endif ··· 1217 1209 struct nfs_write_data *data, int how) 1218 1210 { 1219 1211 struct rpc_task *task = &data->task; 1220 - struct nfs_page *first, *last; 1212 + struct nfs_page *first; 1221 1213 struct inode *inode; 1222 - loff_t start, end, len; 1223 1214 1224 1215 /* Set up the RPC argument and reply structs 1225 1216 * NB: take care not to mess about with data->commit et al. */ 1226 1217 1227 1218 list_splice_init(head, &data->pages); 1228 1219 first = nfs_list_entry(data->pages.next); 1229 - last = nfs_list_entry(data->pages.prev); 1230 1220 inode = first->wb_context->dentry->d_inode; 1231 - 1232 - /* 1233 - * Determine the offset range of requests in the COMMIT call. 1234 - * We rely on the fact that data->pages is an ordered list... 1235 - */ 1236 - start = req_offset(first); 1237 - end = req_offset(last) + last->wb_bytes; 1238 - len = end - start; 1239 - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ 1240 - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) 1241 - len = 0; 1242 1221 1243 1222 data->inode = inode; 1244 1223 data->cred = first->wb_context->cred; 1245 1224 1246 1225 data->args.fh = NFS_FH(data->inode); 1247 - data->args.offset = start; 1248 - data->args.count = len; 1249 - data->res.count = len; 1226 + /* Note: we always request a commit of the entire inode */ 1227 + data->args.offset = 0; 1228 + data->args.count = 0; 1229 + data->res.count = 0; 1250 1230 data->res.fattr = &data->fattr; 1251 1231 data->res.verf = &data->verf; 1252 1232 ··· 1353 1357 } 1354 1358 1355 1359 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1356 - int nfs_commit_inode(struct inode *inode, unsigned long idx_start, 1357 - unsigned int npages, int how) 1360 + int nfs_commit_inode(struct inode *inode, int how) 1358 1361 { 1359 1362 struct nfs_inode *nfsi = NFS_I(inode); 1360 1363 LIST_HEAD(head); ··· 1361 1366 error = 0; 1362 1367 1363 1368 spin_lock(&nfsi->req_lock); 1364 - res = nfs_scan_commit(inode, &head, idx_start, npages); 1369 + res = nfs_scan_commit(inode, &head, 0, 0); 1370 + spin_unlock(&nfsi->req_lock); 1365 1371 if (res) { 1366 - res += nfs_scan_commit(inode, &head, 0, 0); 1367 - spin_unlock(&nfsi->req_lock); 1368 1372 error = nfs_commit_list(&head, how); 1369 - } else 1370 - spin_unlock(&nfsi->req_lock); 1371 - if (error < 0) 1372 - return error; 1373 + if (error < 0) 1374 + return error; 1375 + } 1373 1376 return res; 1374 1377 } 1375 1378 #endif ··· 1389 1396 error = nfs_flush_inode(inode, idx_start, npages, how); 1390 1397 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1391 1398 if (error == 0) 1392 - error = nfs_commit_inode(inode, idx_start, npages, how); 1399 + error = nfs_commit_inode(inode, how); 1393 1400 #endif 1394 1401 } while (error > 0); 1395 1402 return error;
+2 -2
include/linux/nfs_fs.h
··· 395 395 */ 396 396 extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); 397 397 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 398 - extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); 398 + extern int nfs_commit_inode(struct inode *, int); 399 399 #else 400 400 static inline int 401 - nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) 401 + nfs_commit_inode(struct inode *inode, int how) 402 402 { 403 403 return 0; 404 404 }
+16 -2
include/linux/nfs_page.h
··· 22 22 /* 23 23 * Valid flags for the radix tree 24 24 */ 25 + #define NFS_PAGE_TAG_DIRTY 0 25 26 #define NFS_PAGE_TAG_WRITEBACK 1 26 27 27 28 /* ··· 32 31 #define PG_NEED_COMMIT 1 33 32 #define PG_NEED_RESCHED 2 34 33 34 + struct nfs_inode; 35 35 struct nfs_page { 36 36 struct list_head wb_list, /* Defines state of page: */ 37 37 *wb_list_head; /* read/write/commit */ ··· 61 59 extern void nfs_release_request(struct nfs_page *req); 62 60 63 61 64 - extern void nfs_list_add_request(struct nfs_page *, struct list_head *); 65 - 62 + extern int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, 63 + unsigned long idx_start, unsigned int npages); 66 64 extern int nfs_scan_list(struct list_head *, struct list_head *, 67 65 unsigned long, unsigned int); 68 66 extern int nfs_coalesce_requests(struct list_head *, struct list_head *, ··· 94 92 return 0; 95 93 atomic_inc(&req->wb_count); 96 94 return 1; 95 + } 96 + 97 + /** 98 + * nfs_list_add_request - Insert a request into a list 99 + * @req: request 100 + * @head: head of list into which to insert the request. 101 + */ 102 + static inline void 103 + nfs_list_add_request(struct nfs_page *req, struct list_head *head) 104 + { 105 + list_add_tail(&req->wb_list, head); 106 + req->wb_list_head = head; 97 107 } 98 108 99 109