Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs/9p: Update zero-copy implementation in 9p

* remove lot of update to different data structure
* add a seperate callback for zero copy request.
* above makes non zero copy code path simpler
* remove conditionalizing TREAD/TREADDIR/TWRITE in the zero copy path
* Fix the dotu p9_check_errors with zero copy. Add sufficient doc around
* Add support for both in and output buffers in zero copy callback
* pin and unpin pages in the same context
* use helpers instead of defining page offset and rest of page ourself
* Fix mem leak in p9_check_errors
* Remove 'E' and 'F' in p9pdu_vwritef

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

authored by

Aneesh Kumar K.V and committed by
Eric Van Hensbergen
abfa034e c3b92c87

+522 -364
+1 -1
fs/9p/vfs_dir.c
··· 231 231 while (err == 0) { 232 232 if (rdir->tail == rdir->head) { 233 233 err = p9_client_readdir(fid, rdir->buf, buflen, 234 - filp->f_pos); 234 + filp->f_pos); 235 235 if (err <= 0) 236 236 goto unlock_and_exit; 237 237
+3 -8
include/net/9p/9p.h
··· 359 359 /* Room for readdir header */ 360 360 #define P9_READDIRHDRSZ 24 361 361 362 + /* size of header for zero copy read/write */ 363 + #define P9_ZC_HDR_SZ 4096 364 + 362 365 /** 363 366 * struct p9_qid - file system entity information 364 367 * @type: 8-bit type &p9_qid_t ··· 558 555 * @tag: transaction id of the request 559 556 * @offset: used by marshalling routines to track current position in buffer 560 557 * @capacity: used by marshalling routines to track total malloc'd capacity 561 - * @pubuf: Payload user buffer given by the caller 562 - * @pkbuf: Payload kernel buffer given by the caller 563 - * @pbuf_size: pubuf/pkbuf(only one will be !NULL) size to be read/write. 564 - * @private: For transport layer's use. 565 558 * @sdata: payload 566 559 * 567 560 * &p9_fcall represents the structure for all 9P RPC ··· 574 575 575 576 size_t offset; 576 577 size_t capacity; 577 - char __user *pubuf; 578 - char *pkbuf; 579 - size_t pbuf_size; 580 - void *private; 581 578 582 579 u8 *sdata; 583 580 };
+2 -8
include/net/9p/transport.h
··· 26 26 #ifndef NET_9P_TRANSPORT_H 27 27 #define NET_9P_TRANSPORT_H 28 28 29 - #define P9_TRANS_PREF_PAYLOAD_MASK 0x1 30 - 31 - /* Default. Add Payload to PDU before sending it down to transport layer */ 32 - #define P9_TRANS_PREF_PAYLOAD_DEF 0x0 33 - /* Send pay load separately to transport layer along with PDU.*/ 34 - #define P9_TRANS_PREF_PAYLOAD_SEP 0x1 35 - 36 29 /** 37 30 * struct p9_trans_module - transport module interface 38 31 * @list: used to maintain a list of currently available transports ··· 49 56 struct list_head list; 50 57 char *name; /* name of transport */ 51 58 int maxsize; /* max message size of transport */ 52 - int pref; /* Preferences of this transport */ 53 59 int def; /* this transport should be default */ 54 60 struct module *owner; 55 61 int (*create)(struct p9_client *, const char *, char *); 56 62 void (*close) (struct p9_client *); 57 63 int (*request) (struct p9_client *, struct p9_req_t *req); 58 64 int (*cancel) (struct p9_client *, struct p9_req_t *req); 65 + int (*zc_request)(struct p9_client *, struct p9_req_t *, 66 + char *, char *, int , int, int, int); 59 67 }; 60 68 61 69 void v9fs_register_trans(struct p9_trans_module *m);
+300 -109
net/9p/client.c
··· 203 203 * 204 204 */ 205 205 206 - static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) 206 + static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag, int max_size) 207 207 { 208 208 unsigned long flags; 209 209 int row, col; 210 210 struct p9_req_t *req; 211 + int alloc_msize = min(c->msize, max_size); 211 212 212 213 /* This looks up the original request by tag so we know which 213 214 * buffer to read the data into */ ··· 246 245 return ERR_PTR(-ENOMEM); 247 246 } 248 247 init_waitqueue_head(req->wq); 249 - if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 250 - P9_TRANS_PREF_PAYLOAD_SEP) { 251 - int alloc_msize = min(c->msize, 4096); 252 - req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, 253 - GFP_NOFS); 254 - req->tc->capacity = alloc_msize; 255 - req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, 256 - GFP_NOFS); 257 - req->rc->capacity = alloc_msize; 258 - } else { 259 - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, 260 - GFP_NOFS); 261 - req->tc->capacity = c->msize; 262 - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, 263 - GFP_NOFS); 264 - req->rc->capacity = c->msize; 265 - } 248 + req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, 249 + GFP_NOFS); 250 + req->tc->capacity = alloc_msize; 251 + req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, 252 + GFP_NOFS); 253 + req->rc->capacity = alloc_msize; 266 254 if ((!req->tc) || (!req->rc)) { 267 255 printk(KERN_ERR "Couldn't grow tag array\n"); 268 256 kfree(req->tc); ··· 475 485 476 486 if (!p9_is_proto_dotl(c)) { 477 487 char *ename; 478 - 479 - if (req->tc->pbuf_size) { 480 - /* Handle user buffers */ 481 - size_t len = req->rc->size - req->rc->offset; 482 - if (req->tc->pubuf) { 483 - /* User Buffer */ 484 - err = copy_from_user( 485 - &req->rc->sdata[req->rc->offset], 486 - req->tc->pubuf, len); 487 - if (err) { 488 - err = -EFAULT; 489 - goto out_err; 490 - } 491 - } else { 492 - /* Kernel Buffer */ 493 - memmove(&req->rc->sdata[req->rc->offset], 494 - req->tc->pkbuf, len); 495 - } 496 - } 497 488 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 498 - &ename, &ecode); 489 + &ename, &ecode); 499 490 if (err) 500 491 goto out_err; 501 492 ··· 486 515 if (!err || !IS_ERR_VALUE(err)) { 487 516 err = p9_errstr2errno(ename, strlen(ename)); 488 517 489 - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, 490 - ename); 491 - 492 - kfree(ename); 518 + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", 519 + -ecode, ename); 493 520 } 521 + kfree(ename); 494 522 } else { 495 523 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); 496 524 err = -ecode; ··· 497 527 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); 498 528 } 499 529 500 - 501 530 return err; 502 531 503 532 out_err: 504 533 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); 505 534 535 + return err; 536 + } 537 + 538 + /** 539 + * p9_check_zc_errors - check 9p packet for error return and process it 540 + * @c: current client instance 541 + * @req: request to parse and check for error conditions 542 + * @in_hdrlen: Size of response protocol buffer. 543 + * 544 + * returns error code if one is discovered, otherwise returns 0 545 + * 546 + * this will have to be more complicated if we have multiple 547 + * error packet types 548 + */ 549 + 550 + static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, 551 + char *uidata, int in_hdrlen, int kern_buf) 552 + { 553 + int err; 554 + int ecode; 555 + int8_t type; 556 + char *ename = NULL; 557 + 558 + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); 559 + if (err) { 560 + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); 561 + return err; 562 + } 563 + 564 + if (type != P9_RERROR && type != P9_RLERROR) 565 + return 0; 566 + 567 + if (!p9_is_proto_dotl(c)) { 568 + /* Error is reported in string format */ 569 + uint16_t len; 570 + /* 7 = header size for RERROR, 2 is the size of string len; */ 571 + int inline_len = in_hdrlen - (7 + 2); 572 + 573 + /* Read the size of error string */ 574 + err = p9pdu_readf(req->rc, c->proto_version, "w", &len); 575 + if (err) 576 + goto out_err; 577 + 578 + ename = kmalloc(len + 1, GFP_NOFS); 579 + if (!ename) { 580 + err = -ENOMEM; 581 + goto out_err; 582 + } 583 + if (len <= inline_len) { 584 + /* We have error in protocol buffer itself */ 585 + if (pdu_read(req->rc, ename, len)) { 586 + err = -EFAULT; 587 + goto out_free; 588 + 589 + } 590 + } else { 591 + /* 592 + * Part of the data is in user space buffer. 593 + */ 594 + if (pdu_read(req->rc, ename, inline_len)) { 595 + err = -EFAULT; 596 + goto out_free; 597 + 598 + } 599 + if (kern_buf) { 600 + memcpy(ename + inline_len, uidata, 601 + len - inline_len); 602 + } else { 603 + err = copy_from_user(ename + inline_len, 604 + uidata, len - inline_len); 605 + if (err) { 606 + err = -EFAULT; 607 + goto out_free; 608 + } 609 + } 610 + } 611 + ename[len] = 0; 612 + if (p9_is_proto_dotu(c)) { 613 + /* For dotu we also have error code */ 614 + err = p9pdu_readf(req->rc, 615 + c->proto_version, "d", &ecode); 616 + if (err) 617 + goto out_free; 618 + err = -ecode; 619 + } 620 + if (!err || !IS_ERR_VALUE(err)) { 621 + err = p9_errstr2errno(ename, strlen(ename)); 622 + 623 + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", 624 + -ecode, ename); 625 + } 626 + kfree(ename); 627 + } else { 628 + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); 629 + err = -ecode; 630 + 631 + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); 632 + } 633 + return err; 634 + 635 + out_free: 636 + kfree(ename); 637 + out_err: 638 + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); 506 639 return err; 507 640 } 508 641 ··· 652 579 return 0; 653 580 } 654 581 582 + static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, 583 + int8_t type, int req_size, 584 + const char *fmt, va_list ap) 585 + { 586 + int tag, err; 587 + struct p9_req_t *req; 588 + 589 + P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); 590 + 591 + /* we allow for any status other than disconnected */ 592 + if (c->status == Disconnected) 593 + return ERR_PTR(-EIO); 594 + 595 + /* if status is begin_disconnected we allow only clunk request */ 596 + if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) 597 + return ERR_PTR(-EIO); 598 + 599 + tag = P9_NOTAG; 600 + if (type != P9_TVERSION) { 601 + tag = p9_idpool_get(c->tagpool); 602 + if (tag < 0) 603 + return ERR_PTR(-ENOMEM); 604 + } 605 + 606 + req = p9_tag_alloc(c, tag, req_size); 607 + if (IS_ERR(req)) 608 + return req; 609 + 610 + /* marshall the data */ 611 + p9pdu_prepare(req->tc, tag, type); 612 + err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); 613 + if (err) 614 + goto reterr; 615 + p9pdu_finalize(req->tc); 616 + return req; 617 + reterr: 618 + p9_free_req(c, req); 619 + return ERR_PTR(err); 620 + } 621 + 655 622 /** 656 623 * p9_client_rpc - issue a request and wait for a response 657 624 * @c: client session ··· 705 592 p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) 706 593 { 707 594 va_list ap; 708 - int tag, err; 709 - struct p9_req_t *req; 595 + int sigpending, err; 710 596 unsigned long flags; 711 - int sigpending; 597 + struct p9_req_t *req; 712 598 713 - P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); 714 - 715 - /* we allow for any status other than disconnected */ 716 - if (c->status == Disconnected) 717 - return ERR_PTR(-EIO); 718 - 719 - /* if status is begin_disconnected we allow only clunk request */ 720 - if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) 721 - return ERR_PTR(-EIO); 599 + va_start(ap, fmt); 600 + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); 601 + va_end(ap); 602 + if (IS_ERR(req)) 603 + return req; 722 604 723 605 if (signal_pending(current)) { 724 606 sigpending = 1; ··· 721 613 } else 722 614 sigpending = 0; 723 615 724 - tag = P9_NOTAG; 725 - if (type != P9_TVERSION) { 726 - tag = p9_idpool_get(c->tagpool); 727 - if (tag < 0) 728 - return ERR_PTR(-ENOMEM); 729 - } 730 - 731 - req = p9_tag_alloc(c, tag); 732 - if (IS_ERR(req)) 733 - return req; 734 - 735 - /* marshall the data */ 736 - p9pdu_prepare(req->tc, tag, type); 737 - va_start(ap, fmt); 738 - err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); 739 - va_end(ap); 740 - if (err) 741 - goto reterr; 742 - p9pdu_finalize(req->tc); 743 - 744 616 err = c->trans_mod->request(c, req); 745 617 if (err < 0) { 746 618 if (err != -ERESTARTSYS && err != -EFAULT) 747 619 c->status = Disconnected; 748 620 goto reterr; 749 621 } 750 - 751 - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); 622 + /* Wait for the response */ 752 623 err = wait_event_interruptible(*req->wq, 753 - req->status >= REQ_STATUS_RCVD); 754 - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", 755 - req->wq, tag, err); 624 + req->status >= REQ_STATUS_RCVD); 756 625 757 626 if (req->status == REQ_STATUS_ERROR) { 758 627 P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); 759 628 err = req->t_err; 760 629 } 761 - 762 630 if ((err == -ERESTARTSYS) && (c->status == Connected)) { 763 631 P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); 764 632 sigpending = 1; ··· 747 663 if (req->status == REQ_STATUS_RCVD) 748 664 err = 0; 749 665 } 750 - 751 666 if (sigpending) { 752 667 spin_lock_irqsave(&current->sighand->siglock, flags); 753 668 recalc_sigpending(); 754 669 spin_unlock_irqrestore(&current->sighand->siglock, flags); 755 670 } 756 - 757 671 if (err < 0) 758 672 goto reterr; 759 673 ··· 760 678 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); 761 679 return req; 762 680 } 681 + reterr: 682 + P9_DPRINTK(P9_DEBUG_MUX, 683 + "exit: client %p op %d error: %d\n", c, type, err); 684 + p9_free_req(c, req); 685 + return ERR_PTR(err); 686 + } 763 687 688 + /** 689 + * p9_client_zc_rpc - issue a request and wait for a response 690 + * @c: client session 691 + * @type: type of request 692 + * @uidata: user bffer that should be ued for zero copy read 693 + * @uodata: user buffer that shoud be user for zero copy write 694 + * @inlen: read buffer size 695 + * @olen: write buffer size 696 + * @hdrlen: reader header size, This is the size of response protocol data 697 + * @fmt: protocol format string (see protocol.c) 698 + * 699 + * Returns request structure (which client must free using p9_free_req) 700 + */ 701 + static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, 702 + char *uidata, char *uodata, 703 + int inlen, int olen, int in_hdrlen, 704 + int kern_buf, const char *fmt, ...) 705 + { 706 + va_list ap; 707 + int sigpending, err; 708 + unsigned long flags; 709 + struct p9_req_t *req; 710 + 711 + va_start(ap, fmt); 712 + /* 713 + * We allocate a inline protocol data of only 4k bytes. 714 + * The actual content is passed in zero-copy fashion. 715 + */ 716 + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); 717 + va_end(ap); 718 + if (IS_ERR(req)) 719 + return req; 720 + 721 + if (signal_pending(current)) { 722 + sigpending = 1; 723 + clear_thread_flag(TIF_SIGPENDING); 724 + } else 725 + sigpending = 0; 726 + 727 + /* If we are called with KERNEL_DS force kern_buf */ 728 + if (segment_eq(get_fs(), KERNEL_DS)) 729 + kern_buf = 1; 730 + 731 + err = c->trans_mod->zc_request(c, req, uidata, uodata, 732 + inlen, olen, in_hdrlen, kern_buf); 733 + if (err < 0) { 734 + if (err == -EIO) 735 + c->status = Disconnected; 736 + goto reterr; 737 + } 738 + if (req->status == REQ_STATUS_ERROR) { 739 + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); 740 + err = req->t_err; 741 + } 742 + if ((err == -ERESTARTSYS) && (c->status == Connected)) { 743 + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); 744 + sigpending = 1; 745 + clear_thread_flag(TIF_SIGPENDING); 746 + 747 + if (c->trans_mod->cancel(c, req)) 748 + p9_client_flush(c, req); 749 + 750 + /* if we received the response anyway, don't signal error */ 751 + if (req->status == REQ_STATUS_RCVD) 752 + err = 0; 753 + } 754 + if (sigpending) { 755 + spin_lock_irqsave(&current->sighand->siglock, flags); 756 + recalc_sigpending(); 757 + spin_unlock_irqrestore(&current->sighand->siglock, flags); 758 + } 759 + if (err < 0) 760 + goto reterr; 761 + 762 + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); 763 + if (!err) { 764 + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); 765 + return req; 766 + } 764 767 reterr: 765 768 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, 766 769 err); ··· 1497 1330 p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, 1498 1331 u32 count) 1499 1332 { 1500 - int err, rsize; 1501 - struct p9_client *clnt; 1502 - struct p9_req_t *req; 1503 1333 char *dataptr; 1334 + int kernel_buf = 0; 1335 + struct p9_req_t *req; 1336 + struct p9_client *clnt; 1337 + int err, rsize, non_zc = 0; 1504 1338 1505 - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, 1506 - (long long unsigned) offset, count); 1339 + 1340 + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", 1341 + fid->fid, (long long unsigned) offset, count); 1507 1342 err = 0; 1508 1343 clnt = fid->clnt; 1509 1344 ··· 1517 1348 rsize = count; 1518 1349 1519 1350 /* Don't bother zerocopy for small IO (< 1024) */ 1520 - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 1521 - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { 1522 - req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, 1523 - rsize, data, udata); 1351 + if (clnt->trans_mod->zc_request && rsize > 1024) { 1352 + char *indata; 1353 + if (data) { 1354 + kernel_buf = 1; 1355 + indata = data; 1356 + } else 1357 + indata = (char *)udata; 1358 + /* 1359 + * response header len is 11 1360 + * PDU Header(7) + IO Size (4) 1361 + */ 1362 + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, 1363 + 11, kernel_buf, "dqd", fid->fid, 1364 + offset, rsize); 1524 1365 } else { 1366 + non_zc = 1; 1525 1367 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, 1526 - rsize); 1368 + rsize); 1527 1369 } 1528 1370 if (IS_ERR(req)) { 1529 1371 err = PTR_ERR(req); ··· 1550 1370 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1551 1371 P9_DUMP_PKT(1, req->rc); 1552 1372 1553 - if (!req->tc->pbuf_size) { 1373 + if (non_zc) { 1554 1374 if (data) { 1555 1375 memmove(data, dataptr, count); 1556 1376 } else { ··· 1576 1396 u64 offset, u32 count) 1577 1397 { 1578 1398 int err, rsize; 1399 + int kernel_buf = 0; 1579 1400 struct p9_client *clnt; 1580 1401 struct p9_req_t *req; 1581 1402 ··· 1592 1411 if (count < rsize) 1593 1412 rsize = count; 1594 1413 1595 - /* Don't bother zerocopy form small IO (< 1024) */ 1596 - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 1597 - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { 1598 - req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, 1599 - rsize, data, udata); 1414 + /* Don't bother zerocopy for small IO (< 1024) */ 1415 + if (clnt->trans_mod->zc_request && rsize > 1024) { 1416 + char *odata; 1417 + if (data) { 1418 + kernel_buf = 1; 1419 + odata = data; 1420 + } else 1421 + odata = (char *)udata; 1422 + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, 1423 + P9_ZC_HDR_SZ, kernel_buf, "dqd", 1424 + fid->fid, offset, rsize); 1600 1425 } else { 1601 - 1602 1426 if (data) 1603 1427 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, 1604 - offset, rsize, data); 1428 + offset, rsize, data); 1605 1429 else 1606 1430 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, 1607 - offset, rsize, udata); 1431 + offset, rsize, udata); 1608 1432 } 1609 1433 if (IS_ERR(req)) { 1610 1434 err = PTR_ERR(req); ··· 2010 1824 2011 1825 int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) 2012 1826 { 2013 - int err, rsize; 1827 + int err, rsize, non_zc = 0; 2014 1828 struct p9_client *clnt; 2015 1829 struct p9_req_t *req; 2016 1830 char *dataptr; ··· 2028 1842 if (count < rsize) 2029 1843 rsize = count; 2030 1844 2031 - if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 2032 - P9_TRANS_PREF_PAYLOAD_SEP) { 2033 - req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, 2034 - offset, rsize, data); 1845 + /* Don't bother zerocopy for small IO (< 1024) */ 1846 + if (clnt->trans_mod->zc_request && rsize > 1024) { 1847 + /* 1848 + * response header len is 11 1849 + * PDU Header(7) + IO Size (4) 1850 + */ 1851 + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, 1852 + 11, 1, "dqd", fid->fid, offset, rsize); 2035 1853 } else { 1854 + non_zc = 1; 2036 1855 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, 2037 - offset, rsize); 1856 + offset, rsize); 2038 1857 } 2039 1858 if (IS_ERR(req)) { 2040 1859 err = PTR_ERR(req); ··· 2054 1863 2055 1864 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); 2056 1865 2057 - if (!req->tc->pbuf_size && data) 1866 + if (non_zc) 2058 1867 memmove(data, dataptr, count); 2059 1868 2060 1869 p9_free_req(clnt, req);
+1 -45
net/9p/protocol.c
··· 81 81 } 82 82 EXPORT_SYMBOL(p9stat_free); 83 83 84 - static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) 84 + size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) 85 85 { 86 86 size_t len = min(pdu->size - pdu->offset, size); 87 87 memcpy(data, &pdu->sdata[pdu->offset], len); ··· 106 106 107 107 pdu->size += len; 108 108 return size - len; 109 - } 110 - 111 - static size_t 112 - pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, 113 - size_t size) 114 - { 115 - BUG_ON(pdu->size > P9_IOHDRSZ); 116 - pdu->pubuf = (char __user *)udata; 117 - pdu->pkbuf = (char *)kdata; 118 - pdu->pbuf_size = size; 119 - return 0; 120 - } 121 - 122 - static size_t 123 - pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) 124 - { 125 - BUG_ON(pdu->size > P9_READDIRHDRSZ); 126 - pdu->pkbuf = (char *)kdata; 127 - pdu->pbuf_size = size; 128 - return 0; 129 109 } 130 110 131 111 /* ··· 439 459 errcode = -EFAULT; 440 460 } 441 461 break; 442 - case 'E':{ 443 - int32_t cnt = va_arg(ap, int32_t); 444 - const char *k = va_arg(ap, const void *); 445 - const char __user *u = va_arg(ap, 446 - const void __user *); 447 - errcode = p9pdu_writef(pdu, proto_version, "d", 448 - cnt); 449 - if (!errcode && pdu_write_urw(pdu, k, u, cnt)) 450 - errcode = -EFAULT; 451 - } 452 - break; 453 - case 'F':{ 454 - int32_t cnt = va_arg(ap, int32_t); 455 - const char *k = va_arg(ap, const void *); 456 - errcode = p9pdu_writef(pdu, proto_version, "d", 457 - cnt); 458 - if (!errcode && pdu_write_readdir(pdu, k, cnt)) 459 - errcode = -EFAULT; 460 - } 461 - break; 462 462 case 'U':{ 463 463 int32_t count = va_arg(ap, int32_t); 464 464 const char __user *udata = ··· 597 637 { 598 638 pdu->offset = 0; 599 639 pdu->size = 0; 600 - pdu->private = NULL; 601 - pdu->pubuf = NULL; 602 - pdu->pkbuf = NULL; 603 - pdu->pbuf_size = 0; 604 640 } 605 641 606 642 int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
+1
net/9p/protocol.h
··· 32 32 int p9pdu_finalize(struct p9_fcall *pdu); 33 33 void p9pdu_dump(int, struct p9_fcall *); 34 34 void p9pdu_reset(struct p9_fcall *pdu); 35 + size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size);
+15 -38
net/9p/trans_common.c
··· 21 21 22 22 /** 23 23 * p9_release_req_pages - Release pages after the transaction. 24 - * @*private: PDU's private page of struct trans_rpage_info 25 24 */ 26 - void 27 - p9_release_req_pages(struct trans_rpage_info *rpinfo) 25 + void p9_release_pages(struct page **pages, int nr_pages) 28 26 { 29 27 int i = 0; 30 - 31 - while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { 32 - put_page(rpinfo->rp_data[i]); 28 + while (pages[i] && nr_pages--) { 29 + put_page(pages[i]); 33 30 i++; 34 31 } 35 32 } 36 - EXPORT_SYMBOL(p9_release_req_pages); 33 + EXPORT_SYMBOL(p9_release_pages); 37 34 38 35 /** 39 36 * p9_nr_pages - Return number of pages needed to accommodate the payload. 40 37 */ 41 - int 42 - p9_nr_pages(struct p9_req_t *req) 38 + int p9_nr_pages(char *data, int len) 43 39 { 44 40 unsigned long start_page, end_page; 45 - start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; 46 - end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + 47 - PAGE_SIZE - 1) >> PAGE_SHIFT; 41 + start_page = (unsigned long)data >> PAGE_SHIFT; 42 + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 48 43 return end_page - start_page; 49 44 } 50 45 EXPORT_SYMBOL(p9_nr_pages); ··· 53 58 * @nr_pages: number of pages to accommodate the payload 54 59 * @rw: Indicates if the pages are for read or write. 55 60 */ 56 - int 57 - p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, 58 - int nr_pages, u8 rw) 61 + 62 + int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) 59 63 { 60 - uint32_t first_page_bytes = 0; 61 - int32_t pdata_mapped_pages; 62 - struct trans_rpage_info *rpinfo; 64 + int nr_mapped_pages; 63 65 64 - *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); 66 + nr_mapped_pages = get_user_pages_fast((unsigned long)data, 67 + *nr_pages, write, pages); 68 + if (nr_mapped_pages <= 0) 69 + return nr_mapped_pages; 65 70 66 - if (*pdata_off) 67 - first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), 68 - req->tc->pbuf_size); 69 - 70 - rpinfo = req->tc->private; 71 - pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, 72 - nr_pages, rw, &rpinfo->rp_data[0]); 73 - if (pdata_mapped_pages <= 0) 74 - return pdata_mapped_pages; 75 - 76 - rpinfo->rp_nr_pages = pdata_mapped_pages; 77 - if (*pdata_off) { 78 - *pdata_len = first_page_bytes; 79 - *pdata_len += min((req->tc->pbuf_size - *pdata_len), 80 - ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); 81 - } else { 82 - *pdata_len = min(req->tc->pbuf_size, 83 - (size_t)pdata_mapped_pages << PAGE_SHIFT); 84 - } 71 + *nr_pages = nr_mapped_pages; 85 72 return 0; 86 73 } 87 74 EXPORT_SYMBOL(p9_payload_gup);
+3 -18
net/9p/trans_common.h
··· 12 12 * 13 13 */ 14 14 15 - /* TRUE if it is user context */ 16 - #define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) 17 - 18 - /** 19 - * struct trans_rpage_info - To store mapped page information in PDU. 20 - * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. 21 - * @rp_nr_pages: Number of mapped pages 22 - * @rp_data: Array of page pointers 23 - */ 24 - struct trans_rpage_info { 25 - u8 rp_alloc; 26 - int rp_nr_pages; 27 - struct page *rp_data[0]; 28 - }; 29 - 30 - void p9_release_req_pages(struct trans_rpage_info *); 31 - int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); 32 - int p9_nr_pages(struct p9_req_t *); 15 + void p9_release_pages(struct page **, int); 16 + int p9_payload_gup(char *, int *, struct page **, int); 17 + int p9_nr_pages(char *, int);
+196 -137
net/9p/trans_virtio.c
··· 150 150 while (1) { 151 151 spin_lock_irqsave(&chan->lock, flags); 152 152 rc = virtqueue_get_buf(chan->vq, &len); 153 - 154 153 if (rc == NULL) { 155 154 spin_unlock_irqrestore(&chan->lock, flags); 156 155 break; 157 156 } 158 - 159 157 chan->ring_bufs_avail = 1; 160 158 spin_unlock_irqrestore(&chan->lock, flags); 161 159 /* Wakeup if anyone waiting for VirtIO ring space. */ ··· 161 163 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 162 164 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 163 165 req = p9_tag_lookup(chan->client, rc->tag); 164 - if (req->tc->private) { 165 - struct trans_rpage_info *rp = req->tc->private; 166 - int p = rp->rp_nr_pages; 167 - /*Release pages */ 168 - p9_release_req_pages(rp); 169 - atomic_sub(p, &vp_pinned); 170 - wake_up(&vp_wq); 171 - if (rp->rp_alloc) 172 - kfree(rp); 173 - req->tc->private = NULL; 174 - } 175 166 req->status = REQ_STATUS_RCVD; 176 167 p9_client_cb(chan->client, req); 177 168 } ··· 180 193 * 181 194 */ 182 195 183 - static int 184 - pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, 185 - int count) 196 + static int pack_sg_list(struct scatterlist *sg, int start, 197 + int limit, char *data, int count) 186 198 { 187 199 int s; 188 200 int index = start; ··· 210 224 * this takes a list of pages. 211 225 * @sg: scatter/gather list to pack into 212 226 * @start: which segment of the sg_list to start at 213 - * @pdata_off: Offset into the first page 214 227 * @**pdata: a list of pages to add into sg. 228 + * @nr_pages: number of pages to pack into the scatter/gather list 229 + * @data: data to pack into scatter/gather list 215 230 * @count: amount of data to pack into the scatter/gather list 216 231 */ 217 232 static int 218 - pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, 219 - struct page **pdata, int count) 233 + pack_sg_list_p(struct scatterlist *sg, int start, int limit, 234 + struct page **pdata, int nr_pages, char *data, int count) 220 235 { 221 - int s; 222 - int i = 0; 236 + int i = 0, s; 237 + int data_off; 223 238 int index = start; 224 239 225 - if (pdata_off) { 226 - s = min((int)(PAGE_SIZE - pdata_off), count); 227 - sg_set_page(&sg[index++], pdata[i++], s, pdata_off); 240 + BUG_ON(nr_pages > (limit - start)); 241 + /* 242 + * if the first page doesn't start at 243 + * page boundary find the offset 244 + */ 245 + data_off = offset_in_page(data); 246 + while (nr_pages) { 247 + s = rest_of_page(data); 248 + if (s > count) 249 + s = count; 250 + sg_set_page(&sg[index++], pdata[i++], s, data_off); 251 + data_off = 0; 252 + data += s; 228 253 count -= s; 254 + nr_pages--; 229 255 } 230 - 231 - while (count) { 232 - BUG_ON(index > limit); 233 - s = min((int)PAGE_SIZE, count); 234 - sg_set_page(&sg[index++], pdata[i++], s, 0); 235 - count -= s; 236 - } 237 - return index-start; 256 + return index - start; 238 257 } 239 258 240 259 /** ··· 252 261 static int 253 262 p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 254 263 { 255 - int in, out, inp, outp; 256 - struct virtio_chan *chan = client->trans; 264 + int err; 265 + int in, out; 257 266 unsigned long flags; 258 - size_t pdata_off = 0; 259 - struct trans_rpage_info *rpinfo = NULL; 260 - int err, pdata_len = 0; 267 + struct virtio_chan *chan = client->trans; 261 268 262 269 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 263 270 264 271 req->status = REQ_STATUS_SENT; 265 - 266 - if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { 267 - int nr_pages = p9_nr_pages(req); 268 - int rpinfo_size = sizeof(struct trans_rpage_info) + 269 - sizeof(struct page *) * nr_pages; 270 - 271 - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 272 - err = wait_event_interruptible(vp_wq, 273 - atomic_read(&vp_pinned) < chan->p9_max_pages); 274 - if (err == -ERESTARTSYS) 275 - return err; 276 - P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); 277 - } 278 - 279 - if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { 280 - /* We can use sdata */ 281 - req->tc->private = req->tc->sdata + req->tc->size; 282 - rpinfo = (struct trans_rpage_info *)req->tc->private; 283 - rpinfo->rp_alloc = 0; 284 - } else { 285 - req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); 286 - if (!req->tc->private) { 287 - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " 288 - "private kmalloc returned NULL"); 289 - return -ENOMEM; 290 - } 291 - rpinfo = (struct trans_rpage_info *)req->tc->private; 292 - rpinfo->rp_alloc = 1; 293 - } 294 - 295 - err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, 296 - req->tc->id == P9_TREAD ? 1 : 0); 297 - if (err < 0) { 298 - if (rpinfo->rp_alloc) 299 - kfree(rpinfo); 300 - return err; 301 - } else { 302 - atomic_add(rpinfo->rp_nr_pages, &vp_pinned); 303 - } 304 - } 305 - 306 - req_retry_pinned: 272 + req_retry: 307 273 spin_lock_irqsave(&chan->lock, flags); 308 274 309 275 /* Handle out VirtIO ring buffers */ 310 - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 311 - req->tc->size); 276 + out = pack_sg_list(chan->sg, 0, 277 + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 312 278 313 - if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { 314 - /* We have additional write payload buffer to take care */ 315 - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { 316 - outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 317 - pdata_off, rpinfo->rp_data, pdata_len); 318 - } else { 319 - char *pbuf; 320 - if (req->tc->pubuf) 321 - pbuf = (__force char *) req->tc->pubuf; 322 - else 323 - pbuf = req->tc->pkbuf; 324 - outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, 325 - req->tc->pbuf_size); 326 - } 327 - out += outp; 328 - } 329 - 330 - /* Handle in VirtIO ring buffers */ 331 - if (req->tc->pbuf_size && 332 - ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { 333 - /* 334 - * Take care of additional Read payload. 335 - * 11 is the read/write header = PDU Header(7) + IO Size (4). 336 - * Arrange in such a way that server places header in the 337 - * alloced memory and payload onto the user buffer. 338 - */ 339 - inp = pack_sg_list(chan->sg, out, 340 - VIRTQUEUE_NUM, req->rc->sdata, 11); 341 - /* 342 - * Running executables in the filesystem may result in 343 - * a read request with kernel buffer as opposed to user buffer. 344 - */ 345 - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { 346 - in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, 347 - pdata_off, rpinfo->rp_data, pdata_len); 348 - } else { 349 - char *pbuf; 350 - if (req->tc->pubuf) 351 - pbuf = (__force char *) req->tc->pubuf; 352 - else 353 - pbuf = req->tc->pkbuf; 354 - 355 - in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, 356 - pbuf, req->tc->pbuf_size); 357 - } 358 - in += inp; 359 - } else { 360 - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, 361 - req->rc->sdata, req->rc->capacity); 362 - } 279 + in = pack_sg_list(chan->sg, out, 280 + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); 363 281 364 282 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 365 283 if (err < 0) { ··· 281 381 return err; 282 382 283 383 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 284 - goto req_retry_pinned; 384 + goto req_retry; 285 385 } else { 286 386 spin_unlock_irqrestore(&chan->lock, flags); 287 387 P9_DPRINTK(P9_DEBUG_TRANS, 288 388 "9p debug: " 289 389 "virtio rpc add_buf returned failure"); 290 - if (rpinfo && rpinfo->rp_alloc) 291 - kfree(rpinfo); 292 390 return -EIO; 293 391 } 294 392 } 295 - 296 393 virtqueue_kick(chan->vq); 297 394 spin_unlock_irqrestore(&chan->lock, flags); 298 395 299 396 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 300 397 return 0; 398 + } 399 + 400 + static int p9_get_mapped_pages(struct virtio_chan *chan, 401 + struct page **pages, char *data, 402 + int nr_pages, int write, int kern_buf) 403 + { 404 + int err; 405 + if (!kern_buf) { 406 + /* 407 + * We allow only p9_max_pages pinned. We wait for the 408 + * Other zc request to finish here 409 + */ 410 + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 411 + err = wait_event_interruptible(vp_wq, 412 + (atomic_read(&vp_pinned) < chan->p9_max_pages)); 413 + if (err == -ERESTARTSYS) 414 + return err; 415 + } 416 + err = p9_payload_gup(data, &nr_pages, pages, write); 417 + if (err < 0) 418 + return err; 419 + atomic_add(nr_pages, &vp_pinned); 420 + } else { 421 + /* kernel buffer, no need to pin pages */ 422 + int s, index = 0; 423 + int count = nr_pages; 424 + while (nr_pages) { 425 + s = rest_of_page(data); 426 + pages[index++] = virt_to_page(data); 427 + data += s; 428 + nr_pages--; 429 + } 430 + nr_pages = count; 431 + } 432 + return nr_pages; 433 + } 434 + 435 + /** 436 + * p9_virtio_zc_request - issue a zero copy request 437 + * @client: client instance issuing the request 438 + * @req: request to be issued 439 + * @uidata: user bffer that should be ued for zero copy read 440 + * @uodata: user buffer that shoud be user for zero copy write 441 + * @inlen: read buffer size 442 + * @olen: write buffer size 443 + * @hdrlen: reader header size, This is the size of response protocol data 444 + * 445 + */ 446 + static int 447 + p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, 448 + char *uidata, char *uodata, int inlen, 449 + int outlen, int in_hdr_len, int kern_buf) 450 + { 451 + int in, out, err; 452 + unsigned long flags; 453 + int in_nr_pages = 0, out_nr_pages = 0; 454 + struct page **in_pages = NULL, **out_pages = NULL; 455 + struct virtio_chan *chan = client->trans; 456 + 457 + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 458 + 459 + if (uodata) { 460 + out_nr_pages = p9_nr_pages(uodata, outlen); 461 + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, 462 + GFP_NOFS); 463 + if (!out_pages) { 464 + err = -ENOMEM; 465 + goto err_out; 466 + } 467 + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, 468 + out_nr_pages, 0, kern_buf); 469 + if (out_nr_pages < 0) { 470 + err = out_nr_pages; 471 + kfree(out_pages); 472 + out_pages = NULL; 473 + goto err_out; 474 + } 475 + } 476 + if (uidata) { 477 + in_nr_pages = p9_nr_pages(uidata, inlen); 478 + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, 479 + GFP_NOFS); 480 + if (!in_pages) { 481 + err = -ENOMEM; 482 + goto err_out; 483 + } 484 + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, 485 + in_nr_pages, 1, kern_buf); 486 + if (in_nr_pages < 0) { 487 + err = in_nr_pages; 488 + kfree(in_pages); 489 + in_pages = NULL; 490 + goto err_out; 491 + } 492 + } 493 + req->status = REQ_STATUS_SENT; 494 + req_retry_pinned: 495 + spin_lock_irqsave(&chan->lock, flags); 496 + /* out data */ 497 + out = pack_sg_list(chan->sg, 0, 498 + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 499 + 500 + if (out_pages) 501 + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 502 + out_pages, out_nr_pages, uodata, outlen); 503 + /* 504 + * Take care of in data 505 + * For example TREAD have 11. 506 + * 11 is the read/write header = PDU Header(7) + IO Size (4). 507 + * Arrange in such a way that server places header in the 508 + * alloced memory and payload onto the user buffer. 509 + */ 510 + in = pack_sg_list(chan->sg, out, 511 + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); 512 + if (in_pages) 513 + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, 514 + in_pages, in_nr_pages, uidata, inlen); 515 + 516 + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 517 + if (err < 0) { 518 + if (err == -ENOSPC) { 519 + chan->ring_bufs_avail = 0; 520 + spin_unlock_irqrestore(&chan->lock, flags); 521 + err = wait_event_interruptible(*chan->vc_wq, 522 + chan->ring_bufs_avail); 523 + if (err == -ERESTARTSYS) 524 + goto err_out; 525 + 526 + P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 527 + goto req_retry_pinned; 528 + } else { 529 + spin_unlock_irqrestore(&chan->lock, flags); 530 + P9_DPRINTK(P9_DEBUG_TRANS, 531 + "9p debug: " 532 + "virtio rpc add_buf returned failure"); 533 + err = -EIO; 534 + goto err_out; 535 + } 536 + } 537 + virtqueue_kick(chan->vq); 538 + spin_unlock_irqrestore(&chan->lock, flags); 539 + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 540 + err = wait_event_interruptible(*req->wq, 541 + req->status >= REQ_STATUS_RCVD); 542 + /* 543 + * Non kernel buffers are pinned, unpin them 544 + */ 545 + err_out: 546 + if (!kern_buf) { 547 + if (in_pages) { 548 + p9_release_pages(in_pages, in_nr_pages); 549 + atomic_sub(in_nr_pages, &vp_pinned); 550 + } 551 + if (out_pages) { 552 + p9_release_pages(out_pages, out_nr_pages); 553 + atomic_sub(out_nr_pages, &vp_pinned); 554 + } 555 + /* wakeup anybody waiting for slots to pin pages */ 556 + wake_up(&vp_wq); 557 + } 558 + kfree(in_pages); 559 + kfree(out_pages); 560 + return err; 301 561 } 302 562 303 563 static ssize_t p9_mount_tag_show(struct device *dev, ··· 651 591 .create = p9_virtio_create, 652 592 .close = p9_virtio_close, 653 593 .request = p9_virtio_request, 594 + .zc_request = p9_virtio_zc_request, 654 595 .cancel = p9_virtio_cancel, 655 - 656 596 /* 657 597 * We leave one entry for input and one entry for response 658 598 * headers. We also skip one more entry to accomodate, address ··· 660 600 * page in zero copy. 661 601 */ 662 602 .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), 663 - .pref = P9_TRANS_PREF_PAYLOAD_SEP, 664 603 .def = 0, 665 604 .owner = THIS_MODULE, 666 605 };