Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NBD: allow nbd to be used locally

This patch allows Network Block Device to be mounted locally (nbd-client to
nbd-server over 127.0.0.1).

It creates a kthread to avoid the deadlock described in NBD tools
documentation. So, if nbd-client hangs waiting for pages, the kblockd thread
can continue its work and free pages.

I have tested the patch to verify that it avoids the hang that always occurs
when writing to a localhost nbd connection. I have also tested to verify that
no performance degradation results from the additional thread and queue.

Patch originally from Laurent Vivier.

Signed-off-by: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Laurent Vivier and committed by
Linus Torvalds
48cf6061 8c4dd606

+98 -50
+95 -49
drivers/block/nbd.c
··· 29 29 #include <linux/kernel.h> 30 30 #include <net/sock.h> 31 31 #include <linux/net.h> 32 + #include <linux/kthread.h> 32 33 33 34 #include <asm/uaccess.h> 34 35 #include <asm/system.h> ··· 442 441 } 443 442 444 443 444 + static void nbd_handle_req(struct nbd_device *lo, struct request *req) 445 + { 446 + if (!blk_fs_request(req)) 447 + goto error_out; 448 + 449 + nbd_cmd(req) = NBD_CMD_READ; 450 + if (rq_data_dir(req) == WRITE) { 451 + nbd_cmd(req) = NBD_CMD_WRITE; 452 + if (lo->flags & NBD_READ_ONLY) { 453 + printk(KERN_ERR "%s: Write on read-only\n", 454 + lo->disk->disk_name); 455 + goto error_out; 456 + } 457 + } 458 + 459 + req->errors = 0; 460 + 461 + mutex_lock(&lo->tx_lock); 462 + if (unlikely(!lo->sock)) { 463 + mutex_unlock(&lo->tx_lock); 464 + printk(KERN_ERR "%s: Attempted send on closed socket\n", 465 + lo->disk->disk_name); 466 + req->errors++; 467 + nbd_end_request(req); 468 + return; 469 + } 470 + 471 + lo->active_req = req; 472 + 473 + if (nbd_send_req(lo, req) != 0) { 474 + printk(KERN_ERR "%s: Request send failed\n", 475 + lo->disk->disk_name); 476 + req->errors++; 477 + nbd_end_request(req); 478 + } else { 479 + spin_lock(&lo->queue_lock); 480 + list_add(&req->queuelist, &lo->queue_head); 481 + spin_unlock(&lo->queue_lock); 482 + } 483 + 484 + lo->active_req = NULL; 485 + mutex_unlock(&lo->tx_lock); 486 + wake_up_all(&lo->active_wq); 487 + 488 + return; 489 + 490 + error_out: 491 + req->errors++; 492 + nbd_end_request(req); 493 + } 494 + 495 + static int nbd_thread(void *data) 496 + { 497 + struct nbd_device *lo = data; 498 + struct request *req; 499 + 500 + set_user_nice(current, -20); 501 + while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { 502 + /* wait for something to do */ 503 + wait_event_interruptible(lo->waiting_wq, 504 + kthread_should_stop() || 505 + !list_empty(&lo->waiting_queue)); 506 + 507 + /* extract request */ 508 + if (list_empty(&lo->waiting_queue)) 509 + continue; 510 + 511 + spin_lock_irq(&lo->queue_lock); 512 + req = list_entry(lo->waiting_queue.next, struct request, 513 + queuelist); 514 + list_del_init(&req->queuelist); 515 + spin_unlock_irq(&lo->queue_lock); 516 + 517 + /* handle request */ 518 + nbd_handle_req(lo, req); 519 + } 520 + return 0; 521 + } 522 + 445 523 /* 446 524 * We always wait for result of write, for now. It would be nice to make it optional 447 525 * in future ··· 536 456 struct nbd_device *lo; 537 457 538 458 blkdev_dequeue_request(req); 459 + 460 + spin_unlock_irq(q->queue_lock); 461 + 539 462 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", 540 463 req->rq_disk->disk_name, req, req->cmd_type); 541 - 542 - if (!blk_fs_request(req)) 543 - goto error_out; 544 464 545 465 lo = req->rq_disk->private_data; 546 466 547 467 BUG_ON(lo->magic != LO_MAGIC); 548 468 549 - nbd_cmd(req) = NBD_CMD_READ; 550 - if (rq_data_dir(req) == WRITE) { 551 - nbd_cmd(req) = NBD_CMD_WRITE; 552 - if (lo->flags & NBD_READ_ONLY) { 553 - printk(KERN_ERR "%s: Write on read-only\n", 554 - lo->disk->disk_name); 555 - goto error_out; 556 - } 557 - } 469 + spin_lock_irq(&lo->queue_lock); 470 + list_add_tail(&req->queuelist, &lo->waiting_queue); 471 + spin_unlock_irq(&lo->queue_lock); 558 472 559 - req->errors = 0; 560 - spin_unlock_irq(q->queue_lock); 561 - 562 - mutex_lock(&lo->tx_lock); 563 - if (unlikely(!lo->sock)) { 564 - mutex_unlock(&lo->tx_lock); 565 - printk(KERN_ERR "%s: Attempted send on closed socket\n", 566 - lo->disk->disk_name); 567 - req->errors++; 568 - nbd_end_request(req); 569 - spin_lock_irq(q->queue_lock); 570 - continue; 571 - } 572 - 573 - lo->active_req = req; 574 - 575 - if (nbd_send_req(lo, req) != 0) { 576 - printk(KERN_ERR "%s: Request send failed\n", 577 - lo->disk->disk_name); 578 - req->errors++; 579 - nbd_end_request(req); 580 - } else { 581 - spin_lock(&lo->queue_lock); 582 - list_add(&req->queuelist, &lo->queue_head); 583 - spin_unlock(&lo->queue_lock); 584 - } 585 - 586 - lo->active_req = NULL; 587 - mutex_unlock(&lo->tx_lock); 588 - wake_up_all(&lo->active_wq); 473 + wake_up(&lo->waiting_wq); 589 474 590 475 spin_lock_irq(q->queue_lock); 591 - continue; 592 - 593 - error_out: 594 - req->errors++; 595 - spin_unlock(q->queue_lock); 596 - nbd_end_request(req); 597 - spin_lock(q->queue_lock); 598 476 } 599 477 } 600 478 ··· 562 524 struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; 563 525 int error; 564 526 struct request sreq ; 527 + struct task_struct *thread; 565 528 566 529 if (!capable(CAP_SYS_ADMIN)) 567 530 return -EPERM; ··· 645 606 case NBD_DO_IT: 646 607 if (!lo->file) 647 608 return -EINVAL; 609 + thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); 610 + if (IS_ERR(thread)) 611 + return PTR_ERR(thread); 612 + wake_up_process(thread); 648 613 error = nbd_do_it(lo); 614 + kthread_stop(thread); 649 615 if (error) 650 616 return error; 651 617 sock_shutdown(lo, 1); ··· 739 695 nbd_dev[i].file = NULL; 740 696 nbd_dev[i].magic = LO_MAGIC; 741 697 nbd_dev[i].flags = 0; 698 + INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); 742 699 spin_lock_init(&nbd_dev[i].queue_lock); 743 700 INIT_LIST_HEAD(&nbd_dev[i].queue_head); 744 701 mutex_init(&nbd_dev[i].tx_lock); 745 702 init_waitqueue_head(&nbd_dev[i].active_wq); 703 + init_waitqueue_head(&nbd_dev[i].waiting_wq); 746 704 nbd_dev[i].blksize = 1024; 747 705 nbd_dev[i].bytesize = 0; 748 706 disk->major = NBD_MAJOR;
+3 -1
include/linux/nbd.h
··· 56 56 int magic; 57 57 58 58 spinlock_t queue_lock; 59 - struct list_head queue_head;/* Requests are added here... */ 59 + struct list_head queue_head; /* Requests waiting result */ 60 60 struct request *active_req; 61 61 wait_queue_head_t active_wq; 62 + struct list_head waiting_queue; /* Requests to be sent */ 63 + wait_queue_head_t waiting_wq; 62 64 63 65 struct mutex tx_lock; 64 66 struct gendisk *disk;