Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.15-rc5 653 lines 14 kB view raw
1#include <linux/module.h> 2 3#include <linux/moduleparam.h> 4#include <linux/sched.h> 5#include <linux/fs.h> 6#include <linux/blkdev.h> 7#include <linux/init.h> 8#include <linux/slab.h> 9#include <linux/blk-mq.h> 10#include <linux/hrtimer.h> 11 12struct nullb_cmd { 13 struct list_head list; 14 struct llist_node ll_list; 15 struct call_single_data csd; 16 struct request *rq; 17 struct bio *bio; 18 unsigned int tag; 19 struct nullb_queue *nq; 20}; 21 22struct nullb_queue { 23 unsigned long *tag_map; 24 wait_queue_head_t wait; 25 unsigned int queue_depth; 26 27 struct nullb_cmd *cmds; 28}; 29 30struct nullb { 31 struct list_head list; 32 unsigned int index; 33 struct request_queue *q; 34 struct gendisk *disk; 35 struct hrtimer timer; 36 unsigned int queue_depth; 37 spinlock_t lock; 38 39 struct nullb_queue *queues; 40 unsigned int nr_queues; 41}; 42 43static LIST_HEAD(nullb_list); 44static struct mutex lock; 45static int null_major; 46static int nullb_indexes; 47 48struct completion_queue { 49 struct llist_head list; 50 struct hrtimer timer; 51}; 52 53/* 54 * These are per-cpu for now, they will need to be configured by the 55 * complete_queues parameter and appropriately mapped. 56 */ 57static DEFINE_PER_CPU(struct completion_queue, completion_queues); 58 59enum { 60 NULL_IRQ_NONE = 0, 61 NULL_IRQ_SOFTIRQ = 1, 62 NULL_IRQ_TIMER = 2, 63}; 64 65enum { 66 NULL_Q_BIO = 0, 67 NULL_Q_RQ = 1, 68 NULL_Q_MQ = 2, 69}; 70 71static int submit_queues; 72module_param(submit_queues, int, S_IRUGO); 73MODULE_PARM_DESC(submit_queues, "Number of submission queues"); 74 75static int home_node = NUMA_NO_NODE; 76module_param(home_node, int, S_IRUGO); 77MODULE_PARM_DESC(home_node, "Home node for the device"); 78 79static int queue_mode = NULL_Q_MQ; 80module_param(queue_mode, int, S_IRUGO); 81MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); 82 83static int gb = 250; 84module_param(gb, int, S_IRUGO); 85MODULE_PARM_DESC(gb, "Size in GB"); 86 87static int bs = 512; 88module_param(bs, int, S_IRUGO); 89MODULE_PARM_DESC(bs, "Block size (in bytes)"); 90 91static int nr_devices = 2; 92module_param(nr_devices, int, S_IRUGO); 93MODULE_PARM_DESC(nr_devices, "Number of devices to register"); 94 95static int irqmode = NULL_IRQ_SOFTIRQ; 96module_param(irqmode, int, S_IRUGO); 97MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); 98 99static int completion_nsec = 10000; 100module_param(completion_nsec, int, S_IRUGO); 101MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); 102 103static int hw_queue_depth = 64; 104module_param(hw_queue_depth, int, S_IRUGO); 105MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); 106 107static bool use_per_node_hctx = false; 108module_param(use_per_node_hctx, bool, S_IRUGO); 109MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); 110 111static void put_tag(struct nullb_queue *nq, unsigned int tag) 112{ 113 clear_bit_unlock(tag, nq->tag_map); 114 115 if (waitqueue_active(&nq->wait)) 116 wake_up(&nq->wait); 117} 118 119static unsigned int get_tag(struct nullb_queue *nq) 120{ 121 unsigned int tag; 122 123 do { 124 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); 125 if (tag >= nq->queue_depth) 126 return -1U; 127 } while (test_and_set_bit_lock(tag, nq->tag_map)); 128 129 return tag; 130} 131 132static void free_cmd(struct nullb_cmd *cmd) 133{ 134 put_tag(cmd->nq, cmd->tag); 135} 136 137static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) 138{ 139 struct nullb_cmd *cmd; 140 unsigned int tag; 141 142 tag = get_tag(nq); 143 if (tag != -1U) { 144 cmd = &nq->cmds[tag]; 145 cmd->tag = tag; 146 cmd->nq = nq; 147 return cmd; 148 } 149 150 return NULL; 151} 152 153static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) 154{ 155 struct nullb_cmd *cmd; 156 DEFINE_WAIT(wait); 157 158 cmd = __alloc_cmd(nq); 159 if (cmd || !can_wait) 160 return cmd; 161 162 do { 163 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); 164 cmd = __alloc_cmd(nq); 165 if (cmd) 166 break; 167 168 io_schedule(); 169 } while (1); 170 171 finish_wait(&nq->wait, &wait); 172 return cmd; 173} 174 175static void end_cmd(struct nullb_cmd *cmd) 176{ 177 switch (queue_mode) { 178 case NULL_Q_MQ: 179 blk_mq_end_io(cmd->rq, 0); 180 return; 181 case NULL_Q_RQ: 182 INIT_LIST_HEAD(&cmd->rq->queuelist); 183 blk_end_request_all(cmd->rq, 0); 184 break; 185 case NULL_Q_BIO: 186 bio_endio(cmd->bio, 0); 187 break; 188 } 189 190 free_cmd(cmd); 191} 192 193static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) 194{ 195 struct completion_queue *cq; 196 struct llist_node *entry; 197 struct nullb_cmd *cmd; 198 199 cq = &per_cpu(completion_queues, smp_processor_id()); 200 201 while ((entry = llist_del_all(&cq->list)) != NULL) { 202 entry = llist_reverse_order(entry); 203 do { 204 cmd = container_of(entry, struct nullb_cmd, ll_list); 205 end_cmd(cmd); 206 entry = entry->next; 207 } while (entry); 208 } 209 210 return HRTIMER_NORESTART; 211} 212 213static void null_cmd_end_timer(struct nullb_cmd *cmd) 214{ 215 struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); 216 217 cmd->ll_list.next = NULL; 218 if (llist_add(&cmd->ll_list, &cq->list)) { 219 ktime_t kt = ktime_set(0, completion_nsec); 220 221 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); 222 } 223 224 put_cpu(); 225} 226 227static void null_softirq_done_fn(struct request *rq) 228{ 229 end_cmd(rq->special); 230} 231 232static inline void null_handle_cmd(struct nullb_cmd *cmd) 233{ 234 /* Complete IO by inline, softirq or timer */ 235 switch (irqmode) { 236 case NULL_IRQ_SOFTIRQ: 237 switch (queue_mode) { 238 case NULL_Q_MQ: 239 blk_mq_complete_request(cmd->rq); 240 break; 241 case NULL_Q_RQ: 242 blk_complete_request(cmd->rq); 243 break; 244 case NULL_Q_BIO: 245 /* 246 * XXX: no proper submitting cpu information available. 247 */ 248 end_cmd(cmd); 249 break; 250 } 251 break; 252 case NULL_IRQ_NONE: 253 end_cmd(cmd); 254 break; 255 case NULL_IRQ_TIMER: 256 null_cmd_end_timer(cmd); 257 break; 258 } 259} 260 261static struct nullb_queue *nullb_to_queue(struct nullb *nullb) 262{ 263 int index = 0; 264 265 if (nullb->nr_queues != 1) 266 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); 267 268 return &nullb->queues[index]; 269} 270 271static void null_queue_bio(struct request_queue *q, struct bio *bio) 272{ 273 struct nullb *nullb = q->queuedata; 274 struct nullb_queue *nq = nullb_to_queue(nullb); 275 struct nullb_cmd *cmd; 276 277 cmd = alloc_cmd(nq, 1); 278 cmd->bio = bio; 279 280 null_handle_cmd(cmd); 281} 282 283static int null_rq_prep_fn(struct request_queue *q, struct request *req) 284{ 285 struct nullb *nullb = q->queuedata; 286 struct nullb_queue *nq = nullb_to_queue(nullb); 287 struct nullb_cmd *cmd; 288 289 cmd = alloc_cmd(nq, 0); 290 if (cmd) { 291 cmd->rq = req; 292 req->special = cmd; 293 return BLKPREP_OK; 294 } 295 296 return BLKPREP_DEFER; 297} 298 299static void null_request_fn(struct request_queue *q) 300{ 301 struct request *rq; 302 303 while ((rq = blk_fetch_request(q)) != NULL) { 304 struct nullb_cmd *cmd = rq->special; 305 306 spin_unlock_irq(q->queue_lock); 307 null_handle_cmd(cmd); 308 spin_lock_irq(q->queue_lock); 309 } 310} 311 312static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) 313{ 314 struct nullb_cmd *cmd = rq->special; 315 316 cmd->rq = rq; 317 cmd->nq = hctx->driver_data; 318 319 null_handle_cmd(cmd); 320 return BLK_MQ_RQ_QUEUE_OK; 321} 322 323static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) 324{ 325 int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); 326 int tip = (reg->nr_hw_queues % nr_online_nodes); 327 int node = 0, i, n; 328 329 /* 330 * Split submit queues evenly wrt to the number of nodes. If uneven, 331 * fill the first buckets with one extra, until the rest is filled with 332 * no extra. 333 */ 334 for (i = 0, n = 1; i < hctx_index; i++, n++) { 335 if (n % b_size == 0) { 336 n = 0; 337 node++; 338 339 tip--; 340 if (!tip) 341 b_size = reg->nr_hw_queues / nr_online_nodes; 342 } 343 } 344 345 /* 346 * A node might not be online, therefore map the relative node id to the 347 * real node id. 348 */ 349 for_each_online_node(n) { 350 if (!node) 351 break; 352 node--; 353 } 354 355 return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); 356} 357 358static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) 359{ 360 kfree(hctx); 361} 362 363static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) 364{ 365 BUG_ON(!nullb); 366 BUG_ON(!nq); 367 368 init_waitqueue_head(&nq->wait); 369 nq->queue_depth = nullb->queue_depth; 370} 371 372static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 373 unsigned int index) 374{ 375 struct nullb *nullb = data; 376 struct nullb_queue *nq = &nullb->queues[index]; 377 378 hctx->driver_data = nq; 379 null_init_queue(nullb, nq); 380 nullb->nr_queues++; 381 382 return 0; 383} 384 385static struct blk_mq_ops null_mq_ops = { 386 .queue_rq = null_queue_rq, 387 .map_queue = blk_mq_map_queue, 388 .init_hctx = null_init_hctx, 389 .complete = null_softirq_done_fn, 390}; 391 392static struct blk_mq_reg null_mq_reg = { 393 .ops = &null_mq_ops, 394 .queue_depth = 64, 395 .cmd_size = sizeof(struct nullb_cmd), 396 .flags = BLK_MQ_F_SHOULD_MERGE, 397}; 398 399static void null_del_dev(struct nullb *nullb) 400{ 401 list_del_init(&nullb->list); 402 403 del_gendisk(nullb->disk); 404 blk_cleanup_queue(nullb->q); 405 put_disk(nullb->disk); 406 kfree(nullb); 407} 408 409static int null_open(struct block_device *bdev, fmode_t mode) 410{ 411 return 0; 412} 413 414static void null_release(struct gendisk *disk, fmode_t mode) 415{ 416} 417 418static const struct block_device_operations null_fops = { 419 .owner = THIS_MODULE, 420 .open = null_open, 421 .release = null_release, 422}; 423 424static int setup_commands(struct nullb_queue *nq) 425{ 426 struct nullb_cmd *cmd; 427 int i, tag_size; 428 429 nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); 430 if (!nq->cmds) 431 return -ENOMEM; 432 433 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; 434 nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); 435 if (!nq->tag_map) { 436 kfree(nq->cmds); 437 return -ENOMEM; 438 } 439 440 for (i = 0; i < nq->queue_depth; i++) { 441 cmd = &nq->cmds[i]; 442 INIT_LIST_HEAD(&cmd->list); 443 cmd->ll_list.next = NULL; 444 cmd->tag = -1U; 445 } 446 447 return 0; 448} 449 450static void cleanup_queue(struct nullb_queue *nq) 451{ 452 kfree(nq->tag_map); 453 kfree(nq->cmds); 454} 455 456static void cleanup_queues(struct nullb *nullb) 457{ 458 int i; 459 460 for (i = 0; i < nullb->nr_queues; i++) 461 cleanup_queue(&nullb->queues[i]); 462 463 kfree(nullb->queues); 464} 465 466static int setup_queues(struct nullb *nullb) 467{ 468 nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), 469 GFP_KERNEL); 470 if (!nullb->queues) 471 return -ENOMEM; 472 473 nullb->nr_queues = 0; 474 nullb->queue_depth = hw_queue_depth; 475 476 return 0; 477} 478 479static int init_driver_queues(struct nullb *nullb) 480{ 481 struct nullb_queue *nq; 482 int i, ret = 0; 483 484 for (i = 0; i < submit_queues; i++) { 485 nq = &nullb->queues[i]; 486 487 null_init_queue(nullb, nq); 488 489 ret = setup_commands(nq); 490 if (ret) 491 goto err_queue; 492 nullb->nr_queues++; 493 } 494 495 return 0; 496err_queue: 497 cleanup_queues(nullb); 498 return ret; 499} 500 501static int null_add_dev(void) 502{ 503 struct gendisk *disk; 504 struct nullb *nullb; 505 sector_t size; 506 507 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); 508 if (!nullb) 509 return -ENOMEM; 510 511 spin_lock_init(&nullb->lock); 512 513 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) 514 submit_queues = nr_online_nodes; 515 516 if (setup_queues(nullb)) 517 goto err; 518 519 if (queue_mode == NULL_Q_MQ) { 520 null_mq_reg.numa_node = home_node; 521 null_mq_reg.queue_depth = hw_queue_depth; 522 null_mq_reg.nr_hw_queues = submit_queues; 523 524 if (use_per_node_hctx) { 525 null_mq_reg.ops->alloc_hctx = null_alloc_hctx; 526 null_mq_reg.ops->free_hctx = null_free_hctx; 527 } else { 528 null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; 529 null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; 530 } 531 532 nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); 533 } else if (queue_mode == NULL_Q_BIO) { 534 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); 535 blk_queue_make_request(nullb->q, null_queue_bio); 536 init_driver_queues(nullb); 537 } else { 538 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); 539 blk_queue_prep_rq(nullb->q, null_rq_prep_fn); 540 if (nullb->q) 541 blk_queue_softirq_done(nullb->q, null_softirq_done_fn); 542 init_driver_queues(nullb); 543 } 544 545 if (!nullb->q) 546 goto queue_fail; 547 548 nullb->q->queuedata = nullb; 549 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); 550 551 disk = nullb->disk = alloc_disk_node(1, home_node); 552 if (!disk) { 553queue_fail: 554 blk_cleanup_queue(nullb->q); 555 cleanup_queues(nullb); 556err: 557 kfree(nullb); 558 return -ENOMEM; 559 } 560 561 mutex_lock(&lock); 562 list_add_tail(&nullb->list, &nullb_list); 563 nullb->index = nullb_indexes++; 564 mutex_unlock(&lock); 565 566 blk_queue_logical_block_size(nullb->q, bs); 567 blk_queue_physical_block_size(nullb->q, bs); 568 569 size = gb * 1024 * 1024 * 1024ULL; 570 sector_div(size, bs); 571 set_capacity(disk, size); 572 573 disk->flags |= GENHD_FL_EXT_DEVT; 574 disk->major = null_major; 575 disk->first_minor = nullb->index; 576 disk->fops = &null_fops; 577 disk->private_data = nullb; 578 disk->queue = nullb->q; 579 sprintf(disk->disk_name, "nullb%d", nullb->index); 580 add_disk(disk); 581 return 0; 582} 583 584static int __init null_init(void) 585{ 586 unsigned int i; 587 588 if (bs > PAGE_SIZE) { 589 pr_warn("null_blk: invalid block size\n"); 590 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); 591 bs = PAGE_SIZE; 592 } 593 594 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 595 if (submit_queues < nr_online_nodes) { 596 pr_warn("null_blk: submit_queues param is set to %u.", 597 nr_online_nodes); 598 submit_queues = nr_online_nodes; 599 } 600 } else if (submit_queues > nr_cpu_ids) 601 submit_queues = nr_cpu_ids; 602 else if (!submit_queues) 603 submit_queues = 1; 604 605 mutex_init(&lock); 606 607 /* Initialize a separate list for each CPU for issuing softirqs */ 608 for_each_possible_cpu(i) { 609 struct completion_queue *cq = &per_cpu(completion_queues, i); 610 611 init_llist_head(&cq->list); 612 613 if (irqmode != NULL_IRQ_TIMER) 614 continue; 615 616 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 617 cq->timer.function = null_cmd_timer_expired; 618 } 619 620 null_major = register_blkdev(0, "nullb"); 621 if (null_major < 0) 622 return null_major; 623 624 for (i = 0; i < nr_devices; i++) { 625 if (null_add_dev()) { 626 unregister_blkdev(null_major, "nullb"); 627 return -EINVAL; 628 } 629 } 630 631 pr_info("null: module loaded\n"); 632 return 0; 633} 634 635static void __exit null_exit(void) 636{ 637 struct nullb *nullb; 638 639 unregister_blkdev(null_major, "nullb"); 640 641 mutex_lock(&lock); 642 while (!list_empty(&nullb_list)) { 643 nullb = list_entry(nullb_list.next, struct nullb, list); 644 null_del_dev(nullb); 645 } 646 mutex_unlock(&lock); 647} 648 649module_init(null_init); 650module_exit(null_exit); 651 652MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>"); 653MODULE_LICENSE("GPL");