Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.17 632 lines 14 kB view raw
1#include <linux/module.h> 2 3#include <linux/moduleparam.h> 4#include <linux/sched.h> 5#include <linux/fs.h> 6#include <linux/blkdev.h> 7#include <linux/init.h> 8#include <linux/slab.h> 9#include <linux/blk-mq.h> 10#include <linux/hrtimer.h> 11 12struct nullb_cmd { 13 struct list_head list; 14 struct llist_node ll_list; 15 struct call_single_data csd; 16 struct request *rq; 17 struct bio *bio; 18 unsigned int tag; 19 struct nullb_queue *nq; 20}; 21 22struct nullb_queue { 23 unsigned long *tag_map; 24 wait_queue_head_t wait; 25 unsigned int queue_depth; 26 27 struct nullb_cmd *cmds; 28}; 29 30struct nullb { 31 struct list_head list; 32 unsigned int index; 33 struct request_queue *q; 34 struct gendisk *disk; 35 struct blk_mq_tag_set tag_set; 36 struct hrtimer timer; 37 unsigned int queue_depth; 38 spinlock_t lock; 39 40 struct nullb_queue *queues; 41 unsigned int nr_queues; 42}; 43 44static LIST_HEAD(nullb_list); 45static struct mutex lock; 46static int null_major; 47static int nullb_indexes; 48 49struct completion_queue { 50 struct llist_head list; 51 struct hrtimer timer; 52}; 53 54/* 55 * These are per-cpu for now, they will need to be configured by the 56 * complete_queues parameter and appropriately mapped. 57 */ 58static DEFINE_PER_CPU(struct completion_queue, completion_queues); 59 60enum { 61 NULL_IRQ_NONE = 0, 62 NULL_IRQ_SOFTIRQ = 1, 63 NULL_IRQ_TIMER = 2, 64}; 65 66enum { 67 NULL_Q_BIO = 0, 68 NULL_Q_RQ = 1, 69 NULL_Q_MQ = 2, 70}; 71 72static int submit_queues; 73module_param(submit_queues, int, S_IRUGO); 74MODULE_PARM_DESC(submit_queues, "Number of submission queues"); 75 76static int home_node = NUMA_NO_NODE; 77module_param(home_node, int, S_IRUGO); 78MODULE_PARM_DESC(home_node, "Home node for the device"); 79 80static int queue_mode = NULL_Q_MQ; 81module_param(queue_mode, int, S_IRUGO); 82MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); 83 84static int gb = 250; 85module_param(gb, int, S_IRUGO); 86MODULE_PARM_DESC(gb, "Size in GB"); 87 88static int bs = 512; 89module_param(bs, int, S_IRUGO); 90MODULE_PARM_DESC(bs, "Block size (in bytes)"); 91 92static int nr_devices = 2; 93module_param(nr_devices, int, S_IRUGO); 94MODULE_PARM_DESC(nr_devices, "Number of devices to register"); 95 96static int irqmode = NULL_IRQ_SOFTIRQ; 97module_param(irqmode, int, S_IRUGO); 98MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); 99 100static int completion_nsec = 10000; 101module_param(completion_nsec, int, S_IRUGO); 102MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); 103 104static int hw_queue_depth = 64; 105module_param(hw_queue_depth, int, S_IRUGO); 106MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); 107 108static bool use_per_node_hctx = false; 109module_param(use_per_node_hctx, bool, S_IRUGO); 110MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); 111 112static void put_tag(struct nullb_queue *nq, unsigned int tag) 113{ 114 clear_bit_unlock(tag, nq->tag_map); 115 116 if (waitqueue_active(&nq->wait)) 117 wake_up(&nq->wait); 118} 119 120static unsigned int get_tag(struct nullb_queue *nq) 121{ 122 unsigned int tag; 123 124 do { 125 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); 126 if (tag >= nq->queue_depth) 127 return -1U; 128 } while (test_and_set_bit_lock(tag, nq->tag_map)); 129 130 return tag; 131} 132 133static void free_cmd(struct nullb_cmd *cmd) 134{ 135 put_tag(cmd->nq, cmd->tag); 136} 137 138static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) 139{ 140 struct nullb_cmd *cmd; 141 unsigned int tag; 142 143 tag = get_tag(nq); 144 if (tag != -1U) { 145 cmd = &nq->cmds[tag]; 146 cmd->tag = tag; 147 cmd->nq = nq; 148 return cmd; 149 } 150 151 return NULL; 152} 153 154static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) 155{ 156 struct nullb_cmd *cmd; 157 DEFINE_WAIT(wait); 158 159 cmd = __alloc_cmd(nq); 160 if (cmd || !can_wait) 161 return cmd; 162 163 do { 164 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); 165 cmd = __alloc_cmd(nq); 166 if (cmd) 167 break; 168 169 io_schedule(); 170 } while (1); 171 172 finish_wait(&nq->wait, &wait); 173 return cmd; 174} 175 176static void end_cmd(struct nullb_cmd *cmd) 177{ 178 switch (queue_mode) { 179 case NULL_Q_MQ: 180 blk_mq_end_io(cmd->rq, 0); 181 return; 182 case NULL_Q_RQ: 183 INIT_LIST_HEAD(&cmd->rq->queuelist); 184 blk_end_request_all(cmd->rq, 0); 185 break; 186 case NULL_Q_BIO: 187 bio_endio(cmd->bio, 0); 188 break; 189 } 190 191 free_cmd(cmd); 192} 193 194static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) 195{ 196 struct completion_queue *cq; 197 struct llist_node *entry; 198 struct nullb_cmd *cmd; 199 200 cq = &per_cpu(completion_queues, smp_processor_id()); 201 202 while ((entry = llist_del_all(&cq->list)) != NULL) { 203 entry = llist_reverse_order(entry); 204 do { 205 cmd = container_of(entry, struct nullb_cmd, ll_list); 206 entry = entry->next; 207 end_cmd(cmd); 208 } while (entry); 209 } 210 211 return HRTIMER_NORESTART; 212} 213 214static void null_cmd_end_timer(struct nullb_cmd *cmd) 215{ 216 struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); 217 218 cmd->ll_list.next = NULL; 219 if (llist_add(&cmd->ll_list, &cq->list)) { 220 ktime_t kt = ktime_set(0, completion_nsec); 221 222 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); 223 } 224 225 put_cpu(); 226} 227 228static void null_softirq_done_fn(struct request *rq) 229{ 230 if (queue_mode == NULL_Q_MQ) 231 end_cmd(blk_mq_rq_to_pdu(rq)); 232 else 233 end_cmd(rq->special); 234} 235 236static inline void null_handle_cmd(struct nullb_cmd *cmd) 237{ 238 /* Complete IO by inline, softirq or timer */ 239 switch (irqmode) { 240 case NULL_IRQ_SOFTIRQ: 241 switch (queue_mode) { 242 case NULL_Q_MQ: 243 blk_mq_complete_request(cmd->rq); 244 break; 245 case NULL_Q_RQ: 246 blk_complete_request(cmd->rq); 247 break; 248 case NULL_Q_BIO: 249 /* 250 * XXX: no proper submitting cpu information available. 251 */ 252 end_cmd(cmd); 253 break; 254 } 255 break; 256 case NULL_IRQ_NONE: 257 end_cmd(cmd); 258 break; 259 case NULL_IRQ_TIMER: 260 null_cmd_end_timer(cmd); 261 break; 262 } 263} 264 265static struct nullb_queue *nullb_to_queue(struct nullb *nullb) 266{ 267 int index = 0; 268 269 if (nullb->nr_queues != 1) 270 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); 271 272 return &nullb->queues[index]; 273} 274 275static void null_queue_bio(struct request_queue *q, struct bio *bio) 276{ 277 struct nullb *nullb = q->queuedata; 278 struct nullb_queue *nq = nullb_to_queue(nullb); 279 struct nullb_cmd *cmd; 280 281 cmd = alloc_cmd(nq, 1); 282 cmd->bio = bio; 283 284 null_handle_cmd(cmd); 285} 286 287static int null_rq_prep_fn(struct request_queue *q, struct request *req) 288{ 289 struct nullb *nullb = q->queuedata; 290 struct nullb_queue *nq = nullb_to_queue(nullb); 291 struct nullb_cmd *cmd; 292 293 cmd = alloc_cmd(nq, 0); 294 if (cmd) { 295 cmd->rq = req; 296 req->special = cmd; 297 return BLKPREP_OK; 298 } 299 300 return BLKPREP_DEFER; 301} 302 303static void null_request_fn(struct request_queue *q) 304{ 305 struct request *rq; 306 307 while ((rq = blk_fetch_request(q)) != NULL) { 308 struct nullb_cmd *cmd = rq->special; 309 310 spin_unlock_irq(q->queue_lock); 311 null_handle_cmd(cmd); 312 spin_lock_irq(q->queue_lock); 313 } 314} 315 316static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) 317{ 318 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); 319 320 cmd->rq = rq; 321 cmd->nq = hctx->driver_data; 322 323 null_handle_cmd(cmd); 324 return BLK_MQ_RQ_QUEUE_OK; 325} 326 327static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) 328{ 329 BUG_ON(!nullb); 330 BUG_ON(!nq); 331 332 init_waitqueue_head(&nq->wait); 333 nq->queue_depth = nullb->queue_depth; 334} 335 336static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 337 unsigned int index) 338{ 339 struct nullb *nullb = data; 340 struct nullb_queue *nq = &nullb->queues[index]; 341 342 hctx->driver_data = nq; 343 null_init_queue(nullb, nq); 344 nullb->nr_queues++; 345 346 return 0; 347} 348 349static struct blk_mq_ops null_mq_ops = { 350 .queue_rq = null_queue_rq, 351 .map_queue = blk_mq_map_queue, 352 .init_hctx = null_init_hctx, 353 .complete = null_softirq_done_fn, 354}; 355 356static void null_del_dev(struct nullb *nullb) 357{ 358 list_del_init(&nullb->list); 359 360 del_gendisk(nullb->disk); 361 blk_cleanup_queue(nullb->q); 362 if (queue_mode == NULL_Q_MQ) 363 blk_mq_free_tag_set(&nullb->tag_set); 364 put_disk(nullb->disk); 365 kfree(nullb); 366} 367 368static int null_open(struct block_device *bdev, fmode_t mode) 369{ 370 return 0; 371} 372 373static void null_release(struct gendisk *disk, fmode_t mode) 374{ 375} 376 377static const struct block_device_operations null_fops = { 378 .owner = THIS_MODULE, 379 .open = null_open, 380 .release = null_release, 381}; 382 383static int setup_commands(struct nullb_queue *nq) 384{ 385 struct nullb_cmd *cmd; 386 int i, tag_size; 387 388 nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); 389 if (!nq->cmds) 390 return -ENOMEM; 391 392 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; 393 nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); 394 if (!nq->tag_map) { 395 kfree(nq->cmds); 396 return -ENOMEM; 397 } 398 399 for (i = 0; i < nq->queue_depth; i++) { 400 cmd = &nq->cmds[i]; 401 INIT_LIST_HEAD(&cmd->list); 402 cmd->ll_list.next = NULL; 403 cmd->tag = -1U; 404 } 405 406 return 0; 407} 408 409static void cleanup_queue(struct nullb_queue *nq) 410{ 411 kfree(nq->tag_map); 412 kfree(nq->cmds); 413} 414 415static void cleanup_queues(struct nullb *nullb) 416{ 417 int i; 418 419 for (i = 0; i < nullb->nr_queues; i++) 420 cleanup_queue(&nullb->queues[i]); 421 422 kfree(nullb->queues); 423} 424 425static int setup_queues(struct nullb *nullb) 426{ 427 nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), 428 GFP_KERNEL); 429 if (!nullb->queues) 430 return -ENOMEM; 431 432 nullb->nr_queues = 0; 433 nullb->queue_depth = hw_queue_depth; 434 435 return 0; 436} 437 438static int init_driver_queues(struct nullb *nullb) 439{ 440 struct nullb_queue *nq; 441 int i, ret = 0; 442 443 for (i = 0; i < submit_queues; i++) { 444 nq = &nullb->queues[i]; 445 446 null_init_queue(nullb, nq); 447 448 ret = setup_commands(nq); 449 if (ret) 450 goto err_queue; 451 nullb->nr_queues++; 452 } 453 454 return 0; 455err_queue: 456 cleanup_queues(nullb); 457 return ret; 458} 459 460static int null_add_dev(void) 461{ 462 struct gendisk *disk; 463 struct nullb *nullb; 464 sector_t size; 465 int rv; 466 467 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); 468 if (!nullb) { 469 rv = -ENOMEM; 470 goto out; 471 } 472 473 spin_lock_init(&nullb->lock); 474 475 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) 476 submit_queues = nr_online_nodes; 477 478 rv = setup_queues(nullb); 479 if (rv) 480 goto out_free_nullb; 481 482 if (queue_mode == NULL_Q_MQ) { 483 nullb->tag_set.ops = &null_mq_ops; 484 nullb->tag_set.nr_hw_queues = submit_queues; 485 nullb->tag_set.queue_depth = hw_queue_depth; 486 nullb->tag_set.numa_node = home_node; 487 nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); 488 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 489 nullb->tag_set.driver_data = nullb; 490 491 rv = blk_mq_alloc_tag_set(&nullb->tag_set); 492 if (rv) 493 goto out_cleanup_queues; 494 495 nullb->q = blk_mq_init_queue(&nullb->tag_set); 496 if (!nullb->q) { 497 rv = -ENOMEM; 498 goto out_cleanup_tags; 499 } 500 } else if (queue_mode == NULL_Q_BIO) { 501 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); 502 if (!nullb->q) { 503 rv = -ENOMEM; 504 goto out_cleanup_queues; 505 } 506 blk_queue_make_request(nullb->q, null_queue_bio); 507 init_driver_queues(nullb); 508 } else { 509 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); 510 if (!nullb->q) { 511 rv = -ENOMEM; 512 goto out_cleanup_queues; 513 } 514 blk_queue_prep_rq(nullb->q, null_rq_prep_fn); 515 blk_queue_softirq_done(nullb->q, null_softirq_done_fn); 516 init_driver_queues(nullb); 517 } 518 519 nullb->q->queuedata = nullb; 520 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); 521 522 disk = nullb->disk = alloc_disk_node(1, home_node); 523 if (!disk) { 524 rv = -ENOMEM; 525 goto out_cleanup_blk_queue; 526 } 527 528 mutex_lock(&lock); 529 list_add_tail(&nullb->list, &nullb_list); 530 nullb->index = nullb_indexes++; 531 mutex_unlock(&lock); 532 533 blk_queue_logical_block_size(nullb->q, bs); 534 blk_queue_physical_block_size(nullb->q, bs); 535 536 size = gb * 1024 * 1024 * 1024ULL; 537 sector_div(size, bs); 538 set_capacity(disk, size); 539 540 disk->flags |= GENHD_FL_EXT_DEVT; 541 disk->major = null_major; 542 disk->first_minor = nullb->index; 543 disk->fops = &null_fops; 544 disk->private_data = nullb; 545 disk->queue = nullb->q; 546 sprintf(disk->disk_name, "nullb%d", nullb->index); 547 add_disk(disk); 548 return 0; 549 550out_cleanup_blk_queue: 551 blk_cleanup_queue(nullb->q); 552out_cleanup_tags: 553 if (queue_mode == NULL_Q_MQ) 554 blk_mq_free_tag_set(&nullb->tag_set); 555out_cleanup_queues: 556 cleanup_queues(nullb); 557out_free_nullb: 558 kfree(nullb); 559out: 560 return rv; 561} 562 563static int __init null_init(void) 564{ 565 unsigned int i; 566 567 if (bs > PAGE_SIZE) { 568 pr_warn("null_blk: invalid block size\n"); 569 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); 570 bs = PAGE_SIZE; 571 } 572 573 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 574 if (submit_queues < nr_online_nodes) { 575 pr_warn("null_blk: submit_queues param is set to %u.", 576 nr_online_nodes); 577 submit_queues = nr_online_nodes; 578 } 579 } else if (submit_queues > nr_cpu_ids) 580 submit_queues = nr_cpu_ids; 581 else if (!submit_queues) 582 submit_queues = 1; 583 584 mutex_init(&lock); 585 586 /* Initialize a separate list for each CPU for issuing softirqs */ 587 for_each_possible_cpu(i) { 588 struct completion_queue *cq = &per_cpu(completion_queues, i); 589 590 init_llist_head(&cq->list); 591 592 if (irqmode != NULL_IRQ_TIMER) 593 continue; 594 595 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 596 cq->timer.function = null_cmd_timer_expired; 597 } 598 599 null_major = register_blkdev(0, "nullb"); 600 if (null_major < 0) 601 return null_major; 602 603 for (i = 0; i < nr_devices; i++) { 604 if (null_add_dev()) { 605 unregister_blkdev(null_major, "nullb"); 606 return -EINVAL; 607 } 608 } 609 610 pr_info("null: module loaded\n"); 611 return 0; 612} 613 614static void __exit null_exit(void) 615{ 616 struct nullb *nullb; 617 618 unregister_blkdev(null_major, "nullb"); 619 620 mutex_lock(&lock); 621 while (!list_empty(&nullb_list)) { 622 nullb = list_entry(nullb_list.next, struct nullb, list); 623 null_del_dev(nullb); 624 } 625 mutex_unlock(&lock); 626} 627 628module_init(null_init); 629module_exit(null_exit); 630 631MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>"); 632MODULE_LICENSE("GPL");