Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.18-rc3 636 lines 14 kB view raw
1#include <linux/module.h> 2 3#include <linux/moduleparam.h> 4#include <linux/sched.h> 5#include <linux/fs.h> 6#include <linux/blkdev.h> 7#include <linux/init.h> 8#include <linux/slab.h> 9#include <linux/blk-mq.h> 10#include <linux/hrtimer.h> 11 12struct nullb_cmd { 13 struct list_head list; 14 struct llist_node ll_list; 15 struct call_single_data csd; 16 struct request *rq; 17 struct bio *bio; 18 unsigned int tag; 19 struct nullb_queue *nq; 20}; 21 22struct nullb_queue { 23 unsigned long *tag_map; 24 wait_queue_head_t wait; 25 unsigned int queue_depth; 26 27 struct nullb_cmd *cmds; 28}; 29 30struct nullb { 31 struct list_head list; 32 unsigned int index; 33 struct request_queue *q; 34 struct gendisk *disk; 35 struct blk_mq_tag_set tag_set; 36 struct hrtimer timer; 37 unsigned int queue_depth; 38 spinlock_t lock; 39 40 struct nullb_queue *queues; 41 unsigned int nr_queues; 42}; 43 44static LIST_HEAD(nullb_list); 45static struct mutex lock; 46static int null_major; 47static int nullb_indexes; 48 49struct completion_queue { 50 struct llist_head list; 51 struct hrtimer timer; 52}; 53 54/* 55 * These are per-cpu for now, they will need to be configured by the 56 * complete_queues parameter and appropriately mapped. 57 */ 58static DEFINE_PER_CPU(struct completion_queue, completion_queues); 59 60enum { 61 NULL_IRQ_NONE = 0, 62 NULL_IRQ_SOFTIRQ = 1, 63 NULL_IRQ_TIMER = 2, 64}; 65 66enum { 67 NULL_Q_BIO = 0, 68 NULL_Q_RQ = 1, 69 NULL_Q_MQ = 2, 70}; 71 72static int submit_queues; 73module_param(submit_queues, int, S_IRUGO); 74MODULE_PARM_DESC(submit_queues, "Number of submission queues"); 75 76static int home_node = NUMA_NO_NODE; 77module_param(home_node, int, S_IRUGO); 78MODULE_PARM_DESC(home_node, "Home node for the device"); 79 80static int queue_mode = NULL_Q_MQ; 81module_param(queue_mode, int, S_IRUGO); 82MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); 83 84static int gb = 250; 85module_param(gb, int, S_IRUGO); 86MODULE_PARM_DESC(gb, "Size in GB"); 87 88static int bs = 512; 89module_param(bs, int, S_IRUGO); 90MODULE_PARM_DESC(bs, "Block size (in bytes)"); 91 92static int nr_devices = 2; 93module_param(nr_devices, int, S_IRUGO); 94MODULE_PARM_DESC(nr_devices, "Number of devices to register"); 95 96static int irqmode = NULL_IRQ_SOFTIRQ; 97module_param(irqmode, int, S_IRUGO); 98MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); 99 100static int completion_nsec = 10000; 101module_param(completion_nsec, int, S_IRUGO); 102MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); 103 104static int hw_queue_depth = 64; 105module_param(hw_queue_depth, int, S_IRUGO); 106MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); 107 108static bool use_per_node_hctx = false; 109module_param(use_per_node_hctx, bool, S_IRUGO); 110MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); 111 112static void put_tag(struct nullb_queue *nq, unsigned int tag) 113{ 114 clear_bit_unlock(tag, nq->tag_map); 115 116 if (waitqueue_active(&nq->wait)) 117 wake_up(&nq->wait); 118} 119 120static unsigned int get_tag(struct nullb_queue *nq) 121{ 122 unsigned int tag; 123 124 do { 125 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); 126 if (tag >= nq->queue_depth) 127 return -1U; 128 } while (test_and_set_bit_lock(tag, nq->tag_map)); 129 130 return tag; 131} 132 133static void free_cmd(struct nullb_cmd *cmd) 134{ 135 put_tag(cmd->nq, cmd->tag); 136} 137 138static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) 139{ 140 struct nullb_cmd *cmd; 141 unsigned int tag; 142 143 tag = get_tag(nq); 144 if (tag != -1U) { 145 cmd = &nq->cmds[tag]; 146 cmd->tag = tag; 147 cmd->nq = nq; 148 return cmd; 149 } 150 151 return NULL; 152} 153 154static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) 155{ 156 struct nullb_cmd *cmd; 157 DEFINE_WAIT(wait); 158 159 cmd = __alloc_cmd(nq); 160 if (cmd || !can_wait) 161 return cmd; 162 163 do { 164 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); 165 cmd = __alloc_cmd(nq); 166 if (cmd) 167 break; 168 169 io_schedule(); 170 } while (1); 171 172 finish_wait(&nq->wait, &wait); 173 return cmd; 174} 175 176static void end_cmd(struct nullb_cmd *cmd) 177{ 178 switch (queue_mode) { 179 case NULL_Q_MQ: 180 blk_mq_end_request(cmd->rq, 0); 181 return; 182 case NULL_Q_RQ: 183 INIT_LIST_HEAD(&cmd->rq->queuelist); 184 blk_end_request_all(cmd->rq, 0); 185 break; 186 case NULL_Q_BIO: 187 bio_endio(cmd->bio, 0); 188 break; 189 } 190 191 free_cmd(cmd); 192} 193 194static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) 195{ 196 struct completion_queue *cq; 197 struct llist_node *entry; 198 struct nullb_cmd *cmd; 199 200 cq = &per_cpu(completion_queues, smp_processor_id()); 201 202 while ((entry = llist_del_all(&cq->list)) != NULL) { 203 entry = llist_reverse_order(entry); 204 do { 205 cmd = container_of(entry, struct nullb_cmd, ll_list); 206 entry = entry->next; 207 end_cmd(cmd); 208 } while (entry); 209 } 210 211 return HRTIMER_NORESTART; 212} 213 214static void null_cmd_end_timer(struct nullb_cmd *cmd) 215{ 216 struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); 217 218 cmd->ll_list.next = NULL; 219 if (llist_add(&cmd->ll_list, &cq->list)) { 220 ktime_t kt = ktime_set(0, completion_nsec); 221 222 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); 223 } 224 225 put_cpu(); 226} 227 228static void null_softirq_done_fn(struct request *rq) 229{ 230 if (queue_mode == NULL_Q_MQ) 231 end_cmd(blk_mq_rq_to_pdu(rq)); 232 else 233 end_cmd(rq->special); 234} 235 236static inline void null_handle_cmd(struct nullb_cmd *cmd) 237{ 238 /* Complete IO by inline, softirq or timer */ 239 switch (irqmode) { 240 case NULL_IRQ_SOFTIRQ: 241 switch (queue_mode) { 242 case NULL_Q_MQ: 243 blk_mq_complete_request(cmd->rq); 244 break; 245 case NULL_Q_RQ: 246 blk_complete_request(cmd->rq); 247 break; 248 case NULL_Q_BIO: 249 /* 250 * XXX: no proper submitting cpu information available. 251 */ 252 end_cmd(cmd); 253 break; 254 } 255 break; 256 case NULL_IRQ_NONE: 257 end_cmd(cmd); 258 break; 259 case NULL_IRQ_TIMER: 260 null_cmd_end_timer(cmd); 261 break; 262 } 263} 264 265static struct nullb_queue *nullb_to_queue(struct nullb *nullb) 266{ 267 int index = 0; 268 269 if (nullb->nr_queues != 1) 270 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); 271 272 return &nullb->queues[index]; 273} 274 275static void null_queue_bio(struct request_queue *q, struct bio *bio) 276{ 277 struct nullb *nullb = q->queuedata; 278 struct nullb_queue *nq = nullb_to_queue(nullb); 279 struct nullb_cmd *cmd; 280 281 cmd = alloc_cmd(nq, 1); 282 cmd->bio = bio; 283 284 null_handle_cmd(cmd); 285} 286 287static int null_rq_prep_fn(struct request_queue *q, struct request *req) 288{ 289 struct nullb *nullb = q->queuedata; 290 struct nullb_queue *nq = nullb_to_queue(nullb); 291 struct nullb_cmd *cmd; 292 293 cmd = alloc_cmd(nq, 0); 294 if (cmd) { 295 cmd->rq = req; 296 req->special = cmd; 297 return BLKPREP_OK; 298 } 299 300 return BLKPREP_DEFER; 301} 302 303static void null_request_fn(struct request_queue *q) 304{ 305 struct request *rq; 306 307 while ((rq = blk_fetch_request(q)) != NULL) { 308 struct nullb_cmd *cmd = rq->special; 309 310 spin_unlock_irq(q->queue_lock); 311 null_handle_cmd(cmd); 312 spin_lock_irq(q->queue_lock); 313 } 314} 315 316static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, 317 bool last) 318{ 319 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); 320 321 cmd->rq = rq; 322 cmd->nq = hctx->driver_data; 323 324 blk_mq_start_request(rq); 325 326 null_handle_cmd(cmd); 327 return BLK_MQ_RQ_QUEUE_OK; 328} 329 330static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) 331{ 332 BUG_ON(!nullb); 333 BUG_ON(!nq); 334 335 init_waitqueue_head(&nq->wait); 336 nq->queue_depth = nullb->queue_depth; 337} 338 339static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 340 unsigned int index) 341{ 342 struct nullb *nullb = data; 343 struct nullb_queue *nq = &nullb->queues[index]; 344 345 hctx->driver_data = nq; 346 null_init_queue(nullb, nq); 347 nullb->nr_queues++; 348 349 return 0; 350} 351 352static struct blk_mq_ops null_mq_ops = { 353 .queue_rq = null_queue_rq, 354 .map_queue = blk_mq_map_queue, 355 .init_hctx = null_init_hctx, 356 .complete = null_softirq_done_fn, 357}; 358 359static void null_del_dev(struct nullb *nullb) 360{ 361 list_del_init(&nullb->list); 362 363 del_gendisk(nullb->disk); 364 blk_cleanup_queue(nullb->q); 365 if (queue_mode == NULL_Q_MQ) 366 blk_mq_free_tag_set(&nullb->tag_set); 367 put_disk(nullb->disk); 368 kfree(nullb); 369} 370 371static int null_open(struct block_device *bdev, fmode_t mode) 372{ 373 return 0; 374} 375 376static void null_release(struct gendisk *disk, fmode_t mode) 377{ 378} 379 380static const struct block_device_operations null_fops = { 381 .owner = THIS_MODULE, 382 .open = null_open, 383 .release = null_release, 384}; 385 386static int setup_commands(struct nullb_queue *nq) 387{ 388 struct nullb_cmd *cmd; 389 int i, tag_size; 390 391 nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); 392 if (!nq->cmds) 393 return -ENOMEM; 394 395 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; 396 nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); 397 if (!nq->tag_map) { 398 kfree(nq->cmds); 399 return -ENOMEM; 400 } 401 402 for (i = 0; i < nq->queue_depth; i++) { 403 cmd = &nq->cmds[i]; 404 INIT_LIST_HEAD(&cmd->list); 405 cmd->ll_list.next = NULL; 406 cmd->tag = -1U; 407 } 408 409 return 0; 410} 411 412static void cleanup_queue(struct nullb_queue *nq) 413{ 414 kfree(nq->tag_map); 415 kfree(nq->cmds); 416} 417 418static void cleanup_queues(struct nullb *nullb) 419{ 420 int i; 421 422 for (i = 0; i < nullb->nr_queues; i++) 423 cleanup_queue(&nullb->queues[i]); 424 425 kfree(nullb->queues); 426} 427 428static int setup_queues(struct nullb *nullb) 429{ 430 nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), 431 GFP_KERNEL); 432 if (!nullb->queues) 433 return -ENOMEM; 434 435 nullb->nr_queues = 0; 436 nullb->queue_depth = hw_queue_depth; 437 438 return 0; 439} 440 441static int init_driver_queues(struct nullb *nullb) 442{ 443 struct nullb_queue *nq; 444 int i, ret = 0; 445 446 for (i = 0; i < submit_queues; i++) { 447 nq = &nullb->queues[i]; 448 449 null_init_queue(nullb, nq); 450 451 ret = setup_commands(nq); 452 if (ret) 453 return ret; 454 nullb->nr_queues++; 455 } 456 return 0; 457} 458 459static int null_add_dev(void) 460{ 461 struct gendisk *disk; 462 struct nullb *nullb; 463 sector_t size; 464 int rv; 465 466 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); 467 if (!nullb) { 468 rv = -ENOMEM; 469 goto out; 470 } 471 472 spin_lock_init(&nullb->lock); 473 474 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) 475 submit_queues = nr_online_nodes; 476 477 rv = setup_queues(nullb); 478 if (rv) 479 goto out_free_nullb; 480 481 if (queue_mode == NULL_Q_MQ) { 482 nullb->tag_set.ops = &null_mq_ops; 483 nullb->tag_set.nr_hw_queues = submit_queues; 484 nullb->tag_set.queue_depth = hw_queue_depth; 485 nullb->tag_set.numa_node = home_node; 486 nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); 487 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 488 nullb->tag_set.driver_data = nullb; 489 490 rv = blk_mq_alloc_tag_set(&nullb->tag_set); 491 if (rv) 492 goto out_cleanup_queues; 493 494 nullb->q = blk_mq_init_queue(&nullb->tag_set); 495 if (!nullb->q) { 496 rv = -ENOMEM; 497 goto out_cleanup_tags; 498 } 499 } else if (queue_mode == NULL_Q_BIO) { 500 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); 501 if (!nullb->q) { 502 rv = -ENOMEM; 503 goto out_cleanup_queues; 504 } 505 blk_queue_make_request(nullb->q, null_queue_bio); 506 rv = init_driver_queues(nullb); 507 if (rv) 508 goto out_cleanup_blk_queue; 509 } else { 510 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); 511 if (!nullb->q) { 512 rv = -ENOMEM; 513 goto out_cleanup_queues; 514 } 515 blk_queue_prep_rq(nullb->q, null_rq_prep_fn); 516 blk_queue_softirq_done(nullb->q, null_softirq_done_fn); 517 rv = init_driver_queues(nullb); 518 if (rv) 519 goto out_cleanup_blk_queue; 520 } 521 522 nullb->q->queuedata = nullb; 523 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); 524 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); 525 526 disk = nullb->disk = alloc_disk_node(1, home_node); 527 if (!disk) { 528 rv = -ENOMEM; 529 goto out_cleanup_blk_queue; 530 } 531 532 mutex_lock(&lock); 533 list_add_tail(&nullb->list, &nullb_list); 534 nullb->index = nullb_indexes++; 535 mutex_unlock(&lock); 536 537 blk_queue_logical_block_size(nullb->q, bs); 538 blk_queue_physical_block_size(nullb->q, bs); 539 540 size = gb * 1024 * 1024 * 1024ULL; 541 sector_div(size, bs); 542 set_capacity(disk, size); 543 544 disk->flags |= GENHD_FL_EXT_DEVT; 545 disk->major = null_major; 546 disk->first_minor = nullb->index; 547 disk->fops = &null_fops; 548 disk->private_data = nullb; 549 disk->queue = nullb->q; 550 sprintf(disk->disk_name, "nullb%d", nullb->index); 551 add_disk(disk); 552 return 0; 553 554out_cleanup_blk_queue: 555 blk_cleanup_queue(nullb->q); 556out_cleanup_tags: 557 if (queue_mode == NULL_Q_MQ) 558 blk_mq_free_tag_set(&nullb->tag_set); 559out_cleanup_queues: 560 cleanup_queues(nullb); 561out_free_nullb: 562 kfree(nullb); 563out: 564 return rv; 565} 566 567static int __init null_init(void) 568{ 569 unsigned int i; 570 571 if (bs > PAGE_SIZE) { 572 pr_warn("null_blk: invalid block size\n"); 573 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); 574 bs = PAGE_SIZE; 575 } 576 577 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 578 if (submit_queues < nr_online_nodes) { 579 pr_warn("null_blk: submit_queues param is set to %u.", 580 nr_online_nodes); 581 submit_queues = nr_online_nodes; 582 } 583 } else if (submit_queues > nr_cpu_ids) 584 submit_queues = nr_cpu_ids; 585 else if (!submit_queues) 586 submit_queues = 1; 587 588 mutex_init(&lock); 589 590 /* Initialize a separate list for each CPU for issuing softirqs */ 591 for_each_possible_cpu(i) { 592 struct completion_queue *cq = &per_cpu(completion_queues, i); 593 594 init_llist_head(&cq->list); 595 596 if (irqmode != NULL_IRQ_TIMER) 597 continue; 598 599 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 600 cq->timer.function = null_cmd_timer_expired; 601 } 602 603 null_major = register_blkdev(0, "nullb"); 604 if (null_major < 0) 605 return null_major; 606 607 for (i = 0; i < nr_devices; i++) { 608 if (null_add_dev()) { 609 unregister_blkdev(null_major, "nullb"); 610 return -EINVAL; 611 } 612 } 613 614 pr_info("null: module loaded\n"); 615 return 0; 616} 617 618static void __exit null_exit(void) 619{ 620 struct nullb *nullb; 621 622 unregister_blkdev(null_major, "nullb"); 623 624 mutex_lock(&lock); 625 while (!list_empty(&nullb_list)) { 626 nullb = list_entry(nullb_list.next, struct nullb, list); 627 null_del_dev(nullb); 628 } 629 mutex_unlock(&lock); 630} 631 632module_init(null_init); 633module_exit(null_exit); 634 635MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>"); 636MODULE_LICENSE("GPL");