[BLOCK] ll_rw_blk: Enable out-of-order request completions through softirq

Request completion can be a quite heavy process, since it needs to
iterate through the entire request and complete the bio's it holds.
This patch adds blk_complete_request() which moves this processing
into a dedicated block softirq.

Signed-off-by: Jens Axboe <axboe@suse.de>

+124 -4
+105 -1
block/ll_rw_blk.c
··· 27 27 #include <linux/swap.h> 28 28 #include <linux/writeback.h> 29 29 #include <linux/blkdev.h> 30 + #include <linux/interrupt.h> 31 + #include <linux/cpu.h> 30 32 31 33 /* 32 34 * for max sense size ··· 64 62 /* 65 63 * Controlling structure to kblockd 66 64 */ 67 - static struct workqueue_struct *kblockd_workqueue; 65 + static struct workqueue_struct *kblockd_workqueue; 68 66 69 67 unsigned long blk_max_low_pfn, blk_max_pfn; 70 68 71 69 EXPORT_SYMBOL(blk_max_low_pfn); 72 70 EXPORT_SYMBOL(blk_max_pfn); 71 + 72 + static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 73 73 74 74 /* Amount of time in which a process may batch requests */ 75 75 #define BLK_BATCH_TIME (HZ/50UL) ··· 211 207 212 208 EXPORT_SYMBOL(blk_queue_merge_bvec); 213 209 210 + void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) 211 + { 212 + q->softirq_done_fn = fn; 213 + } 214 + 215 + EXPORT_SYMBOL(blk_queue_softirq_done); 216 + 214 217 /** 215 218 * blk_queue_make_request - define an alternate make_request function for a device 216 219 * @q: the request queue for the device to be affected ··· 281 270 static inline void rq_init(request_queue_t *q, struct request *rq) 282 271 { 283 272 INIT_LIST_HEAD(&rq->queuelist); 273 + INIT_LIST_HEAD(&rq->donelist); 284 274 285 275 rq->errors = 0; 286 276 rq->rq_status = RQ_ACTIVE; ··· 298 286 rq->sense = NULL; 299 287 rq->end_io = NULL; 300 288 rq->end_io_data = NULL; 289 + rq->completion_data = NULL; 301 290 } 302 291 303 292 /** ··· 3300 3287 EXPORT_SYMBOL(end_that_request_chunk); 3301 3288 3302 3289 /* 3290 + * splice the completion data to a local structure and hand off to 3291 + * process_completion_queue() to complete the requests 3292 + */ 3293 + static void blk_done_softirq(struct softirq_action *h) 3294 + { 3295 + struct list_head *cpu_list; 3296 + LIST_HEAD(local_list); 3297 + 3298 + local_irq_disable(); 3299 + cpu_list = &__get_cpu_var(blk_cpu_done); 3300 + list_splice_init(cpu_list, &local_list); 3301 + local_irq_enable(); 3302 + 3303 + while (!list_empty(&local_list)) { 3304 + struct request *rq = list_entry(local_list.next, struct request, donelist); 3305 + 3306 + list_del_init(&rq->donelist); 3307 + rq->q->softirq_done_fn(rq); 3308 + } 3309 + } 3310 + 3311 + #ifdef CONFIG_HOTPLUG_CPU 3312 + 3313 + static int blk_cpu_notify(struct notifier_block *self, unsigned long action, 3314 + void *hcpu) 3315 + { 3316 + /* 3317 + * If a CPU goes away, splice its entries to the current CPU 3318 + * and trigger a run of the softirq 3319 + */ 3320 + if (action == CPU_DEAD) { 3321 + int cpu = (unsigned long) hcpu; 3322 + 3323 + local_irq_disable(); 3324 + list_splice_init(&per_cpu(blk_cpu_done, cpu), 3325 + &__get_cpu_var(blk_cpu_done)); 3326 + raise_softirq_irqoff(BLOCK_SOFTIRQ); 3327 + local_irq_enable(); 3328 + } 3329 + 3330 + return NOTIFY_OK; 3331 + } 3332 + 3333 + 3334 + static struct notifier_block __devinitdata blk_cpu_notifier = { 3335 + .notifier_call = blk_cpu_notify, 3336 + }; 3337 + 3338 + #endif /* CONFIG_HOTPLUG_CPU */ 3339 + 3340 + /** 3341 + * blk_complete_request - end I/O on a request 3342 + * @req: the request being processed 3343 + * 3344 + * Description: 3345 + * Ends all I/O on a request. It does not handle partial completions, 3346 + * unless the driver actually implements this in its completionc callback 3347 + * through requeueing. Theh actual completion happens out-of-order, 3348 + * through a softirq handler. The user must have registered a completion 3349 + * callback through blk_queue_softirq_done(). 3350 + **/ 3351 + 3352 + void blk_complete_request(struct request *req) 3353 + { 3354 + struct list_head *cpu_list; 3355 + unsigned long flags; 3356 + 3357 + BUG_ON(!req->q->softirq_done_fn); 3358 + 3359 + local_irq_save(flags); 3360 + 3361 + cpu_list = &__get_cpu_var(blk_cpu_done); 3362 + list_add_tail(&req->donelist, cpu_list); 3363 + raise_softirq_irqoff(BLOCK_SOFTIRQ); 3364 + 3365 + local_irq_restore(flags); 3366 + } 3367 + 3368 + EXPORT_SYMBOL(blk_complete_request); 3369 + 3370 + /* 3303 3371 * queue lock must be held 3304 3372 */ 3305 3373 void end_that_request_last(struct request *req, int uptodate) ··· 3458 3364 3459 3365 int __init blk_dev_init(void) 3460 3366 { 3367 + int i; 3368 + 3461 3369 kblockd_workqueue = create_workqueue("kblockd"); 3462 3370 if (!kblockd_workqueue) 3463 3371 panic("Failed to create kblockd\n"); ··· 3472 3376 3473 3377 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3474 3378 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); 3379 + 3380 + for (i = 0; i < NR_CPUS; i++) 3381 + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 3382 + 3383 + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3384 + #ifdef CONFIG_HOTPLUG_CPU 3385 + register_cpu_notifier(&blk_cpu_notifier); 3386 + #endif 3475 3387 3476 3388 blk_max_low_pfn = max_low_pfn; 3477 3389 blk_max_pfn = max_pfn;
+18 -3
include/linux/blkdev.h
··· 118 118 * try to put the fields that are referenced together in the same cacheline 119 119 */ 120 120 struct request { 121 - struct list_head queuelist; /* looking for ->queue? you must _not_ 122 - * access it directly, use 123 - * blkdev_dequeue_request! */ 121 + struct list_head queuelist; 122 + struct list_head donelist; 123 + 124 124 unsigned long flags; /* see REQ_ bits below */ 125 125 126 126 /* Maintain bio traversal state for part by part I/O submission. ··· 141 141 struct bio *biotail; 142 142 143 143 void *elevator_private; 144 + void *completion_data; 144 145 145 146 unsigned short ioprio; 146 147 ··· 292 291 typedef void (activity_fn) (void *data, int rw); 293 292 typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); 294 293 typedef void (prepare_flush_fn) (request_queue_t *, struct request *); 294 + typedef void (softirq_done_fn)(struct request *); 295 295 296 296 enum blk_queue_state { 297 297 Queue_down, ··· 334 332 activity_fn *activity_fn; 335 333 issue_flush_fn *issue_flush_fn; 336 334 prepare_flush_fn *prepare_flush_fn; 335 + softirq_done_fn *softirq_done_fn; 337 336 338 337 /* 339 338 * Dispatch queue sorting ··· 649 646 extern int end_that_request_chunk(struct request *, int, int); 650 647 extern void end_that_request_last(struct request *, int); 651 648 extern void end_request(struct request *req, int uptodate); 649 + extern void blk_complete_request(struct request *); 650 + 651 + static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) 652 + { 653 + if (blk_fs_request(rq)) 654 + return (nr_bytes >= (rq->hard_nr_sectors << 9)); 655 + else if (blk_pc_request(rq)) 656 + return nr_bytes >= rq->data_len; 657 + 658 + return 0; 659 + } 652 660 653 661 /* 654 662 * end_that_request_first/chunk() takes an uptodate argument. we account ··· 708 694 extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); 709 695 extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); 710 696 extern void blk_queue_dma_alignment(request_queue_t *, int); 697 + extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); 711 698 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 712 699 extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); 713 700 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
+1
include/linux/interrupt.h
··· 112 112 TIMER_SOFTIRQ, 113 113 NET_TX_SOFTIRQ, 114 114 NET_RX_SOFTIRQ, 115 + BLOCK_SOFTIRQ, 115 116 SCSI_SOFTIRQ, 116 117 TASKLET_SOFTIRQ 117 118 };