Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

block: make queue flags non-atomic

We can save some atomic ops in the IO path, if we clearly define
the rules of how to modify the queue flags.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

authored by

Nick Piggin and committed by
Jens Axboe
75ad23bc 68154e90

+101 -48
+26 -13
block/blk-core.c
··· 198 198 if (blk_queue_stopped(q)) 199 199 return; 200 200 201 - if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 201 + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 202 + __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); 202 203 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 203 204 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 204 205 } ··· 214 213 { 215 214 WARN_ON(!irqs_disabled()); 216 215 217 - if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 216 + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 218 217 return 0; 219 218 219 + queue_flag_clear(QUEUE_FLAG_PLUGGED, q); 220 220 del_timer(&q->unplug_timer); 221 221 return 1; 222 222 } ··· 313 311 { 314 312 WARN_ON(!irqs_disabled()); 315 313 316 - clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 314 + queue_flag_clear(QUEUE_FLAG_STOPPED, q); 317 315 318 316 /* 319 317 * one level of recursion is ok and is much faster than kicking 320 318 * the unplug handling 321 319 */ 322 - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 320 + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 321 + queue_flag_set(QUEUE_FLAG_REENTER, q); 323 322 q->request_fn(q); 324 - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 323 + queue_flag_clear(QUEUE_FLAG_REENTER, q); 325 324 } else { 326 325 blk_plug_device(q); 327 326 kblockd_schedule_work(&q->unplug_work); ··· 347 344 void blk_stop_queue(struct request_queue *q) 348 345 { 349 346 blk_remove_plug(q); 350 - set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 347 + queue_flag_set(QUEUE_FLAG_STOPPED, q); 351 348 } 352 349 EXPORT_SYMBOL(blk_stop_queue); 353 350 ··· 376 373 * blk_run_queue - run a single device queue 377 374 * @q: The queue to run 378 375 */ 379 - void blk_run_queue(struct request_queue *q) 376 + void __blk_run_queue(struct request_queue *q) 380 377 { 381 - unsigned long flags; 382 - 383 - spin_lock_irqsave(q->queue_lock, flags); 384 378 blk_remove_plug(q); 385 379 386 380 /* ··· 385 385 * handling reinvoke the handler shortly if we already got there. 386 386 */ 387 387 if (!elv_queue_empty(q)) { 388 - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 388 + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 389 + queue_flag_set(QUEUE_FLAG_REENTER, q); 389 390 q->request_fn(q); 390 - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 391 + queue_flag_clear(QUEUE_FLAG_REENTER, q); 391 392 } else { 392 393 blk_plug_device(q); 393 394 kblockd_schedule_work(&q->unplug_work); 394 395 } 395 396 } 397 + } 398 + EXPORT_SYMBOL(__blk_run_queue); 396 399 400 + /** 401 + * blk_run_queue - run a single device queue 402 + * @q: The queue to run 403 + */ 404 + void blk_run_queue(struct request_queue *q) 405 + { 406 + unsigned long flags; 407 + 408 + spin_lock_irqsave(q->queue_lock, flags); 409 + __blk_run_queue(q); 397 410 spin_unlock_irqrestore(q->queue_lock, flags); 398 411 } 399 412 EXPORT_SYMBOL(blk_run_queue); ··· 419 406 void blk_cleanup_queue(struct request_queue *q) 420 407 { 421 408 mutex_lock(&q->sysfs_lock); 422 - set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); 409 + queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 423 410 mutex_unlock(&q->sysfs_lock); 424 411 425 412 if (q->elevator)
+3 -3
block/blk-merge.c
··· 55 55 if (!rq->bio) 56 56 return; 57 57 58 - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 58 + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 59 59 hw_seg_size = seg_size = 0; 60 60 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; 61 61 rq_for_each_segment(bv, rq, iter) { ··· 128 128 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 129 129 struct bio *nxt) 130 130 { 131 - if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) 131 + if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 132 132 return 0; 133 133 134 134 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) ··· 175 175 int nsegs, cluster; 176 176 177 177 nsegs = 0; 178 - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 178 + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 179 179 180 180 /* 181 181 * for each bio in rq
+1 -1
block/blk-settings.c
··· 287 287 t->max_segment_size = min(t->max_segment_size, b->max_segment_size); 288 288 t->hardsect_size = max(t->hardsect_size, b->hardsect_size); 289 289 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 290 - clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); 290 + queue_flag_clear(QUEUE_FLAG_CLUSTER, t); 291 291 } 292 292 EXPORT_SYMBOL(blk_queue_stack_limits); 293 293
+4 -4
block/blk-tag.c
··· 70 70 __blk_free_tags(bqt); 71 71 72 72 q->queue_tags = NULL; 73 - q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); 73 + queue_flag_clear(QUEUE_FLAG_QUEUED, q); 74 74 } 75 75 76 76 /** ··· 98 98 **/ 99 99 void blk_queue_free_tags(struct request_queue *q) 100 100 { 101 - clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 101 + queue_flag_clear(QUEUE_FLAG_QUEUED, q); 102 102 } 103 103 EXPORT_SYMBOL(blk_queue_free_tags); 104 104 ··· 188 188 rc = blk_queue_resize_tags(q, depth); 189 189 if (rc) 190 190 return rc; 191 - set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 191 + queue_flag_set(QUEUE_FLAG_QUEUED, q); 192 192 return 0; 193 193 } else 194 194 atomic_inc(&tags->refcnt); ··· 197 197 * assign it, all done 198 198 */ 199 199 q->queue_tags = tags; 200 - q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); 200 + queue_flag_set(QUEUE_FLAG_QUEUED, q); 201 201 INIT_LIST_HEAD(&q->tag_busy_list); 202 202 return 0; 203 203 fail:
+10 -3
block/elevator.c
··· 1070 1070 */ 1071 1071 spin_lock_irq(q->queue_lock); 1072 1072 1073 - set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1073 + queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); 1074 1074 1075 1075 elv_drain_elevator(q); 1076 1076 ··· 1104 1104 * finally exit old elevator and turn off BYPASS. 1105 1105 */ 1106 1106 elevator_exit(old_elevator); 1107 - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1107 + spin_lock_irq(q->queue_lock); 1108 + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); 1109 + spin_unlock_irq(q->queue_lock); 1110 + 1108 1111 return 1; 1109 1112 1110 1113 fail_register: ··· 1118 1115 elevator_exit(e); 1119 1116 q->elevator = old_elevator; 1120 1117 elv_register_queue(q); 1121 - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1118 + 1119 + spin_lock_irq(q->queue_lock); 1120 + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); 1121 + spin_unlock_irq(q->queue_lock); 1122 + 1122 1123 return 0; 1123 1124 } 1124 1125
+1 -1
drivers/block/loop.c
··· 546 546 { 547 547 struct loop_device *lo = q->queuedata; 548 548 549 - clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); 549 + queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); 550 550 blk_run_address_space(lo->lo_backing_file->f_mapping); 551 551 } 552 552
+1 -1
drivers/block/ub.c
··· 2399 2399 del_gendisk(lun->disk); 2400 2400 /* 2401 2401 * I wish I could do: 2402 - * set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); 2402 + * queue_flag_set(QUEUE_FLAG_DEAD, q); 2403 2403 * As it is, we rely on our internal poisoning and let 2404 2404 * the upper levels to spin furiously failing all the I/O. 2405 2405 */
+5 -2
drivers/md/dm-table.c
··· 873 873 q->max_hw_sectors = t->limits.max_hw_sectors; 874 874 q->seg_boundary_mask = t->limits.seg_boundary_mask; 875 875 q->bounce_pfn = t->limits.bounce_pfn; 876 + /* XXX: the below will probably go bug. must ensure there can be no 877 + * concurrency on queue_flags, and use the unlocked versions... 878 + */ 876 879 if (t->limits.no_cluster) 877 - q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER); 880 + queue_flag_clear(QUEUE_FLAG_CLUSTER, q); 878 881 else 879 - q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER); 882 + queue_flag_set(QUEUE_FLAG_CLUSTER, q); 880 883 881 884 } 882 885
+2 -1
drivers/md/md.c
··· 282 282 kfree(new); 283 283 return NULL; 284 284 } 285 - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); 285 + /* Can be unlocked because the queue is new: no concurrency */ 286 + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); 286 287 287 288 blk_queue_make_request(new->queue, md_fail_request); 288 289
+1 -1
drivers/scsi/scsi_debug.c
··· 1773 1773 if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) 1774 1774 printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n", 1775 1775 sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); 1776 - set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags); 1776 + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); 1777 1777 return 0; 1778 1778 } 1779 1779
+17 -12
drivers/scsi/scsi_lib.c
··· 536 536 !shost->host_blocked && !shost->host_self_blocked && 537 537 !((shost->can_queue > 0) && 538 538 (shost->host_busy >= shost->can_queue))) { 539 + 540 + int flagset; 541 + 539 542 /* 540 543 * As long as shost is accepting commands and we have 541 544 * starved queues, call blk_run_queue. scsi_request_fn ··· 552 549 sdev = list_entry(shost->starved_list.next, 553 550 struct scsi_device, starved_entry); 554 551 list_del_init(&sdev->starved_entry); 555 - spin_unlock_irqrestore(shost->host_lock, flags); 552 + spin_unlock(shost->host_lock); 556 553 554 + spin_lock(sdev->request_queue->queue_lock); 555 + flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && 556 + !test_bit(QUEUE_FLAG_REENTER, 557 + &sdev->request_queue->queue_flags); 558 + if (flagset) 559 + queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); 560 + __blk_run_queue(sdev->request_queue); 561 + if (flagset) 562 + queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); 563 + spin_unlock(sdev->request_queue->queue_lock); 557 564 558 - if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && 559 - !test_and_set_bit(QUEUE_FLAG_REENTER, 560 - &sdev->request_queue->queue_flags)) { 561 - blk_run_queue(sdev->request_queue); 562 - clear_bit(QUEUE_FLAG_REENTER, 563 - &sdev->request_queue->queue_flags); 564 - } else 565 - blk_run_queue(sdev->request_queue); 566 - 567 - spin_lock_irqsave(shost->host_lock, flags); 565 + spin_lock(shost->host_lock); 568 566 if (unlikely(!list_empty(&sdev->starved_entry))) 569 567 /* 570 568 * sdev lost a race, and was put back on the ··· 1589 1585 1590 1586 blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); 1591 1587 1588 + /* New queue, no concurrency on queue_flags */ 1592 1589 if (!shost->use_clustering) 1593 - clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 1590 + queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1594 1591 1595 1592 /* 1596 1593 * set a reasonable default alignment on word boundaries: the
+1 -2
drivers/scsi/scsi_transport_sas.c
··· 248 248 else 249 249 q->queuedata = shost; 250 250 251 - set_bit(QUEUE_FLAG_BIDI, &q->queue_flags); 252 - 251 + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 253 252 return 0; 254 253 } 255 254
+29 -4
include/linux/blkdev.h
··· 408 408 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 409 409 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 410 410 411 + static inline void queue_flag_set_unlocked(unsigned int flag, 412 + struct request_queue *q) 413 + { 414 + __set_bit(flag, &q->queue_flags); 415 + } 416 + 417 + static inline void queue_flag_set(unsigned int flag, struct request_queue *q) 418 + { 419 + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); 420 + __set_bit(flag, &q->queue_flags); 421 + } 422 + 423 + static inline void queue_flag_clear_unlocked(unsigned int flag, 424 + struct request_queue *q) 425 + { 426 + __clear_bit(flag, &q->queue_flags); 427 + } 428 + 429 + static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) 430 + { 431 + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); 432 + __clear_bit(flag, &q->queue_flags); 433 + } 434 + 411 435 enum { 412 436 /* 413 437 * Hardbarrier is supported with one of the following methods. ··· 520 496 static inline void blk_set_queue_full(struct request_queue *q, int rw) 521 497 { 522 498 if (rw == READ) 523 - set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 499 + queue_flag_set(QUEUE_FLAG_READFULL, q); 524 500 else 525 - set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 501 + queue_flag_set(QUEUE_FLAG_WRITEFULL, q); 526 502 } 527 503 528 504 static inline void blk_clear_queue_full(struct request_queue *q, int rw) 529 505 { 530 506 if (rw == READ) 531 - clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 507 + queue_flag_clear(QUEUE_FLAG_READFULL, q); 532 508 else 533 - clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 509 + queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); 534 510 } 535 511 536 512 ··· 650 626 extern void blk_stop_queue(struct request_queue *q); 651 627 extern void blk_sync_queue(struct request_queue *q); 652 628 extern void __blk_stop_queue(struct request_queue *q); 629 + extern void __blk_run_queue(struct request_queue *); 653 630 extern void blk_run_queue(struct request_queue *); 654 631 extern void blk_start_queueing(struct request_queue *); 655 632 extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);