Merge branch 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block

+1 -1

Documentation/DMA-API.txt

··· 337 337 int i, count = dma_map_sg(dev, sglist, nents, direction); 338 338 struct scatterlist *sg; 339 339 340 - for (i = 0, sg = sglist; i < count; i++, sg++) { 340 + for_each_sg(sglist, sg, count, i) { 341 341 hw_address[i] = sg_dma_address(sg); 342 342 hw_len[i] = sg_dma_len(sg); 343 343 }

+4

Documentation/DocBook/kernel-api.tmpl

··· 364 364 !Eblock/blk-barrier.c 365 365 !Eblock/blk-tag.c 366 366 !Iblock/blk-tag.c 367 + !Eblock/blk-integrity.c 368 + !Iblock/blktrace.c 369 + !Iblock/genhd.c 370 + !Eblock/genhd.c 367 371 </chapter> 368 372 369 373 <chapter id="chrdev">

+10 -4

Documentation/block/deadline-iosched.txt

··· 30 30 Similar to read_expire mentioned above, but for writes. 31 31 32 32 33 - fifo_batch 33 + fifo_batch (number of requests) 34 34 ---------- 35 35 36 - When a read request expires its deadline, we must move some requests from 37 - the sorted io scheduler list to the block device dispatch queue. fifo_batch 38 - controls how many requests we move. 36 + Requests are grouped into ``batches'' of a particular data direction (read or 37 + write) which are serviced in increasing sector order. To limit extra seeking, 38 + deadline expiries are only checked between batches. fifo_batch controls the 39 + maximum number of requests per batch. 40 + 41 + This parameter tunes the balance between per-request latency and aggregate 42 + throughput. When low latency is the primary concern, smaller is better (where 43 + a value of 1 yields first-come first-served behaviour). Increasing fifo_batch 44 + generally improves throughput, at the cost of latency variation. 39 45 40 46 41 47 writes_starved (number of dispatches)

+1 -2

Documentation/cdrom/ide-cd

··· 145 145 146 146 To play an audio CD, you should first unmount and remove any data 147 147 CDROM. Any of the CDROM player programs should then work (workman, 148 - workbone, cdplayer, etc.). Lacking anything else, you could use the 149 - cdtester program in Documentation/cdrom/sbpcd. 148 + workbone, cdplayer, etc.). 150 149 151 150 On a few drives, you can read digital audio directly using a program 152 151 such as cdda2wav. The only types of drive which I've heard support

+2 -2

block/Makefile

··· 4 4 5 5 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 6 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 7 - blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ 8 - cmd-filter.o 7 + blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 8 + ioctl.o genhd.o scsi_ioctl.o cmd-filter.o 9 9 10 10 obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 11 11 obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o

+11 -3

block/as-iosched.c

··· 462 462 del_timer(&ad->antic_timer); 463 463 ad->antic_status = ANTIC_FINISHED; 464 464 /* see as_work_handler */ 465 - kblockd_schedule_work(&ad->antic_work); 465 + kblockd_schedule_work(ad->q, &ad->antic_work); 466 466 } 467 467 } 468 468 ··· 483 483 aic = ad->io_context->aic; 484 484 485 485 ad->antic_status = ANTIC_FINISHED; 486 - kblockd_schedule_work(&ad->antic_work); 486 + kblockd_schedule_work(q, &ad->antic_work); 487 487 488 488 if (aic->ttime_samples == 0) { 489 489 /* process anticipated on has exited or timed out*/ ··· 745 745 */ 746 746 static int as_can_anticipate(struct as_data *ad, struct request *rq) 747 747 { 748 + #if 0 /* disable for now, we need to check tag level as well */ 749 + /* 750 + * SSD device without seek penalty, disable idling 751 + */ 752 + if (blk_queue_nonrot(ad->q)) axman 753 + return 0; 754 + #endif 755 + 748 756 if (!ad->io_context) 749 757 /* 750 758 * Last request submitted was a write ··· 852 844 if (ad->changed_batch && ad->nr_dispatched == 1) { 853 845 ad->current_batch_expires = jiffies + 854 846 ad->batch_expire[ad->batch_data_dir]; 855 - kblockd_schedule_work(&ad->antic_work); 847 + kblockd_schedule_work(q, &ad->antic_work); 856 848 ad->changed_batch = 0; 857 849 858 850 if (ad->batch_data_dir == REQ_SYNC)

+71 -1

block/blk-barrier.c

··· 293 293 bio->bi_end_io = bio_end_empty_barrier; 294 294 bio->bi_private = &wait; 295 295 bio->bi_bdev = bdev; 296 - submit_bio(1 << BIO_RW_BARRIER, bio); 296 + submit_bio(WRITE_BARRIER, bio); 297 297 298 298 wait_for_completion(&wait); 299 299 ··· 315 315 return ret; 316 316 } 317 317 EXPORT_SYMBOL(blkdev_issue_flush); 318 + 319 + static void blkdev_discard_end_io(struct bio *bio, int err) 320 + { 321 + if (err) { 322 + if (err == -EOPNOTSUPP) 323 + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 324 + clear_bit(BIO_UPTODATE, &bio->bi_flags); 325 + } 326 + 327 + bio_put(bio); 328 + } 329 + 330 + /** 331 + * blkdev_issue_discard - queue a discard 332 + * @bdev: blockdev to issue discard for 333 + * @sector: start sector 334 + * @nr_sects: number of sectors to discard 335 + * @gfp_mask: memory allocation flags (for bio_alloc) 336 + * 337 + * Description: 338 + * Issue a discard request for the sectors in question. Does not wait. 339 + */ 340 + int blkdev_issue_discard(struct block_device *bdev, 341 + sector_t sector, sector_t nr_sects, gfp_t gfp_mask) 342 + { 343 + struct request_queue *q; 344 + struct bio *bio; 345 + int ret = 0; 346 + 347 + if (bdev->bd_disk == NULL) 348 + return -ENXIO; 349 + 350 + q = bdev_get_queue(bdev); 351 + if (!q) 352 + return -ENXIO; 353 + 354 + if (!q->prepare_discard_fn) 355 + return -EOPNOTSUPP; 356 + 357 + while (nr_sects && !ret) { 358 + bio = bio_alloc(gfp_mask, 0); 359 + if (!bio) 360 + return -ENOMEM; 361 + 362 + bio->bi_end_io = blkdev_discard_end_io; 363 + bio->bi_bdev = bdev; 364 + 365 + bio->bi_sector = sector; 366 + 367 + if (nr_sects > q->max_hw_sectors) { 368 + bio->bi_size = q->max_hw_sectors << 9; 369 + nr_sects -= q->max_hw_sectors; 370 + sector += q->max_hw_sectors; 371 + } else { 372 + bio->bi_size = nr_sects << 9; 373 + nr_sects = 0; 374 + } 375 + bio_get(bio); 376 + submit_bio(DISCARD_BARRIER, bio); 377 + 378 + /* Check if it failed immediately */ 379 + if (bio_flagged(bio, BIO_EOPNOTSUPP)) 380 + ret = -EOPNOTSUPP; 381 + else if (!bio_flagged(bio, BIO_UPTODATE)) 382 + ret = -EIO; 383 + bio_put(bio); 384 + } 385 + return ret; 386 + } 387 + EXPORT_SYMBOL(blkdev_issue_discard);

+336 -293

block/blk-core.c

··· 26 26 #include <linux/swap.h> 27 27 #include <linux/writeback.h> 28 28 #include <linux/task_io_accounting_ops.h> 29 - #include <linux/interrupt.h> 30 - #include <linux/cpu.h> 31 29 #include <linux/blktrace_api.h> 32 30 #include <linux/fault-inject.h> 33 31 ··· 48 50 */ 49 51 static struct workqueue_struct *kblockd_workqueue; 50 52 51 - static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 52 - 53 53 static void drive_stat_acct(struct request *rq, int new_io) 54 54 { 55 55 struct hd_struct *part; 56 56 int rw = rq_data_dir(rq); 57 + int cpu; 57 58 58 59 if (!blk_fs_request(rq) || !rq->rq_disk) 59 60 return; 60 61 61 - part = get_part(rq->rq_disk, rq->sector); 62 + cpu = part_stat_lock(); 63 + part = disk_map_sector_rcu(rq->rq_disk, rq->sector); 64 + 62 65 if (!new_io) 63 - __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); 66 + part_stat_inc(cpu, part, merges[rw]); 64 67 else { 65 - disk_round_stats(rq->rq_disk); 66 - rq->rq_disk->in_flight++; 67 - if (part) { 68 - part_round_stats(part); 69 - part->in_flight++; 70 - } 68 + part_round_stats(cpu, part); 69 + part_inc_in_flight(part); 71 70 } 71 + 72 + part_stat_unlock(); 72 73 } 73 74 74 75 void blk_queue_congestion_threshold(struct request_queue *q) ··· 110 113 memset(rq, 0, sizeof(*rq)); 111 114 112 115 INIT_LIST_HEAD(&rq->queuelist); 113 - INIT_LIST_HEAD(&rq->donelist); 116 + INIT_LIST_HEAD(&rq->timeout_list); 117 + rq->cpu = -1; 114 118 rq->q = q; 115 119 rq->sector = rq->hard_sector = (sector_t) -1; 116 120 INIT_HLIST_NODE(&rq->hash); ··· 306 308 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 307 309 q->rq.count[READ] + q->rq.count[WRITE]); 308 310 309 - kblockd_schedule_work(&q->unplug_work); 311 + kblockd_schedule_work(q, &q->unplug_work); 310 312 } 311 313 312 314 void blk_unplug(struct request_queue *q) ··· 323 325 } 324 326 EXPORT_SYMBOL(blk_unplug); 325 327 328 + static void blk_invoke_request_fn(struct request_queue *q) 329 + { 330 + /* 331 + * one level of recursion is ok and is much faster than kicking 332 + * the unplug handling 333 + */ 334 + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 335 + q->request_fn(q); 336 + queue_flag_clear(QUEUE_FLAG_REENTER, q); 337 + } else { 338 + queue_flag_set(QUEUE_FLAG_PLUGGED, q); 339 + kblockd_schedule_work(q, &q->unplug_work); 340 + } 341 + } 342 + 326 343 /** 327 344 * blk_start_queue - restart a previously stopped queue 328 345 * @q: The &struct request_queue in question ··· 352 339 WARN_ON(!irqs_disabled()); 353 340 354 341 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 355 - 356 - /* 357 - * one level of recursion is ok and is much faster than kicking 358 - * the unplug handling 359 - */ 360 - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 361 - q->request_fn(q); 362 - queue_flag_clear(QUEUE_FLAG_REENTER, q); 363 - } else { 364 - blk_plug_device(q); 365 - kblockd_schedule_work(&q->unplug_work); 366 - } 342 + blk_invoke_request_fn(q); 367 343 } 368 344 EXPORT_SYMBOL(blk_start_queue); 369 345 ··· 410 408 * Only recurse once to avoid overrunning the stack, let the unplug 411 409 * handling reinvoke the handler shortly if we already got there. 412 410 */ 413 - if (!elv_queue_empty(q)) { 414 - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 415 - q->request_fn(q); 416 - queue_flag_clear(QUEUE_FLAG_REENTER, q); 417 - } else { 418 - blk_plug_device(q); 419 - kblockd_schedule_work(&q->unplug_work); 420 - } 421 - } 411 + if (!elv_queue_empty(q)) 412 + blk_invoke_request_fn(q); 422 413 } 423 414 EXPORT_SYMBOL(__blk_run_queue); 424 415 ··· 436 441 437 442 void blk_cleanup_queue(struct request_queue *q) 438 443 { 444 + /* 445 + * We know we have process context here, so we can be a little 446 + * cautious and ensure that pending block actions on this device 447 + * are done before moving on. Going into this function, we should 448 + * not have processes doing IO to this device. 449 + */ 450 + blk_sync_queue(q); 451 + 439 452 mutex_lock(&q->sysfs_lock); 440 453 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 441 454 mutex_unlock(&q->sysfs_lock); ··· 499 496 } 500 497 501 498 init_timer(&q->unplug_timer); 499 + setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 500 + INIT_LIST_HEAD(&q->timeout_list); 502 501 503 502 kobject_init(&q->kobj, &blk_queue_ktype); 504 503 ··· 536 531 * request queue; this lock will be taken also from interrupt context, so irq 537 532 * disabling is needed for it. 538 533 * 539 - * Function returns a pointer to the initialized request queue, or NULL if 534 + * Function returns a pointer to the initialized request queue, or %NULL if 540 535 * it didn't succeed. 541 536 * 542 537 * Note: ··· 574 569 q->request_fn = rfn; 575 570 q->prep_rq_fn = NULL; 576 571 q->unplug_fn = generic_unplug_device; 577 - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 572 + q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | 573 + 1 << QUEUE_FLAG_STACKABLE); 578 574 q->queue_lock = lock; 579 575 580 576 blk_queue_segment_boundary(q, 0xffffffff); ··· 630 624 631 625 blk_rq_init(q, rq); 632 626 633 - /* 634 - * first three bits are identical in rq->cmd_flags and bio->bi_rw, 635 - * see bio.h and blkdev.h 636 - */ 637 627 rq->cmd_flags = rw | REQ_ALLOCED; 638 628 639 629 if (priv) { ··· 890 888 */ 891 889 void blk_start_queueing(struct request_queue *q) 892 890 { 893 - if (!blk_queue_plugged(q)) 891 + if (!blk_queue_plugged(q)) { 892 + if (unlikely(blk_queue_stopped(q))) 893 + return; 894 894 q->request_fn(q); 895 - else 895 + } else 896 896 __generic_unplug_device(q); 897 897 } 898 898 EXPORT_SYMBOL(blk_start_queueing); ··· 911 907 */ 912 908 void blk_requeue_request(struct request_queue *q, struct request *rq) 913 909 { 910 + blk_delete_timer(rq); 911 + blk_clear_rq_complete(rq); 914 912 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 915 913 916 914 if (blk_rq_tagged(rq)) ··· 923 917 EXPORT_SYMBOL(blk_requeue_request); 924 918 925 919 /** 926 - * blk_insert_request - insert a special request in to a request queue 920 + * blk_insert_request - insert a special request into a request queue 927 921 * @q: request queue where request should be inserted 928 922 * @rq: request to be inserted 929 923 * @at_head: insert request at head or tail of queue ··· 933 927 * Many block devices need to execute commands asynchronously, so they don't 934 928 * block the whole kernel from preemption during request execution. This is 935 929 * accomplished normally by inserting aritficial requests tagged as 936 - * REQ_SPECIAL in to the corresponding request queue, and letting them be 937 - * scheduled for actual execution by the request queue. 930 + * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them 931 + * be scheduled for actual execution by the request queue. 938 932 * 939 933 * We have the option of inserting the head or the tail of the queue. 940 934 * Typically we use the tail for new ioctls and so forth. We use the head ··· 988 982 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 989 983 } 990 984 991 - /* 992 - * disk_round_stats() - Round off the performance stats on a struct 985 + static void part_round_stats_single(int cpu, struct hd_struct *part, 986 + unsigned long now) 987 + { 988 + if (now == part->stamp) 989 + return; 990 + 991 + if (part->in_flight) { 992 + __part_stat_add(cpu, part, time_in_queue, 993 + part->in_flight * (now - part->stamp)); 994 + __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 995 + } 996 + part->stamp = now; 997 + } 998 + 999 + /** 1000 + * part_round_stats() - Round off the performance stats on a struct 993 1001 * disk_stats. 994 1002 * 995 1003 * The average IO queue length and utilisation statistics are maintained ··· 1017 997 * /proc/diskstats. This accounts immediately for all queue usage up to 1018 998 * the current jiffies and restarts the counters again. 1019 999 */ 1020 - void disk_round_stats(struct gendisk *disk) 1000 + void part_round_stats(int cpu, struct hd_struct *part) 1021 1001 { 1022 1002 unsigned long now = jiffies; 1023 1003 1024 - if (now == disk->stamp) 1025 - return; 1026 - 1027 - if (disk->in_flight) { 1028 - __disk_stat_add(disk, time_in_queue, 1029 - disk->in_flight * (now - disk->stamp)); 1030 - __disk_stat_add(disk, io_ticks, (now - disk->stamp)); 1031 - } 1032 - disk->stamp = now; 1004 + if (part->partno) 1005 + part_round_stats_single(cpu, &part_to_disk(part)->part0, now); 1006 + part_round_stats_single(cpu, part, now); 1033 1007 } 1034 - EXPORT_SYMBOL_GPL(disk_round_stats); 1035 - 1036 - void part_round_stats(struct hd_struct *part) 1037 - { 1038 - unsigned long now = jiffies; 1039 - 1040 - if (now == part->stamp) 1041 - return; 1042 - 1043 - if (part->in_flight) { 1044 - __part_stat_add(part, time_in_queue, 1045 - part->in_flight * (now - part->stamp)); 1046 - __part_stat_add(part, io_ticks, (now - part->stamp)); 1047 - } 1048 - part->stamp = now; 1049 - } 1008 + EXPORT_SYMBOL_GPL(part_round_stats); 1050 1009 1051 1010 /* 1052 1011 * queue lock must be held ··· 1069 1070 1070 1071 void init_request_from_bio(struct request *req, struct bio *bio) 1071 1072 { 1073 + req->cpu = bio->bi_comp_cpu; 1072 1074 req->cmd_type = REQ_TYPE_FS; 1073 1075 1074 1076 /* ··· 1081 1081 /* 1082 1082 * REQ_BARRIER implies no merging, but lets make it explicit 1083 1083 */ 1084 - if (unlikely(bio_barrier(bio))) 1084 + if (unlikely(bio_discard(bio))) { 1085 + req->cmd_flags |= REQ_DISCARD; 1086 + if (bio_barrier(bio)) 1087 + req->cmd_flags |= REQ_SOFTBARRIER; 1088 + req->q->prepare_discard_fn(req->q, req); 1089 + } else if (unlikely(bio_barrier(bio))) 1085 1090 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1086 1091 1087 1092 if (bio_sync(bio)) ··· 1104 1099 static int __make_request(struct request_queue *q, struct bio *bio) 1105 1100 { 1106 1101 struct request *req; 1107 - int el_ret, nr_sectors, barrier, err; 1102 + int el_ret, nr_sectors, barrier, discard, err; 1108 1103 const unsigned short prio = bio_prio(bio); 1109 1104 const int sync = bio_sync(bio); 1110 1105 int rw_flags; ··· 1119 1114 blk_queue_bounce(q, &bio); 1120 1115 1121 1116 barrier = bio_barrier(bio); 1122 - if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 1117 + if (unlikely(barrier) && bio_has_data(bio) && 1118 + (q->next_ordered == QUEUE_ORDERED_NONE)) { 1119 + err = -EOPNOTSUPP; 1120 + goto end_io; 1121 + } 1122 + 1123 + discard = bio_discard(bio); 1124 + if (unlikely(discard) && !q->prepare_discard_fn) { 1123 1125 err = -EOPNOTSUPP; 1124 1126 goto end_io; 1125 1127 } ··· 1150 1138 req->biotail = bio; 1151 1139 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1152 1140 req->ioprio = ioprio_best(req->ioprio, prio); 1141 + if (!blk_rq_cpu_valid(req)) 1142 + req->cpu = bio->bi_comp_cpu; 1153 1143 drive_stat_acct(req, 0); 1154 1144 if (!attempt_back_merge(q, req)) 1155 1145 elv_merged_request(q, req, el_ret); ··· 1179 1165 req->sector = req->hard_sector = bio->bi_sector; 1180 1166 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1181 1167 req->ioprio = ioprio_best(req->ioprio, prio); 1168 + if (!blk_rq_cpu_valid(req)) 1169 + req->cpu = bio->bi_comp_cpu; 1182 1170 drive_stat_acct(req, 0); 1183 1171 if (!attempt_front_merge(q, req)) 1184 1172 elv_merged_request(q, req, el_ret); ··· 1216 1200 init_request_from_bio(req, bio); 1217 1201 1218 1202 spin_lock_irq(q->queue_lock); 1203 + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1204 + bio_flagged(bio, BIO_CPU_AFFINE)) 1205 + req->cpu = blk_cpu_to_group(smp_processor_id()); 1219 1206 if (elv_queue_empty(q)) 1220 1207 blk_plug_device(q); 1221 1208 add_request(q, req); 1222 1209 out: 1223 1210 if (sync) 1224 1211 __generic_unplug_device(q); 1225 - 1226 1212 spin_unlock_irq(q->queue_lock); 1227 1213 return 0; 1228 1214 ··· 1278 1260 1279 1261 static int should_fail_request(struct bio *bio) 1280 1262 { 1281 - if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || 1282 - (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) 1263 + struct hd_struct *part = bio->bi_bdev->bd_part; 1264 + 1265 + if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) 1283 1266 return should_fail(&fail_make_request, bio->bi_size); 1284 1267 1285 1268 return 0; ··· 1333 1314 } 1334 1315 1335 1316 /** 1336 - * generic_make_request: hand a buffer to its device driver for I/O 1317 + * generic_make_request - hand a buffer to its device driver for I/O 1337 1318 * @bio: The bio describing the location in memory and on the device. 1338 1319 * 1339 1320 * generic_make_request() is used to make I/O requests of block ··· 1428 1409 1429 1410 if (bio_check_eod(bio, nr_sectors)) 1430 1411 goto end_io; 1431 - if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { 1412 + if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || 1413 + (bio_discard(bio) && !q->prepare_discard_fn)) { 1432 1414 err = -EOPNOTSUPP; 1433 1415 goto end_io; 1434 1416 } ··· 1491 1471 EXPORT_SYMBOL(generic_make_request); 1492 1472 1493 1473 /** 1494 - * submit_bio: submit a bio to the block device layer for I/O 1474 + * submit_bio - submit a bio to the block device layer for I/O 1495 1475 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1496 1476 * @bio: The &struct bio which describes the I/O 1497 1477 * 1498 1478 * submit_bio() is very similar in purpose to generic_make_request(), and 1499 1479 * uses that function to do most of the work. Both are fairly rough 1500 - * interfaces, @bio must be presetup and ready for I/O. 1480 + * interfaces; @bio must be presetup and ready for I/O. 1501 1481 * 1502 1482 */ 1503 1483 void submit_bio(int rw, struct bio *bio) ··· 1510 1490 * If it's a regular read/write or a barrier with data attached, 1511 1491 * go through the normal accounting stuff before submission. 1512 1492 */ 1513 - if (!bio_empty_barrier(bio)) { 1514 - 1515 - BIO_BUG_ON(!bio->bi_size); 1516 - BIO_BUG_ON(!bio->bi_io_vec); 1517 - 1493 + if (bio_has_data(bio)) { 1518 1494 if (rw & WRITE) { 1519 1495 count_vm_events(PGPGOUT, count); 1520 1496 } else { ··· 1533 1517 EXPORT_SYMBOL(submit_bio); 1534 1518 1535 1519 /** 1520 + * blk_rq_check_limits - Helper function to check a request for the queue limit 1521 + * @q: the queue 1522 + * @rq: the request being checked 1523 + * 1524 + * Description: 1525 + * @rq may have been made based on weaker limitations of upper-level queues 1526 + * in request stacking drivers, and it may violate the limitation of @q. 1527 + * Since the block layer and the underlying device driver trust @rq 1528 + * after it is inserted to @q, it should be checked against @q before 1529 + * the insertion using this generic function. 1530 + * 1531 + * This function should also be useful for request stacking drivers 1532 + * in some cases below, so export this fuction. 1533 + * Request stacking drivers like request-based dm may change the queue 1534 + * limits while requests are in the queue (e.g. dm's table swapping). 1535 + * Such request stacking drivers should check those requests agaist 1536 + * the new queue limits again when they dispatch those requests, 1537 + * although such checkings are also done against the old queue limits 1538 + * when submitting requests. 1539 + */ 1540 + int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1541 + { 1542 + if (rq->nr_sectors > q->max_sectors || 1543 + rq->data_len > q->max_hw_sectors << 9) { 1544 + printk(KERN_ERR "%s: over max size limit.\n", __func__); 1545 + return -EIO; 1546 + } 1547 + 1548 + /* 1549 + * queue's settings related to segment counting like q->bounce_pfn 1550 + * may differ from that of other stacking queues. 1551 + * Recalculate it to check the request correctly on this queue's 1552 + * limitation. 1553 + */ 1554 + blk_recalc_rq_segments(rq); 1555 + if (rq->nr_phys_segments > q->max_phys_segments || 1556 + rq->nr_phys_segments > q->max_hw_segments) { 1557 + printk(KERN_ERR "%s: over max segments limit.\n", __func__); 1558 + return -EIO; 1559 + } 1560 + 1561 + return 0; 1562 + } 1563 + EXPORT_SYMBOL_GPL(blk_rq_check_limits); 1564 + 1565 + /** 1566 + * blk_insert_cloned_request - Helper for stacking drivers to submit a request 1567 + * @q: the queue to submit the request 1568 + * @rq: the request being queued 1569 + */ 1570 + int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1571 + { 1572 + unsigned long flags; 1573 + 1574 + if (blk_rq_check_limits(q, rq)) 1575 + return -EIO; 1576 + 1577 + #ifdef CONFIG_FAIL_MAKE_REQUEST 1578 + if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && 1579 + should_fail(&fail_make_request, blk_rq_bytes(rq))) 1580 + return -EIO; 1581 + #endif 1582 + 1583 + spin_lock_irqsave(q->queue_lock, flags); 1584 + 1585 + /* 1586 + * Submitting request must be dequeued before calling this function 1587 + * because it will be linked to another request_queue 1588 + */ 1589 + BUG_ON(blk_queued_rq(rq)); 1590 + 1591 + drive_stat_acct(rq, 1); 1592 + __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1593 + 1594 + spin_unlock_irqrestore(q->queue_lock, flags); 1595 + 1596 + return 0; 1597 + } 1598 + EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1599 + 1600 + /** 1536 1601 * __end_that_request_first - end I/O on a request 1537 1602 * @req: the request being processed 1538 - * @error: 0 for success, < 0 for error 1603 + * @error: %0 for success, < %0 for error 1539 1604 * @nr_bytes: number of bytes to complete 1540 1605 * 1541 1606 * Description: ··· 1624 1527 * for the next range of segments (if any) in the cluster. 1625 1528 * 1626 1529 * Return: 1627 - * 0 - we are done with this request, call end_that_request_last() 1628 - * 1 - still buffers pending for this request 1530 + * %0 - we are done with this request, call end_that_request_last() 1531 + * %1 - still buffers pending for this request 1629 1532 **/ 1630 1533 static int __end_that_request_first(struct request *req, int error, 1631 1534 int nr_bytes) ··· 1636 1539 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1637 1540 1638 1541 /* 1639 - * for a REQ_BLOCK_PC request, we want to carry any eventual 1542 + * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual 1640 1543 * sense key with us all the way through 1641 1544 */ 1642 1545 if (!blk_pc_request(req)) ··· 1649 1552 } 1650 1553 1651 1554 if (blk_fs_request(req) && req->rq_disk) { 1652 - struct hd_struct *part = get_part(req->rq_disk, req->sector); 1653 1555 const int rw = rq_data_dir(req); 1556 + struct hd_struct *part; 1557 + int cpu; 1654 1558 1655 - all_stat_add(req->rq_disk, part, sectors[rw], 1656 - nr_bytes >> 9, req->sector); 1559 + cpu = part_stat_lock(); 1560 + part = disk_map_sector_rcu(req->rq_disk, req->sector); 1561 + part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); 1562 + part_stat_unlock(); 1657 1563 } 1658 1564 1659 1565 total_bytes = bio_nbytes = 0; ··· 1741 1641 } 1742 1642 1743 1643 /* 1744 - * splice the completion data to a local structure and hand off to 1745 - * process_completion_queue() to complete the requests 1746 - */ 1747 - static void blk_done_softirq(struct softirq_action *h) 1748 - { 1749 - struct list_head *cpu_list, local_list; 1750 - 1751 - local_irq_disable(); 1752 - cpu_list = &__get_cpu_var(blk_cpu_done); 1753 - list_replace_init(cpu_list, &local_list); 1754 - local_irq_enable(); 1755 - 1756 - while (!list_empty(&local_list)) { 1757 - struct request *rq; 1758 - 1759 - rq = list_entry(local_list.next, struct request, donelist); 1760 - list_del_init(&rq->donelist); 1761 - rq->q->softirq_done_fn(rq); 1762 - } 1763 - } 1764 - 1765 - static int __cpuinit blk_cpu_notify(struct notifier_block *self, 1766 - unsigned long action, void *hcpu) 1767 - { 1768 - /* 1769 - * If a CPU goes away, splice its entries to the current CPU 1770 - * and trigger a run of the softirq 1771 - */ 1772 - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1773 - int cpu = (unsigned long) hcpu; 1774 - 1775 - local_irq_disable(); 1776 - list_splice_init(&per_cpu(blk_cpu_done, cpu), 1777 - &__get_cpu_var(blk_cpu_done)); 1778 - raise_softirq_irqoff(BLOCK_SOFTIRQ); 1779 - local_irq_enable(); 1780 - } 1781 - 1782 - return NOTIFY_OK; 1783 - } 1784 - 1785 - 1786 - static struct notifier_block blk_cpu_notifier __cpuinitdata = { 1787 - .notifier_call = blk_cpu_notify, 1788 - }; 1789 - 1790 - /** 1791 - * blk_complete_request - end I/O on a request 1792 - * @req: the request being processed 1793 - * 1794 - * Description: 1795 - * Ends all I/O on a request. It does not handle partial completions, 1796 - * unless the driver actually implements this in its completion callback 1797 - * through requeueing. The actual completion happens out-of-order, 1798 - * through a softirq handler. The user must have registered a completion 1799 - * callback through blk_queue_softirq_done(). 1800 - **/ 1801 - 1802 - void blk_complete_request(struct request *req) 1803 - { 1804 - struct list_head *cpu_list; 1805 - unsigned long flags; 1806 - 1807 - BUG_ON(!req->q->softirq_done_fn); 1808 - 1809 - local_irq_save(flags); 1810 - 1811 - cpu_list = &__get_cpu_var(blk_cpu_done); 1812 - list_add_tail(&req->donelist, cpu_list); 1813 - raise_softirq_irqoff(BLOCK_SOFTIRQ); 1814 - 1815 - local_irq_restore(flags); 1816 - } 1817 - EXPORT_SYMBOL(blk_complete_request); 1818 - 1819 - /* 1820 1644 * queue lock must be held 1821 1645 */ 1822 1646 static void end_that_request_last(struct request *req, int error) 1823 1647 { 1824 1648 struct gendisk *disk = req->rq_disk; 1649 + 1650 + blk_delete_timer(req); 1825 1651 1826 1652 if (blk_rq_tagged(req)) 1827 1653 blk_queue_end_tag(req->q, req); ··· 1766 1740 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 1767 1741 unsigned long duration = jiffies - req->start_time; 1768 1742 const int rw = rq_data_dir(req); 1769 - struct hd_struct *part = get_part(disk, req->sector); 1743 + struct hd_struct *part; 1744 + int cpu; 1770 1745 1771 - __all_stat_inc(disk, part, ios[rw], req->sector); 1772 - __all_stat_add(disk, part, ticks[rw], duration, req->sector); 1773 - disk_round_stats(disk); 1774 - disk->in_flight--; 1775 - if (part) { 1776 - part_round_stats(part); 1777 - part->in_flight--; 1778 - } 1746 + cpu = part_stat_lock(); 1747 + part = disk_map_sector_rcu(disk, req->sector); 1748 + 1749 + part_stat_inc(cpu, part, ios[rw]); 1750 + part_stat_add(cpu, part, ticks[rw], duration); 1751 + part_round_stats(cpu, part); 1752 + part_dec_in_flight(part); 1753 + 1754 + part_stat_unlock(); 1779 1755 } 1780 1756 1781 1757 if (req->end_io) ··· 1788 1760 1789 1761 __blk_put_request(req->q, req); 1790 1762 } 1791 - } 1792 - 1793 - static inline void __end_request(struct request *rq, int uptodate, 1794 - unsigned int nr_bytes) 1795 - { 1796 - int error = 0; 1797 - 1798 - if (uptodate <= 0) 1799 - error = uptodate ? uptodate : -EIO; 1800 - 1801 - __blk_end_request(rq, error, nr_bytes); 1802 1763 } 1803 1764 1804 1765 /** ··· 1820 1803 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); 1821 1804 1822 1805 /** 1823 - * end_queued_request - end all I/O on a queued request 1824 - * @rq: the request being processed 1825 - * @uptodate: error value or 0/1 uptodate flag 1826 - * 1827 - * Description: 1828 - * Ends all I/O on a request, and removes it from the block layer queues. 1829 - * Not suitable for normal IO completion, unless the driver still has 1830 - * the request attached to the block layer. 1831 - * 1832 - **/ 1833 - void end_queued_request(struct request *rq, int uptodate) 1834 - { 1835 - __end_request(rq, uptodate, blk_rq_bytes(rq)); 1836 - } 1837 - EXPORT_SYMBOL(end_queued_request); 1838 - 1839 - /** 1840 - * end_dequeued_request - end all I/O on a dequeued request 1841 - * @rq: the request being processed 1842 - * @uptodate: error value or 0/1 uptodate flag 1843 - * 1844 - * Description: 1845 - * Ends all I/O on a request. The request must already have been 1846 - * dequeued using blkdev_dequeue_request(), as is normally the case 1847 - * for most drivers. 1848 - * 1849 - **/ 1850 - void end_dequeued_request(struct request *rq, int uptodate) 1851 - { 1852 - __end_request(rq, uptodate, blk_rq_bytes(rq)); 1853 - } 1854 - EXPORT_SYMBOL(end_dequeued_request); 1855 - 1856 - 1857 - /** 1858 1806 * end_request - end I/O on the current segment of the request 1859 1807 * @req: the request being processed 1860 - * @uptodate: error value or 0/1 uptodate flag 1808 + * @uptodate: error value or %0/%1 uptodate flag 1861 1809 * 1862 1810 * Description: 1863 1811 * Ends I/O on the current segment of a request. If that is the only 1864 1812 * remaining segment, the request is also completed and freed. 1865 1813 * 1866 - * This is a remnant of how older block drivers handled IO completions. 1867 - * Modern drivers typically end IO on the full request in one go, unless 1814 + * This is a remnant of how older block drivers handled I/O completions. 1815 + * Modern drivers typically end I/O on the full request in one go, unless 1868 1816 * they have a residual value to account for. For that case this function 1869 1817 * isn't really useful, unless the residual just happens to be the 1870 1818 * full current segment. In other words, don't use this function in new 1871 - * code. Either use end_request_completely(), or the 1872 - * end_that_request_chunk() (along with end_that_request_last()) for 1873 - * partial completions. 1874 - * 1819 + * code. Use blk_end_request() or __blk_end_request() to end a request. 1875 1820 **/ 1876 1821 void end_request(struct request *req, int uptodate) 1877 1822 { 1878 - __end_request(req, uptodate, req->hard_cur_sectors << 9); 1823 + int error = 0; 1824 + 1825 + if (uptodate <= 0) 1826 + error = uptodate ? uptodate : -EIO; 1827 + 1828 + __blk_end_request(req, error, req->hard_cur_sectors << 9); 1879 1829 } 1880 1830 EXPORT_SYMBOL(end_request); 1881 1831 1882 - /** 1883 - * blk_end_io - Generic end_io function to complete a request. 1884 - * @rq: the request being processed 1885 - * @error: 0 for success, < 0 for error 1886 - * @nr_bytes: number of bytes to complete @rq 1887 - * @bidi_bytes: number of bytes to complete @rq->next_rq 1888 - * @drv_callback: function called between completion of bios in the request 1889 - * and completion of the request. 1890 - * If the callback returns non 0, this helper returns without 1891 - * completion of the request. 1892 - * 1893 - * Description: 1894 - * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1895 - * If @rq has leftover, sets it up for the next range of segments. 1896 - * 1897 - * Return: 1898 - * 0 - we are done with this request 1899 - * 1 - this request is not freed yet, it still has pending buffers. 1900 - **/ 1901 - static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 1902 - unsigned int bidi_bytes, 1903 - int (drv_callback)(struct request *)) 1832 + static int end_that_request_data(struct request *rq, int error, 1833 + unsigned int nr_bytes, unsigned int bidi_bytes) 1904 1834 { 1905 - struct request_queue *q = rq->q; 1906 - unsigned long flags = 0UL; 1907 - 1908 - if (blk_fs_request(rq) || blk_pc_request(rq)) { 1835 + if (rq->bio) { 1909 1836 if (__end_that_request_first(rq, error, nr_bytes)) 1910 1837 return 1; 1911 1838 ··· 1858 1897 __end_that_request_first(rq->next_rq, error, bidi_bytes)) 1859 1898 return 1; 1860 1899 } 1900 + 1901 + return 0; 1902 + } 1903 + 1904 + /** 1905 + * blk_end_io - Generic end_io function to complete a request. 1906 + * @rq: the request being processed 1907 + * @error: %0 for success, < %0 for error 1908 + * @nr_bytes: number of bytes to complete @rq 1909 + * @bidi_bytes: number of bytes to complete @rq->next_rq 1910 + * @drv_callback: function called between completion of bios in the request 1911 + * and completion of the request. 1912 + * If the callback returns non %0, this helper returns without 1913 + * completion of the request. 1914 + * 1915 + * Description: 1916 + * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1917 + * If @rq has leftover, sets it up for the next range of segments. 1918 + * 1919 + * Return: 1920 + * %0 - we are done with this request 1921 + * %1 - this request is not freed yet, it still has pending buffers. 1922 + **/ 1923 + static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 1924 + unsigned int bidi_bytes, 1925 + int (drv_callback)(struct request *)) 1926 + { 1927 + struct request_queue *q = rq->q; 1928 + unsigned long flags = 0UL; 1929 + 1930 + if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) 1931 + return 1; 1861 1932 1862 1933 /* Special feature for tricky drivers */ 1863 1934 if (drv_callback && drv_callback(rq)) ··· 1907 1914 /** 1908 1915 * blk_end_request - Helper function for drivers to complete the request. 1909 1916 * @rq: the request being processed 1910 - * @error: 0 for success, < 0 for error 1917 + * @error: %0 for success, < %0 for error 1911 1918 * @nr_bytes: number of bytes to complete 1912 1919 * 1913 1920 * Description: ··· 1915 1922 * If @rq has leftover, sets it up for the next range of segments. 1916 1923 * 1917 1924 * Return: 1918 - * 0 - we are done with this request 1919 - * 1 - still buffers pending for this request 1925 + * %0 - we are done with this request 1926 + * %1 - still buffers pending for this request 1920 1927 **/ 1921 1928 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1922 1929 { ··· 1927 1934 /** 1928 1935 * __blk_end_request - Helper function for drivers to complete the request. 1929 1936 * @rq: the request being processed 1930 - * @error: 0 for success, < 0 for error 1937 + * @error: %0 for success, < %0 for error 1931 1938 * @nr_bytes: number of bytes to complete 1932 1939 * 1933 1940 * Description: 1934 1941 * Must be called with queue lock held unlike blk_end_request(). 1935 1942 * 1936 1943 * Return: 1937 - * 0 - we are done with this request 1938 - * 1 - still buffers pending for this request 1944 + * %0 - we are done with this request 1945 + * %1 - still buffers pending for this request 1939 1946 **/ 1940 1947 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1941 1948 { 1942 - if (blk_fs_request(rq) || blk_pc_request(rq)) { 1943 - if (__end_that_request_first(rq, error, nr_bytes)) 1944 - return 1; 1945 - } 1949 + if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) 1950 + return 1; 1946 1951 1947 1952 add_disk_randomness(rq->rq_disk); 1948 1953 ··· 1953 1962 /** 1954 1963 * blk_end_bidi_request - Helper function for drivers to complete bidi request. 1955 1964 * @rq: the bidi request being processed 1956 - * @error: 0 for success, < 0 for error 1965 + * @error: %0 for success, < %0 for error 1957 1966 * @nr_bytes: number of bytes to complete @rq 1958 1967 * @bidi_bytes: number of bytes to complete @rq->next_rq 1959 1968 * ··· 1961 1970 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1962 1971 * 1963 1972 * Return: 1964 - * 0 - we are done with this request 1965 - * 1 - still buffers pending for this request 1973 + * %0 - we are done with this request 1974 + * %1 - still buffers pending for this request 1966 1975 **/ 1967 1976 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, 1968 1977 unsigned int bidi_bytes) ··· 1972 1981 EXPORT_SYMBOL_GPL(blk_end_bidi_request); 1973 1982 1974 1983 /** 1984 + * blk_update_request - Special helper function for request stacking drivers 1985 + * @rq: the request being processed 1986 + * @error: %0 for success, < %0 for error 1987 + * @nr_bytes: number of bytes to complete @rq 1988 + * 1989 + * Description: 1990 + * Ends I/O on a number of bytes attached to @rq, but doesn't complete 1991 + * the request structure even if @rq doesn't have leftover. 1992 + * If @rq has leftover, sets it up for the next range of segments. 1993 + * 1994 + * This special helper function is only for request stacking drivers 1995 + * (e.g. request-based dm) so that they can handle partial completion. 1996 + * Actual device drivers should use blk_end_request instead. 1997 + */ 1998 + void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) 1999 + { 2000 + if (!end_that_request_data(rq, error, nr_bytes, 0)) { 2001 + /* 2002 + * These members are not updated in end_that_request_data() 2003 + * when all bios are completed. 2004 + * Update them so that the request stacking driver can find 2005 + * how many bytes remain in the request later. 2006 + */ 2007 + rq->nr_sectors = rq->hard_nr_sectors = 0; 2008 + rq->current_nr_sectors = rq->hard_cur_sectors = 0; 2009 + } 2010 + } 2011 + EXPORT_SYMBOL_GPL(blk_update_request); 2012 + 2013 + /** 1975 2014 * blk_end_request_callback - Special helper function for tricky drivers 1976 2015 * @rq: the request being processed 1977 - * @error: 0 for success, < 0 for error 2016 + * @error: %0 for success, < %0 for error 1978 2017 * @nr_bytes: number of bytes to complete 1979 2018 * @drv_callback: function called between completion of bios in the request 1980 2019 * and completion of the request. 1981 - * If the callback returns non 0, this helper returns without 2020 + * If the callback returns non %0, this helper returns without 1982 2021 * completion of the request. 1983 2022 * 1984 2023 * Description: ··· 2021 2000 * Don't use this interface in other places anymore. 2022 2001 * 2023 2002 * Return: 2024 - * 0 - we are done with this request 2025 - * 1 - this request is not freed yet. 2026 - * this request still has pending buffers or 2027 - * the driver doesn't want to finish this request yet. 2003 + * %0 - we are done with this request 2004 + * %1 - this request is not freed yet. 2005 + * this request still has pending buffers or 2006 + * the driver doesn't want to finish this request yet. 2028 2007 **/ 2029 2008 int blk_end_request_callback(struct request *rq, int error, 2030 2009 unsigned int nr_bytes, ··· 2037 2016 void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2038 2017 struct bio *bio) 2039 2018 { 2040 - /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ 2019 + /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and 2020 + we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ 2041 2021 rq->cmd_flags |= (bio->bi_rw & 3); 2042 2022 2043 - rq->nr_phys_segments = bio_phys_segments(q, bio); 2044 - rq->nr_hw_segments = bio_hw_segments(q, bio); 2023 + if (bio_has_data(bio)) { 2024 + rq->nr_phys_segments = bio_phys_segments(q, bio); 2025 + rq->buffer = bio_data(bio); 2026 + } 2045 2027 rq->current_nr_sectors = bio_cur_sectors(bio); 2046 2028 rq->hard_cur_sectors = rq->current_nr_sectors; 2047 2029 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2048 - rq->buffer = bio_data(bio); 2049 2030 rq->data_len = bio->bi_size; 2050 2031 2051 2032 rq->bio = rq->biotail = bio; ··· 2056 2033 rq->rq_disk = bio->bi_bdev->bd_disk; 2057 2034 } 2058 2035 2059 - int kblockd_schedule_work(struct work_struct *work) 2036 + /** 2037 + * blk_lld_busy - Check if underlying low-level drivers of a device are busy 2038 + * @q : the queue of the device being checked 2039 + * 2040 + * Description: 2041 + * Check if underlying low-level drivers of a device are busy. 2042 + * If the drivers want to export their busy state, they must set own 2043 + * exporting function using blk_queue_lld_busy() first. 2044 + * 2045 + * Basically, this function is used only by request stacking drivers 2046 + * to stop dispatching requests to underlying devices when underlying 2047 + * devices are busy. This behavior helps more I/O merging on the queue 2048 + * of the request stacking driver and prevents I/O throughput regression 2049 + * on burst I/O load. 2050 + * 2051 + * Return: 2052 + * 0 - Not busy (The request stacking driver should dispatch request) 2053 + * 1 - Busy (The request stacking driver should stop dispatching request) 2054 + */ 2055 + int blk_lld_busy(struct request_queue *q) 2056 + { 2057 + if (q->lld_busy_fn) 2058 + return q->lld_busy_fn(q); 2059 + 2060 + return 0; 2061 + } 2062 + EXPORT_SYMBOL_GPL(blk_lld_busy); 2063 + 2064 + int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) 2060 2065 { 2061 2066 return queue_work(kblockd_workqueue, work); 2062 2067 } ··· 2098 2047 2099 2048 int __init blk_dev_init(void) 2100 2049 { 2101 - int i; 2102 - 2103 2050 kblockd_workqueue = create_workqueue("kblockd"); 2104 2051 if (!kblockd_workqueue) 2105 2052 panic("Failed to create kblockd\n"); ··· 2107 2058 2108 2059 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2109 2060 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2110 - 2111 - for_each_possible_cpu(i) 2112 - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 2113 - 2114 - open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); 2115 - register_hotcpu_notifier(&blk_cpu_notifier); 2116 2061 2117 2062 return 0; 2118 2063 }

+3 -3

block/blk-exec.c

··· 16 16 /** 17 17 * blk_end_sync_rq - executes a completion event on a request 18 18 * @rq: request to complete 19 - * @error: end io status of the request 19 + * @error: end I/O status of the request 20 20 */ 21 21 static void blk_end_sync_rq(struct request *rq, int error) 22 22 { ··· 41 41 * @done: I/O completion handler 42 42 * 43 43 * Description: 44 - * Insert a fully prepared request at the back of the io scheduler queue 44 + * Insert a fully prepared request at the back of the I/O scheduler queue 45 45 * for execution. Don't wait for completion. 46 46 */ 47 47 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, ··· 72 72 * @at_head: insert request at head or tail of queue 73 73 * 74 74 * Description: 75 - * Insert a fully prepared request at the back of the io scheduler queue 75 + * Insert a fully prepared request at the back of the I/O scheduler queue 76 76 * for execution and wait for completion. 77 77 */ 78 78 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,

+17 -16

block/blk-integrity.c

··· 108 108 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 109 109 110 110 /** 111 - * blk_integrity_compare - Compare integrity profile of two block devices 112 - * @b1: Device to compare 113 - * @b2: Device to compare 111 + * blk_integrity_compare - Compare integrity profile of two disks 112 + * @gd1: Disk to compare 113 + * @gd2: Disk to compare 114 114 * 115 115 * Description: Meta-devices like DM and MD need to verify that all 116 116 * sub-devices use the same integrity format before advertising to 117 117 * upper layers that they can send/receive integrity metadata. This 118 - * function can be used to check whether two block devices have 118 + * function can be used to check whether two gendisk devices have 119 119 * compatible integrity formats. 120 120 */ 121 - int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2) 121 + int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) 122 122 { 123 - struct blk_integrity *b1 = bd1->bd_disk->integrity; 124 - struct blk_integrity *b2 = bd2->bd_disk->integrity; 123 + struct blk_integrity *b1 = gd1->integrity; 124 + struct blk_integrity *b2 = gd2->integrity; 125 125 126 - BUG_ON(bd1->bd_disk == NULL); 127 - BUG_ON(bd2->bd_disk == NULL); 126 + if (!b1 && !b2) 127 + return 0; 128 128 129 129 if (!b1 || !b2) 130 - return 0; 130 + return -1; 131 131 132 132 if (b1->sector_size != b2->sector_size) { 133 133 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, 134 - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 134 + gd1->disk_name, gd2->disk_name, 135 135 b1->sector_size, b2->sector_size); 136 136 return -1; 137 137 } 138 138 139 139 if (b1->tuple_size != b2->tuple_size) { 140 140 printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, 141 - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 141 + gd1->disk_name, gd2->disk_name, 142 142 b1->tuple_size, b2->tuple_size); 143 143 return -1; 144 144 } 145 145 146 146 if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { 147 147 printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, 148 - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 148 + gd1->disk_name, gd2->disk_name, 149 149 b1->tag_size, b2->tag_size); 150 150 return -1; 151 151 } 152 152 153 153 if (strcmp(b1->name, b2->name)) { 154 154 printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, 155 - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 155 + gd1->disk_name, gd2->disk_name, 156 156 b1->name, b2->name); 157 157 return -1; 158 158 } ··· 331 331 return -1; 332 332 333 333 if (kobject_init_and_add(&bi->kobj, &integrity_ktype, 334 - &disk->dev.kobj, "%s", "integrity")) { 334 + &disk_to_dev(disk)->kobj, 335 + "%s", "integrity")) { 335 336 kmem_cache_free(integrity_cachep, bi); 336 337 return -1; 337 338 } ··· 376 375 377 376 kobject_uevent(&bi->kobj, KOBJ_REMOVE); 378 377 kobject_del(&bi->kobj); 379 - kobject_put(&disk->dev.kobj); 380 378 kmem_cache_free(integrity_cachep, bi); 379 + disk->integrity = NULL; 381 380 } 382 381 EXPORT_SYMBOL(blk_integrity_unregister);

+39 -29

block/blk-map.c

··· 41 41 } 42 42 43 43 static int __blk_rq_map_user(struct request_queue *q, struct request *rq, 44 - void __user *ubuf, unsigned int len) 44 + struct rq_map_data *map_data, void __user *ubuf, 45 + unsigned int len, int null_mapped, gfp_t gfp_mask) 45 46 { 46 47 unsigned long uaddr; 47 - unsigned int alignment; 48 48 struct bio *bio, *orig_bio; 49 49 int reading, ret; 50 50 ··· 55 55 * direct dma. else, set up kernel bounce buffers 56 56 */ 57 57 uaddr = (unsigned long) ubuf; 58 - alignment = queue_dma_alignment(q) | q->dma_pad_mask; 59 - if (!(uaddr & alignment) && !(len & alignment)) 60 - bio = bio_map_user(q, NULL, uaddr, len, reading); 58 + if (blk_rq_aligned(q, ubuf, len) && !map_data) 59 + bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); 61 60 else 62 - bio = bio_copy_user(q, uaddr, len, reading); 61 + bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); 63 62 64 63 if (IS_ERR(bio)) 65 64 return PTR_ERR(bio); 65 + 66 + if (null_mapped) 67 + bio->bi_flags |= (1 << BIO_NULL_MAPPED); 66 68 67 69 orig_bio = bio; 68 70 blk_queue_bounce(q, &bio); ··· 87 85 } 88 86 89 87 /** 90 - * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 88 + * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage 91 89 * @q: request queue where request should be inserted 92 90 * @rq: request structure to fill 91 + * @map_data: pointer to the rq_map_data holding pages (if necessary) 93 92 * @ubuf: the user buffer 94 93 * @len: length of user data 94 + * @gfp_mask: memory allocation flags 95 95 * 96 96 * Description: 97 - * Data will be mapped directly for zero copy io, if possible. Otherwise 97 + * Data will be mapped directly for zero copy I/O, if possible. Otherwise 98 98 * a kernel bounce buffer is used. 99 99 * 100 - * A matching blk_rq_unmap_user() must be issued at the end of io, while 100 + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 101 101 * still in process context. 102 102 * 103 103 * Note: The mapped bio may need to be bounced through blk_queue_bounce() ··· 109 105 * unmapping. 110 106 */ 111 107 int blk_rq_map_user(struct request_queue *q, struct request *rq, 112 - void __user *ubuf, unsigned long len) 108 + struct rq_map_data *map_data, void __user *ubuf, 109 + unsigned long len, gfp_t gfp_mask) 113 110 { 114 111 unsigned long bytes_read = 0; 115 112 struct bio *bio = NULL; 116 - int ret; 113 + int ret, null_mapped = 0; 117 114 118 115 if (len > (q->max_hw_sectors << 9)) 119 116 return -EINVAL; 120 - if (!len || !ubuf) 117 + if (!len) 121 118 return -EINVAL; 119 + if (!ubuf) { 120 + if (!map_data || rq_data_dir(rq) != READ) 121 + return -EINVAL; 122 + null_mapped = 1; 123 + } 122 124 123 125 while (bytes_read != len) { 124 126 unsigned long map_len, end, start; ··· 142 132 if (end - start > BIO_MAX_PAGES) 143 133 map_len -= PAGE_SIZE; 144 134 145 - ret = __blk_rq_map_user(q, rq, ubuf, map_len); 135 + ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, 136 + null_mapped, gfp_mask); 146 137 if (ret < 0) 147 138 goto unmap_rq; 148 139 if (!bio) ··· 165 154 EXPORT_SYMBOL(blk_rq_map_user); 166 155 167 156 /** 168 - * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 157 + * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage 169 158 * @q: request queue where request should be inserted 170 159 * @rq: request to map data to 160 + * @map_data: pointer to the rq_map_data holding pages (if necessary) 171 161 * @iov: pointer to the iovec 172 162 * @iov_count: number of elements in the iovec 173 163 * @len: I/O byte count 164 + * @gfp_mask: memory allocation flags 174 165 * 175 166 * Description: 176 - * Data will be mapped directly for zero copy io, if possible. Otherwise 167 + * Data will be mapped directly for zero copy I/O, if possible. Otherwise 177 168 * a kernel bounce buffer is used. 178 169 * 179 - * A matching blk_rq_unmap_user() must be issued at the end of io, while 170 + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 180 171 * still in process context. 181 172 * 182 173 * Note: The mapped bio may need to be bounced through blk_queue_bounce() ··· 188 175 * unmapping. 189 176 */ 190 177 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 191 - struct sg_iovec *iov, int iov_count, unsigned int len) 178 + struct rq_map_data *map_data, struct sg_iovec *iov, 179 + int iov_count, unsigned int len, gfp_t gfp_mask) 192 180 { 193 181 struct bio *bio; 194 182 int i, read = rq_data_dir(rq) == READ; ··· 207 193 } 208 194 } 209 195 210 - if (unaligned || (q->dma_pad_mask & len)) 211 - bio = bio_copy_user_iov(q, iov, iov_count, read); 196 + if (unaligned || (q->dma_pad_mask & len) || map_data) 197 + bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, 198 + gfp_mask); 212 199 else 213 - bio = bio_map_user_iov(q, NULL, iov, iov_count, read); 200 + bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); 214 201 215 202 if (IS_ERR(bio)) 216 203 return PTR_ERR(bio); ··· 231 216 rq->buffer = rq->data = NULL; 232 217 return 0; 233 218 } 219 + EXPORT_SYMBOL(blk_rq_map_user_iov); 234 220 235 221 /** 236 222 * blk_rq_unmap_user - unmap a request with user data ··· 240 224 * Description: 241 225 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 242 226 * supply the original rq->bio from the blk_rq_map_user() return, since 243 - * the io completion may have changed rq->bio. 227 + * the I/O completion may have changed rq->bio. 244 228 */ 245 229 int blk_rq_unmap_user(struct bio *bio) 246 230 { ··· 266 250 EXPORT_SYMBOL(blk_rq_unmap_user); 267 251 268 252 /** 269 - * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 253 + * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage 270 254 * @q: request queue where request should be inserted 271 255 * @rq: request to fill 272 256 * @kbuf: the kernel buffer ··· 280 264 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 281 265 unsigned int len, gfp_t gfp_mask) 282 266 { 283 - unsigned long kaddr; 284 - unsigned int alignment; 285 267 int reading = rq_data_dir(rq) == READ; 286 268 int do_copy = 0; 287 269 struct bio *bio; ··· 289 275 if (!len || !kbuf) 290 276 return -EINVAL; 291 277 292 - kaddr = (unsigned long)kbuf; 293 - alignment = queue_dma_alignment(q) | q->dma_pad_mask; 294 - do_copy = ((kaddr & alignment) || (len & alignment) || 295 - object_is_on_stack(kbuf)); 296 - 278 + do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf); 297 279 if (do_copy) 298 280 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 299 281 else

+25 -104

block/blk-merge.c

··· 11 11 12 12 void blk_recalc_rq_sectors(struct request *rq, int nsect) 13 13 { 14 - if (blk_fs_request(rq)) { 14 + if (blk_fs_request(rq) || blk_discard_rq(rq)) { 15 15 rq->hard_sector += nsect; 16 16 rq->hard_nr_sectors -= nsect; 17 17 ··· 41 41 void blk_recalc_rq_segments(struct request *rq) 42 42 { 43 43 int nr_phys_segs; 44 - int nr_hw_segs; 45 44 unsigned int phys_size; 46 - unsigned int hw_size; 47 45 struct bio_vec *bv, *bvprv = NULL; 48 46 int seg_size; 49 - int hw_seg_size; 50 47 int cluster; 51 48 struct req_iterator iter; 52 49 int high, highprv = 1; ··· 53 56 return; 54 57 55 58 cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 56 - hw_seg_size = seg_size = 0; 57 - phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; 59 + seg_size = 0; 60 + phys_size = nr_phys_segs = 0; 58 61 rq_for_each_segment(bv, rq, iter) { 59 62 /* 60 63 * the trick here is making sure that a high page is never ··· 63 66 */ 64 67 high = page_to_pfn(bv->bv_page) > q->bounce_pfn; 65 68 if (high || highprv) 66 - goto new_hw_segment; 69 + goto new_segment; 67 70 if (cluster) { 68 71 if (seg_size + bv->bv_len > q->max_segment_size) 69 72 goto new_segment; ··· 71 74 goto new_segment; 72 75 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 73 76 goto new_segment; 74 - if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) 75 - goto new_hw_segment; 76 77 77 78 seg_size += bv->bv_len; 78 - hw_seg_size += bv->bv_len; 79 79 bvprv = bv; 80 80 continue; 81 81 } 82 82 new_segment: 83 - if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && 84 - !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) 85 - hw_seg_size += bv->bv_len; 86 - else { 87 - new_hw_segment: 88 - if (nr_hw_segs == 1 && 89 - hw_seg_size > rq->bio->bi_hw_front_size) 90 - rq->bio->bi_hw_front_size = hw_seg_size; 91 - hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; 92 - nr_hw_segs++; 93 - } 94 - 95 83 nr_phys_segs++; 96 84 bvprv = bv; 97 85 seg_size = bv->bv_len; 98 86 highprv = high; 99 87 } 100 88 101 - if (nr_hw_segs == 1 && 102 - hw_seg_size > rq->bio->bi_hw_front_size) 103 - rq->bio->bi_hw_front_size = hw_seg_size; 104 - if (hw_seg_size > rq->biotail->bi_hw_back_size) 105 - rq->biotail->bi_hw_back_size = hw_seg_size; 106 89 rq->nr_phys_segments = nr_phys_segs; 107 - rq->nr_hw_segments = nr_hw_segs; 108 90 } 109 91 110 92 void blk_recount_segments(struct request_queue *q, struct bio *bio) ··· 96 120 blk_recalc_rq_segments(&rq); 97 121 bio->bi_next = nxt; 98 122 bio->bi_phys_segments = rq.nr_phys_segments; 99 - bio->bi_hw_segments = rq.nr_hw_segments; 100 123 bio->bi_flags |= (1 << BIO_SEG_VALID); 101 124 } 102 125 EXPORT_SYMBOL(blk_recount_segments); ··· 106 131 if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 107 132 return 0; 108 133 109 - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 110 - return 0; 111 134 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 112 135 return 0; 113 136 137 + if (!bio_has_data(bio)) 138 + return 1; 139 + 140 + if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 141 + return 0; 142 + 114 143 /* 115 - * bio and nxt are contigous in memory, check if the queue allows 144 + * bio and nxt are contiguous in memory; check if the queue allows 116 145 * these two to be merged into one 117 146 */ 118 147 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 119 148 return 1; 120 149 121 150 return 0; 122 - } 123 - 124 - static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, 125 - struct bio *nxt) 126 - { 127 - if (!bio_flagged(bio, BIO_SEG_VALID)) 128 - blk_recount_segments(q, bio); 129 - if (!bio_flagged(nxt, BIO_SEG_VALID)) 130 - blk_recount_segments(q, nxt); 131 - if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || 132 - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) 133 - return 0; 134 - if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) 135 - return 0; 136 - 137 - return 1; 138 151 } 139 152 140 153 /* ··· 238 275 struct request *req, 239 276 struct bio *bio) 240 277 { 241 - int nr_hw_segs = bio_hw_segments(q, bio); 242 278 int nr_phys_segs = bio_phys_segments(q, bio); 243 279 244 - if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 280 + if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments 245 281 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 246 282 req->cmd_flags |= REQ_NOMERGE; 247 283 if (req == q->last_merge) ··· 252 290 * This will form the start of a new hw segment. Bump both 253 291 * counters. 254 292 */ 255 - req->nr_hw_segments += nr_hw_segs; 256 293 req->nr_phys_segments += nr_phys_segs; 257 294 return 1; 258 295 } ··· 260 299 struct bio *bio) 261 300 { 262 301 unsigned short max_sectors; 263 - int len; 264 302 265 303 if (unlikely(blk_pc_request(req))) 266 304 max_sectors = q->max_hw_sectors; ··· 276 316 blk_recount_segments(q, req->biotail); 277 317 if (!bio_flagged(bio, BIO_SEG_VALID)) 278 318 blk_recount_segments(q, bio); 279 - len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; 280 - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) 281 - && !BIOVEC_VIRT_OVERSIZE(len)) { 282 - int mergeable = ll_new_mergeable(q, req, bio); 283 - 284 - if (mergeable) { 285 - if (req->nr_hw_segments == 1) 286 - req->bio->bi_hw_front_size = len; 287 - if (bio->bi_hw_segments == 1) 288 - bio->bi_hw_back_size = len; 289 - } 290 - return mergeable; 291 - } 292 319 293 320 return ll_new_hw_segment(q, req, bio); 294 321 } ··· 284 337 struct bio *bio) 285 338 { 286 339 unsigned short max_sectors; 287 - int len; 288 340 289 341 if (unlikely(blk_pc_request(req))) 290 342 max_sectors = q->max_hw_sectors; ··· 297 351 q->last_merge = NULL; 298 352 return 0; 299 353 } 300 - len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; 301 354 if (!bio_flagged(bio, BIO_SEG_VALID)) 302 355 blk_recount_segments(q, bio); 303 356 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 304 357 blk_recount_segments(q, req->bio); 305 - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && 306 - !BIOVEC_VIRT_OVERSIZE(len)) { 307 - int mergeable = ll_new_mergeable(q, req, bio); 308 - 309 - if (mergeable) { 310 - if (bio->bi_hw_segments == 1) 311 - bio->bi_hw_front_size = len; 312 - if (req->nr_hw_segments == 1) 313 - req->biotail->bi_hw_back_size = len; 314 - } 315 - return mergeable; 316 - } 317 358 318 359 return ll_new_hw_segment(q, req, bio); 319 360 } ··· 309 376 struct request *next) 310 377 { 311 378 int total_phys_segments; 312 - int total_hw_segments; 313 379 314 380 /* 315 381 * First check if the either of the requests are re-queued ··· 330 398 if (total_phys_segments > q->max_phys_segments) 331 399 return 0; 332 400 333 - total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; 334 - if (blk_hw_contig_segment(q, req->biotail, next->bio)) { 335 - int len = req->biotail->bi_hw_back_size + 336 - next->bio->bi_hw_front_size; 337 - /* 338 - * propagate the combined length to the end of the requests 339 - */ 340 - if (req->nr_hw_segments == 1) 341 - req->bio->bi_hw_front_size = len; 342 - if (next->nr_hw_segments == 1) 343 - next->biotail->bi_hw_back_size = len; 344 - total_hw_segments--; 345 - } 346 - 347 - if (total_hw_segments > q->max_hw_segments) 401 + if (total_phys_segments > q->max_hw_segments) 348 402 return 0; 349 403 350 404 /* Merge is OK... */ 351 405 req->nr_phys_segments = total_phys_segments; 352 - req->nr_hw_segments = total_hw_segments; 353 406 return 1; 354 407 } 355 408 ··· 387 470 elv_merge_requests(q, req, next); 388 471 389 472 if (req->rq_disk) { 390 - struct hd_struct *part 391 - = get_part(req->rq_disk, req->sector); 392 - disk_round_stats(req->rq_disk); 393 - req->rq_disk->in_flight--; 394 - if (part) { 395 - part_round_stats(part); 396 - part->in_flight--; 397 - } 473 + struct hd_struct *part; 474 + int cpu; 475 + 476 + cpu = part_stat_lock(); 477 + part = disk_map_sector_rcu(req->rq_disk, req->sector); 478 + 479 + part_round_stats(cpu, part); 480 + part_dec_in_flight(part); 481 + 482 + part_stat_unlock(); 398 483 } 399 484 400 485 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 486 + if (blk_rq_cpu_valid(next)) 487 + req->cpu = next->cpu; 401 488 402 489 __blk_put_request(q, next); 403 490 return 1;

+39 -4

block/blk-settings.c

··· 33 33 EXPORT_SYMBOL(blk_queue_prep_rq); 34 34 35 35 /** 36 + * blk_queue_set_discard - set a discard_sectors function for queue 37 + * @q: queue 38 + * @dfn: prepare_discard function 39 + * 40 + * It's possible for a queue to register a discard callback which is used 41 + * to transform a discard request into the appropriate type for the 42 + * hardware. If none is registered, then discard requests are failed 43 + * with %EOPNOTSUPP. 44 + * 45 + */ 46 + void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) 47 + { 48 + q->prepare_discard_fn = dfn; 49 + } 50 + EXPORT_SYMBOL(blk_queue_set_discard); 51 + 52 + /** 36 53 * blk_queue_merge_bvec - set a merge_bvec function for queue 37 54 * @q: queue 38 55 * @mbfn: merge_bvec_fn ··· 76 59 q->softirq_done_fn = fn; 77 60 } 78 61 EXPORT_SYMBOL(blk_queue_softirq_done); 62 + 63 + void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) 64 + { 65 + q->rq_timeout = timeout; 66 + } 67 + EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); 68 + 69 + void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) 70 + { 71 + q->rq_timed_out_fn = fn; 72 + } 73 + EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); 74 + 75 + void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) 76 + { 77 + q->lld_busy_fn = fn; 78 + } 79 + EXPORT_SYMBOL_GPL(blk_queue_lld_busy); 79 80 80 81 /** 81 82 * blk_queue_make_request - define an alternate make_request function for a device ··· 162 127 * Different hardware can have different requirements as to what pages 163 128 * it can do I/O directly to. A low level driver can call 164 129 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 165 - * buffers for doing I/O to pages residing above @page. 130 + * buffers for doing I/O to pages residing above @dma_addr. 166 131 **/ 167 132 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) 168 133 { ··· 247 212 * Description: 248 213 * Enables a low level driver to set an upper limit on the number of 249 214 * hw data segments in a request. This would be the largest number of 250 - * address/length pairs the host adapter can actually give as once 215 + * address/length pairs the host adapter can actually give at once 251 216 * to the device. 252 217 **/ 253 218 void blk_queue_max_hw_segments(struct request_queue *q, ··· 428 393 * @mask: alignment mask 429 394 * 430 395 * description: 431 - * set required memory and length aligment for direct dma transactions. 396 + * set required memory and length alignment for direct dma transactions. 432 397 * this is used when buiding direct io requests for the queue. 433 398 * 434 399 **/ ··· 444 409 * @mask: alignment mask 445 410 * 446 411 * description: 447 - * update required memory and length aligment for direct dma transactions. 412 + * update required memory and length alignment for direct dma transactions. 448 413 * If the requested alignment is larger than the current alignment, then 449 414 * the current queue alignment is updated to the new value, otherwise it 450 415 * is left alone. The design of this is to allow multiple objects

+175

block/blk-softirq.c

··· 1 + /* 2 + * Functions related to softirq rq completions 3 + */ 4 + #include <linux/kernel.h> 5 + #include <linux/module.h> 6 + #include <linux/init.h> 7 + #include <linux/bio.h> 8 + #include <linux/blkdev.h> 9 + #include <linux/interrupt.h> 10 + #include <linux/cpu.h> 11 + 12 + #include "blk.h" 13 + 14 + static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 15 + 16 + /* 17 + * Softirq action handler - move entries to local list and loop over them 18 + * while passing them to the queue registered handler. 19 + */ 20 + static void blk_done_softirq(struct softirq_action *h) 21 + { 22 + struct list_head *cpu_list, local_list; 23 + 24 + local_irq_disable(); 25 + cpu_list = &__get_cpu_var(blk_cpu_done); 26 + list_replace_init(cpu_list, &local_list); 27 + local_irq_enable(); 28 + 29 + while (!list_empty(&local_list)) { 30 + struct request *rq; 31 + 32 + rq = list_entry(local_list.next, struct request, csd.list); 33 + list_del_init(&rq->csd.list); 34 + rq->q->softirq_done_fn(rq); 35 + } 36 + } 37 + 38 + #if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) 39 + static void trigger_softirq(void *data) 40 + { 41 + struct request *rq = data; 42 + unsigned long flags; 43 + struct list_head *list; 44 + 45 + local_irq_save(flags); 46 + list = &__get_cpu_var(blk_cpu_done); 47 + list_add_tail(&rq->csd.list, list); 48 + 49 + if (list->next == &rq->csd.list) 50 + raise_softirq_irqoff(BLOCK_SOFTIRQ); 51 + 52 + local_irq_restore(flags); 53 + } 54 + 55 + /* 56 + * Setup and invoke a run of 'trigger_softirq' on the given cpu. 57 + */ 58 + static int raise_blk_irq(int cpu, struct request *rq) 59 + { 60 + if (cpu_online(cpu)) { 61 + struct call_single_data *data = &rq->csd; 62 + 63 + data->func = trigger_softirq; 64 + data->info = rq; 65 + data->flags = 0; 66 + 67 + __smp_call_function_single(cpu, data); 68 + return 0; 69 + } 70 + 71 + return 1; 72 + } 73 + #else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ 74 + static int raise_blk_irq(int cpu, struct request *rq) 75 + { 76 + return 1; 77 + } 78 + #endif 79 + 80 + static int __cpuinit blk_cpu_notify(struct notifier_block *self, 81 + unsigned long action, void *hcpu) 82 + { 83 + /* 84 + * If a CPU goes away, splice its entries to the current CPU 85 + * and trigger a run of the softirq 86 + */ 87 + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 88 + int cpu = (unsigned long) hcpu; 89 + 90 + local_irq_disable(); 91 + list_splice_init(&per_cpu(blk_cpu_done, cpu), 92 + &__get_cpu_var(blk_cpu_done)); 93 + raise_softirq_irqoff(BLOCK_SOFTIRQ); 94 + local_irq_enable(); 95 + } 96 + 97 + return NOTIFY_OK; 98 + } 99 + 100 + static struct notifier_block __cpuinitdata blk_cpu_notifier = { 101 + .notifier_call = blk_cpu_notify, 102 + }; 103 + 104 + void __blk_complete_request(struct request *req) 105 + { 106 + struct request_queue *q = req->q; 107 + unsigned long flags; 108 + int ccpu, cpu, group_cpu; 109 + 110 + BUG_ON(!q->softirq_done_fn); 111 + 112 + local_irq_save(flags); 113 + cpu = smp_processor_id(); 114 + group_cpu = blk_cpu_to_group(cpu); 115 + 116 + /* 117 + * Select completion CPU 118 + */ 119 + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) 120 + ccpu = req->cpu; 121 + else 122 + ccpu = cpu; 123 + 124 + if (ccpu == cpu || ccpu == group_cpu) { 125 + struct list_head *list; 126 + do_local: 127 + list = &__get_cpu_var(blk_cpu_done); 128 + list_add_tail(&req->csd.list, list); 129 + 130 + /* 131 + * if the list only contains our just added request, 132 + * signal a raise of the softirq. If there are already 133 + * entries there, someone already raised the irq but it 134 + * hasn't run yet. 135 + */ 136 + if (list->next == &req->csd.list) 137 + raise_softirq_irqoff(BLOCK_SOFTIRQ); 138 + } else if (raise_blk_irq(ccpu, req)) 139 + goto do_local; 140 + 141 + local_irq_restore(flags); 142 + } 143 + 144 + /** 145 + * blk_complete_request - end I/O on a request 146 + * @req: the request being processed 147 + * 148 + * Description: 149 + * Ends all I/O on a request. It does not handle partial completions, 150 + * unless the driver actually implements this in its completion callback 151 + * through requeueing. The actual completion happens out-of-order, 152 + * through a softirq handler. The user must have registered a completion 153 + * callback through blk_queue_softirq_done(). 154 + **/ 155 + void blk_complete_request(struct request *req) 156 + { 157 + if (unlikely(blk_should_fake_timeout(req->q))) 158 + return; 159 + if (!blk_mark_rq_complete(req)) 160 + __blk_complete_request(req); 161 + } 162 + EXPORT_SYMBOL(blk_complete_request); 163 + 164 + __init int blk_softirq_init(void) 165 + { 166 + int i; 167 + 168 + for_each_possible_cpu(i) 169 + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 170 + 171 + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); 172 + register_hotcpu_notifier(&blk_cpu_notifier); 173 + return 0; 174 + } 175 + subsys_initcall(blk_softirq_init);

+33 -2

block/blk-sysfs.c

··· 156 156 return ret; 157 157 } 158 158 159 + static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) 160 + { 161 + unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); 162 + 163 + return queue_var_show(set != 0, page); 164 + } 165 + 166 + static ssize_t 167 + queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) 168 + { 169 + ssize_t ret = -EINVAL; 170 + #if defined(CONFIG_USE_GENERIC_SMP_HELPERS) 171 + unsigned long val; 172 + 173 + ret = queue_var_store(&val, page, count); 174 + spin_lock_irq(q->queue_lock); 175 + if (val) 176 + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 177 + else 178 + queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 179 + spin_unlock_irq(q->queue_lock); 180 + #endif 181 + return ret; 182 + } 159 183 160 184 static struct queue_sysfs_entry queue_requests_entry = { 161 185 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, ··· 221 197 .store = queue_nomerges_store, 222 198 }; 223 199 200 + static struct queue_sysfs_entry queue_rq_affinity_entry = { 201 + .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR }, 202 + .show = queue_rq_affinity_show, 203 + .store = queue_rq_affinity_store, 204 + }; 205 + 224 206 static struct attribute *default_attrs[] = { 225 207 &queue_requests_entry.attr, 226 208 &queue_ra_entry.attr, ··· 235 205 &queue_iosched_entry.attr, 236 206 &queue_hw_sector_size_entry.attr, 237 207 &queue_nomerges_entry.attr, 208 + &queue_rq_affinity_entry.attr, 238 209 NULL, 239 210 }; 240 211 ··· 341 310 if (!q->request_fn) 342 311 return 0; 343 312 344 - ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), 313 + ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), 345 314 "%s", "queue"); 346 315 if (ret < 0) 347 316 return ret; ··· 370 339 371 340 kobject_uevent(&q->kobj, KOBJ_REMOVE); 372 341 kobject_del(&q->kobj); 373 - kobject_put(&disk->dev.kobj); 342 + kobject_put(&disk_to_dev(disk)->kobj); 374 343 } 375 344 }

+16 -6

block/blk-tag.c

··· 29 29 * __blk_free_tags - release a given set of tag maintenance info 30 30 * @bqt: the tag map to free 31 31 * 32 - * Tries to free the specified @bqt@. Returns true if it was 32 + * Tries to free the specified @bqt. Returns true if it was 33 33 * actually freed and false if there are still references using it 34 34 */ 35 35 static int __blk_free_tags(struct blk_queue_tag *bqt) ··· 78 78 * blk_free_tags - release a given set of tag maintenance info 79 79 * @bqt: the tag map to free 80 80 * 81 - * For externally managed @bqt@ frees the map. Callers of this 81 + * For externally managed @bqt frees the map. Callers of this 82 82 * function must guarantee to have released all the queues that 83 83 * might have been using this tag map. 84 84 */ ··· 94 94 * @q: the request queue for the device 95 95 * 96 96 * Notes: 97 - * This is used to disabled tagged queuing to a device, yet leave 97 + * This is used to disable tagged queuing to a device, yet leave 98 98 * queue in function. 99 99 **/ 100 100 void blk_queue_free_tags(struct request_queue *q) ··· 271 271 * @rq: the request that has completed 272 272 * 273 273 * Description: 274 - * Typically called when end_that_request_first() returns 0, meaning 274 + * Typically called when end_that_request_first() returns %0, meaning 275 275 * all transfers have been done for a request. It's important to call 276 276 * this function before end_that_request_last(), as that will put the 277 277 * request back on the free list thus corrupting the internal tag list. ··· 337 337 int blk_queue_start_tag(struct request_queue *q, struct request *rq) 338 338 { 339 339 struct blk_queue_tag *bqt = q->queue_tags; 340 + unsigned max_depth, offset; 340 341 int tag; 341 342 342 343 if (unlikely((rq->cmd_flags & REQ_QUEUED))) { ··· 351 350 /* 352 351 * Protect against shared tag maps, as we may not have exclusive 353 352 * access to the tag map. 353 + * 354 + * We reserve a few tags just for sync IO, since we don't want 355 + * to starve sync IO on behalf of flooding async IO. 354 356 */ 357 + max_depth = bqt->max_depth; 358 + if (rq_is_sync(rq)) 359 + offset = 0; 360 + else 361 + offset = max_depth >> 2; 362 + 355 363 do { 356 - tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 357 - if (tag >= bqt->max_depth) 364 + tag = find_next_zero_bit(bqt->tag_map, max_depth, offset); 365 + if (tag >= max_depth) 358 366 return 1; 359 367 360 368 } while (test_and_set_bit_lock(tag, bqt->tag_map));

+238

block/blk-timeout.c

··· 1 + /* 2 + * Functions related to generic timeout handling of requests. 3 + */ 4 + #include <linux/kernel.h> 5 + #include <linux/module.h> 6 + #include <linux/blkdev.h> 7 + #include <linux/fault-inject.h> 8 + 9 + #include "blk.h" 10 + 11 + #ifdef CONFIG_FAIL_IO_TIMEOUT 12 + 13 + static DECLARE_FAULT_ATTR(fail_io_timeout); 14 + 15 + static int __init setup_fail_io_timeout(char *str) 16 + { 17 + return setup_fault_attr(&fail_io_timeout, str); 18 + } 19 + __setup("fail_io_timeout=", setup_fail_io_timeout); 20 + 21 + int blk_should_fake_timeout(struct request_queue *q) 22 + { 23 + if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) 24 + return 0; 25 + 26 + return should_fail(&fail_io_timeout, 1); 27 + } 28 + 29 + static int __init fail_io_timeout_debugfs(void) 30 + { 31 + return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); 32 + } 33 + 34 + late_initcall(fail_io_timeout_debugfs); 35 + 36 + ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr, 37 + char *buf) 38 + { 39 + struct gendisk *disk = dev_to_disk(dev); 40 + int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags); 41 + 42 + return sprintf(buf, "%d\n", set != 0); 43 + } 44 + 45 + ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, 46 + const char *buf, size_t count) 47 + { 48 + struct gendisk *disk = dev_to_disk(dev); 49 + int val; 50 + 51 + if (count) { 52 + struct request_queue *q = disk->queue; 53 + char *p = (char *) buf; 54 + 55 + val = simple_strtoul(p, &p, 10); 56 + spin_lock_irq(q->queue_lock); 57 + if (val) 58 + queue_flag_set(QUEUE_FLAG_FAIL_IO, q); 59 + else 60 + queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); 61 + spin_unlock_irq(q->queue_lock); 62 + } 63 + 64 + return count; 65 + } 66 + 67 + #endif /* CONFIG_FAIL_IO_TIMEOUT */ 68 + 69 + /* 70 + * blk_delete_timer - Delete/cancel timer for a given function. 71 + * @req: request that we are canceling timer for 72 + * 73 + */ 74 + void blk_delete_timer(struct request *req) 75 + { 76 + struct request_queue *q = req->q; 77 + 78 + /* 79 + * Nothing to detach 80 + */ 81 + if (!q->rq_timed_out_fn || !req->deadline) 82 + return; 83 + 84 + list_del_init(&req->timeout_list); 85 + 86 + if (list_empty(&q->timeout_list)) 87 + del_timer(&q->timeout); 88 + } 89 + 90 + static void blk_rq_timed_out(struct request *req) 91 + { 92 + struct request_queue *q = req->q; 93 + enum blk_eh_timer_return ret; 94 + 95 + ret = q->rq_timed_out_fn(req); 96 + switch (ret) { 97 + case BLK_EH_HANDLED: 98 + __blk_complete_request(req); 99 + break; 100 + case BLK_EH_RESET_TIMER: 101 + blk_clear_rq_complete(req); 102 + blk_add_timer(req); 103 + break; 104 + case BLK_EH_NOT_HANDLED: 105 + /* 106 + * LLD handles this for now but in the future 107 + * we can send a request msg to abort the command 108 + * and we can move more of the generic scsi eh code to 109 + * the blk layer. 110 + */ 111 + break; 112 + default: 113 + printk(KERN_ERR "block: bad eh return: %d\n", ret); 114 + break; 115 + } 116 + } 117 + 118 + void blk_rq_timed_out_timer(unsigned long data) 119 + { 120 + struct request_queue *q = (struct request_queue *) data; 121 + unsigned long flags, uninitialized_var(next), next_set = 0; 122 + struct request *rq, *tmp; 123 + 124 + spin_lock_irqsave(q->queue_lock, flags); 125 + 126 + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { 127 + if (time_after_eq(jiffies, rq->deadline)) { 128 + list_del_init(&rq->timeout_list); 129 + 130 + /* 131 + * Check if we raced with end io completion 132 + */ 133 + if (blk_mark_rq_complete(rq)) 134 + continue; 135 + blk_rq_timed_out(rq); 136 + } 137 + if (!next_set) { 138 + next = rq->deadline; 139 + next_set = 1; 140 + } else if (time_after(next, rq->deadline)) 141 + next = rq->deadline; 142 + } 143 + 144 + if (next_set && !list_empty(&q->timeout_list)) 145 + mod_timer(&q->timeout, round_jiffies(next)); 146 + 147 + spin_unlock_irqrestore(q->queue_lock, flags); 148 + } 149 + 150 + /** 151 + * blk_abort_request -- Request request recovery for the specified command 152 + * @req: pointer to the request of interest 153 + * 154 + * This function requests that the block layer start recovery for the 155 + * request by deleting the timer and calling the q's timeout function. 156 + * LLDDs who implement their own error recovery MAY ignore the timeout 157 + * event if they generated blk_abort_req. Must hold queue lock. 158 + */ 159 + void blk_abort_request(struct request *req) 160 + { 161 + if (blk_mark_rq_complete(req)) 162 + return; 163 + blk_delete_timer(req); 164 + blk_rq_timed_out(req); 165 + } 166 + EXPORT_SYMBOL_GPL(blk_abort_request); 167 + 168 + /** 169 + * blk_add_timer - Start timeout timer for a single request 170 + * @req: request that is about to start running. 171 + * 172 + * Notes: 173 + * Each request has its own timer, and as it is added to the queue, we 174 + * set up the timer. When the request completes, we cancel the timer. 175 + */ 176 + void blk_add_timer(struct request *req) 177 + { 178 + struct request_queue *q = req->q; 179 + unsigned long expiry; 180 + 181 + if (!q->rq_timed_out_fn) 182 + return; 183 + 184 + BUG_ON(!list_empty(&req->timeout_list)); 185 + BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); 186 + 187 + if (req->timeout) 188 + req->deadline = jiffies + req->timeout; 189 + else { 190 + req->deadline = jiffies + q->rq_timeout; 191 + /* 192 + * Some LLDs, like scsi, peek at the timeout to prevent 193 + * a command from being retried forever. 194 + */ 195 + req->timeout = q->rq_timeout; 196 + } 197 + list_add_tail(&req->timeout_list, &q->timeout_list); 198 + 199 + /* 200 + * If the timer isn't already pending or this timeout is earlier 201 + * than an existing one, modify the timer. Round to next nearest 202 + * second. 203 + */ 204 + expiry = round_jiffies(req->deadline); 205 + 206 + /* 207 + * We use ->deadline == 0 to detect whether a timer was added or 208 + * not, so just increase to next jiffy for that specific case 209 + */ 210 + if (unlikely(!req->deadline)) 211 + req->deadline = 1; 212 + 213 + if (!timer_pending(&q->timeout) || 214 + time_before(expiry, q->timeout.expires)) 215 + mod_timer(&q->timeout, expiry); 216 + } 217 + 218 + /** 219 + * blk_abort_queue -- Abort all request on given queue 220 + * @queue: pointer to queue 221 + * 222 + */ 223 + void blk_abort_queue(struct request_queue *q) 224 + { 225 + unsigned long flags; 226 + struct request *rq, *tmp; 227 + 228 + spin_lock_irqsave(q->queue_lock, flags); 229 + 230 + elv_abort_queue(q); 231 + 232 + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) 233 + blk_abort_request(rq); 234 + 235 + spin_unlock_irqrestore(q->queue_lock, flags); 236 + 237 + } 238 + EXPORT_SYMBOL_GPL(blk_abort_queue);

+48

block/blk.h

··· 17 17 18 18 void blk_unplug_work(struct work_struct *work); 19 19 void blk_unplug_timeout(unsigned long data); 20 + void blk_rq_timed_out_timer(unsigned long data); 21 + void blk_delete_timer(struct request *); 22 + void blk_add_timer(struct request *); 23 + 24 + /* 25 + * Internal atomic flags for request handling 26 + */ 27 + enum rq_atomic_flags { 28 + REQ_ATOM_COMPLETE = 0, 29 + }; 30 + 31 + /* 32 + * EH timer and IO completion will both attempt to 'grab' the request, make 33 + * sure that only one of them suceeds 34 + */ 35 + static inline int blk_mark_rq_complete(struct request *rq) 36 + { 37 + return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); 38 + } 39 + 40 + static inline void blk_clear_rq_complete(struct request *rq) 41 + { 42 + clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); 43 + } 44 + 45 + #ifdef CONFIG_FAIL_IO_TIMEOUT 46 + int blk_should_fake_timeout(struct request_queue *); 47 + ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); 48 + ssize_t part_timeout_store(struct device *, struct device_attribute *, 49 + const char *, size_t); 50 + #else 51 + static inline int blk_should_fake_timeout(struct request_queue *q) 52 + { 53 + return 0; 54 + } 55 + #endif 20 56 21 57 struct io_context *current_io_context(gfp_t gfp_flags, int node); 22 58 ··· 94 58 bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i) 95 59 96 60 #endif /* BLK_DEV_INTEGRITY */ 61 + 62 + static inline int blk_cpu_to_group(int cpu) 63 + { 64 + #ifdef CONFIG_SCHED_MC 65 + cpumask_t mask = cpu_coregroup_map(cpu); 66 + return first_cpu(mask); 67 + #elif defined(CONFIG_SCHED_SMT) 68 + return first_cpu(per_cpu(cpu_sibling_map, cpu)); 69 + #else 70 + return cpu; 71 + #endif 72 + } 97 73 98 74 #endif

+10 -22

block/blktrace.c

··· 111 111 */ 112 112 static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; 113 113 114 - /* 115 - * Bio action bits of interest 116 - */ 117 - static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; 118 - 119 - /* 120 - * More could be added as needed, taking care to increment the decrementer 121 - * to get correct indexing 122 - */ 123 - #define trace_barrier_bit(rw) \ 124 - (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) 125 - #define trace_sync_bit(rw) \ 126 - (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) 127 - #define trace_ahead_bit(rw) \ 128 - (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) 129 - #define trace_meta_bit(rw) \ 130 - (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) 114 + /* The ilog2() calls fall out because they're constant */ 115 + #define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ 116 + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) 131 117 132 118 /* 133 119 * The worker for the various blk_add_trace*() types. Fills out a ··· 133 147 return; 134 148 135 149 what |= ddir_act[rw & WRITE]; 136 - what |= bio_act[trace_barrier_bit(rw)]; 137 - what |= bio_act[trace_sync_bit(rw)]; 138 - what |= bio_act[trace_ahead_bit(rw)]; 139 - what |= bio_act[trace_meta_bit(rw)]; 150 + what |= MASK_TC_BIT(rw, BARRIER); 151 + what |= MASK_TC_BIT(rw, SYNC); 152 + what |= MASK_TC_BIT(rw, AHEAD); 153 + what |= MASK_TC_BIT(rw, META); 154 + what |= MASK_TC_BIT(rw, DISCARD); 140 155 141 156 pid = tsk->pid; 142 157 if (unlikely(act_log_check(bt, what, sector, pid))) ··· 369 382 if (!buts->buf_size || !buts->buf_nr) 370 383 return -EINVAL; 371 384 372 - strcpy(buts->name, name); 385 + strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); 386 + buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; 373 387 374 388 /* 375 389 * some device names have larger paths - convert the slashes

+4 -2

block/bsg.c

··· 283 283 next_rq->cmd_type = rq->cmd_type; 284 284 285 285 dxferp = (void*)(unsigned long)hdr->din_xferp; 286 - ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); 286 + ret = blk_rq_map_user(q, next_rq, NULL, dxferp, 287 + hdr->din_xfer_len, GFP_KERNEL); 287 288 if (ret) 288 289 goto out; 289 290 } ··· 299 298 dxfer_len = 0; 300 299 301 300 if (dxfer_len) { 302 - ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); 301 + ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len, 302 + GFP_KERNEL); 303 303 if (ret) 304 304 goto out; 305 305 }

+47 -10

block/cfq-iosched.c

··· 39 39 #define CFQ_MIN_TT (2) 40 40 41 41 #define CFQ_SLICE_SCALE (5) 42 + #define CFQ_HW_QUEUE_MIN (5) 42 43 43 44 #define RQ_CIC(rq) \ 44 45 ((struct cfq_io_context *) (rq)->elevator_private) ··· 87 86 88 87 int rq_in_driver; 89 88 int sync_flight; 89 + 90 + /* 91 + * queue-depth detection 92 + */ 93 + int rq_queued; 90 94 int hw_tag; 95 + int hw_tag_samples; 96 + int rq_in_driver_peak; 91 97 92 98 /* 93 99 * idle window management ··· 252 244 { 253 245 if (cfqd->busy_queues) { 254 246 cfq_log(cfqd, "schedule dispatch"); 255 - kblockd_schedule_work(&cfqd->unplug_work); 247 + kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); 256 248 } 257 249 } 258 250 ··· 662 654 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 663 655 cfqd->rq_in_driver); 664 656 665 - /* 666 - * If the depth is larger 1, it really could be queueing. But lets 667 - * make the mark a little higher - idling could still be good for 668 - * low queueing, and a low queueing number could also just indicate 669 - * a SCSI mid layer like behaviour where limit+1 is often seen. 670 - */ 671 - if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) 672 - cfqd->hw_tag = 1; 673 - 674 657 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; 675 658 } 676 659 ··· 685 686 list_del_init(&rq->queuelist); 686 687 cfq_del_rq_rb(rq); 687 688 689 + cfqq->cfqd->rq_queued--; 688 690 if (rq_is_meta(rq)) { 689 691 WARN_ON(!cfqq->meta_pending); 690 692 cfqq->meta_pending--; ··· 877 877 struct cfq_queue *cfqq = cfqd->active_queue; 878 878 struct cfq_io_context *cic; 879 879 unsigned long sl; 880 + 881 + /* 882 + * SSD device without seek penalty, disable idling. But only do so 883 + * for devices that support queuing, otherwise we still have a problem 884 + * with sync vs async workloads. 885 + */ 886 + if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) 887 + return; 880 888 881 889 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 882 890 WARN_ON(cfq_cfqq_slice_new(cfqq)); ··· 1841 1833 { 1842 1834 struct cfq_io_context *cic = RQ_CIC(rq); 1843 1835 1836 + cfqd->rq_queued++; 1844 1837 if (rq_is_meta(rq)) 1845 1838 cfqq->meta_pending++; 1846 1839 ··· 1889 1880 cfq_rq_enqueued(cfqd, cfqq, rq); 1890 1881 } 1891 1882 1883 + /* 1884 + * Update hw_tag based on peak queue depth over 50 samples under 1885 + * sufficient load. 1886 + */ 1887 + static void cfq_update_hw_tag(struct cfq_data *cfqd) 1888 + { 1889 + if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) 1890 + cfqd->rq_in_driver_peak = cfqd->rq_in_driver; 1891 + 1892 + if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && 1893 + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) 1894 + return; 1895 + 1896 + if (cfqd->hw_tag_samples++ < 50) 1897 + return; 1898 + 1899 + if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN) 1900 + cfqd->hw_tag = 1; 1901 + else 1902 + cfqd->hw_tag = 0; 1903 + 1904 + cfqd->hw_tag_samples = 0; 1905 + cfqd->rq_in_driver_peak = 0; 1906 + } 1907 + 1892 1908 static void cfq_completed_request(struct request_queue *q, struct request *rq) 1893 1909 { 1894 1910 struct cfq_queue *cfqq = RQ_CFQQ(rq); ··· 1923 1889 1924 1890 now = jiffies; 1925 1891 cfq_log_cfqq(cfqd, cfqq, "complete"); 1892 + 1893 + cfq_update_hw_tag(cfqd); 1926 1894 1927 1895 WARN_ON(!cfqd->rq_in_driver); 1928 1896 WARN_ON(!cfqq->dispatched); ··· 2236 2200 cfqd->cfq_slice[1] = cfq_slice_sync; 2237 2201 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2238 2202 cfqd->cfq_slice_idle = cfq_slice_idle; 2203 + cfqd->hw_tag = 1; 2239 2204 2240 2205 return cfqd; 2241 2206 }

+2 -7

block/cmd-filter.c

··· 211 211 { 212 212 int ret; 213 213 struct blk_cmd_filter *filter = &disk->queue->cmd_filter; 214 - struct kobject *parent = kobject_get(disk->holder_dir->parent); 215 214 216 - if (!parent) 217 - return -ENODEV; 218 - 219 - ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, 215 + ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, 216 + &disk_to_dev(disk)->kobj, 220 217 "%s", "cmd_filter"); 221 - 222 218 if (ret < 0) 223 219 return ret; 224 220 ··· 227 231 struct blk_cmd_filter *filter = &disk->queue->cmd_filter; 228 232 229 233 kobject_put(&filter->kobj); 230 - kobject_put(disk->holder_dir->parent); 231 234 } 232 235 EXPORT_SYMBOL(blk_unregister_filter); 233 236 #endif

+1

block/compat_ioctl.c

··· 788 788 return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); 789 789 case BLKFLSBUF: 790 790 case BLKROSET: 791 + case BLKDISCARD: 791 792 /* 792 793 * the ones below are implemented in blkdev_locked_ioctl, 793 794 * but we call blkdev_ioctl, which gets the lock for us

+16 -24

block/deadline-iosched.c

··· 33 33 */ 34 34 struct rb_root sort_list[2]; 35 35 struct list_head fifo_list[2]; 36 - 36 + 37 37 /* 38 38 * next in sort order. read, write or both are NULL 39 39 */ ··· 53 53 54 54 static void deadline_move_request(struct deadline_data *, struct request *); 55 55 56 - #define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) 56 + static inline struct rb_root * 57 + deadline_rb_root(struct deadline_data *dd, struct request *rq) 58 + { 59 + return &dd->sort_list[rq_data_dir(rq)]; 60 + } 57 61 58 62 /* 59 63 * get the request after `rq' in sector-sorted order ··· 76 72 static void 77 73 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 78 74 { 79 - struct rb_root *root = RQ_RB_ROOT(dd, rq); 75 + struct rb_root *root = deadline_rb_root(dd, rq); 80 76 struct request *__alias; 81 77 82 - retry: 83 - __alias = elv_rb_add(root, rq); 84 - if (unlikely(__alias)) { 78 + while (unlikely(__alias = elv_rb_add(root, rq))) 85 79 deadline_move_request(dd, __alias); 86 - goto retry; 87 - } 88 80 } 89 81 90 82 static inline void ··· 91 91 if (dd->next_rq[data_dir] == rq) 92 92 dd->next_rq[data_dir] = deadline_latter_request(rq); 93 93 94 - elv_rb_del(RQ_RB_ROOT(dd, rq), rq); 94 + elv_rb_del(deadline_rb_root(dd, rq), rq); 95 95 } 96 96 97 97 /* ··· 106 106 deadline_add_rq_rb(dd, rq); 107 107 108 108 /* 109 - * set expire time (only used for reads) and add to fifo list 109 + * set expire time and add to fifo list 110 110 */ 111 111 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); 112 112 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); ··· 162 162 * if the merge was a front merge, we need to reposition request 163 163 */ 164 164 if (type == ELEVATOR_FRONT_MERGE) { 165 - elv_rb_del(RQ_RB_ROOT(dd, req), req); 165 + elv_rb_del(deadline_rb_root(dd, req), req); 166 166 deadline_add_rq_rb(dd, req); 167 167 } 168 168 } ··· 212 212 dd->next_rq[WRITE] = NULL; 213 213 dd->next_rq[data_dir] = deadline_latter_request(rq); 214 214 215 - dd->last_sector = rq->sector + rq->nr_sectors; 215 + dd->last_sector = rq_end_sector(rq); 216 216 217 217 /* 218 218 * take it off the sort and fifo list, move ··· 222 222 } 223 223 224 224 /* 225 - * deadline_check_fifo returns 0 if there are no expired reads on the fifo, 225 + * deadline_check_fifo returns 0 if there are no expired requests on the fifo, 226 226 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 227 227 */ 228 228 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) ··· 258 258 else 259 259 rq = dd->next_rq[READ]; 260 260 261 - if (rq) { 262 - /* we have a "next request" */ 263 - 264 - if (dd->last_sector != rq->sector) 265 - /* end the batch on a non sequential request */ 266 - dd->batching += dd->fifo_batch; 267 - 268 - if (dd->batching < dd->fifo_batch) 269 - /* we are still entitled to batch */ 270 - goto dispatch_request; 271 - } 261 + if (rq && dd->batching < dd->fifo_batch) 262 + /* we have a next request are still entitled to batch */ 263 + goto dispatch_request; 272 264 273 265 /* 274 266 * at this point we are not running a batch. select the appropriate

+33 -7

block/elevator.c

··· 34 34 #include <linux/delay.h> 35 35 #include <linux/blktrace_api.h> 36 36 #include <linux/hash.h> 37 + #include <linux/uaccess.h> 37 38 38 - #include <asm/uaccess.h> 39 + #include "blk.h" 39 40 40 41 static DEFINE_SPINLOCK(elv_list_lock); 41 42 static LIST_HEAD(elv_list); ··· 73 72 int elv_rq_merge_ok(struct request *rq, struct bio *bio) 74 73 { 75 74 if (!rq_mergeable(rq)) 75 + return 0; 76 + 77 + /* 78 + * Don't merge file system requests and discard requests 79 + */ 80 + if (bio_discard(bio) != bio_discard(rq->bio)) 76 81 return 0; 77 82 78 83 /* ··· 445 438 list_for_each_prev(entry, &q->queue_head) { 446 439 struct request *pos = list_entry_rq(entry); 447 440 441 + if (blk_discard_rq(rq) != blk_discard_rq(pos)) 442 + break; 448 443 if (rq_data_dir(rq) != rq_data_dir(pos)) 449 444 break; 450 445 if (pos->cmd_flags & stop_flags) ··· 616 607 break; 617 608 618 609 case ELEVATOR_INSERT_SORT: 619 - BUG_ON(!blk_fs_request(rq)); 610 + BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); 620 611 rq->cmd_flags |= REQ_SORTED; 621 612 q->nr_sorted++; 622 613 if (rq_mergeable(rq)) { ··· 701 692 * this request is scheduling boundary, update 702 693 * end_sector 703 694 */ 704 - if (blk_fs_request(rq)) { 695 + if (blk_fs_request(rq) || blk_discard_rq(rq)) { 705 696 q->end_sector = rq_end_sector(rq); 706 697 q->boundary_rq = rq; 707 698 } ··· 754 745 * not ever see it. 755 746 */ 756 747 if (blk_empty_barrier(rq)) { 757 - end_queued_request(rq, 1); 748 + __blk_end_request(rq, 0, blk_rq_bytes(rq)); 758 749 continue; 759 750 } 760 751 if (!(rq->cmd_flags & REQ_STARTED)) { ··· 773 764 */ 774 765 rq->cmd_flags |= REQ_STARTED; 775 766 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 767 + 768 + /* 769 + * We are now handing the request to the hardware, 770 + * add the timeout handler 771 + */ 772 + blk_add_timer(rq); 776 773 } 777 774 778 775 if (!q->boundary_rq || q->boundary_rq == rq) { ··· 797 782 * device can handle 798 783 */ 799 784 rq->nr_phys_segments++; 800 - rq->nr_hw_segments++; 801 785 } 802 786 803 787 if (!q->prep_rq_fn) ··· 819 805 * so that we don't add it again 820 806 */ 821 807 --rq->nr_phys_segments; 822 - --rq->nr_hw_segments; 823 808 } 824 809 825 810 rq = NULL; 826 811 break; 827 812 } else if (ret == BLKPREP_KILL) { 828 813 rq->cmd_flags |= REQ_QUIET; 829 - end_queued_request(rq, 0); 814 + __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); 830 815 } else { 831 816 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 832 817 break; ··· 913 900 914 901 return ELV_MQUEUE_MAY; 915 902 } 903 + 904 + void elv_abort_queue(struct request_queue *q) 905 + { 906 + struct request *rq; 907 + 908 + while (!list_empty(&q->queue_head)) { 909 + rq = list_entry_rq(q->queue_head.next); 910 + rq->cmd_flags |= REQ_QUIET; 911 + blk_add_trace_rq(q, rq, BLK_TA_ABORT); 912 + __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); 913 + } 914 + } 915 + EXPORT_SYMBOL(elv_abort_queue); 916 916 917 917 void elv_completed_request(struct request_queue *q, struct request *rq) 918 918 {

+640 -337

block/genhd.c

··· 16 16 #include <linux/kobj_map.h> 17 17 #include <linux/buffer_head.h> 18 18 #include <linux/mutex.h> 19 + #include <linux/idr.h> 19 20 20 21 #include "blk.h" 21 22 ··· 25 24 struct kobject *block_depr; 26 25 #endif 27 26 27 + /* for extended dynamic devt allocation, currently only one major is used */ 28 + #define MAX_EXT_DEVT (1 << MINORBITS) 29 + 30 + /* For extended devt allocation. ext_devt_mutex prevents look up 31 + * results from going away underneath its user. 32 + */ 33 + static DEFINE_MUTEX(ext_devt_mutex); 34 + static DEFINE_IDR(ext_devt_idr); 35 + 28 36 static struct device_type disk_type; 37 + 38 + /** 39 + * disk_get_part - get partition 40 + * @disk: disk to look partition from 41 + * @partno: partition number 42 + * 43 + * Look for partition @partno from @disk. If found, increment 44 + * reference count and return it. 45 + * 46 + * CONTEXT: 47 + * Don't care. 48 + * 49 + * RETURNS: 50 + * Pointer to the found partition on success, NULL if not found. 51 + */ 52 + struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 53 + { 54 + struct hd_struct *part = NULL; 55 + struct disk_part_tbl *ptbl; 56 + 57 + if (unlikely(partno < 0)) 58 + return NULL; 59 + 60 + rcu_read_lock(); 61 + 62 + ptbl = rcu_dereference(disk->part_tbl); 63 + if (likely(partno < ptbl->len)) { 64 + part = rcu_dereference(ptbl->part[partno]); 65 + if (part) 66 + get_device(part_to_dev(part)); 67 + } 68 + 69 + rcu_read_unlock(); 70 + 71 + return part; 72 + } 73 + EXPORT_SYMBOL_GPL(disk_get_part); 74 + 75 + /** 76 + * disk_part_iter_init - initialize partition iterator 77 + * @piter: iterator to initialize 78 + * @disk: disk to iterate over 79 + * @flags: DISK_PITER_* flags 80 + * 81 + * Initialize @piter so that it iterates over partitions of @disk. 82 + * 83 + * CONTEXT: 84 + * Don't care. 85 + */ 86 + void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 87 + unsigned int flags) 88 + { 89 + struct disk_part_tbl *ptbl; 90 + 91 + rcu_read_lock(); 92 + ptbl = rcu_dereference(disk->part_tbl); 93 + 94 + piter->disk = disk; 95 + piter->part = NULL; 96 + 97 + if (flags & DISK_PITER_REVERSE) 98 + piter->idx = ptbl->len - 1; 99 + else if (flags & DISK_PITER_INCL_PART0) 100 + piter->idx = 0; 101 + else 102 + piter->idx = 1; 103 + 104 + piter->flags = flags; 105 + 106 + rcu_read_unlock(); 107 + } 108 + EXPORT_SYMBOL_GPL(disk_part_iter_init); 109 + 110 + /** 111 + * disk_part_iter_next - proceed iterator to the next partition and return it 112 + * @piter: iterator of interest 113 + * 114 + * Proceed @piter to the next partition and return it. 115 + * 116 + * CONTEXT: 117 + * Don't care. 118 + */ 119 + struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 120 + { 121 + struct disk_part_tbl *ptbl; 122 + int inc, end; 123 + 124 + /* put the last partition */ 125 + disk_put_part(piter->part); 126 + piter->part = NULL; 127 + 128 + /* get part_tbl */ 129 + rcu_read_lock(); 130 + ptbl = rcu_dereference(piter->disk->part_tbl); 131 + 132 + /* determine iteration parameters */ 133 + if (piter->flags & DISK_PITER_REVERSE) { 134 + inc = -1; 135 + if (piter->flags & DISK_PITER_INCL_PART0) 136 + end = -1; 137 + else 138 + end = 0; 139 + } else { 140 + inc = 1; 141 + end = ptbl->len; 142 + } 143 + 144 + /* iterate to the next partition */ 145 + for (; piter->idx != end; piter->idx += inc) { 146 + struct hd_struct *part; 147 + 148 + part = rcu_dereference(ptbl->part[piter->idx]); 149 + if (!part) 150 + continue; 151 + if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) 152 + continue; 153 + 154 + get_device(part_to_dev(part)); 155 + piter->part = part; 156 + piter->idx += inc; 157 + break; 158 + } 159 + 160 + rcu_read_unlock(); 161 + 162 + return piter->part; 163 + } 164 + EXPORT_SYMBOL_GPL(disk_part_iter_next); 165 + 166 + /** 167 + * disk_part_iter_exit - finish up partition iteration 168 + * @piter: iter of interest 169 + * 170 + * Called when iteration is over. Cleans up @piter. 171 + * 172 + * CONTEXT: 173 + * Don't care. 174 + */ 175 + void disk_part_iter_exit(struct disk_part_iter *piter) 176 + { 177 + disk_put_part(piter->part); 178 + piter->part = NULL; 179 + } 180 + EXPORT_SYMBOL_GPL(disk_part_iter_exit); 181 + 182 + /** 183 + * disk_map_sector_rcu - map sector to partition 184 + * @disk: gendisk of interest 185 + * @sector: sector to map 186 + * 187 + * Find out which partition @sector maps to on @disk. This is 188 + * primarily used for stats accounting. 189 + * 190 + * CONTEXT: 191 + * RCU read locked. The returned partition pointer is valid only 192 + * while preemption is disabled. 193 + * 194 + * RETURNS: 195 + * Found partition on success, part0 is returned if no partition matches 196 + */ 197 + struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 198 + { 199 + struct disk_part_tbl *ptbl; 200 + int i; 201 + 202 + ptbl = rcu_dereference(disk->part_tbl); 203 + 204 + for (i = 1; i < ptbl->len; i++) { 205 + struct hd_struct *part = rcu_dereference(ptbl->part[i]); 206 + 207 + if (part && part->start_sect <= sector && 208 + sector < part->start_sect + part->nr_sects) 209 + return part; 210 + } 211 + return &disk->part0; 212 + } 213 + EXPORT_SYMBOL_GPL(disk_map_sector_rcu); 29 214 30 215 /* 31 216 * Can be deleted altogether. Later. ··· 230 43 } 231 44 232 45 #ifdef CONFIG_PROC_FS 233 - void blkdev_show(struct seq_file *f, off_t offset) 46 + void blkdev_show(struct seq_file *seqf, off_t offset) 234 47 { 235 48 struct blk_major_name *dp; 236 49 237 50 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 238 51 mutex_lock(&block_class_lock); 239 52 for (dp = major_names[offset]; dp; dp = dp->next) 240 - seq_printf(f, "%3d %s\n", dp->major, dp->name); 53 + seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 241 54 mutex_unlock(&block_class_lock); 242 55 } 243 56 } ··· 323 136 324 137 static struct kobj_map *bdev_map; 325 138 139 + /** 140 + * blk_mangle_minor - scatter minor numbers apart 141 + * @minor: minor number to mangle 142 + * 143 + * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 144 + * is enabled. Mangling twice gives the original value. 145 + * 146 + * RETURNS: 147 + * Mangled value. 148 + * 149 + * CONTEXT: 150 + * Don't care. 151 + */ 152 + static int blk_mangle_minor(int minor) 153 + { 154 + #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 155 + int i; 156 + 157 + for (i = 0; i < MINORBITS / 2; i++) { 158 + int low = minor & (1 << i); 159 + int high = minor & (1 << (MINORBITS - 1 - i)); 160 + int distance = MINORBITS - 1 - 2 * i; 161 + 162 + minor ^= low | high; /* clear both bits */ 163 + low <<= distance; /* swap the positions */ 164 + high >>= distance; 165 + minor |= low | high; /* and set */ 166 + } 167 + #endif 168 + return minor; 169 + } 170 + 171 + /** 172 + * blk_alloc_devt - allocate a dev_t for a partition 173 + * @part: partition to allocate dev_t for 174 + * @gfp_mask: memory allocation flag 175 + * @devt: out parameter for resulting dev_t 176 + * 177 + * Allocate a dev_t for block device. 178 + * 179 + * RETURNS: 180 + * 0 on success, allocated dev_t is returned in *@devt. -errno on 181 + * failure. 182 + * 183 + * CONTEXT: 184 + * Might sleep. 185 + */ 186 + int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 187 + { 188 + struct gendisk *disk = part_to_disk(part); 189 + int idx, rc; 190 + 191 + /* in consecutive minor range? */ 192 + if (part->partno < disk->minors) { 193 + *devt = MKDEV(disk->major, disk->first_minor + part->partno); 194 + return 0; 195 + } 196 + 197 + /* allocate ext devt */ 198 + do { 199 + if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) 200 + return -ENOMEM; 201 + rc = idr_get_new(&ext_devt_idr, part, &idx); 202 + } while (rc == -EAGAIN); 203 + 204 + if (rc) 205 + return rc; 206 + 207 + if (idx > MAX_EXT_DEVT) { 208 + idr_remove(&ext_devt_idr, idx); 209 + return -EBUSY; 210 + } 211 + 212 + *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 213 + return 0; 214 + } 215 + 216 + /** 217 + * blk_free_devt - free a dev_t 218 + * @devt: dev_t to free 219 + * 220 + * Free @devt which was allocated using blk_alloc_devt(). 221 + * 222 + * CONTEXT: 223 + * Might sleep. 224 + */ 225 + void blk_free_devt(dev_t devt) 226 + { 227 + might_sleep(); 228 + 229 + if (devt == MKDEV(0, 0)) 230 + return; 231 + 232 + if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 233 + mutex_lock(&ext_devt_mutex); 234 + idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 235 + mutex_unlock(&ext_devt_mutex); 236 + } 237 + } 238 + 239 + static char *bdevt_str(dev_t devt, char *buf) 240 + { 241 + if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 242 + char tbuf[BDEVT_SIZE]; 243 + snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 244 + snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 245 + } else 246 + snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 247 + 248 + return buf; 249 + } 250 + 326 251 /* 327 252 * Register device numbers dev..(dev+range-1) 328 253 * range must be nonzero ··· 456 157 457 158 EXPORT_SYMBOL(blk_unregister_region); 458 159 459 - static struct kobject *exact_match(dev_t devt, int *part, void *data) 160 + static struct kobject *exact_match(dev_t devt, int *partno, void *data) 460 161 { 461 162 struct gendisk *p = data; 462 163 463 - return &p->dev.kobj; 164 + return &disk_to_dev(p)->kobj; 464 165 } 465 166 466 167 static int exact_lock(dev_t devt, void *data) ··· 478 179 * 479 180 * This function registers the partitioning information in @disk 480 181 * with the kernel. 182 + * 183 + * FIXME: error handling 481 184 */ 482 185 void add_disk(struct gendisk *disk) 483 186 { 484 187 struct backing_dev_info *bdi; 188 + dev_t devt; 485 189 int retval; 486 190 191 + /* minors == 0 indicates to use ext devt from part0 and should 192 + * be accompanied with EXT_DEVT flag. Make sure all 193 + * parameters make sense. 194 + */ 195 + WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 196 + WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); 197 + 487 198 disk->flags |= GENHD_FL_UP; 488 - blk_register_region(MKDEV(disk->major, disk->first_minor), 489 - disk->minors, NULL, exact_match, exact_lock, disk); 199 + 200 + retval = blk_alloc_devt(&disk->part0, &devt); 201 + if (retval) { 202 + WARN_ON(1); 203 + return; 204 + } 205 + disk_to_dev(disk)->devt = devt; 206 + 207 + /* ->major and ->first_minor aren't supposed to be 208 + * dereferenced from here on, but set them just in case. 209 + */ 210 + disk->major = MAJOR(devt); 211 + disk->first_minor = MINOR(devt); 212 + 213 + blk_register_region(disk_devt(disk), disk->minors, NULL, 214 + exact_match, exact_lock, disk); 490 215 register_disk(disk); 491 216 blk_register_queue(disk); 492 217 493 218 bdi = &disk->queue->backing_dev_info; 494 - bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); 495 - retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); 219 + bdi_register_dev(bdi, disk_devt(disk)); 220 + retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 221 + "bdi"); 496 222 WARN_ON(retval); 497 223 } 498 224 ··· 526 202 527 203 void unlink_gendisk(struct gendisk *disk) 528 204 { 529 - sysfs_remove_link(&disk->dev.kobj, "bdi"); 205 + sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 530 206 bdi_unregister(&disk->queue->backing_dev_info); 531 207 blk_unregister_queue(disk); 532 - blk_unregister_region(MKDEV(disk->major, disk->first_minor), 533 - disk->minors); 208 + blk_unregister_region(disk_devt(disk), disk->minors); 534 209 } 535 210 536 211 /** 537 212 * get_gendisk - get partitioning information for a given device 538 - * @dev: device to get partitioning information for 213 + * @devt: device to get partitioning information for 214 + * @part: returned partition index 539 215 * 540 216 * This function gets the structure containing partitioning 541 - * information for the given device @dev. 217 + * information for the given device @devt. 542 218 */ 543 - struct gendisk *get_gendisk(dev_t devt, int *part) 219 + struct gendisk *get_gendisk(dev_t devt, int *partno) 544 220 { 545 - struct kobject *kobj = kobj_lookup(bdev_map, devt, part); 546 - struct device *dev = kobj_to_dev(kobj); 221 + struct gendisk *disk = NULL; 547 222 548 - return kobj ? dev_to_disk(dev) : NULL; 549 - } 223 + if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 224 + struct kobject *kobj; 550 225 551 - /* 552 - * print a partitions - intended for places where the root filesystem can't be 553 - * mounted and thus to give the victim some idea of what went wrong 554 - */ 555 - static int printk_partition(struct device *dev, void *data) 556 - { 557 - struct gendisk *sgp; 558 - char buf[BDEVNAME_SIZE]; 559 - int n; 226 + kobj = kobj_lookup(bdev_map, devt, partno); 227 + if (kobj) 228 + disk = dev_to_disk(kobj_to_dev(kobj)); 229 + } else { 230 + struct hd_struct *part; 560 231 561 - if (dev->type != &disk_type) 562 - goto exit; 563 - 564 - sgp = dev_to_disk(dev); 565 - /* 566 - * Don't show empty devices or things that have been surpressed 567 - */ 568 - if (get_capacity(sgp) == 0 || 569 - (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 570 - goto exit; 571 - 572 - /* 573 - * Note, unlike /proc/partitions, I am showing the numbers in 574 - * hex - the same format as the root= option takes. 575 - */ 576 - printk("%02x%02x %10llu %s", 577 - sgp->major, sgp->first_minor, 578 - (unsigned long long)get_capacity(sgp) >> 1, 579 - disk_name(sgp, 0, buf)); 580 - if (sgp->driverfs_dev != NULL && 581 - sgp->driverfs_dev->driver != NULL) 582 - printk(" driver: %s\n", 583 - sgp->driverfs_dev->driver->name); 584 - else 585 - printk(" (driver?)\n"); 586 - 587 - /* now show the partitions */ 588 - for (n = 0; n < sgp->minors - 1; ++n) { 589 - if (sgp->part[n] == NULL) 590 - goto exit; 591 - if (sgp->part[n]->nr_sects == 0) 592 - goto exit; 593 - printk(" %02x%02x %10llu %s\n", 594 - sgp->major, n + 1 + sgp->first_minor, 595 - (unsigned long long)sgp->part[n]->nr_sects >> 1, 596 - disk_name(sgp, n + 1, buf)); 232 + mutex_lock(&ext_devt_mutex); 233 + part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 234 + if (part && get_disk(part_to_disk(part))) { 235 + *partno = part->partno; 236 + disk = part_to_disk(part); 237 + } 238 + mutex_unlock(&ext_devt_mutex); 597 239 } 598 - exit: 599 - return 0; 240 + 241 + return disk; 600 242 } 243 + 244 + /** 245 + * bdget_disk - do bdget() by gendisk and partition number 246 + * @disk: gendisk of interest 247 + * @partno: partition number 248 + * 249 + * Find partition @partno from @disk, do bdget() on it. 250 + * 251 + * CONTEXT: 252 + * Don't care. 253 + * 254 + * RETURNS: 255 + * Resulting block_device on success, NULL on failure. 256 + */ 257 + struct block_device *bdget_disk(struct gendisk *disk, int partno) 258 + { 259 + struct hd_struct *part; 260 + struct block_device *bdev = NULL; 261 + 262 + part = disk_get_part(disk, partno); 263 + if (part) 264 + bdev = bdget(part_devt(part)); 265 + disk_put_part(part); 266 + 267 + return bdev; 268 + } 269 + EXPORT_SYMBOL(bdget_disk); 601 270 602 271 /* 603 272 * print a full list of all partitions - intended for places where the root ··· 599 282 */ 600 283 void __init printk_all_partitions(void) 601 284 { 602 - mutex_lock(&block_class_lock); 603 - class_for_each_device(&block_class, NULL, NULL, printk_partition); 604 - mutex_unlock(&block_class_lock); 285 + struct class_dev_iter iter; 286 + struct device *dev; 287 + 288 + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 289 + while ((dev = class_dev_iter_next(&iter))) { 290 + struct gendisk *disk = dev_to_disk(dev); 291 + struct disk_part_iter piter; 292 + struct hd_struct *part; 293 + char name_buf[BDEVNAME_SIZE]; 294 + char devt_buf[BDEVT_SIZE]; 295 + 296 + /* 297 + * Don't show empty devices or things that have been 298 + * surpressed 299 + */ 300 + if (get_capacity(disk) == 0 || 301 + (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 302 + continue; 303 + 304 + /* 305 + * Note, unlike /proc/partitions, I am showing the 306 + * numbers in hex - the same format as the root= 307 + * option takes. 308 + */ 309 + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 310 + while ((part = disk_part_iter_next(&piter))) { 311 + bool is_part0 = part == &disk->part0; 312 + 313 + printk("%s%s %10llu %s", is_part0 ? "" : " ", 314 + bdevt_str(part_devt(part), devt_buf), 315 + (unsigned long long)part->nr_sects >> 1, 316 + disk_name(disk, part->partno, name_buf)); 317 + if (is_part0) { 318 + if (disk->driverfs_dev != NULL && 319 + disk->driverfs_dev->driver != NULL) 320 + printk(" driver: %s\n", 321 + disk->driverfs_dev->driver->name); 322 + else 323 + printk(" (driver?)\n"); 324 + } else 325 + printk("\n"); 326 + } 327 + disk_part_iter_exit(&piter); 328 + } 329 + class_dev_iter_exit(&iter); 605 330 } 606 331 607 332 #ifdef CONFIG_PROC_FS 608 333 /* iterator */ 609 - static int find_start(struct device *dev, void *data) 334 + static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 610 335 { 611 - loff_t *k = data; 336 + loff_t skip = *pos; 337 + struct class_dev_iter *iter; 338 + struct device *dev; 612 339 613 - if (dev->type != &disk_type) 614 - return 0; 615 - if (!*k) 616 - return 1; 617 - (*k)--; 618 - return 0; 340 + iter = kmalloc(sizeof(*iter), GFP_KERNEL); 341 + if (!iter) 342 + return ERR_PTR(-ENOMEM); 343 + 344 + seqf->private = iter; 345 + class_dev_iter_init(iter, &block_class, NULL, &disk_type); 346 + do { 347 + dev = class_dev_iter_next(iter); 348 + if (!dev) 349 + return NULL; 350 + } while (skip--); 351 + 352 + return dev_to_disk(dev); 619 353 } 620 354 621 - static void *part_start(struct seq_file *part, loff_t *pos) 355 + static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 622 356 { 623 357 struct device *dev; 624 - loff_t k = *pos; 625 358 626 - if (!k) 627 - part->private = (void *)1LU; /* tell show to print header */ 628 - 629 - mutex_lock(&block_class_lock); 630 - dev = class_find_device(&block_class, NULL, &k, find_start); 631 - if (dev) { 632 - put_device(dev); 359 + (*pos)++; 360 + dev = class_dev_iter_next(seqf->private); 361 + if (dev) 633 362 return dev_to_disk(dev); 634 - } 363 + 635 364 return NULL; 636 365 } 637 366 638 - static int find_next(struct device *dev, void *data) 367 + static void disk_seqf_stop(struct seq_file *seqf, void *v) 639 368 { 640 - if (dev->type == &disk_type) 641 - return 1; 642 - return 0; 643 - } 369 + struct class_dev_iter *iter = seqf->private; 644 370 645 - static void *part_next(struct seq_file *part, void *v, loff_t *pos) 646 - { 647 - struct gendisk *gp = v; 648 - struct device *dev; 649 - ++*pos; 650 - dev = class_find_device(&block_class, &gp->dev, NULL, find_next); 651 - if (dev) { 652 - put_device(dev); 653 - return dev_to_disk(dev); 371 + /* stop is called even after start failed :-( */ 372 + if (iter) { 373 + class_dev_iter_exit(iter); 374 + kfree(iter); 654 375 } 655 - return NULL; 656 376 } 657 377 658 - static void part_stop(struct seq_file *part, void *v) 378 + static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 659 379 { 660 - mutex_unlock(&block_class_lock); 380 + static void *p; 381 + 382 + p = disk_seqf_start(seqf, pos); 383 + if (!IS_ERR(p) && p && !*pos) 384 + seq_puts(seqf, "major minor #blocks name\n\n"); 385 + return p; 661 386 } 662 387 663 - static int show_partition(struct seq_file *part, void *v) 388 + static int show_partition(struct seq_file *seqf, void *v) 664 389 { 665 390 struct gendisk *sgp = v; 666 - int n; 391 + struct disk_part_iter piter; 392 + struct hd_struct *part; 667 393 char buf[BDEVNAME_SIZE]; 668 394 669 - /* 670 - * Print header if start told us to do. This is to preserve 671 - * the original behavior of not printing header if no 672 - * partition exists. This hackery will be removed later with 673 - * class iteration clean up. 674 - */ 675 - if (part->private) { 676 - seq_puts(part, "major minor #blocks name\n\n"); 677 - part->private = NULL; 678 - } 679 - 680 395 /* Don't show non-partitionable removeable devices or empty devices */ 681 - if (!get_capacity(sgp) || 682 - (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) 396 + if (!get_capacity(sgp) || (!disk_partitionable(sgp) && 397 + (sgp->flags & GENHD_FL_REMOVABLE))) 683 398 return 0; 684 399 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 685 400 return 0; 686 401 687 402 /* show the full disk and all non-0 size partitions of it */ 688 - seq_printf(part, "%4d %4d %10llu %s\n", 689 - sgp->major, sgp->first_minor, 690 - (unsigned long long)get_capacity(sgp) >> 1, 691 - disk_name(sgp, 0, buf)); 692 - for (n = 0; n < sgp->minors - 1; n++) { 693 - if (!sgp->part[n]) 694 - continue; 695 - if (sgp->part[n]->nr_sects == 0) 696 - continue; 697 - seq_printf(part, "%4d %4d %10llu %s\n", 698 - sgp->major, n + 1 + sgp->first_minor, 699 - (unsigned long long)sgp->part[n]->nr_sects >> 1 , 700 - disk_name(sgp, n + 1, buf)); 701 - } 403 + disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 404 + while ((part = disk_part_iter_next(&piter))) 405 + seq_printf(seqf, "%4d %7d %10llu %s\n", 406 + MAJOR(part_devt(part)), MINOR(part_devt(part)), 407 + (unsigned long long)part->nr_sects >> 1, 408 + disk_name(sgp, part->partno, buf)); 409 + disk_part_iter_exit(&piter); 702 410 703 411 return 0; 704 412 } 705 413 706 414 const struct seq_operations partitions_op = { 707 - .start = part_start, 708 - .next = part_next, 709 - .stop = part_stop, 415 + .start = show_partition_start, 416 + .next = disk_seqf_next, 417 + .stop = disk_seqf_stop, 710 418 .show = show_partition 711 419 }; 712 420 #endif 713 421 714 422 715 - static struct kobject *base_probe(dev_t devt, int *part, void *data) 423 + static struct kobject *base_probe(dev_t devt, int *partno, void *data) 716 424 { 717 425 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 718 426 /* Make old-style 2.4 aliases work */ ··· 773 431 return sprintf(buf, "%d\n", disk->minors); 774 432 } 775 433 434 + static ssize_t disk_ext_range_show(struct device *dev, 435 + struct device_attribute *attr, char *buf) 436 + { 437 + struct gendisk *disk = dev_to_disk(dev); 438 + 439 + return sprintf(buf, "%d\n", disk_max_parts(disk)); 440 + } 441 + 776 442 static ssize_t disk_removable_show(struct device *dev, 777 443 struct device_attribute *attr, char *buf) 778 444 { ··· 795 445 { 796 446 struct gendisk *disk = dev_to_disk(dev); 797 447 798 - return sprintf(buf, "%d\n", disk->policy ? 1 : 0); 799 - } 800 - 801 - static ssize_t disk_size_show(struct device *dev, 802 - struct device_attribute *attr, char *buf) 803 - { 804 - struct gendisk *disk = dev_to_disk(dev); 805 - 806 - return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk)); 448 + return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 807 449 } 808 450 809 451 static ssize_t disk_capability_show(struct device *dev, ··· 806 464 return sprintf(buf, "%x\n", disk->flags); 807 465 } 808 466 809 - static ssize_t disk_stat_show(struct device *dev, 810 - struct device_attribute *attr, char *buf) 811 - { 812 - struct gendisk *disk = dev_to_disk(dev); 813 - 814 - preempt_disable(); 815 - disk_round_stats(disk); 816 - preempt_enable(); 817 - return sprintf(buf, 818 - "%8lu %8lu %8llu %8u " 819 - "%8lu %8lu %8llu %8u " 820 - "%8u %8u %8u" 821 - "\n", 822 - disk_stat_read(disk, ios[READ]), 823 - disk_stat_read(disk, merges[READ]), 824 - (unsigned long long)disk_stat_read(disk, sectors[READ]), 825 - jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), 826 - disk_stat_read(disk, ios[WRITE]), 827 - disk_stat_read(disk, merges[WRITE]), 828 - (unsigned long long)disk_stat_read(disk, sectors[WRITE]), 829 - jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), 830 - disk->in_flight, 831 - jiffies_to_msecs(disk_stat_read(disk, io_ticks)), 832 - jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); 833 - } 834 - 835 - #ifdef CONFIG_FAIL_MAKE_REQUEST 836 - static ssize_t disk_fail_show(struct device *dev, 837 - struct device_attribute *attr, char *buf) 838 - { 839 - struct gendisk *disk = dev_to_disk(dev); 840 - 841 - return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0); 842 - } 843 - 844 - static ssize_t disk_fail_store(struct device *dev, 845 - struct device_attribute *attr, 846 - const char *buf, size_t count) 847 - { 848 - struct gendisk *disk = dev_to_disk(dev); 849 - int i; 850 - 851 - if (count > 0 && sscanf(buf, "%d", &i) > 0) { 852 - if (i == 0) 853 - disk->flags &= ~GENHD_FL_FAIL; 854 - else 855 - disk->flags |= GENHD_FL_FAIL; 856 - } 857 - 858 - return count; 859 - } 860 - 861 - #endif 862 - 863 467 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 468 + static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 864 469 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 865 470 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 866 - static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); 471 + static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 867 472 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 868 - static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); 473 + static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 869 474 #ifdef CONFIG_FAIL_MAKE_REQUEST 870 475 static struct device_attribute dev_attr_fail = 871 - __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store); 476 + __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 477 + #endif 478 + #ifdef CONFIG_FAIL_IO_TIMEOUT 479 + static struct device_attribute dev_attr_fail_timeout = 480 + __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, 481 + part_timeout_store); 872 482 #endif 873 483 874 484 static struct attribute *disk_attrs[] = { 875 485 &dev_attr_range.attr, 486 + &dev_attr_ext_range.attr, 876 487 &dev_attr_removable.attr, 877 488 &dev_attr_ro.attr, 878 489 &dev_attr_size.attr, ··· 833 538 &dev_attr_stat.attr, 834 539 #ifdef CONFIG_FAIL_MAKE_REQUEST 835 540 &dev_attr_fail.attr, 541 + #endif 542 + #ifdef CONFIG_FAIL_IO_TIMEOUT 543 + &dev_attr_fail_timeout.attr, 836 544 #endif 837 545 NULL 838 546 }; ··· 849 551 NULL 850 552 }; 851 553 554 + static void disk_free_ptbl_rcu_cb(struct rcu_head *head) 555 + { 556 + struct disk_part_tbl *ptbl = 557 + container_of(head, struct disk_part_tbl, rcu_head); 558 + 559 + kfree(ptbl); 560 + } 561 + 562 + /** 563 + * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 564 + * @disk: disk to replace part_tbl for 565 + * @new_ptbl: new part_tbl to install 566 + * 567 + * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 568 + * original ptbl is freed using RCU callback. 569 + * 570 + * LOCKING: 571 + * Matching bd_mutx locked. 572 + */ 573 + static void disk_replace_part_tbl(struct gendisk *disk, 574 + struct disk_part_tbl *new_ptbl) 575 + { 576 + struct disk_part_tbl *old_ptbl = disk->part_tbl; 577 + 578 + rcu_assign_pointer(disk->part_tbl, new_ptbl); 579 + if (old_ptbl) 580 + call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); 581 + } 582 + 583 + /** 584 + * disk_expand_part_tbl - expand disk->part_tbl 585 + * @disk: disk to expand part_tbl for 586 + * @partno: expand such that this partno can fit in 587 + * 588 + * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 589 + * uses RCU to allow unlocked dereferencing for stats and other stuff. 590 + * 591 + * LOCKING: 592 + * Matching bd_mutex locked, might sleep. 593 + * 594 + * RETURNS: 595 + * 0 on success, -errno on failure. 596 + */ 597 + int disk_expand_part_tbl(struct gendisk *disk, int partno) 598 + { 599 + struct disk_part_tbl *old_ptbl = disk->part_tbl; 600 + struct disk_part_tbl *new_ptbl; 601 + int len = old_ptbl ? old_ptbl->len : 0; 602 + int target = partno + 1; 603 + size_t size; 604 + int i; 605 + 606 + /* disk_max_parts() is zero during initialization, ignore if so */ 607 + if (disk_max_parts(disk) && target > disk_max_parts(disk)) 608 + return -EINVAL; 609 + 610 + if (target <= len) 611 + return 0; 612 + 613 + size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); 614 + new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); 615 + if (!new_ptbl) 616 + return -ENOMEM; 617 + 618 + INIT_RCU_HEAD(&new_ptbl->rcu_head); 619 + new_ptbl->len = target; 620 + 621 + for (i = 0; i < len; i++) 622 + rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 623 + 624 + disk_replace_part_tbl(disk, new_ptbl); 625 + return 0; 626 + } 627 + 852 628 static void disk_release(struct device *dev) 853 629 { 854 630 struct gendisk *disk = dev_to_disk(dev); 855 631 856 632 kfree(disk->random); 857 - kfree(disk->part); 858 - free_disk_stats(disk); 633 + disk_replace_part_tbl(disk, NULL); 634 + free_part_stats(&disk->part0); 859 635 kfree(disk); 860 636 } 861 637 struct class block_class = { ··· 950 578 * The output looks suspiciously like /proc/partitions with a bunch of 951 579 * extra fields. 952 580 */ 953 - 954 - static void *diskstats_start(struct seq_file *part, loff_t *pos) 955 - { 956 - struct device *dev; 957 - loff_t k = *pos; 958 - 959 - mutex_lock(&block_class_lock); 960 - dev = class_find_device(&block_class, NULL, &k, find_start); 961 - if (dev) { 962 - put_device(dev); 963 - return dev_to_disk(dev); 964 - } 965 - return NULL; 966 - } 967 - 968 - static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) 581 + static int diskstats_show(struct seq_file *seqf, void *v) 969 582 { 970 583 struct gendisk *gp = v; 971 - struct device *dev; 972 - 973 - ++*pos; 974 - dev = class_find_device(&block_class, &gp->dev, NULL, find_next); 975 - if (dev) { 976 - put_device(dev); 977 - return dev_to_disk(dev); 978 - } 979 - return NULL; 980 - } 981 - 982 - static void diskstats_stop(struct seq_file *part, void *v) 983 - { 984 - mutex_unlock(&block_class_lock); 985 - } 986 - 987 - static int diskstats_show(struct seq_file *s, void *v) 988 - { 989 - struct gendisk *gp = v; 584 + struct disk_part_iter piter; 585 + struct hd_struct *hd; 990 586 char buf[BDEVNAME_SIZE]; 991 - int n = 0; 587 + int cpu; 992 588 993 589 /* 994 - if (&gp->dev.kobj.entry == block_class.devices.next) 995 - seq_puts(s, "major minor name" 590 + if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 591 + seq_puts(seqf, "major minor name" 996 592 " rio rmerge rsect ruse wio wmerge " 997 593 "wsect wuse running use aveq" 998 594 "\n\n"); 999 595 */ 1000 596 1001 - preempt_disable(); 1002 - disk_round_stats(gp); 1003 - preempt_enable(); 1004 - seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", 1005 - gp->major, n + gp->first_minor, disk_name(gp, n, buf), 1006 - disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), 1007 - (unsigned long long)disk_stat_read(gp, sectors[0]), 1008 - jiffies_to_msecs(disk_stat_read(gp, ticks[0])), 1009 - disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), 1010 - (unsigned long long)disk_stat_read(gp, sectors[1]), 1011 - jiffies_to_msecs(disk_stat_read(gp, ticks[1])), 1012 - gp->in_flight, 1013 - jiffies_to_msecs(disk_stat_read(gp, io_ticks)), 1014 - jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); 1015 - 1016 - /* now show all non-0 size partitions of it */ 1017 - for (n = 0; n < gp->minors - 1; n++) { 1018 - struct hd_struct *hd = gp->part[n]; 1019 - 1020 - if (!hd || !hd->nr_sects) 1021 - continue; 1022 - 1023 - preempt_disable(); 1024 - part_round_stats(hd); 1025 - preempt_enable(); 1026 - seq_printf(s, "%4d %4d %s %lu %lu %llu " 597 + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); 598 + while ((hd = disk_part_iter_next(&piter))) { 599 + cpu = part_stat_lock(); 600 + part_round_stats(cpu, hd); 601 + part_stat_unlock(); 602 + seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1027 603 "%u %lu %lu %llu %u %u %u %u\n", 1028 - gp->major, n + gp->first_minor + 1, 1029 - disk_name(gp, n + 1, buf), 604 + MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 605 + disk_name(gp, hd->partno, buf), 1030 606 part_stat_read(hd, ios[0]), 1031 607 part_stat_read(hd, merges[0]), 1032 608 (unsigned long long)part_stat_read(hd, sectors[0]), ··· 988 668 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 989 669 ); 990 670 } 671 + disk_part_iter_exit(&piter); 991 672 992 673 return 0; 993 674 } 994 675 995 676 const struct seq_operations diskstats_op = { 996 - .start = diskstats_start, 997 - .next = diskstats_next, 998 - .stop = diskstats_stop, 677 + .start = disk_seqf_start, 678 + .next = disk_seqf_next, 679 + .stop = disk_seqf_stop, 999 680 .show = diskstats_show 1000 681 }; 1001 682 #endif /* CONFIG_PROC_FS */ ··· 1011 690 * set enviroment vars to indicate which event this is for 1012 691 * so that user space will know to go check the media status. 1013 692 */ 1014 - kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); 693 + kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1015 694 put_device(gd->driverfs_dev); 1016 695 } 1017 696 ··· 1024 703 EXPORT_SYMBOL_GPL(genhd_media_change_notify); 1025 704 #endif /* 0 */ 1026 705 1027 - struct find_block { 1028 - const char *name; 1029 - int part; 1030 - }; 1031 - 1032 - static int match_id(struct device *dev, void *data) 706 + dev_t blk_lookup_devt(const char *name, int partno) 1033 707 { 1034 - struct find_block *find = data; 1035 - 1036 - if (dev->type != &disk_type) 1037 - return 0; 1038 - if (strcmp(dev->bus_id, find->name) == 0) { 1039 - struct gendisk *disk = dev_to_disk(dev); 1040 - if (find->part < disk->minors) 1041 - return 1; 1042 - } 1043 - return 0; 1044 - } 1045 - 1046 - dev_t blk_lookup_devt(const char *name, int part) 1047 - { 1048 - struct device *dev; 1049 708 dev_t devt = MKDEV(0, 0); 1050 - struct find_block find; 709 + struct class_dev_iter iter; 710 + struct device *dev; 1051 711 1052 - mutex_lock(&block_class_lock); 1053 - find.name = name; 1054 - find.part = part; 1055 - dev = class_find_device(&block_class, NULL, &find, match_id); 1056 - if (dev) { 1057 - put_device(dev); 1058 - devt = MKDEV(MAJOR(dev->devt), 1059 - MINOR(dev->devt) + part); 712 + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 713 + while ((dev = class_dev_iter_next(&iter))) { 714 + struct gendisk *disk = dev_to_disk(dev); 715 + struct hd_struct *part; 716 + 717 + if (strcmp(dev->bus_id, name)) 718 + continue; 719 + 720 + part = disk_get_part(disk, partno); 721 + if (part) { 722 + devt = part_devt(part); 723 + disk_put_part(part); 724 + break; 725 + } 726 + disk_put_part(part); 1060 727 } 1061 - mutex_unlock(&block_class_lock); 1062 - 728 + class_dev_iter_exit(&iter); 1063 729 return devt; 1064 730 } 1065 731 EXPORT_SYMBOL(blk_lookup_devt); ··· 1055 747 { 1056 748 return alloc_disk_node(minors, -1); 1057 749 } 750 + EXPORT_SYMBOL(alloc_disk); 1058 751 1059 752 struct gendisk *alloc_disk_node(int minors, int node_id) 1060 753 { ··· 1064 755 disk = kmalloc_node(sizeof(struct gendisk), 1065 756 GFP_KERNEL | __GFP_ZERO, node_id); 1066 757 if (disk) { 1067 - if (!init_disk_stats(disk)) { 758 + if (!init_part_stats(&disk->part0)) { 1068 759 kfree(disk); 1069 760 return NULL; 1070 761 } 1071 - if (minors > 1) { 1072 - int size = (minors - 1) * sizeof(struct hd_struct *); 1073 - disk->part = kmalloc_node(size, 1074 - GFP_KERNEL | __GFP_ZERO, node_id); 1075 - if (!disk->part) { 1076 - free_disk_stats(disk); 1077 - kfree(disk); 1078 - return NULL; 1079 - } 762 + if (disk_expand_part_tbl(disk, 0)) { 763 + free_part_stats(&disk->part0); 764 + kfree(disk); 765 + return NULL; 1080 766 } 767 + disk->part_tbl->part[0] = &disk->part0; 768 + 1081 769 disk->minors = minors; 1082 770 rand_initialize_disk(disk); 1083 - disk->dev.class = &block_class; 1084 - disk->dev.type = &disk_type; 1085 - device_initialize(&disk->dev); 771 + disk_to_dev(disk)->class = &block_class; 772 + disk_to_dev(disk)->type = &disk_type; 773 + device_initialize(disk_to_dev(disk)); 1086 774 INIT_WORK(&disk->async_notify, 1087 775 media_change_notify_thread); 776 + disk->node_id = node_id; 1088 777 } 1089 778 return disk; 1090 779 } 1091 - 1092 - EXPORT_SYMBOL(alloc_disk); 1093 780 EXPORT_SYMBOL(alloc_disk_node); 1094 781 1095 782 struct kobject *get_disk(struct gendisk *disk) ··· 1098 793 owner = disk->fops->owner; 1099 794 if (owner && !try_module_get(owner)) 1100 795 return NULL; 1101 - kobj = kobject_get(&disk->dev.kobj); 796 + kobj = kobject_get(&disk_to_dev(disk)->kobj); 1102 797 if (kobj == NULL) { 1103 798 module_put(owner); 1104 799 return NULL; ··· 1112 807 void put_disk(struct gendisk *disk) 1113 808 { 1114 809 if (disk) 1115 - kobject_put(&disk->dev.kobj); 810 + kobject_put(&disk_to_dev(disk)->kobj); 1116 811 } 1117 812 1118 813 EXPORT_SYMBOL(put_disk); 1119 814 1120 815 void set_device_ro(struct block_device *bdev, int flag) 1121 816 { 1122 - if (bdev->bd_contains != bdev) 1123 - bdev->bd_part->policy = flag; 1124 - else 1125 - bdev->bd_disk->policy = flag; 817 + bdev->bd_part->policy = flag; 1126 818 } 1127 819 1128 820 EXPORT_SYMBOL(set_device_ro); 1129 821 1130 822 void set_disk_ro(struct gendisk *disk, int flag) 1131 823 { 1132 - int i; 1133 - disk->policy = flag; 1134 - for (i = 0; i < disk->minors - 1; i++) 1135 - if (disk->part[i]) disk->part[i]->policy = flag; 824 + struct disk_part_iter piter; 825 + struct hd_struct *part; 826 + 827 + disk_part_iter_init(&piter, disk, 828 + DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); 829 + while ((part = disk_part_iter_next(&piter))) 830 + part->policy = flag; 831 + disk_part_iter_exit(&piter); 1136 832 } 1137 833 1138 834 EXPORT_SYMBOL(set_disk_ro); ··· 1142 836 { 1143 837 if (!bdev) 1144 838 return 0; 1145 - else if (bdev->bd_contains != bdev) 1146 - return bdev->bd_part->policy; 1147 - else 1148 - return bdev->bd_disk->policy; 839 + return bdev->bd_part->policy; 1149 840 } 1150 841 1151 842 EXPORT_SYMBOL(bdev_read_only); 1152 843 1153 - int invalidate_partition(struct gendisk *disk, int index) 844 + int invalidate_partition(struct gendisk *disk, int partno) 1154 845 { 1155 846 int res = 0; 1156 - struct block_device *bdev = bdget_disk(disk, index); 847 + struct block_device *bdev = bdget_disk(disk, partno); 1157 848 if (bdev) { 1158 849 fsync_bdev(bdev); 1159 850 res = __invalidate_device(bdev);

+102 -24

block/ioctl.c

··· 12 12 { 13 13 struct block_device *bdevp; 14 14 struct gendisk *disk; 15 + struct hd_struct *part; 15 16 struct blkpg_ioctl_arg a; 16 17 struct blkpg_partition p; 18 + struct disk_part_iter piter; 17 19 long long start, length; 18 - int part; 19 - int i; 20 + int partno; 20 21 int err; 21 22 22 23 if (!capable(CAP_SYS_ADMIN)) ··· 29 28 disk = bdev->bd_disk; 30 29 if (bdev != bdev->bd_contains) 31 30 return -EINVAL; 32 - part = p.pno; 33 - if (part <= 0 || part >= disk->minors) 31 + partno = p.pno; 32 + if (partno <= 0) 34 33 return -EINVAL; 35 34 switch (a.op) { 36 35 case BLKPG_ADD_PARTITION: ··· 44 43 || pstart < 0 || plength < 0) 45 44 return -EINVAL; 46 45 } 47 - /* partition number in use? */ 48 - mutex_lock(&bdev->bd_mutex); 49 - if (disk->part[part - 1]) { 50 - mutex_unlock(&bdev->bd_mutex); 51 - return -EBUSY; 52 - } 53 - /* overlap? */ 54 - for (i = 0; i < disk->minors - 1; i++) { 55 - struct hd_struct *s = disk->part[i]; 56 46 57 - if (!s) 58 - continue; 59 - if (!(start+length <= s->start_sect || 60 - start >= s->start_sect + s->nr_sects)) { 47 + mutex_lock(&bdev->bd_mutex); 48 + 49 + /* overlap? */ 50 + disk_part_iter_init(&piter, disk, 51 + DISK_PITER_INCL_EMPTY); 52 + while ((part = disk_part_iter_next(&piter))) { 53 + if (!(start + length <= part->start_sect || 54 + start >= part->start_sect + part->nr_sects)) { 55 + disk_part_iter_exit(&piter); 61 56 mutex_unlock(&bdev->bd_mutex); 62 57 return -EBUSY; 63 58 } 64 59 } 60 + disk_part_iter_exit(&piter); 61 + 65 62 /* all seems OK */ 66 - err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE); 63 + err = add_partition(disk, partno, start, length, 64 + ADDPART_FLAG_NONE); 67 65 mutex_unlock(&bdev->bd_mutex); 68 66 return err; 69 67 case BLKPG_DEL_PARTITION: 70 - if (!disk->part[part-1]) 68 + part = disk_get_part(disk, partno); 69 + if (!part) 71 70 return -ENXIO; 72 - if (disk->part[part - 1]->nr_sects == 0) 73 - return -ENXIO; 74 - bdevp = bdget_disk(disk, part); 71 + 72 + bdevp = bdget(part_devt(part)); 73 + disk_put_part(part); 75 74 if (!bdevp) 76 75 return -ENOMEM; 76 + 77 77 mutex_lock(&bdevp->bd_mutex); 78 78 if (bdevp->bd_openers) { 79 79 mutex_unlock(&bdevp->bd_mutex); ··· 86 84 invalidate_bdev(bdevp); 87 85 88 86 mutex_lock_nested(&bdev->bd_mutex, 1); 89 - delete_partition(disk, part); 87 + delete_partition(disk, partno); 90 88 mutex_unlock(&bdev->bd_mutex); 91 89 mutex_unlock(&bdevp->bd_mutex); 92 90 bdput(bdevp); ··· 102 100 struct gendisk *disk = bdev->bd_disk; 103 101 int res; 104 102 105 - if (disk->minors == 1 || bdev != bdev->bd_contains) 103 + if (!disk_partitionable(disk) || bdev != bdev->bd_contains) 106 104 return -EINVAL; 107 105 if (!capable(CAP_SYS_ADMIN)) 108 106 return -EACCES; ··· 111 109 res = rescan_partitions(disk, bdev); 112 110 mutex_unlock(&bdev->bd_mutex); 113 111 return res; 112 + } 113 + 114 + static void blk_ioc_discard_endio(struct bio *bio, int err) 115 + { 116 + if (err) { 117 + if (err == -EOPNOTSUPP) 118 + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 119 + clear_bit(BIO_UPTODATE, &bio->bi_flags); 120 + } 121 + complete(bio->bi_private); 122 + } 123 + 124 + static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, 125 + uint64_t len) 126 + { 127 + struct request_queue *q = bdev_get_queue(bdev); 128 + int ret = 0; 129 + 130 + if (start & 511) 131 + return -EINVAL; 132 + if (len & 511) 133 + return -EINVAL; 134 + start >>= 9; 135 + len >>= 9; 136 + 137 + if (start + len > (bdev->bd_inode->i_size >> 9)) 138 + return -EINVAL; 139 + 140 + if (!q->prepare_discard_fn) 141 + return -EOPNOTSUPP; 142 + 143 + while (len && !ret) { 144 + DECLARE_COMPLETION_ONSTACK(wait); 145 + struct bio *bio; 146 + 147 + bio = bio_alloc(GFP_KERNEL, 0); 148 + if (!bio) 149 + return -ENOMEM; 150 + 151 + bio->bi_end_io = blk_ioc_discard_endio; 152 + bio->bi_bdev = bdev; 153 + bio->bi_private = &wait; 154 + bio->bi_sector = start; 155 + 156 + if (len > q->max_hw_sectors) { 157 + bio->bi_size = q->max_hw_sectors << 9; 158 + len -= q->max_hw_sectors; 159 + start += q->max_hw_sectors; 160 + } else { 161 + bio->bi_size = len << 9; 162 + len = 0; 163 + } 164 + submit_bio(DISCARD_NOBARRIER, bio); 165 + 166 + wait_for_completion(&wait); 167 + 168 + if (bio_flagged(bio, BIO_EOPNOTSUPP)) 169 + ret = -EOPNOTSUPP; 170 + else if (!bio_flagged(bio, BIO_UPTODATE)) 171 + ret = -EIO; 172 + bio_put(bio); 173 + } 174 + return ret; 114 175 } 115 176 116 177 static int put_ushort(unsigned long arg, unsigned short val) ··· 323 258 set_device_ro(bdev, n); 324 259 unlock_kernel(); 325 260 return 0; 261 + 262 + case BLKDISCARD: { 263 + uint64_t range[2]; 264 + 265 + if (!(file->f_mode & FMODE_WRITE)) 266 + return -EBADF; 267 + 268 + if (copy_from_user(range, (void __user *)arg, sizeof(range))) 269 + return -EFAULT; 270 + 271 + return blk_ioctl_discard(bdev, range[0], range[1]); 272 + } 273 + 326 274 case HDIO_GETGEO: { 327 275 struct hd_geometry geo; 328 276

+5 -3

block/scsi_ioctl.c

··· 185 185 __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok); 186 186 __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok); 187 187 __set_bit(GPCMD_SET_STREAMING, filter->write_ok); 188 + __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); 188 189 } 189 190 EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); 190 191 ··· 314 313 goto out; 315 314 } 316 315 317 - ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, 318 - hdr->dxfer_len); 316 + ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, 317 + hdr->dxfer_len, GFP_KERNEL); 319 318 kfree(iov); 320 319 } else if (hdr->dxfer_len) 321 - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); 320 + ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, 321 + GFP_KERNEL); 322 322 323 323 if (ret) 324 324 goto out;

+7 -6

drivers/ata/libata-eh.c

··· 33 33 */ 34 34 35 35 #include <linux/kernel.h> 36 + #include <linux/blkdev.h> 36 37 #include <linux/pci.h> 37 38 #include <scsi/scsi.h> 38 39 #include <scsi/scsi_host.h> ··· 460 459 * RETURNS: 461 460 * EH_HANDLED or EH_NOT_HANDLED 462 461 */ 463 - enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 462 + enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 464 463 { 465 464 struct Scsi_Host *host = cmd->device->host; 466 465 struct ata_port *ap = ata_shost_to_port(host); 467 466 unsigned long flags; 468 467 struct ata_queued_cmd *qc; 469 - enum scsi_eh_timer_return ret; 468 + enum blk_eh_timer_return ret; 470 469 471 470 DPRINTK("ENTER\n"); 472 471 473 472 if (ap->ops->error_handler) { 474 - ret = EH_NOT_HANDLED; 473 + ret = BLK_EH_NOT_HANDLED; 475 474 goto out; 476 475 } 477 476 478 - ret = EH_HANDLED; 477 + ret = BLK_EH_HANDLED; 479 478 spin_lock_irqsave(ap->lock, flags); 480 479 qc = ata_qc_from_tag(ap, ap->link.active_tag); 481 480 if (qc) { 482 481 WARN_ON(qc->scsicmd != cmd); 483 482 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 484 483 qc->err_mask |= AC_ERR_TIMEOUT; 485 - ret = EH_NOT_HANDLED; 484 + ret = BLK_EH_NOT_HANDLED; 486 485 } 487 486 spin_unlock_irqrestore(ap->lock, flags); 488 487 ··· 834 833 * Note that ATA_QCFLAG_FAILED is unconditionally set after 835 834 * this function completes. 836 835 */ 837 - scsi_req_abort_cmd(qc->scsicmd); 836 + blk_abort_request(qc->scsicmd->request); 838 837 } 839 838 840 839 /**

+4

drivers/ata/libata-scsi.c

··· 1085 1085 1086 1086 blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); 1087 1087 } else { 1088 + if (ata_id_is_ssd(dev->id)) 1089 + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, 1090 + sdev->request_queue); 1091 + 1088 1092 /* ATA devices must be sector aligned */ 1089 1093 blk_queue_update_dma_alignment(sdev->request_queue, 1090 1094 ATA_SECT_SIZE - 1);

+1 -1

drivers/ata/libata.h

··· 155 155 /* libata-eh.c */ 156 156 extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); 157 157 extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); 158 - extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); 158 + extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); 159 159 extern void ata_scsi_error(struct Scsi_Host *host); 160 160 extern void ata_port_wait_eh(struct ata_port *ap); 161 161 extern void ata_eh_fastdrain_timerfn(unsigned long arg);

+1 -1

drivers/base/base.h

··· 54 54 */ 55 55 struct class_private { 56 56 struct kset class_subsys; 57 - struct list_head class_devices; 57 + struct klist class_devices; 58 58 struct list_head class_interfaces; 59 59 struct kset class_dirs; 60 60 struct mutex class_mutex;

+103 -33

drivers/base/class.c

··· 135 135 } 136 136 } 137 137 138 + static void klist_class_dev_get(struct klist_node *n) 139 + { 140 + struct device *dev = container_of(n, struct device, knode_class); 141 + 142 + get_device(dev); 143 + } 144 + 145 + static void klist_class_dev_put(struct klist_node *n) 146 + { 147 + struct device *dev = container_of(n, struct device, knode_class); 148 + 149 + put_device(dev); 150 + } 151 + 138 152 int __class_register(struct class *cls, struct lock_class_key *key) 139 153 { 140 154 struct class_private *cp; ··· 159 145 cp = kzalloc(sizeof(*cp), GFP_KERNEL); 160 146 if (!cp) 161 147 return -ENOMEM; 162 - INIT_LIST_HEAD(&cp->class_devices); 148 + klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put); 163 149 INIT_LIST_HEAD(&cp->class_interfaces); 164 150 kset_init(&cp->class_dirs); 165 151 __mutex_init(&cp->class_mutex, "struct class mutex", key); ··· 283 269 #endif 284 270 285 271 /** 272 + * class_dev_iter_init - initialize class device iterator 273 + * @iter: class iterator to initialize 274 + * @class: the class we wanna iterate over 275 + * @start: the device to start iterating from, if any 276 + * @type: device_type of the devices to iterate over, NULL for all 277 + * 278 + * Initialize class iterator @iter such that it iterates over devices 279 + * of @class. If @start is set, the list iteration will start there, 280 + * otherwise if it is NULL, the iteration starts at the beginning of 281 + * the list. 282 + */ 283 + void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, 284 + struct device *start, const struct device_type *type) 285 + { 286 + struct klist_node *start_knode = NULL; 287 + 288 + if (start) 289 + start_knode = &start->knode_class; 290 + klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode); 291 + iter->type = type; 292 + } 293 + EXPORT_SYMBOL_GPL(class_dev_iter_init); 294 + 295 + /** 296 + * class_dev_iter_next - iterate to the next device 297 + * @iter: class iterator to proceed 298 + * 299 + * Proceed @iter to the next device and return it. Returns NULL if 300 + * iteration is complete. 301 + * 302 + * The returned device is referenced and won't be released till 303 + * iterator is proceed to the next device or exited. The caller is 304 + * free to do whatever it wants to do with the device including 305 + * calling back into class code. 306 + */ 307 + struct device *class_dev_iter_next(struct class_dev_iter *iter) 308 + { 309 + struct klist_node *knode; 310 + struct device *dev; 311 + 312 + while (1) { 313 + knode = klist_next(&iter->ki); 314 + if (!knode) 315 + return NULL; 316 + dev = container_of(knode, struct device, knode_class); 317 + if (!iter->type || iter->type == dev->type) 318 + return dev; 319 + } 320 + } 321 + EXPORT_SYMBOL_GPL(class_dev_iter_next); 322 + 323 + /** 324 + * class_dev_iter_exit - finish iteration 325 + * @iter: class iterator to finish 326 + * 327 + * Finish an iteration. Always call this function after iteration is 328 + * complete whether the iteration ran till the end or not. 329 + */ 330 + void class_dev_iter_exit(struct class_dev_iter *iter) 331 + { 332 + klist_iter_exit(&iter->ki); 333 + } 334 + EXPORT_SYMBOL_GPL(class_dev_iter_exit); 335 + 336 + /** 286 337 * class_for_each_device - device iterator 287 338 * @class: the class we're iterating 288 339 * @start: the device to start with in the list, if any. ··· 362 283 * We check the return of @fn each time. If it returns anything 363 284 * other than 0, we break out and return that value. 364 285 * 365 - * Note, we hold class->class_mutex in this function, so it can not be 366 - * re-acquired in @fn, otherwise it will self-deadlocking. For 367 - * example, calls to add or remove class members would be verboten. 286 + * @fn is allowed to do anything including calling back into class 287 + * code. There's no locking restriction. 368 288 */ 369 289 int class_for_each_device(struct class *class, struct device *start, 370 290 void *data, int (*fn)(struct device *, void *)) 371 291 { 292 + struct class_dev_iter iter; 372 293 struct device *dev; 373 294 int error = 0; 374 295 ··· 380 301 return -EINVAL; 381 302 } 382 303 383 - mutex_lock(&class->p->class_mutex); 384 - list_for_each_entry(dev, &class->p->class_devices, node) { 385 - if (start) { 386 - if (start == dev) 387 - start = NULL; 388 - continue; 389 - } 390 - dev = get_device(dev); 304 + class_dev_iter_init(&iter, class, start, NULL); 305 + while ((dev = class_dev_iter_next(&iter))) { 391 306 error = fn(dev, data); 392 - put_device(dev); 393 307 if (error) 394 308 break; 395 309 } 396 - mutex_unlock(&class->p->class_mutex); 310 + class_dev_iter_exit(&iter); 397 311 398 312 return error; 399 313 } ··· 409 337 * 410 338 * Note, you will need to drop the reference with put_device() after use. 411 339 * 412 - * We hold class->class_mutex in this function, so it can not be 413 - * re-acquired in @match, otherwise it will self-deadlocking. For 414 - * example, calls to add or remove class members would be verboten. 340 + * @fn is allowed to do anything including calling back into class 341 + * code. There's no locking restriction. 415 342 */ 416 343 struct device *class_find_device(struct class *class, struct device *start, 417 344 void *data, 418 345 int (*match)(struct device *, void *)) 419 346 { 347 + struct class_dev_iter iter; 420 348 struct device *dev; 421 - int found = 0; 422 349 423 350 if (!class) 424 351 return NULL; ··· 427 356 return NULL; 428 357 } 429 358 430 - mutex_lock(&class->p->class_mutex); 431 - list_for_each_entry(dev, &class->p->class_devices, node) { 432 - if (start) { 433 - if (start == dev) 434 - start = NULL; 435 - continue; 436 - } 437 - dev = get_device(dev); 359 + class_dev_iter_init(&iter, class, start, NULL); 360 + while ((dev = class_dev_iter_next(&iter))) { 438 361 if (match(dev, data)) { 439 - found = 1; 362 + get_device(dev); 440 363 break; 441 - } else 442 - put_device(dev); 364 + } 443 365 } 444 - mutex_unlock(&class->p->class_mutex); 366 + class_dev_iter_exit(&iter); 445 367 446 - return found ? dev : NULL; 368 + return dev; 447 369 } 448 370 EXPORT_SYMBOL_GPL(class_find_device); 449 371 450 372 int class_interface_register(struct class_interface *class_intf) 451 373 { 452 374 struct class *parent; 375 + struct class_dev_iter iter; 453 376 struct device *dev; 454 377 455 378 if (!class_intf || !class_intf->class) ··· 456 391 mutex_lock(&parent->p->class_mutex); 457 392 list_add_tail(&class_intf->node, &parent->p->class_interfaces); 458 393 if (class_intf->add_dev) { 459 - list_for_each_entry(dev, &parent->p->class_devices, node) 394 + class_dev_iter_init(&iter, parent, NULL, NULL); 395 + while ((dev = class_dev_iter_next(&iter))) 460 396 class_intf->add_dev(dev, class_intf); 397 + class_dev_iter_exit(&iter); 461 398 } 462 399 mutex_unlock(&parent->p->class_mutex); 463 400 ··· 469 402 void class_interface_unregister(struct class_interface *class_intf) 470 403 { 471 404 struct class *parent = class_intf->class; 405 + struct class_dev_iter iter; 472 406 struct device *dev; 473 407 474 408 if (!parent) ··· 478 410 mutex_lock(&parent->p->class_mutex); 479 411 list_del_init(&class_intf->node); 480 412 if (class_intf->remove_dev) { 481 - list_for_each_entry(dev, &parent->p->class_devices, node) 413 + class_dev_iter_init(&iter, parent, NULL, NULL); 414 + while ((dev = class_dev_iter_next(&iter))) 482 415 class_intf->remove_dev(dev, class_intf); 416 + class_dev_iter_exit(&iter); 483 417 } 484 418 mutex_unlock(&parent->p->class_mutex); 485 419

+3 -3

drivers/base/core.c

··· 536 536 klist_init(&dev->klist_children, klist_children_get, 537 537 klist_children_put); 538 538 INIT_LIST_HEAD(&dev->dma_pools); 539 - INIT_LIST_HEAD(&dev->node); 540 539 init_MUTEX(&dev->sem); 541 540 spin_lock_init(&dev->devres_lock); 542 541 INIT_LIST_HEAD(&dev->devres_head); ··· 915 916 if (dev->class) { 916 917 mutex_lock(&dev->class->p->class_mutex); 917 918 /* tie the class to the device */ 918 - list_add_tail(&dev->node, &dev->class->p->class_devices); 919 + klist_add_tail(&dev->knode_class, 920 + &dev->class->p->class_devices); 919 921 920 922 /* notify any interfaces that the device is here */ 921 923 list_for_each_entry(class_intf, ··· 1032 1032 if (class_intf->remove_dev) 1033 1033 class_intf->remove_dev(dev, class_intf); 1034 1034 /* remove the device from the class list */ 1035 - list_del_init(&dev->node); 1035 + klist_del(&dev->knode_class); 1036 1036 mutex_unlock(&dev->class->p->class_mutex); 1037 1037 } 1038 1038 device_remove_file(dev, &uevent_attr);

+3 -3

drivers/block/aoe/aoeblk.c

··· 109 109 static int 110 110 aoedisk_add_sysfs(struct aoedev *d) 111 111 { 112 - return sysfs_create_group(&d->gd->dev.kobj, &attr_group); 112 + return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); 113 113 } 114 114 void 115 115 aoedisk_rm_sysfs(struct aoedev *d) 116 116 { 117 - sysfs_remove_group(&d->gd->dev.kobj, &attr_group); 117 + sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); 118 118 } 119 119 120 120 static int ··· 276 276 gd->first_minor = d->sysminor * AOE_PARTITIONS; 277 277 gd->fops = &aoe_bdops; 278 278 gd->private_data = d; 279 - gd->capacity = d->ssize; 279 + set_capacity(gd, d->ssize); 280 280 snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", 281 281 d->aoemajor, d->aoeminor); 282 282

+12 -7

drivers/block/aoe/aoecmd.c

··· 645 645 unsigned long flags; 646 646 u64 ssize; 647 647 648 - ssize = d->gd->capacity; 648 + ssize = get_capacity(d->gd); 649 649 bd = bdget_disk(d->gd, 0); 650 650 651 651 if (bd) { ··· 707 707 if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 708 708 return; 709 709 if (d->gd != NULL) { 710 - d->gd->capacity = ssize; 710 + set_capacity(d->gd, ssize); 711 711 d->flags |= DEVFL_NEWSIZE; 712 712 } else 713 713 d->flags |= DEVFL_GDALLOC; ··· 756 756 unsigned long n_sect = bio->bi_size >> 9; 757 757 const int rw = bio_data_dir(bio); 758 758 struct hd_struct *part; 759 + int cpu; 759 760 760 - part = get_part(disk, sector); 761 - all_stat_inc(disk, part, ios[rw], sector); 762 - all_stat_add(disk, part, ticks[rw], duration, sector); 763 - all_stat_add(disk, part, sectors[rw], n_sect, sector); 764 - all_stat_add(disk, part, io_ticks, duration, sector); 761 + cpu = part_stat_lock(); 762 + part = disk_map_sector_rcu(disk, sector); 763 + 764 + part_stat_inc(cpu, part, ios[rw]); 765 + part_stat_add(cpu, part, ticks[rw], duration); 766 + part_stat_add(cpu, part, sectors[rw], n_sect); 767 + part_stat_add(cpu, part, io_ticks, duration); 768 + 769 + part_stat_unlock(); 765 770 } 766 771 767 772 void

+1 -1

drivers/block/aoe/aoedev.c

··· 91 91 } 92 92 93 93 if (d->gd) 94 - d->gd->capacity = 0; 94 + set_capacity(d->gd, 0); 95 95 96 96 d->flags &= ~DEVFL_UP; 97 97 }

+4 -4

drivers/block/cciss.c

··· 3460 3460 hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); 3461 3461 3462 3462 hba[i]->cmd_pool_bits = 3463 - kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - 3464 - 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); 3463 + kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) 3464 + * sizeof(unsigned long), GFP_KERNEL); 3465 3465 hba[i]->cmd_pool = (CommandList_struct *) 3466 3466 pci_alloc_consistent(hba[i]->pdev, 3467 3467 hba[i]->nr_cmds * sizeof(CommandList_struct), ··· 3493 3493 /* command and error info recs zeroed out before 3494 3494 they are used */ 3495 3495 memset(hba[i]->cmd_pool_bits, 0, 3496 - ((hba[i]->nr_cmds + BITS_PER_LONG - 3497 - 1) / BITS_PER_LONG) * sizeof(unsigned long)); 3496 + DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) 3497 + * sizeof(unsigned long)); 3498 3498 3499 3499 hba[i]->num_luns = 0; 3500 3500 hba[i]->highest_lun = -1;

+97 -52

drivers/block/cciss_scsi.c

··· 365 365 366 366 static int 367 367 cciss_scsi_add_entry(int ctlr, int hostno, 368 - unsigned char *scsi3addr, int devtype, 368 + struct cciss_scsi_dev_t *device, 369 369 struct scsi2map *added, int *nadded) 370 370 { 371 371 /* assumes hba[ctlr]->scsi_ctlr->lock is held */ ··· 384 384 lun = 0; 385 385 /* Is this device a non-zero lun of a multi-lun device */ 386 386 /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */ 387 - if (scsi3addr[4] != 0) { 387 + if (device->scsi3addr[4] != 0) { 388 388 /* Search through our list and find the device which */ 389 389 /* has the same 8 byte LUN address, excepting byte 4. */ 390 390 /* Assign the same bus and target for this new LUN. */ 391 391 /* Use the logical unit number from the firmware. */ 392 - memcpy(addr1, scsi3addr, 8); 392 + memcpy(addr1, device->scsi3addr, 8); 393 393 addr1[4] = 0; 394 394 for (i = 0; i < n; i++) { 395 395 sd = &ccissscsi[ctlr].dev[i]; ··· 399 399 if (memcmp(addr1, addr2, 8) == 0) { 400 400 bus = sd->bus; 401 401 target = sd->target; 402 - lun = scsi3addr[4]; 402 + lun = device->scsi3addr[4]; 403 403 break; 404 404 } 405 405 } ··· 420 420 added[*nadded].lun = sd->lun; 421 421 (*nadded)++; 422 422 423 - memcpy(&sd->scsi3addr[0], scsi3addr, 8); 424 - sd->devtype = devtype; 423 + memcpy(sd->scsi3addr, device->scsi3addr, 8); 424 + memcpy(sd->vendor, device->vendor, sizeof(sd->vendor)); 425 + memcpy(sd->revision, device->revision, sizeof(sd->revision)); 426 + memcpy(sd->device_id, device->device_id, sizeof(sd->device_id)); 427 + sd->devtype = device->devtype; 428 + 425 429 ccissscsi[ctlr].ndevices++; 426 430 427 431 /* initially, (before registering with scsi layer) we don't ··· 491 487 CPQ_TAPE_UNLOCK(ctlr, flags); 492 488 } 493 489 490 + static int device_is_the_same(struct cciss_scsi_dev_t *dev1, 491 + struct cciss_scsi_dev_t *dev2) 492 + { 493 + return dev1->devtype == dev2->devtype && 494 + memcmp(dev1->scsi3addr, dev2->scsi3addr, 495 + sizeof(dev1->scsi3addr)) == 0 && 496 + memcmp(dev1->device_id, dev2->device_id, 497 + sizeof(dev1->device_id)) == 0 && 498 + memcmp(dev1->vendor, dev2->vendor, 499 + sizeof(dev1->vendor)) == 0 && 500 + memcmp(dev1->model, dev2->model, 501 + sizeof(dev1->model)) == 0 && 502 + memcmp(dev1->revision, dev2->revision, 503 + sizeof(dev1->revision)) == 0; 504 + } 505 + 494 506 static int 495 507 adjust_cciss_scsi_table(int ctlr, int hostno, 496 508 struct cciss_scsi_dev_t sd[], int nsds) ··· 552 532 for (j=0;j<nsds;j++) { 553 533 if (SCSI3ADDR_EQ(sd[j].scsi3addr, 554 534 csd->scsi3addr)) { 555 - if (sd[j].devtype == csd->devtype) 535 + if (device_is_the_same(&sd[j], csd)) 556 536 found=2; 557 537 else 558 538 found=1; ··· 568 548 cciss_scsi_remove_entry(ctlr, hostno, i, 569 549 removed, &nremoved); 570 550 /* remove ^^^, hence i not incremented */ 571 - } 572 - else if (found == 1) { /* device is different kind */ 551 + } else if (found == 1) { /* device is different in some way */ 573 552 changes++; 574 - printk("cciss%d: device c%db%dt%dl%d type changed " 575 - "(device type now %s).\n", 576 - ctlr, hostno, csd->bus, csd->target, csd->lun, 577 - scsi_device_type(csd->devtype)); 553 + printk("cciss%d: device c%db%dt%dl%d has changed.\n", 554 + ctlr, hostno, csd->bus, csd->target, csd->lun); 578 555 cciss_scsi_remove_entry(ctlr, hostno, i, 579 556 removed, &nremoved); 580 557 /* remove ^^^, hence i not incremented */ 581 - if (cciss_scsi_add_entry(ctlr, hostno, 582 - &sd[j].scsi3addr[0], sd[j].devtype, 558 + if (cciss_scsi_add_entry(ctlr, hostno, &sd[j], 583 559 added, &nadded) != 0) 584 560 /* we just removed one, so add can't fail. */ 585 561 BUG(); 586 562 csd->devtype = sd[j].devtype; 563 + memcpy(csd->device_id, sd[j].device_id, 564 + sizeof(csd->device_id)); 565 + memcpy(csd->vendor, sd[j].vendor, 566 + sizeof(csd->vendor)); 567 + memcpy(csd->model, sd[j].model, 568 + sizeof(csd->model)); 569 + memcpy(csd->revision, sd[j].revision, 570 + sizeof(csd->revision)); 587 571 } else /* device is same as it ever was, */ 588 572 i++; /* so just move along. */ 589 573 } ··· 601 577 csd = &ccissscsi[ctlr].dev[j]; 602 578 if (SCSI3ADDR_EQ(sd[i].scsi3addr, 603 579 csd->scsi3addr)) { 604 - if (sd[i].devtype == csd->devtype) 580 + if (device_is_the_same(&sd[i], csd)) 605 581 found=2; /* found device */ 606 582 else 607 583 found=1; /* found a bug. */ ··· 610 586 } 611 587 if (!found) { 612 588 changes++; 613 - if (cciss_scsi_add_entry(ctlr, hostno, 614 - 615 - &sd[i].scsi3addr[0], sd[i].devtype, 589 + if (cciss_scsi_add_entry(ctlr, hostno, &sd[i], 616 590 added, &nadded) != 0) 617 591 break; 618 592 } else if (found == 1) { 619 593 /* should never happen... */ 620 594 changes++; 621 - printk("cciss%d: device unexpectedly changed type\n", 622 - ctlr); 595 + printk(KERN_WARNING "cciss%d: device " 596 + "unexpectedly changed\n", ctlr); 623 597 /* but if it does happen, we just ignore that device */ 624 598 } 625 599 } ··· 1034 1012 1035 1013 static int 1036 1014 cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 1037 - unsigned char *buf, unsigned char bufsize) 1015 + unsigned char page, unsigned char *buf, 1016 + unsigned char bufsize) 1038 1017 { 1039 1018 int rc; 1040 1019 CommandList_struct *cp; ··· 1055 1032 ei = cp->err_info; 1056 1033 1057 1034 cdb[0] = CISS_INQUIRY; 1058 - cdb[1] = 0; 1059 - cdb[2] = 0; 1035 + cdb[1] = (page != 0); 1036 + cdb[2] = page; 1060 1037 cdb[3] = 0; 1061 1038 cdb[4] = bufsize; 1062 1039 cdb[5] = 0; ··· 1074 1051 scsi_cmd_free(c, cp); 1075 1052 spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags); 1076 1053 return rc; 1054 + } 1055 + 1056 + /* Get the device id from inquiry page 0x83 */ 1057 + static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr, 1058 + unsigned char *device_id, int buflen) 1059 + { 1060 + int rc; 1061 + unsigned char *buf; 1062 + 1063 + if (buflen > 16) 1064 + buflen = 16; 1065 + buf = kzalloc(64, GFP_KERNEL); 1066 + if (!buf) 1067 + return -1; 1068 + rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64); 1069 + if (rc == 0) 1070 + memcpy(device_id, &buf[8], buflen); 1071 + kfree(buf); 1072 + return rc != 0; 1077 1073 } 1078 1074 1079 1075 static int ··· 1184 1142 ctlr_info_t *c; 1185 1143 __u32 num_luns=0; 1186 1144 unsigned char *ch; 1187 - /* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */ 1188 - struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; 1145 + struct cciss_scsi_dev_t *currentsd, *this_device; 1189 1146 int ncurrent=0; 1190 1147 int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8; 1191 1148 int i; 1192 1149 1193 1150 c = (ctlr_info_t *) hba[cntl_num]; 1194 1151 ld_buff = kzalloc(reportlunsize, GFP_KERNEL); 1195 - if (ld_buff == NULL) { 1196 - printk(KERN_ERR "cciss: out of memory\n"); 1197 - return; 1198 - } 1199 1152 inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); 1200 - if (inq_buff == NULL) { 1201 - printk(KERN_ERR "cciss: out of memory\n"); 1202 - kfree(ld_buff); 1203 - return; 1153 + currentsd = kzalloc(sizeof(*currentsd) * 1154 + (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL); 1155 + if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) { 1156 + printk(KERN_ERR "cciss: out of memory\n"); 1157 + goto out; 1204 1158 } 1205 - 1159 + this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; 1206 1160 if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) { 1207 1161 ch = &ld_buff->LUNListLength[0]; 1208 1162 num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; ··· 1217 1179 1218 1180 1219 1181 /* adjust our table of devices */ 1220 - for(i=0; i<num_luns; i++) 1221 - { 1222 - int devtype; 1223 - 1182 + for (i = 0; i < num_luns; i++) { 1224 1183 /* for each physical lun, do an inquiry */ 1225 1184 if (ld_buff->LUN[i][3] & 0xC0) continue; 1226 1185 memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE); 1227 1186 memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8); 1228 1187 1229 - if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff, 1230 - (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { 1188 + if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff, 1189 + (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) 1231 1190 /* Inquiry failed (msg printed already) */ 1232 - devtype = 0; /* so we will skip this device. */ 1233 - } else /* what kind of device is this? */ 1234 - devtype = (inq_buff[0] & 0x1f); 1191 + continue; /* so we will skip this device. */ 1235 1192 1236 - switch (devtype) 1193 + this_device->devtype = (inq_buff[0] & 0x1f); 1194 + this_device->bus = -1; 1195 + this_device->target = -1; 1196 + this_device->lun = -1; 1197 + memcpy(this_device->scsi3addr, scsi3addr, 8); 1198 + memcpy(this_device->vendor, &inq_buff[8], 1199 + sizeof(this_device->vendor)); 1200 + memcpy(this_device->model, &inq_buff[16], 1201 + sizeof(this_device->model)); 1202 + memcpy(this_device->revision, &inq_buff[32], 1203 + sizeof(this_device->revision)); 1204 + memset(this_device->device_id, 0, 1205 + sizeof(this_device->device_id)); 1206 + cciss_scsi_get_device_id(hba[cntl_num], scsi3addr, 1207 + this_device->device_id, sizeof(this_device->device_id)); 1208 + 1209 + switch (this_device->devtype) 1237 1210 { 1238 1211 case 0x05: /* CD-ROM */ { 1239 1212 ··· 1269 1220 if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { 1270 1221 printk(KERN_INFO "cciss%d: %s ignored, " 1271 1222 "too many devices.\n", cntl_num, 1272 - scsi_device_type(devtype)); 1223 + scsi_device_type(this_device->devtype)); 1273 1224 break; 1274 1225 } 1275 - memcpy(&currentsd[ncurrent].scsi3addr[0], 1276 - &scsi3addr[0], 8); 1277 - currentsd[ncurrent].devtype = devtype; 1278 - currentsd[ncurrent].bus = -1; 1279 - currentsd[ncurrent].target = -1; 1280 - currentsd[ncurrent].lun = -1; 1226 + currentsd[ncurrent] = *this_device; 1281 1227 ncurrent++; 1282 1228 break; 1283 1229 default: ··· 1284 1240 out: 1285 1241 kfree(inq_buff); 1286 1242 kfree(ld_buff); 1243 + kfree(currentsd); 1287 1244 return; 1288 1245 } 1289 1246

+4

drivers/block/cciss_scsi.h

··· 66 66 int devtype; 67 67 int bus, target, lun; /* as presented to the OS */ 68 68 unsigned char scsi3addr[8]; /* as presented to the HW */ 69 + unsigned char device_id[16]; /* from inquiry pg. 0x83 */ 70 + unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ 71 + unsigned char model[16]; /* bytes 16-31 of inquiry data */ 72 + unsigned char revision[4]; /* bytes 32-35 of inquiry data */ 69 73 }; 70 74 71 75 struct cciss_scsi_hba_t {

+1 -1

drivers/block/cpqarray.c

··· 424 424 hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), 425 425 &(hba[i]->cmd_pool_dhandle)); 426 426 hba[i]->cmd_pool_bits = kcalloc( 427 - (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long), 427 + DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long), 428 428 GFP_KERNEL); 429 429 430 430 if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)

+19 -12

drivers/block/floppy.c

··· 423 423 * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical 424 424 * side 0 is on physical side 0 (but with the misnamed sector IDs). 425 425 * 'stretch' should probably be renamed to something more general, like 426 - * 'options'. Other parameters should be self-explanatory (see also 427 - * setfdprm(8)). 426 + * 'options'. 427 + * 428 + * Bits 2 through 9 of 'stretch' tell the number of the first sector. 429 + * The LSB (bit 2) is flipped. For most disks, the first sector 430 + * is 1 (represented by 0x00<<2). For some CP/M and music sampler 431 + * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2). 432 + * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2). 433 + * 434 + * Other parameters should be self-explanatory (see also setfdprm(8)). 428 435 */ 429 436 /* 430 437 Size ··· 1362 1355 } 1363 1356 1364 1357 /* Convert step rate from microseconds to milliseconds and 4 bits */ 1365 - srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR; 1358 + srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); 1366 1359 if (slow_floppy) { 1367 1360 srt = srt / 4; 1368 1361 } 1369 1362 SUPBOUND(srt, 0xf); 1370 1363 INFBOUND(srt, 0); 1371 1364 1372 - hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR; 1365 + hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); 1373 1366 if (hlt < 0x01) 1374 1367 hlt = 0x01; 1375 1368 else if (hlt > 0x7f) 1376 1369 hlt = hlt_max_code; 1377 1370 1378 - hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR; 1371 + hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); 1379 1372 if (hut < 0x1) 1380 1373 hut = 0x1; 1381 1374 else if (hut > 0xf) ··· 2243 2236 } 2244 2237 } 2245 2238 } 2246 - if (_floppy->stretch & FD_ZEROBASED) { 2239 + if (_floppy->stretch & FD_SECTBASEMASK) { 2247 2240 for (count = 0; count < F_SECT_PER_TRACK; count++) 2248 - here[count].sect--; 2241 + here[count].sect += FD_SECTBASE(_floppy) - 1; 2249 2242 } 2250 2243 } 2251 2244 ··· 2392 2385 2393 2386 #ifdef FLOPPY_SANITY_CHECK 2394 2387 if (nr_sectors / ssize > 2395 - (in_sector_offset + current_count_sectors + ssize - 1) / ssize) { 2388 + DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { 2396 2389 DPRINT("long rw: %x instead of %lx\n", 2397 2390 nr_sectors, current_count_sectors); 2398 2391 printk("rs=%d s=%d\n", R_SECTOR, SECTOR); ··· 2656 2649 } 2657 2650 HEAD = fsector_t / _floppy->sect; 2658 2651 2659 - if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) || 2652 + if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || 2660 2653 TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect) 2661 2654 max_sector = _floppy->sect; 2662 2655 ··· 2686 2679 CODE2SIZE; 2687 2680 SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; 2688 2681 SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 2689 - ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1); 2682 + FD_SECTBASE(_floppy); 2690 2683 2691 2684 /* tracksize describes the size which can be filled up with sectors 2692 2685 * of size ssize. ··· 3318 3311 g->head <= 0 || 3319 3312 g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || 3320 3313 /* check if reserved bits are set */ 3321 - (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0) 3314 + (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) 3322 3315 return -EINVAL; 3323 3316 if (type) { 3324 3317 if (!capable(CAP_SYS_ADMIN)) ··· 3363 3356 if (DRS->maxblock > user_params[drive].sect || 3364 3357 DRS->maxtrack || 3365 3358 ((user_params[drive].sect ^ oldStretch) & 3366 - (FD_SWAPSIDES | FD_ZEROBASED))) 3359 + (FD_SWAPSIDES | FD_SECTBASEMASK))) 3367 3360 invalidate_drive(bdev); 3368 3361 else 3369 3362 process_fd_request();

+2 -2

drivers/block/nbd.c

··· 403 403 BUG_ON(lo->magic != LO_MAGIC); 404 404 405 405 lo->pid = current->pid; 406 - ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); 406 + ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); 407 407 if (ret) { 408 408 printk(KERN_ERR "nbd: sysfs_create_file failed!"); 409 409 return ret; ··· 412 412 while ((req = nbd_read_stat(lo)) != NULL) 413 413 nbd_end_request(req); 414 414 415 - sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); 415 + sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); 416 416 return 0; 417 417 } 418 418

+2 -2

drivers/block/pktcdvd.c

··· 2544 2544 if (last_zone != zone) { 2545 2545 BUG_ON(last_zone != zone + pd->settings.size); 2546 2546 first_sectors = last_zone - bio->bi_sector; 2547 - bp = bio_split(bio, bio_split_pool, first_sectors); 2547 + bp = bio_split(bio, first_sectors); 2548 2548 BUG_ON(!bp); 2549 2549 pkt_make_request(q, &bp->bio1); 2550 2550 pkt_make_request(q, &bp->bio2); ··· 2911 2911 if (!disk->queue) 2912 2912 goto out_mem2; 2913 2913 2914 - pd->pkt_dev = MKDEV(disk->major, disk->first_minor); 2914 + pd->pkt_dev = MKDEV(pktdev_major, idx); 2915 2915 ret = pkt_new_dev(pd, dev); 2916 2916 if (ret) 2917 2917 goto out_new_dev;

+7 -4

drivers/block/ps3disk.c

··· 199 199 if (blk_fs_request(req)) { 200 200 if (ps3disk_submit_request_sg(dev, req)) 201 201 break; 202 - } else if (req->cmd_type == REQ_TYPE_FLUSH) { 202 + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && 203 + req->cmd[0] == REQ_LB_OP_FLUSH) { 203 204 if (ps3disk_submit_flush_request(dev, req)) 204 205 break; 205 206 } else { ··· 258 257 return IRQ_HANDLED; 259 258 } 260 259 261 - if (req->cmd_type == REQ_TYPE_FLUSH) { 260 + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && 261 + req->cmd[0] == REQ_LB_OP_FLUSH) { 262 262 read = 0; 263 263 num_sectors = req->hard_cur_sectors; 264 264 op = "flush"; ··· 407 405 408 406 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); 409 407 410 - req->cmd_type = REQ_TYPE_FLUSH; 408 + req->cmd_type = REQ_TYPE_LINUX_BLOCK; 409 + req->cmd[0] = REQ_LB_OP_FLUSH; 411 410 } 412 411 413 412 static unsigned long ps3disk_mask; ··· 541 538 struct ps3disk_private *priv = dev->sbd.core.driver_data; 542 539 543 540 mutex_lock(&ps3disk_mask_mutex); 544 - __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, 541 + __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, 545 542 &ps3disk_mask); 546 543 mutex_unlock(&ps3disk_mask_mutex); 547 544 del_gendisk(priv->gendisk);

+7 -7

drivers/block/virtio_blk.c

··· 47 47 48 48 spin_lock_irqsave(&vblk->lock, flags); 49 49 while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { 50 - int uptodate; 50 + int error; 51 51 switch (vbr->status) { 52 52 case VIRTIO_BLK_S_OK: 53 - uptodate = 1; 53 + error = 0; 54 54 break; 55 55 case VIRTIO_BLK_S_UNSUPP: 56 - uptodate = -ENOTTY; 56 + error = -ENOTTY; 57 57 break; 58 58 default: 59 - uptodate = 0; 59 + error = -EIO; 60 60 break; 61 61 } 62 62 63 - end_dequeued_request(vbr->req, uptodate); 63 + __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); 64 64 list_del(&vbr->list); 65 65 mempool_free(vbr, vblk->pool); 66 66 } ··· 84 84 if (blk_fs_request(vbr->req)) { 85 85 vbr->out_hdr.type = 0; 86 86 vbr->out_hdr.sector = vbr->req->sector; 87 - vbr->out_hdr.ioprio = vbr->req->ioprio; 87 + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 88 88 } else if (blk_pc_request(vbr->req)) { 89 89 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; 90 90 vbr->out_hdr.sector = 0; 91 - vbr->out_hdr.ioprio = vbr->req->ioprio; 91 + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); 92 92 } else { 93 93 /* We don't put anything else in the queue. */ 94 94 BUG();

+55 -21

drivers/block/xen-blkfront.c

··· 105 105 #define GRANT_INVALID_REF 0 106 106 107 107 #define PARTS_PER_DISK 16 108 + #define PARTS_PER_EXT_DISK 256 108 109 109 110 #define BLKIF_MAJOR(dev) ((dev)>>8) 110 111 #define BLKIF_MINOR(dev) ((dev) & 0xff) 111 112 112 - #define DEV_NAME "xvd" /* name in /dev */ 113 + #define EXT_SHIFT 28 114 + #define EXTENDED (1<<EXT_SHIFT) 115 + #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 116 + #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 113 117 114 - /* Information about our VBDs. */ 115 - #define MAX_VBDS 64 116 - static LIST_HEAD(vbds_list); 118 + #define DEV_NAME "xvd" /* name in /dev */ 117 119 118 120 static int get_id_from_freelist(struct blkfront_info *info) 119 121 { ··· 388 386 } 389 387 390 388 391 - static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, 392 - int vdevice, u16 vdisk_info, u16 sector_size, 393 - struct blkfront_info *info) 389 + static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 390 + struct blkfront_info *info, 391 + u16 vdisk_info, u16 sector_size) 394 392 { 395 393 struct gendisk *gd; 396 394 int nr_minors = 1; 397 395 int err = -ENODEV; 396 + unsigned int offset; 397 + int minor; 398 + int nr_parts; 398 399 399 400 BUG_ON(info->gd != NULL); 400 401 BUG_ON(info->rq != NULL); 401 402 402 - if ((minor % PARTS_PER_DISK) == 0) 403 - nr_minors = PARTS_PER_DISK; 403 + if ((info->vdevice>>EXT_SHIFT) > 1) { 404 + /* this is above the extended range; something is wrong */ 405 + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); 406 + return -ENODEV; 407 + } 408 + 409 + if (!VDEV_IS_EXTENDED(info->vdevice)) { 410 + minor = BLKIF_MINOR(info->vdevice); 411 + nr_parts = PARTS_PER_DISK; 412 + } else { 413 + minor = BLKIF_MINOR_EXT(info->vdevice); 414 + nr_parts = PARTS_PER_EXT_DISK; 415 + } 416 + 417 + if ((minor % nr_parts) == 0) 418 + nr_minors = nr_parts; 404 419 405 420 gd = alloc_disk(nr_minors); 406 421 if (gd == NULL) 407 422 goto out; 408 423 409 - if (nr_minors > 1) 410 - sprintf(gd->disk_name, "%s%c", DEV_NAME, 411 - 'a' + minor / PARTS_PER_DISK); 412 - else 413 - sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 414 - 'a' + minor / PARTS_PER_DISK, 415 - minor % PARTS_PER_DISK); 424 + offset = minor / nr_parts; 425 + 426 + if (nr_minors > 1) { 427 + if (offset < 26) 428 + sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 429 + else 430 + sprintf(gd->disk_name, "%s%c%c", DEV_NAME, 431 + 'a' + ((offset / 26)-1), 'a' + (offset % 26)); 432 + } else { 433 + if (offset < 26) 434 + sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 435 + 'a' + offset, 436 + minor & (nr_parts - 1)); 437 + else 438 + sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, 439 + 'a' + ((offset / 26) - 1), 440 + 'a' + (offset % 26), 441 + minor & (nr_parts - 1)); 442 + } 416 443 417 444 gd->major = XENVBD_MAJOR; 418 445 gd->first_minor = minor; ··· 730 699 err = xenbus_scanf(XBT_NIL, dev->nodename, 731 700 "virtual-device", "%i", &vdevice); 732 701 if (err != 1) { 733 - xenbus_dev_fatal(dev, err, "reading virtual-device"); 734 - return err; 702 + /* go looking in the extended area instead */ 703 + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", 704 + "%i", &vdevice); 705 + if (err != 1) { 706 + xenbus_dev_fatal(dev, err, "reading virtual-device"); 707 + return err; 708 + } 735 709 } 736 710 737 711 info = kzalloc(sizeof(*info), GFP_KERNEL); ··· 897 861 if (err) 898 862 info->feature_barrier = 0; 899 863 900 - err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), 901 - sectors, info->vdevice, 902 - binfo, sector_size, info); 864 + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 903 865 if (err) { 904 866 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 905 867 info->xbdev->otherend);

+1 -1

drivers/cdrom/cdrom.c

··· 2097 2097 2098 2098 len = nr * CD_FRAMESIZE_RAW; 2099 2099 2100 - ret = blk_rq_map_user(q, rq, ubuf, len); 2100 + ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); 2101 2101 if (ret) 2102 2102 break; 2103 2103

+2 -2

drivers/cdrom/gdrom.c

··· 624 624 ctrl_outb(1, GDROM_DMA_STATUS_REG); 625 625 wait_event_interruptible_timeout(request_queue, 626 626 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); 627 - err = gd.transfer; 627 + err = gd.transfer ? -EIO : 0; 628 628 gd.transfer = 0; 629 629 gd.pending = 0; 630 630 /* now seek to take the request spinlock 631 631 * before handling ending the request */ 632 632 spin_lock(&gdrom_lock); 633 633 list_del_init(&req->queuelist); 634 - end_dequeued_request(req, 1 - err); 634 + __blk_end_request(req, err, blk_rq_bytes(req)); 635 635 } 636 636 spin_unlock(&gdrom_lock); 637 637 kfree(read_command);

+3 -3

drivers/char/random.c

··· 661 661 if (!disk || !disk->random) 662 662 return; 663 663 /* first major is 1, so we get >= 0x200 here */ 664 - DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); 664 + DEBUG_ENT("disk event %d:%d\n", 665 + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk))); 665 666 666 - add_timer_randomness(disk->random, 667 - 0x100 + MKDEV(disk->major, disk->first_minor)); 667 + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); 668 668 } 669 669 #endif 670 670

+1 -1

drivers/ide/ide-cd.c

··· 1113 1113 1114 1114 if (write) { 1115 1115 /* disk has become write protected */ 1116 - if (cd->disk->policy) { 1116 + if (get_disk_ro(cd->disk)) { 1117 1117 cdrom_end_request(drive, 0); 1118 1118 return ide_stopped; 1119 1119 }

+11 -4

drivers/ide/ide-disk.c

··· 41 41 #include <asm/io.h> 42 42 #include <asm/div64.h> 43 43 44 + #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) 45 + #define IDE_DISK_MINORS (1 << PARTN_BITS) 46 + #else 47 + #define IDE_DISK_MINORS 0 48 + #endif 49 + 44 50 struct ide_disk_obj { 45 51 ide_drive_t *drive; 46 52 ide_driver_t *driver; ··· 1157 1151 if (!idkp) 1158 1152 goto failed; 1159 1153 1160 - g = alloc_disk_node(1 << PARTN_BITS, 1161 - hwif_to_node(drive->hwif)); 1154 + g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif)); 1162 1155 if (!g) 1163 1156 goto out_free_idkp; 1164 1157 ··· 1183 1178 } else 1184 1179 drive->attach = 1; 1185 1180 1186 - g->minors = 1 << PARTN_BITS; 1181 + g->minors = IDE_DISK_MINORS; 1187 1182 g->driverfs_dev = &drive->gendev; 1188 - g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; 1183 + g->flags |= GENHD_FL_EXT_DEVT; 1184 + if (drive->removable) 1185 + g->flags |= GENHD_FL_REMOVABLE; 1189 1186 set_capacity(g, idedisk_capacity(drive)); 1190 1187 g->fops = &idedisk_ops; 1191 1188 add_disk(g);

+1 -1

drivers/ide/ide-probe.c

··· 1188 1188 { 1189 1189 struct gendisk *p = data; 1190 1190 *part &= (1 << PARTN_BITS) - 1; 1191 - return &p->dev.kobj; 1191 + return &disk_to_dev(p)->kobj; 1192 1192 } 1193 1193 1194 1194 static int exact_lock(dev_t dev, void *data)

+3 -3

drivers/md/dm-ioctl.c

··· 426 426 old_nl->next = (uint32_t) ((void *) nl - 427 427 (void *) old_nl); 428 428 disk = dm_disk(hc->md); 429 - nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); 429 + nl->dev = huge_encode_dev(disk_devt(disk)); 430 430 nl->next = 0; 431 431 strcpy(nl->name, hc->name); 432 432 ··· 539 539 if (dm_suspended(md)) 540 540 param->flags |= DM_SUSPEND_FLAG; 541 541 542 - param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); 542 + param->dev = huge_encode_dev(disk_devt(disk)); 543 543 544 544 /* 545 545 * Yes, this will be out of date by the time it gets back ··· 548 548 */ 549 549 param->open_count = dm_open_count(md); 550 550 551 - if (disk->policy) 551 + if (get_disk_ro(disk)) 552 552 param->flags |= DM_READONLY_FLAG; 553 553 554 554 param->event_nr = dm_get_event_nr(md);

+14 -1

drivers/md/dm-mpath.c

··· 33 33 unsigned fail_count; /* Cumulative failure count */ 34 34 35 35 struct dm_path path; 36 + struct work_struct deactivate_path; 36 37 }; 37 38 38 39 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) ··· 113 112 static void process_queued_ios(struct work_struct *work); 114 113 static void trigger_event(struct work_struct *work); 115 114 static void activate_path(struct work_struct *work); 115 + static void deactivate_path(struct work_struct *work); 116 116 117 117 118 118 /*----------------------------------------------- ··· 124 122 { 125 123 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 126 124 127 - if (pgpath) 125 + if (pgpath) { 128 126 pgpath->path.is_active = 1; 127 + INIT_WORK(&pgpath->deactivate_path, deactivate_path); 128 + } 129 129 130 130 return pgpath; 131 131 } ··· 135 131 static void free_pgpath(struct pgpath *pgpath) 136 132 { 137 133 kfree(pgpath); 134 + } 135 + 136 + static void deactivate_path(struct work_struct *work) 137 + { 138 + struct pgpath *pgpath = 139 + container_of(work, struct pgpath, deactivate_path); 140 + 141 + blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); 138 142 } 139 143 140 144 static struct priority_group *alloc_priority_group(void) ··· 882 870 pgpath->path.dev->name, m->nr_valid_paths); 883 871 884 872 queue_work(kmultipathd, &m->trigger_event); 873 + queue_work(kmultipathd, &pgpath->deactivate_path); 885 874 886 875 out: 887 876 spin_unlock_irqrestore(&m->lock, flags);

+2 -2

drivers/md/dm-stripe.c

··· 284 284 285 285 memset(major_minor, 0, sizeof(major_minor)); 286 286 sprintf(major_minor, "%d:%d", 287 - bio->bi_bdev->bd_disk->major, 288 - bio->bi_bdev->bd_disk->first_minor); 287 + MAJOR(disk_devt(bio->bi_bdev->bd_disk)), 288 + MINOR(disk_devt(bio->bi_bdev->bd_disk))); 289 289 290 290 /* 291 291 * Test to see which stripe drive triggered the event

+23 -17

drivers/md/dm.c

··· 377 377 static void start_io_acct(struct dm_io *io) 378 378 { 379 379 struct mapped_device *md = io->md; 380 + int cpu; 380 381 381 382 io->start_time = jiffies; 382 383 383 - preempt_disable(); 384 - disk_round_stats(dm_disk(md)); 385 - preempt_enable(); 386 - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); 384 + cpu = part_stat_lock(); 385 + part_round_stats(cpu, &dm_disk(md)->part0); 386 + part_stat_unlock(); 387 + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); 387 388 } 388 389 389 390 static int end_io_acct(struct dm_io *io) ··· 392 391 struct mapped_device *md = io->md; 393 392 struct bio *bio = io->bio; 394 393 unsigned long duration = jiffies - io->start_time; 395 - int pending; 394 + int pending, cpu; 396 395 int rw = bio_data_dir(bio); 397 396 398 - preempt_disable(); 399 - disk_round_stats(dm_disk(md)); 400 - preempt_enable(); 401 - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); 397 + cpu = part_stat_lock(); 398 + part_round_stats(cpu, &dm_disk(md)->part0); 399 + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); 400 + part_stat_unlock(); 402 401 403 - disk_stat_add(dm_disk(md), ticks[rw], duration); 402 + dm_disk(md)->part0.in_flight = pending = 403 + atomic_dec_return(&md->pending); 404 404 405 405 return !pending; 406 406 } ··· 887 885 int r = -EIO; 888 886 int rw = bio_data_dir(bio); 889 887 struct mapped_device *md = q->queuedata; 888 + int cpu; 890 889 891 890 /* 892 891 * There is no use in forwarding any barrier request since we can't ··· 900 897 901 898 down_read(&md->io_lock); 902 899 903 - disk_stat_inc(dm_disk(md), ios[rw]); 904 - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); 900 + cpu = part_stat_lock(); 901 + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); 902 + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); 903 + part_stat_unlock(); 905 904 906 905 /* 907 906 * If we're suspended we have to queue ··· 1151 1146 1152 1147 static void free_dev(struct mapped_device *md) 1153 1148 { 1154 - int minor = md->disk->first_minor; 1149 + int minor = MINOR(disk_devt(md->disk)); 1155 1150 1156 1151 if (md->suspended_bdev) { 1157 1152 unlock_fs(md); ··· 1187 1182 list_splice_init(&md->uevent_list, &uevents); 1188 1183 spin_unlock_irqrestore(&md->uevent_lock, flags); 1189 1184 1190 - dm_send_uevents(&uevents, &md->disk->dev.kobj); 1185 + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 1191 1186 1192 1187 atomic_inc(&md->event_nr); 1193 1188 wake_up(&md->eventq); ··· 1272 1267 1273 1268 md = idr_find(&_minor_idr, minor); 1274 1269 if (md && (md == MINOR_ALLOCED || 1275 - (dm_disk(md)->first_minor != minor) || 1270 + (MINOR(disk_devt(dm_disk(md))) != minor) || 1276 1271 test_bit(DMF_FREEING, &md->flags))) { 1277 1272 md = NULL; 1278 1273 goto out; ··· 1323 1318 1324 1319 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 1325 1320 map = dm_get_table(md); 1326 - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); 1321 + idr_replace(&_minor_idr, MINOR_ALLOCED, 1322 + MINOR(disk_devt(dm_disk(md)))); 1327 1323 set_bit(DMF_FREEING, &md->flags); 1328 1324 spin_unlock(&_minor_lock); 1329 1325 if (!dm_suspended(md)) { ··· 1644 1638 *---------------------------------------------------------------*/ 1645 1639 void dm_kobject_uevent(struct mapped_device *md) 1646 1640 { 1647 - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); 1641 + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); 1648 1642 } 1649 1643 1650 1644 uint32_t dm_next_uevent_seq(struct mapped_device *md)

+7 -3

drivers/md/linear.c

··· 318 318 mddev_t *mddev = q->queuedata; 319 319 dev_info_t *tmp_dev; 320 320 sector_t block; 321 + int cpu; 321 322 322 323 if (unlikely(bio_barrier(bio))) { 323 324 bio_endio(bio, -EOPNOTSUPP); 324 325 return 0; 325 326 } 326 327 327 - disk_stat_inc(mddev->gendisk, ios[rw]); 328 - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 328 + cpu = part_stat_lock(); 329 + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 330 + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], 331 + bio_sectors(bio)); 332 + part_stat_unlock(); 329 333 330 334 tmp_dev = which_dev(mddev, bio->bi_sector); 331 335 block = bio->bi_sector >> 1; ··· 353 349 * split it. 354 350 */ 355 351 struct bio_pair *bp; 356 - bp = bio_split(bio, bio_split_pool, 352 + bp = bio_split(bio, 357 353 ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); 358 354 if (linear_make_request(q, &bp->bio1)) 359 355 generic_make_request(&bp->bio1);

+6 -9

drivers/md/md.c

··· 1464 1464 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) 1465 1465 goto fail; 1466 1466 1467 - if (rdev->bdev->bd_part) 1468 - ko = &rdev->bdev->bd_part->dev.kobj; 1469 - else 1470 - ko = &rdev->bdev->bd_disk->dev.kobj; 1467 + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; 1471 1468 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { 1472 1469 kobject_del(&rdev->kobj); 1473 1470 goto fail; ··· 3467 3470 disk->queue = mddev->queue; 3468 3471 add_disk(disk); 3469 3472 mddev->gendisk = disk; 3470 - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, 3471 - "%s", "md"); 3473 + error = kobject_init_and_add(&mddev->kobj, &md_ktype, 3474 + &disk_to_dev(disk)->kobj, "%s", "md"); 3472 3475 mutex_unlock(&disks_mutex); 3473 3476 if (error) 3474 3477 printk(KERN_WARNING "md: cannot register %s/md - name in use\n", ··· 3758 3761 sysfs_notify(&mddev->kobj, NULL, "array_state"); 3759 3762 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 3760 3763 sysfs_notify(&mddev->kobj, NULL, "degraded"); 3761 - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); 3764 + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 3762 3765 return 0; 3763 3766 } 3764 3767 ··· 5546 5549 rcu_read_lock(); 5547 5550 rdev_for_each_rcu(rdev, mddev) { 5548 5551 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; 5549 - curr_events = disk_stat_read(disk, sectors[0]) + 5550 - disk_stat_read(disk, sectors[1]) - 5552 + curr_events = part_stat_read(&disk->part0, sectors[0]) + 5553 + part_stat_read(&disk->part0, sectors[1]) - 5551 5554 atomic_read(&disk->sync_io); 5552 5555 /* sync IO will cause sync_io to increase before the disk_stats 5553 5556 * as sync_io is counted when a request starts, and

+6 -2

drivers/md/multipath.c

··· 147 147 struct multipath_bh * mp_bh; 148 148 struct multipath_info *multipath; 149 149 const int rw = bio_data_dir(bio); 150 + int cpu; 150 151 151 152 if (unlikely(bio_barrier(bio))) { 152 153 bio_endio(bio, -EOPNOTSUPP); ··· 159 158 mp_bh->master_bio = bio; 160 159 mp_bh->mddev = mddev; 161 160 162 - disk_stat_inc(mddev->gendisk, ios[rw]); 163 - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 161 + cpu = part_stat_lock(); 162 + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 163 + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], 164 + bio_sectors(bio)); 165 + part_stat_unlock(); 164 166 165 167 mp_bh->path = multipath_map(conf); 166 168 if (mp_bh->path < 0) {

+7 -3

drivers/md/raid0.c

··· 399 399 sector_t chunk; 400 400 sector_t block, rsect; 401 401 const int rw = bio_data_dir(bio); 402 + int cpu; 402 403 403 404 if (unlikely(bio_barrier(bio))) { 404 405 bio_endio(bio, -EOPNOTSUPP); 405 406 return 0; 406 407 } 407 408 408 - disk_stat_inc(mddev->gendisk, ios[rw]); 409 - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 409 + cpu = part_stat_lock(); 410 + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 411 + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], 412 + bio_sectors(bio)); 413 + part_stat_unlock(); 410 414 411 415 chunk_size = mddev->chunk_size >> 10; 412 416 chunk_sects = mddev->chunk_size >> 9; ··· 427 423 /* This is a one page bio that upper layers 428 424 * refuse to split for us, so we need to split it. 429 425 */ 430 - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 426 + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); 431 427 if (raid0_make_request(q, &bp->bio1)) 432 428 generic_make_request(&bp->bio1); 433 429 if (raid0_make_request(q, &bp->bio2))

+6 -7

drivers/md/raid1.c

··· 779 779 struct page **behind_pages = NULL; 780 780 const int rw = bio_data_dir(bio); 781 781 const int do_sync = bio_sync(bio); 782 - int do_barriers; 782 + int cpu, do_barriers; 783 783 mdk_rdev_t *blocked_rdev; 784 784 785 785 /* ··· 804 804 805 805 bitmap = mddev->bitmap; 806 806 807 - disk_stat_inc(mddev->gendisk, ios[rw]); 808 - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 807 + cpu = part_stat_lock(); 808 + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 809 + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], 810 + bio_sectors(bio)); 811 + part_stat_unlock(); 809 812 810 813 /* 811 814 * make_request() can abort the operation when READA is being ··· 1305 1302 sbio->bi_size = r1_bio->sectors << 9; 1306 1303 sbio->bi_idx = 0; 1307 1304 sbio->bi_phys_segments = 0; 1308 - sbio->bi_hw_segments = 0; 1309 - sbio->bi_hw_front_size = 0; 1310 - sbio->bi_hw_back_size = 0; 1311 1305 sbio->bi_flags &= ~(BIO_POOL_MASK - 1); 1312 1306 sbio->bi_flags |= 1 << BIO_UPTODATE; 1313 1307 sbio->bi_next = NULL; ··· 1790 1790 bio->bi_vcnt = 0; 1791 1791 bio->bi_idx = 0; 1792 1792 bio->bi_phys_segments = 0; 1793 - bio->bi_hw_segments = 0; 1794 1793 bio->bi_size = 0; 1795 1794 bio->bi_end_io = NULL; 1796 1795 bio->bi_private = NULL;

+7 -7

drivers/md/raid10.c

··· 789 789 mirror_info_t *mirror; 790 790 r10bio_t *r10_bio; 791 791 struct bio *read_bio; 792 + int cpu; 792 793 int i; 793 794 int chunk_sects = conf->chunk_mask + 1; 794 795 const int rw = bio_data_dir(bio); ··· 817 816 /* This is a one page bio that upper layers 818 817 * refuse to split for us, so we need to split it. 819 818 */ 820 - bp = bio_split(bio, bio_split_pool, 819 + bp = bio_split(bio, 821 820 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 822 821 if (make_request(q, &bp->bio1)) 823 822 generic_make_request(&bp->bio1); ··· 844 843 */ 845 844 wait_barrier(conf); 846 845 847 - disk_stat_inc(mddev->gendisk, ios[rw]); 848 - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 846 + cpu = part_stat_lock(); 847 + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 848 + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], 849 + bio_sectors(bio)); 850 + part_stat_unlock(); 849 851 850 852 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); 851 853 ··· 1349 1345 tbio->bi_size = r10_bio->sectors << 9; 1350 1346 tbio->bi_idx = 0; 1351 1347 tbio->bi_phys_segments = 0; 1352 - tbio->bi_hw_segments = 0; 1353 - tbio->bi_hw_front_size = 0; 1354 - tbio->bi_hw_back_size = 0; 1355 1348 tbio->bi_flags &= ~(BIO_POOL_MASK - 1); 1356 1349 tbio->bi_flags |= 1 << BIO_UPTODATE; 1357 1350 tbio->bi_next = NULL; ··· 1948 1947 bio->bi_vcnt = 0; 1949 1948 bio->bi_idx = 0; 1950 1949 bio->bi_phys_segments = 0; 1951 - bio->bi_hw_segments = 0; 1952 1950 bio->bi_size = 0; 1953 1951 } 1954 1952

+57 -18

drivers/md/raid5.c

··· 101 101 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 102 102 #endif 103 103 104 + /* 105 + * We maintain a biased count of active stripes in the bottom 16 bits of 106 + * bi_phys_segments, and a count of processed stripes in the upper 16 bits 107 + */ 108 + static inline int raid5_bi_phys_segments(struct bio *bio) 109 + { 110 + return bio->bi_phys_segments & 0xffff; 111 + } 112 + 113 + static inline int raid5_bi_hw_segments(struct bio *bio) 114 + { 115 + return (bio->bi_phys_segments >> 16) & 0xffff; 116 + } 117 + 118 + static inline int raid5_dec_bi_phys_segments(struct bio *bio) 119 + { 120 + --bio->bi_phys_segments; 121 + return raid5_bi_phys_segments(bio); 122 + } 123 + 124 + static inline int raid5_dec_bi_hw_segments(struct bio *bio) 125 + { 126 + unsigned short val = raid5_bi_hw_segments(bio); 127 + 128 + --val; 129 + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); 130 + return val; 131 + } 132 + 133 + static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) 134 + { 135 + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); 136 + } 137 + 104 138 static inline int raid6_next_disk(int disk, int raid_disks) 105 139 { 106 140 disk++; ··· 541 507 while (rbi && rbi->bi_sector < 542 508 dev->sector + STRIPE_SECTORS) { 543 509 rbi2 = r5_next_bio(rbi, dev->sector); 544 - if (--rbi->bi_phys_segments == 0) { 510 + if (!raid5_dec_bi_phys_segments(rbi)) { 545 511 rbi->bi_next = return_bi; 546 512 return_bi = rbi; 547 513 } ··· 1759 1725 if (*bip) 1760 1726 bi->bi_next = *bip; 1761 1727 *bip = bi; 1762 - bi->bi_phys_segments ++; 1728 + bi->bi_phys_segments++; 1763 1729 spin_unlock_irq(&conf->device_lock); 1764 1730 spin_unlock(&sh->lock); 1765 1731 ··· 1853 1819 sh->dev[i].sector + STRIPE_SECTORS) { 1854 1820 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 1855 1821 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1856 - if (--bi->bi_phys_segments == 0) { 1822 + if (!raid5_dec_bi_phys_segments(bi)) { 1857 1823 md_write_end(conf->mddev); 1858 1824 bi->bi_next = *return_bi; 1859 1825 *return_bi = bi; ··· 1868 1834 sh->dev[i].sector + STRIPE_SECTORS) { 1869 1835 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 1870 1836 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1871 - if (--bi->bi_phys_segments == 0) { 1837 + if (!raid5_dec_bi_phys_segments(bi)) { 1872 1838 md_write_end(conf->mddev); 1873 1839 bi->bi_next = *return_bi; 1874 1840 *return_bi = bi; ··· 1892 1858 struct bio *nextbi = 1893 1859 r5_next_bio(bi, sh->dev[i].sector); 1894 1860 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1895 - if (--bi->bi_phys_segments == 0) { 1861 + if (!raid5_dec_bi_phys_segments(bi)) { 1896 1862 bi->bi_next = *return_bi; 1897 1863 *return_bi = bi; 1898 1864 } ··· 2067 2033 while (wbi && wbi->bi_sector < 2068 2034 dev->sector + STRIPE_SECTORS) { 2069 2035 wbi2 = r5_next_bio(wbi, dev->sector); 2070 - if (--wbi->bi_phys_segments == 0) { 2036 + if (!raid5_dec_bi_phys_segments(wbi)) { 2071 2037 md_write_end(conf->mddev); 2072 2038 wbi->bi_next = *return_bi; 2073 2039 *return_bi = wbi; ··· 2848 2814 copy_data(0, rbi, dev->page, dev->sector); 2849 2815 rbi2 = r5_next_bio(rbi, dev->sector); 2850 2816 spin_lock_irq(&conf->device_lock); 2851 - if (--rbi->bi_phys_segments == 0) { 2817 + if (!raid5_dec_bi_phys_segments(rbi)) { 2852 2818 rbi->bi_next = return_bi; 2853 2819 return_bi = rbi; 2854 2820 } ··· 3189 3155 if(bi) { 3190 3156 conf->retry_read_aligned_list = bi->bi_next; 3191 3157 bi->bi_next = NULL; 3158 + /* 3159 + * this sets the active strip count to 1 and the processed 3160 + * strip count to zero (upper 8 bits) 3161 + */ 3192 3162 bi->bi_phys_segments = 1; /* biased count of active stripes */ 3193 - bi->bi_hw_segments = 0; /* count of processed stripes */ 3194 3163 } 3195 3164 3196 3165 return bi; ··· 3243 3206 if ((bi->bi_size>>9) > q->max_sectors) 3244 3207 return 0; 3245 3208 blk_recount_segments(q, bi); 3246 - if (bi->bi_phys_segments > q->max_phys_segments || 3247 - bi->bi_hw_segments > q->max_hw_segments) 3209 + if (bi->bi_phys_segments > q->max_phys_segments) 3248 3210 return 0; 3249 3211 3250 3212 if (q->merge_bvec_fn) ··· 3387 3351 sector_t logical_sector, last_sector; 3388 3352 struct stripe_head *sh; 3389 3353 const int rw = bio_data_dir(bi); 3390 - int remaining; 3354 + int cpu, remaining; 3391 3355 3392 3356 if (unlikely(bio_barrier(bi))) { 3393 3357 bio_endio(bi, -EOPNOTSUPP); ··· 3396 3360 3397 3361 md_write_start(mddev, bi); 3398 3362 3399 - disk_stat_inc(mddev->gendisk, ios[rw]); 3400 - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); 3363 + cpu = part_stat_lock(); 3364 + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 3365 + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], 3366 + bio_sectors(bi)); 3367 + part_stat_unlock(); 3401 3368 3402 3369 if (rw == READ && 3403 3370 mddev->reshape_position == MaxSector && ··· 3507 3468 3508 3469 } 3509 3470 spin_lock_irq(&conf->device_lock); 3510 - remaining = --bi->bi_phys_segments; 3471 + remaining = raid5_dec_bi_phys_segments(bi); 3511 3472 spin_unlock_irq(&conf->device_lock); 3512 3473 if (remaining == 0) { 3513 3474 ··· 3791 3752 sector += STRIPE_SECTORS, 3792 3753 scnt++) { 3793 3754 3794 - if (scnt < raid_bio->bi_hw_segments) 3755 + if (scnt < raid5_bi_hw_segments(raid_bio)) 3795 3756 /* already done this stripe */ 3796 3757 continue; 3797 3758 ··· 3799 3760 3800 3761 if (!sh) { 3801 3762 /* failed to get a stripe - must wait */ 3802 - raid_bio->bi_hw_segments = scnt; 3763 + raid5_set_bi_hw_segments(raid_bio, scnt); 3803 3764 conf->retry_read_aligned = raid_bio; 3804 3765 return handled; 3805 3766 } ··· 3807 3768 set_bit(R5_ReadError, &sh->dev[dd_idx].flags); 3808 3769 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { 3809 3770 release_stripe(sh); 3810 - raid_bio->bi_hw_segments = scnt; 3771 + raid5_set_bi_hw_segments(raid_bio, scnt); 3811 3772 conf->retry_read_aligned = raid_bio; 3812 3773 return handled; 3813 3774 } ··· 3817 3778 handled++; 3818 3779 } 3819 3780 spin_lock_irq(&conf->device_lock); 3820 - remaining = --raid_bio->bi_phys_segments; 3781 + remaining = raid5_dec_bi_phys_segments(raid_bio); 3821 3782 spin_unlock_irq(&conf->device_lock); 3822 3783 if (remaining == 0) 3823 3784 bio_endio(raid_bio, 0);

+2 -2

drivers/memstick/core/mspro_block.c

··· 197 197 static int mspro_block_disk_release(struct gendisk *disk) 198 198 { 199 199 struct mspro_block_data *msb = disk->private_data; 200 - int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; 200 + int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; 201 201 202 202 mutex_lock(&mspro_block_disk_lock); 203 203 ··· 828 828 829 829 if (msb->eject) { 830 830 while ((req = elv_next_request(q)) != NULL) 831 - end_queued_request(req, -ENODEV); 831 + __blk_end_request(req, -ENODEV, blk_rq_bytes(req)); 832 832 833 833 return; 834 834 }

+1 -1

drivers/mmc/card/block.c

··· 83 83 mutex_lock(&open_lock); 84 84 md->usage--; 85 85 if (md->usage == 0) { 86 - int devidx = md->disk->first_minor >> MMC_SHIFT; 86 + int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; 87 87 __clear_bit(devidx, dev_use); 88 88 89 89 put_disk(md->disk);

+24

drivers/mtd/ftl.c

··· 1005 1005 return ftl_write((void *)dev, buf, block, 1); 1006 1006 } 1007 1007 1008 + static int ftl_discardsect(struct mtd_blktrans_dev *dev, 1009 + unsigned long sector, unsigned nr_sects) 1010 + { 1011 + partition_t *part = (void *)dev; 1012 + uint32_t bsize = 1 << part->header.EraseUnitSize; 1013 + 1014 + DEBUG(1, "FTL erase sector %ld for %d sectors\n", 1015 + sector, nr_sects); 1016 + 1017 + while (nr_sects) { 1018 + uint32_t old_addr = part->VirtualBlockMap[sector]; 1019 + if (old_addr != 0xffffffff) { 1020 + part->VirtualBlockMap[sector] = 0xffffffff; 1021 + part->EUNInfo[old_addr/bsize].Deleted++; 1022 + if (set_bam_entry(part, old_addr, 0)) 1023 + return -EIO; 1024 + } 1025 + nr_sects--; 1026 + sector++; 1027 + } 1028 + 1029 + return 0; 1030 + } 1008 1031 /*====================================================================*/ 1009 1032 1010 1033 static void ftl_freepart(partition_t *part) ··· 1092 1069 .blksize = SECTOR_SIZE, 1093 1070 .readsect = ftl_readsect, 1094 1071 .writesect = ftl_writesect, 1072 + .discard = ftl_discardsect, 1095 1073 .getgeo = ftl_getgeo, 1096 1074 .add_mtd = ftl_add_mtd, 1097 1075 .remove_dev = ftl_remove_dev,

+16

drivers/mtd/mtd_blkdevs.c

··· 32 32 spinlock_t queue_lock; 33 33 }; 34 34 35 + static int blktrans_discard_request(struct request_queue *q, 36 + struct request *req) 37 + { 38 + req->cmd_type = REQ_TYPE_LINUX_BLOCK; 39 + req->cmd[0] = REQ_LB_OP_DISCARD; 40 + return 0; 41 + } 42 + 35 43 static int do_blktrans_request(struct mtd_blktrans_ops *tr, 36 44 struct mtd_blktrans_dev *dev, 37 45 struct request *req) ··· 51 43 nsect = req->current_nr_sectors << 9 >> tr->blkshift; 52 44 53 45 buf = req->buffer; 46 + 47 + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && 48 + req->cmd[0] == REQ_LB_OP_DISCARD) 49 + return !tr->discard(dev, block, nsect); 54 50 55 51 if (!blk_fs_request(req)) 56 52 return 0; ··· 379 367 380 368 tr->blkcore_priv->rq->queuedata = tr; 381 369 blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); 370 + if (tr->discard) 371 + blk_queue_set_discard(tr->blkcore_priv->rq, 372 + blktrans_discard_request); 373 + 382 374 tr->blkshift = ffs(tr->blksize) - 1; 383 375 384 376 tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr,

+2 -1

drivers/s390/block/dasd_proc.c

··· 76 76 /* Print kdev. */ 77 77 if (block->gdp) 78 78 seq_printf(m, " at (%3d:%6d)", 79 - block->gdp->major, block->gdp->first_minor); 79 + MAJOR(disk_devt(block->gdp)), 80 + MINOR(disk_devt(block->gdp))); 80 81 else 81 82 seq_printf(m, " at (???:??????)"); 82 83 /* Print device name. */

+2 -2

drivers/s390/block/dcssblk.c

··· 114 114 found = 0; 115 115 // test if minor available 116 116 list_for_each_entry(entry, &dcssblk_devices, lh) 117 - if (minor == entry->gd->first_minor) 117 + if (minor == MINOR(disk_devt(entry->gd))) 118 118 found++; 119 119 if (!found) break; // got unused minor 120 120 } ··· 397 397 goto unload_seg; 398 398 } 399 399 sprintf(dev_info->gd->disk_name, "dcssblk%d", 400 - dev_info->gd->first_minor); 400 + MINOR(disk_devt(dev_info->gd))); 401 401 list_add_tail(&dev_info->lh, &dcssblk_devices); 402 402 403 403 if (!try_module_get(THIS_MODULE)) {

+1 -1

drivers/scsi/aacraid/aachba.c

··· 1139 1139 srbcmd->id = cpu_to_le32(scmd_id(cmd)); 1140 1140 srbcmd->lun = cpu_to_le32(cmd->device->lun); 1141 1141 srbcmd->flags = cpu_to_le32(flag); 1142 - timeout = cmd->timeout_per_command/HZ; 1142 + timeout = cmd->request->timeout/HZ; 1143 1143 if (timeout == 0) 1144 1144 timeout = 1; 1145 1145 srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds

+38 -22

drivers/scsi/gdth.c

··· 464 464 465 465 /* use request field to save the ptr. to completion struct. */ 466 466 scp->request = (struct request *)&wait; 467 - scp->timeout_per_command = timeout*HZ; 468 467 scp->cmd_len = 12; 469 468 scp->cmnd = cmnd; 470 469 cmndinfo.priority = IOCTL_PRI; ··· 1994 1995 register Scsi_Cmnd *pscp; 1995 1996 register Scsi_Cmnd *nscp; 1996 1997 ulong flags; 1997 - unchar b, t; 1998 1998 1999 1999 TRACE(("gdth_putq() priority %d\n",priority)); 2000 2000 spin_lock_irqsave(&ha->smp_lock, flags); 2001 2001 2002 - if (!cmndinfo->internal_command) { 2002 + if (!cmndinfo->internal_command) 2003 2003 cmndinfo->priority = priority; 2004 - b = scp->device->channel; 2005 - t = scp->device->id; 2006 - if (priority >= DEFAULT_PRI) { 2007 - if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) || 2008 - (b==ha->virt_bus && t<MAX_HDRIVES && ha->hdr[t].lock)) { 2009 - TRACE2(("gdth_putq(): locked IO ->update_timeout()\n")); 2010 - cmndinfo->timeout = gdth_update_timeout(scp, 0); 2011 - } 2012 - } 2013 - } 2014 2004 2015 2005 if (ha->req_first==NULL) { 2016 2006 ha->req_first = scp; /* queue was empty */ ··· 3887 3899 return ((const char *)ha->binfo.type_string); 3888 3900 } 3889 3901 3902 + static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp) 3903 + { 3904 + gdth_ha_str *ha = shost_priv(scp->device->host); 3905 + struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); 3906 + unchar b, t; 3907 + ulong flags; 3908 + enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED; 3909 + 3910 + TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__)); 3911 + b = scp->device->channel; 3912 + t = scp->device->id; 3913 + 3914 + /* 3915 + * We don't really honor the command timeout, but we try to 3916 + * honor 6 times of the actual command timeout! So reset the 3917 + * timer if this is less than 6th timeout on this command! 3918 + */ 3919 + if (++cmndinfo->timeout_count < 6) 3920 + retval = BLK_EH_RESET_TIMER; 3921 + 3922 + /* Reset the timeout if it is locked IO */ 3923 + spin_lock_irqsave(&ha->smp_lock, flags); 3924 + if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) || 3925 + (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) { 3926 + TRACE2(("%s(): locked IO, reset timeout\n", __func__)); 3927 + retval = BLK_EH_RESET_TIMER; 3928 + } 3929 + spin_unlock_irqrestore(&ha->smp_lock, flags); 3930 + 3931 + return retval; 3932 + } 3933 + 3934 + 3890 3935 static int gdth_eh_bus_reset(Scsi_Cmnd *scp) 3891 3936 { 3892 3937 gdth_ha_str *ha = shost_priv(scp->device->host); ··· 4013 3992 BUG_ON(!cmndinfo); 4014 3993 4015 3994 scp->scsi_done = done; 4016 - gdth_update_timeout(scp, scp->timeout_per_command * 6); 3995 + cmndinfo->timeout_count = 0; 4017 3996 cmndinfo->priority = DEFAULT_PRI; 4018 3997 4019 3998 return __gdth_queuecommand(ha, scp, cmndinfo); ··· 4117 4096 ha->hdr[j].lock = 1; 4118 4097 spin_unlock_irqrestore(&ha->smp_lock, flags); 4119 4098 gdth_wait_completion(ha, ha->bus_cnt, j); 4120 - gdth_stop_timeout(ha, ha->bus_cnt, j); 4121 4099 } else { 4122 4100 spin_lock_irqsave(&ha->smp_lock, flags); 4123 4101 ha->hdr[j].lock = 0; 4124 4102 spin_unlock_irqrestore(&ha->smp_lock, flags); 4125 - gdth_start_timeout(ha, ha->bus_cnt, j); 4126 4103 gdth_next(ha); 4127 4104 } 4128 4105 } ··· 4558 4539 spin_lock_irqsave(&ha->smp_lock, flags); 4559 4540 ha->raw[i].lock = 1; 4560 4541 spin_unlock_irqrestore(&ha->smp_lock, flags); 4561 - for (j = 0; j < ha->tid_cnt; ++j) { 4542 + for (j = 0; j < ha->tid_cnt; ++j) 4562 4543 gdth_wait_completion(ha, i, j); 4563 - gdth_stop_timeout(ha, i, j); 4564 - } 4565 4544 } else { 4566 4545 spin_lock_irqsave(&ha->smp_lock, flags); 4567 4546 ha->raw[i].lock = 0; 4568 4547 spin_unlock_irqrestore(&ha->smp_lock, flags); 4569 - for (j = 0; j < ha->tid_cnt; ++j) { 4570 - gdth_start_timeout(ha, i, j); 4548 + for (j = 0; j < ha->tid_cnt; ++j) 4571 4549 gdth_next(ha); 4572 - } 4573 4550 } 4574 4551 } 4575 4552 break; ··· 4659 4644 .slave_configure = gdth_slave_configure, 4660 4645 .bios_param = gdth_bios_param, 4661 4646 .proc_info = gdth_proc_info, 4647 + .eh_timed_out = gdth_timed_out, 4662 4648 .proc_name = "gdth", 4663 4649 .can_queue = GDTH_MAXCMDS, 4664 4650 .this_id = -1,

+1 -1

drivers/scsi/gdth.h

··· 916 916 gdth_cmd_str *internal_cmd_str; /* crier for internal messages*/ 917 917 dma_addr_t sense_paddr; /* sense dma-addr */ 918 918 unchar priority; 919 - int timeout; 919 + int timeout_count; /* # of timeout calls */ 920 920 volatile int wait_for_completion; 921 921 ushort status; 922 922 ulong32 info;

-66

drivers/scsi/gdth_proc.c

··· 748 748 } 749 749 spin_unlock_irqrestore(&ha->smp_lock, flags); 750 750 } 751 - 752 - static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id) 753 - { 754 - ulong flags; 755 - Scsi_Cmnd *scp; 756 - unchar b, t; 757 - 758 - spin_lock_irqsave(&ha->smp_lock, flags); 759 - 760 - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { 761 - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); 762 - if (!cmndinfo->internal_command) { 763 - b = scp->device->channel; 764 - t = scp->device->id; 765 - if (t == (unchar)id && b == (unchar)busnum) { 766 - TRACE2(("gdth_stop_timeout(): update_timeout()\n")); 767 - cmndinfo->timeout = gdth_update_timeout(scp, 0); 768 - } 769 - } 770 - } 771 - spin_unlock_irqrestore(&ha->smp_lock, flags); 772 - } 773 - 774 - static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id) 775 - { 776 - ulong flags; 777 - Scsi_Cmnd *scp; 778 - unchar b, t; 779 - 780 - spin_lock_irqsave(&ha->smp_lock, flags); 781 - 782 - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { 783 - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); 784 - if (!cmndinfo->internal_command) { 785 - b = scp->device->channel; 786 - t = scp->device->id; 787 - if (t == (unchar)id && b == (unchar)busnum) { 788 - TRACE2(("gdth_start_timeout(): update_timeout()\n")); 789 - gdth_update_timeout(scp, cmndinfo->timeout); 790 - } 791 - } 792 - } 793 - spin_unlock_irqrestore(&ha->smp_lock, flags); 794 - } 795 - 796 - static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout) 797 - { 798 - int oldto; 799 - 800 - oldto = scp->timeout_per_command; 801 - scp->timeout_per_command = timeout; 802 - 803 - if (timeout == 0) { 804 - del_timer(&scp->eh_timeout); 805 - scp->eh_timeout.data = (unsigned long) NULL; 806 - scp->eh_timeout.expires = 0; 807 - } else { 808 - if (scp->eh_timeout.data != (unsigned long) NULL) 809 - del_timer(&scp->eh_timeout); 810 - scp->eh_timeout.data = (unsigned long) scp; 811 - scp->eh_timeout.expires = jiffies + timeout; 812 - add_timer(&scp->eh_timeout); 813 - } 814 - 815 - return oldto; 816 - }

-3

drivers/scsi/gdth_proc.h

··· 20 20 ulong64 *paddr); 21 21 static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr); 22 22 static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id); 23 - static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id); 24 - static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id); 25 - static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout); 26 23 27 24 #endif 28 25

+1 -1

drivers/scsi/ibmvscsi/ibmvscsi.c

··· 756 756 init_event_struct(evt_struct, 757 757 handle_cmd_rsp, 758 758 VIOSRP_SRP_FORMAT, 759 - cmnd->timeout_per_command/HZ); 759 + cmnd->request->timeout/HZ); 760 760 761 761 evt_struct->cmnd = cmnd; 762 762 evt_struct->cmnd_done = done;

+1 -1

drivers/scsi/ide-scsi.c

··· 612 612 pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); 613 613 pc->scsi_cmd = cmd; 614 614 pc->done = done; 615 - pc->timeout = jiffies + cmd->timeout_per_command; 615 + pc->timeout = jiffies + cmd->request->timeout; 616 616 617 617 if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { 618 618 printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);

+2 -1

drivers/scsi/ipr.c

··· 3670 3670 sdev->no_uld_attach = 1; 3671 3671 } 3672 3672 if (ipr_is_vset_device(res)) { 3673 - sdev->timeout = IPR_VSET_RW_TIMEOUT; 3673 + blk_queue_rq_timeout(sdev->request_queue, 3674 + IPR_VSET_RW_TIMEOUT); 3674 3675 blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS); 3675 3676 } 3676 3677 if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res))

+1 -1

drivers/scsi/ips.c

··· 3818 3818 scb->cmd.dcdb.segment_4G = 0; 3819 3819 scb->cmd.dcdb.enhanced_sg = 0; 3820 3820 3821 - TimeOut = scb->scsi_cmd->timeout_per_command; 3821 + TimeOut = scb->scsi_cmd->request->timeout; 3822 3822 3823 3823 if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */ 3824 3824 if (!scb->sg_len) {

+9 -8

drivers/scsi/libiscsi.c

··· 1476 1476 scsi_queue_work(conn->session->host, &conn->xmitwork); 1477 1477 } 1478 1478 1479 - static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) 1479 + static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) 1480 1480 { 1481 1481 struct iscsi_cls_session *cls_session; 1482 1482 struct iscsi_session *session; 1483 1483 struct iscsi_conn *conn; 1484 - enum scsi_eh_timer_return rc = EH_NOT_HANDLED; 1484 + enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; 1485 1485 1486 1486 cls_session = starget_to_session(scsi_target(scmd->device)); 1487 1487 session = cls_session->dd_data; ··· 1494 1494 * We are probably in the middle of iscsi recovery so let 1495 1495 * that complete and handle the error. 1496 1496 */ 1497 - rc = EH_RESET_TIMER; 1497 + rc = BLK_EH_RESET_TIMER; 1498 1498 goto done; 1499 1499 } 1500 1500 1501 1501 conn = session->leadconn; 1502 1502 if (!conn) { 1503 1503 /* In the middle of shuting down */ 1504 - rc = EH_RESET_TIMER; 1504 + rc = BLK_EH_RESET_TIMER; 1505 1505 goto done; 1506 1506 } 1507 1507 ··· 1513 1513 */ 1514 1514 if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + 1515 1515 (conn->ping_timeout * HZ), jiffies)) 1516 - rc = EH_RESET_TIMER; 1516 + rc = BLK_EH_RESET_TIMER; 1517 1517 /* 1518 1518 * if we are about to check the transport then give the command 1519 1519 * more time 1520 1520 */ 1521 1521 if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ), 1522 1522 jiffies)) 1523 - rc = EH_RESET_TIMER; 1523 + rc = BLK_EH_RESET_TIMER; 1524 1524 /* if in the middle of checking the transport then give us more time */ 1525 1525 if (conn->ping_task) 1526 - rc = EH_RESET_TIMER; 1526 + rc = BLK_EH_RESET_TIMER; 1527 1527 done: 1528 1528 spin_unlock(&session->lock); 1529 - debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh"); 1529 + debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ? 1530 + "timer reset" : "nh"); 1530 1531 return rc; 1531 1532 } 1532 1533

+1 -1

drivers/scsi/libsas/sas_ata.c

··· 398 398 399 399 /* Bounce SCSI-initiated commands to the SCSI EH */ 400 400 if (qc->scsicmd) { 401 - scsi_req_abort_cmd(qc->scsicmd); 401 + blk_abort_request(qc->scsicmd->request); 402 402 scsi_schedule_eh(qc->scsicmd->device->host); 403 403 return; 404 404 }

+1 -1

drivers/scsi/libsas/sas_internal.h

··· 55 55 int sas_register_ports(struct sas_ha_struct *sas_ha); 56 56 void sas_unregister_ports(struct sas_ha_struct *sas_ha); 57 57 58 - enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); 58 + enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); 59 59 60 60 int sas_init_queue(struct sas_ha_struct *sas_ha); 61 61 int sas_init_events(struct sas_ha_struct *sas_ha);

+15 -15

drivers/scsi/libsas/sas_scsi_host.c

··· 673 673 return; 674 674 } 675 675 676 - enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) 676 + enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) 677 677 { 678 678 struct sas_task *task = TO_SAS_TASK(cmd); 679 679 unsigned long flags; 680 680 681 681 if (!task) { 682 - cmd->timeout_per_command /= 2; 682 + cmd->request->timeout /= 2; 683 683 SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n", 684 - cmd, task, (cmd->timeout_per_command ? 685 - "EH_RESET_TIMER" : "EH_NOT_HANDLED")); 686 - if (!cmd->timeout_per_command) 687 - return EH_NOT_HANDLED; 688 - return EH_RESET_TIMER; 684 + cmd, task, (cmd->request->timeout ? 685 + "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED")); 686 + if (!cmd->request->timeout) 687 + return BLK_EH_NOT_HANDLED; 688 + return BLK_EH_RESET_TIMER; 689 689 } 690 690 691 691 spin_lock_irqsave(&task->task_state_lock, flags); 692 692 BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED); 693 693 if (task->task_state_flags & SAS_TASK_STATE_DONE) { 694 694 spin_unlock_irqrestore(&task->task_state_lock, flags); 695 - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", 696 - cmd, task); 697 - return EH_HANDLED; 695 + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: " 696 + "BLK_EH_HANDLED\n", cmd, task); 697 + return BLK_EH_HANDLED; 698 698 } 699 699 if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) { 700 700 spin_unlock_irqrestore(&task->task_state_lock, flags); 701 701 SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: " 702 - "EH_RESET_TIMER\n", 702 + "BLK_EH_RESET_TIMER\n", 703 703 cmd, task); 704 - return EH_RESET_TIMER; 704 + return BLK_EH_RESET_TIMER; 705 705 } 706 706 task->task_state_flags |= SAS_TASK_STATE_ABORTED; 707 707 spin_unlock_irqrestore(&task->task_state_lock, flags); 708 708 709 - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n", 709 + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n", 710 710 cmd, task); 711 711 712 - return EH_NOT_HANDLED; 712 + return BLK_EH_NOT_HANDLED; 713 713 } 714 714 715 715 int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) ··· 1039 1039 return; 1040 1040 } 1041 1041 1042 - scsi_req_abort_cmd(sc); 1042 + blk_abort_request(sc->request); 1043 1043 scsi_schedule_eh(sc->device->host); 1044 1044 } 1045 1045

+3 -3

drivers/scsi/megaraid/megaraid_sas.c

··· 1167 1167 * cmd has not been completed within the timeout period. 1168 1168 */ 1169 1169 static enum 1170 - scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) 1170 + blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) 1171 1171 { 1172 1172 struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr; 1173 1173 struct megasas_instance *instance; ··· 1175 1175 1176 1176 if (time_after(jiffies, scmd->jiffies_at_alloc + 1177 1177 (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) { 1178 - return EH_NOT_HANDLED; 1178 + return BLK_EH_NOT_HANDLED; 1179 1179 } 1180 1180 1181 1181 instance = cmd->instance; ··· 1189 1189 1190 1190 spin_unlock_irqrestore(instance->host->host_lock, flags); 1191 1191 } 1192 - return EH_RESET_TIMER; 1192 + return BLK_EH_RESET_TIMER; 1193 1193 } 1194 1194 1195 1195 /**

+2 -2

drivers/scsi/ncr53c8xx.c

··· 4170 4170 ** 4171 4171 **---------------------------------------------------- 4172 4172 */ 4173 - if (np->settle_time && cmd->timeout_per_command >= HZ) { 4174 - u_long tlimit = jiffies + cmd->timeout_per_command - HZ; 4173 + if (np->settle_time && cmd->request->timeout >= HZ) { 4174 + u_long tlimit = jiffies + cmd->request->timeout - HZ; 4175 4175 if (time_after(np->settle_time, tlimit)) 4176 4176 np->settle_time = tlimit; 4177 4177 }

+2 -2

drivers/scsi/qla1280.c

··· 2845 2845 memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); 2846 2846 2847 2847 /* Set ISP command timeout. */ 2848 - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); 2848 + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); 2849 2849 2850 2850 /* Set device target ID and LUN */ 2851 2851 pkt->lun = SCSI_LUN_32(cmd); ··· 3114 3114 memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); 3115 3115 3116 3116 /* Set ISP command timeout. */ 3117 - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); 3117 + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); 3118 3118 3119 3119 /* Set device target ID and LUN */ 3120 3120 pkt->lun = SCSI_LUN_32(cmd);

+2 -2

drivers/scsi/qla4xxx/ql4_os.c

··· 1542 1542 DEBUG2(printk(KERN_INFO 1543 1543 "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x," 1544 1544 "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no, 1545 - cmd, jiffies, cmd->timeout_per_command / HZ, 1545 + cmd, jiffies, cmd->request->timeout / HZ, 1546 1546 ha->dpc_flags, cmd->result, cmd->allowed)); 1547 1547 1548 1548 /* FIXME: wait for hba to go online */ ··· 1598 1598 DEBUG2(printk(KERN_INFO 1599 1599 "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, " 1600 1600 "to=%x,dpc_flags=%lx, status=%x allowed=%d\n", 1601 - ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ, 1601 + ha->host_no, cmd, jiffies, cmd->request->timeout / HZ, 1602 1602 ha->dpc_flags, cmd->result, cmd->allowed)); 1603 1603 1604 1604 stat = qla4xxx_reset_target(ha, ddb_entry);

+17 -75

drivers/scsi/scsi.c

··· 291 291 unsigned long flags; 292 292 293 293 cmd->device = dev; 294 - init_timer(&cmd->eh_timeout); 295 294 INIT_LIST_HEAD(&cmd->list); 296 295 spin_lock_irqsave(&dev->list_lock, flags); 297 296 list_add_tail(&cmd->list, &dev->cmd_list); ··· 651 652 unsigned long timeout; 652 653 int rtn = 0; 653 654 655 + /* 656 + * We will use a queued command if possible, otherwise we will 657 + * emulate the queuing and calling of completion function ourselves. 658 + */ 659 + atomic_inc(&cmd->device->iorequest_cnt); 660 + 654 661 /* check if the device is still usable */ 655 662 if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { 656 663 /* in SDEV_DEL we error all commands. DID_NO_CONNECT 657 664 * returns an immediate error upwards, and signals 658 665 * that the device is no longer present */ 659 666 cmd->result = DID_NO_CONNECT << 16; 660 - atomic_inc(&cmd->device->iorequest_cnt); 661 - __scsi_done(cmd); 667 + scsi_done(cmd); 662 668 /* return 0 (because the command has been processed) */ 663 669 goto out; 664 670 } ··· 676 672 * future requests should not occur until the device 677 673 * transitions out of the suspend state. 678 674 */ 675 + 679 676 scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); 680 677 681 678 SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n")); ··· 719 714 host->resetting = 0; 720 715 } 721 716 722 - /* 723 - * AK: unlikely race here: for some reason the timer could 724 - * expire before the serial number is set up below. 725 - */ 726 - scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out); 727 - 728 717 scsi_log_send(cmd); 729 - 730 - /* 731 - * We will use a queued command if possible, otherwise we will 732 - * emulate the queuing and calling of completion function ourselves. 733 - */ 734 - atomic_inc(&cmd->device->iorequest_cnt); 735 718 736 719 /* 737 720 * Before we queue this command, check if the command ··· 737 744 } 738 745 739 746 spin_lock_irqsave(host->host_lock, flags); 747 + /* 748 + * AK: unlikely race here: for some reason the timer could 749 + * expire before the serial number is set up below. 750 + * 751 + * TODO: kill serial or move to blk layer 752 + */ 740 753 scsi_cmd_get_serial(host, cmd); 741 754 742 755 if (unlikely(host->shost_state == SHOST_DEL)) { ··· 753 754 } 754 755 spin_unlock_irqrestore(host->host_lock, flags); 755 756 if (rtn) { 756 - if (scsi_delete_timer(cmd)) { 757 - atomic_inc(&cmd->device->iodone_cnt); 758 - scsi_queue_insert(cmd, 759 - (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? 760 - rtn : SCSI_MLQUEUE_HOST_BUSY); 761 - } 757 + scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? 758 + rtn : SCSI_MLQUEUE_HOST_BUSY); 762 759 SCSI_LOG_MLQUEUE(3, 763 760 printk("queuecommand : request rejected\n")); 764 761 } ··· 763 768 SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); 764 769 return rtn; 765 770 } 766 - 767 - /** 768 - * scsi_req_abort_cmd -- Request command recovery for the specified command 769 - * @cmd: pointer to the SCSI command of interest 770 - * 771 - * This function requests that SCSI Core start recovery for the 772 - * command by deleting the timer and adding the command to the eh 773 - * queue. It can be called by either LLDDs or SCSI Core. LLDDs who 774 - * implement their own error recovery MAY ignore the timeout event if 775 - * they generated scsi_req_abort_cmd. 776 - */ 777 - void scsi_req_abort_cmd(struct scsi_cmnd *cmd) 778 - { 779 - if (!scsi_delete_timer(cmd)) 780 - return; 781 - scsi_times_out(cmd); 782 - } 783 - EXPORT_SYMBOL(scsi_req_abort_cmd); 784 771 785 772 /** 786 773 * scsi_done - Enqueue the finished SCSI command into the done queue. ··· 779 802 */ 780 803 static void scsi_done(struct scsi_cmnd *cmd) 781 804 { 782 - /* 783 - * We don't have to worry about this one timing out anymore. 784 - * If we are unable to remove the timer, then the command 785 - * has already timed out. In which case, we have no choice but to 786 - * let the timeout function run, as we have no idea where in fact 787 - * that function could really be. It might be on another processor, 788 - * etc, etc. 789 - */ 790 - if (!scsi_delete_timer(cmd)) 791 - return; 792 - __scsi_done(cmd); 793 - } 794 - 795 - /* Private entry to scsi_done() to complete a command when the timer 796 - * isn't running --- used by scsi_times_out */ 797 - void __scsi_done(struct scsi_cmnd *cmd) 798 - { 799 - struct request *rq = cmd->request; 800 - 801 - /* 802 - * Set the serial numbers back to zero 803 - */ 804 - cmd->serial_number = 0; 805 - 806 - atomic_inc(&cmd->device->iodone_cnt); 807 - if (cmd->result) 808 - atomic_inc(&cmd->device->ioerr_cnt); 809 - 810 - BUG_ON(!rq); 811 - 812 - /* 813 - * The uptodate/nbytes values don't matter, as we allow partial 814 - * completes and thus will check this in the softirq callback 815 - */ 816 - rq->completion_data = cmd; 817 - blk_complete_request(rq); 805 + blk_complete_request(cmd->request); 818 806 } 819 807 820 808 /* Move this to a header if it becomes more generally useful */

+13 -77

drivers/scsi/scsi_error.c

··· 112 112 } 113 113 114 114 /** 115 - * scsi_add_timer - Start timeout timer for a single scsi command. 116 - * @scmd: scsi command that is about to start running. 117 - * @timeout: amount of time to allow this command to run. 118 - * @complete: timeout function to call if timer isn't canceled. 119 - * 120 - * Notes: 121 - * This should be turned into an inline function. Each scsi command 122 - * has its own timer, and as it is added to the queue, we set up the 123 - * timer. When the command completes, we cancel the timer. 124 - */ 125 - void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, 126 - void (*complete)(struct scsi_cmnd *)) 127 - { 128 - 129 - /* 130 - * If the clock was already running for this command, then 131 - * first delete the timer. The timer handling code gets rather 132 - * confused if we don't do this. 133 - */ 134 - if (scmd->eh_timeout.function) 135 - del_timer(&scmd->eh_timeout); 136 - 137 - scmd->eh_timeout.data = (unsigned long)scmd; 138 - scmd->eh_timeout.expires = jiffies + timeout; 139 - scmd->eh_timeout.function = (void (*)(unsigned long)) complete; 140 - 141 - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" 142 - " %d, (%p)\n", __func__, 143 - scmd, timeout, complete)); 144 - 145 - add_timer(&scmd->eh_timeout); 146 - } 147 - 148 - /** 149 - * scsi_delete_timer - Delete/cancel timer for a given function. 150 - * @scmd: Cmd that we are canceling timer for 151 - * 152 - * Notes: 153 - * This should be turned into an inline function. 154 - * 155 - * Return value: 156 - * 1 if we were able to detach the timer. 0 if we blew it, and the 157 - * timer function has already started to run. 158 - */ 159 - int scsi_delete_timer(struct scsi_cmnd *scmd) 160 - { 161 - int rtn; 162 - 163 - rtn = del_timer(&scmd->eh_timeout); 164 - 165 - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," 166 - " rtn: %d\n", __func__, 167 - scmd, rtn)); 168 - 169 - scmd->eh_timeout.data = (unsigned long)NULL; 170 - scmd->eh_timeout.function = NULL; 171 - 172 - return rtn; 173 - } 174 - 175 - /** 176 115 * scsi_times_out - Timeout function for normal scsi commands. 177 - * @scmd: Cmd that is timing out. 116 + * @req: request that is timing out. 178 117 * 179 118 * Notes: 180 119 * We do not need to lock this. There is the potential for a race ··· 121 182 * normal completion function determines that the timer has already 122 183 * fired, then it mustn't do anything. 123 184 */ 124 - void scsi_times_out(struct scsi_cmnd *scmd) 185 + enum blk_eh_timer_return scsi_times_out(struct request *req) 125 186 { 126 - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); 187 + struct scsi_cmnd *scmd = req->special; 188 + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); 189 + enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; 127 190 128 191 scsi_log_completion(scmd, TIMEOUT_ERROR); 129 192 ··· 137 196 eh_timed_out = NULL; 138 197 139 198 if (eh_timed_out) 140 - switch (eh_timed_out(scmd)) { 141 - case EH_HANDLED: 142 - __scsi_done(scmd); 143 - return; 144 - case EH_RESET_TIMER: 145 - scsi_add_timer(scmd, scmd->timeout_per_command, 146 - scsi_times_out); 147 - return; 148 - case EH_NOT_HANDLED: 199 + rtn = eh_timed_out(scmd); 200 + switch (rtn) { 201 + case BLK_EH_NOT_HANDLED: 149 202 break; 203 + default: 204 + return rtn; 150 205 } 151 206 152 207 if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { 153 208 scmd->result |= DID_TIME_OUT << 16; 154 - __scsi_done(scmd); 209 + return BLK_EH_HANDLED; 155 210 } 211 + 212 + return BLK_EH_NOT_HANDLED; 156 213 } 157 214 158 215 /** ··· 1732 1793 1733 1794 blk_rq_init(NULL, &req); 1734 1795 scmd->request = &req; 1735 - memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); 1736 1796 1737 1797 scmd->cmnd = req.cmd; 1738 1798 ··· 1741 1803 scmd->cmd_len = 0; 1742 1804 1743 1805 scmd->sc_data_direction = DMA_BIDIRECTIONAL; 1744 - 1745 - init_timer(&scmd->eh_timeout); 1746 1806 1747 1807 spin_lock_irqsave(shost->host_lock, flags); 1748 1808 shost->tmf_in_progress = 1;

+13 -4

drivers/scsi/scsi_lib.c

··· 1181 1181 1182 1182 cmd->transfersize = req->data_len; 1183 1183 cmd->allowed = req->retries; 1184 - cmd->timeout_per_command = req->timeout; 1185 1184 return BLKPREP_OK; 1186 1185 } 1187 1186 EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); ··· 1415 1416 spin_unlock(shost->host_lock); 1416 1417 spin_lock(sdev->request_queue->queue_lock); 1417 1418 1418 - __scsi_done(cmd); 1419 + blk_complete_request(req); 1419 1420 } 1420 1421 1421 1422 static void scsi_softirq_done(struct request *rq) 1422 1423 { 1423 - struct scsi_cmnd *cmd = rq->completion_data; 1424 - unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command; 1424 + struct scsi_cmnd *cmd = rq->special; 1425 + unsigned long wait_for = (cmd->allowed + 1) * rq->timeout; 1425 1426 int disposition; 1426 1427 1427 1428 INIT_LIST_HEAD(&cmd->eh_entry); 1429 + 1430 + /* 1431 + * Set the serial numbers back to zero 1432 + */ 1433 + cmd->serial_number = 0; 1434 + 1435 + atomic_inc(&cmd->device->iodone_cnt); 1436 + if (cmd->result) 1437 + atomic_inc(&cmd->device->ioerr_cnt); 1428 1438 1429 1439 disposition = scsi_decide_disposition(cmd); 1430 1440 if (disposition != SUCCESS && ··· 1683 1675 1684 1676 blk_queue_prep_rq(q, scsi_prep_fn); 1685 1677 blk_queue_softirq_done(q, scsi_softirq_done); 1678 + blk_queue_rq_timed_out(q, scsi_times_out); 1686 1679 return q; 1687 1680 } 1688 1681

+2 -5

drivers/scsi/scsi_priv.h

··· 4 4 #include <linux/device.h> 5 5 6 6 struct request_queue; 7 + struct request; 7 8 struct scsi_cmnd; 8 9 struct scsi_device; 9 10 struct scsi_host_template; ··· 28 27 extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd); 29 28 extern int scsi_setup_command_freelist(struct Scsi_Host *shost); 30 29 extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); 31 - extern void __scsi_done(struct scsi_cmnd *cmd); 32 30 #ifdef CONFIG_SCSI_LOGGING 33 31 void scsi_log_send(struct scsi_cmnd *cmd); 34 32 void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); ··· 49 49 extern void scsi_exit_devinfo(void); 50 50 51 51 /* scsi_error.c */ 52 - extern void scsi_add_timer(struct scsi_cmnd *, int, 53 - void (*)(struct scsi_cmnd *)); 54 - extern int scsi_delete_timer(struct scsi_cmnd *); 55 - extern void scsi_times_out(struct scsi_cmnd *cmd); 52 + extern enum blk_eh_timer_return scsi_times_out(struct request *req); 56 53 extern int scsi_error_handler(void *host); 57 54 extern int scsi_decide_disposition(struct scsi_cmnd *cmd); 58 55 extern void scsi_eh_wakeup(struct Scsi_Host *shost);

+5 -2

drivers/scsi/scsi_sysfs.c

··· 560 560 sdev_rd_attr (model, "%.16s\n"); 561 561 sdev_rd_attr (rev, "%.4s\n"); 562 562 563 + /* 564 + * TODO: can we make these symlinks to the block layer ones? 565 + */ 563 566 static ssize_t 564 567 sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf) 565 568 { 566 569 struct scsi_device *sdev; 567 570 sdev = to_scsi_device(dev); 568 - return snprintf (buf, 20, "%d\n", sdev->timeout / HZ); 571 + return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ); 569 572 } 570 573 571 574 static ssize_t ··· 579 576 int timeout; 580 577 sdev = to_scsi_device(dev); 581 578 sscanf (buf, "%d\n", &timeout); 582 - sdev->timeout = timeout * HZ; 579 + blk_queue_rq_timeout(sdev->request_queue, timeout * HZ); 583 580 return count; 584 581 } 585 582 static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout);

+1 -1

drivers/scsi/scsi_tgt_lib.c

··· 362 362 int err; 363 363 364 364 dprintk("%lx %u\n", uaddr, len); 365 - err = blk_rq_map_user(q, rq, (void *)uaddr, len); 365 + err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL); 366 366 if (err) { 367 367 /* 368 368 * TODO: need to fixup sg_tablesize, max_segment_size,

+3 -3

drivers/scsi/scsi_transport_fc.c

··· 1950 1950 * Notes: 1951 1951 * This routine assumes no locks are held on entry. 1952 1952 */ 1953 - static enum scsi_eh_timer_return 1953 + static enum blk_eh_timer_return 1954 1954 fc_timed_out(struct scsi_cmnd *scmd) 1955 1955 { 1956 1956 struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); 1957 1957 1958 1958 if (rport->port_state == FC_PORTSTATE_BLOCKED) 1959 - return EH_RESET_TIMER; 1959 + return BLK_EH_RESET_TIMER; 1960 1960 1961 - return EH_NOT_HANDLED; 1961 + return BLK_EH_NOT_HANDLED; 1962 1962 } 1963 1963 1964 1964 /*

+67 -28

drivers/scsi/sd.c

··· 86 86 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); 87 87 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); 88 88 89 + #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) 90 + #define SD_MINORS 16 91 + #else 92 + #define SD_MINORS 0 93 + #endif 94 + 89 95 static int sd_revalidate_disk(struct gendisk *); 90 96 static int sd_probe(struct device *); 91 97 static int sd_remove(struct device *); ··· 165 159 sd_print_sense_hdr(sdkp, &sshdr); 166 160 return -EINVAL; 167 161 } 168 - sd_revalidate_disk(sdkp->disk); 162 + revalidate_disk(sdkp->disk); 169 163 return count; 170 164 } 171 165 ··· 383 377 sector_t block = rq->sector; 384 378 sector_t threshold; 385 379 unsigned int this_count = rq->nr_sectors; 386 - unsigned int timeout = sdp->timeout; 387 380 int ret; 388 381 389 382 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { ··· 583 578 SCpnt->transfersize = sdp->sector_size; 584 579 SCpnt->underflow = this_count << 9; 585 580 SCpnt->allowed = SD_MAX_RETRIES; 586 - SCpnt->timeout_per_command = timeout; 587 581 588 582 /* 589 583 * This indicates that the command is ready from our end to be ··· 914 910 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); 915 911 916 912 if (sdkp) { 917 - sd_revalidate_disk(sdkp->disk); 913 + revalidate_disk(sdkp->disk); 918 914 scsi_disk_put(sdkp); 919 915 } 920 916 } ··· 1768 1764 } 1769 1765 1770 1766 /** 1767 + * sd_format_disk_name - format disk name 1768 + * @prefix: name prefix - ie. "sd" for SCSI disks 1769 + * @index: index of the disk to format name for 1770 + * @buf: output buffer 1771 + * @buflen: length of the output buffer 1772 + * 1773 + * SCSI disk names starts at sda. The 26th device is sdz and the 1774 + * 27th is sdaa. The last one for two lettered suffix is sdzz 1775 + * which is followed by sdaaa. 1776 + * 1777 + * This is basically 26 base counting with one extra 'nil' entry 1778 + * at the beggining from the second digit on and can be 1779 + * determined using similar method as 26 base conversion with the 1780 + * index shifted -1 after each digit is computed. 1781 + * 1782 + * CONTEXT: 1783 + * Don't care. 1784 + * 1785 + * RETURNS: 1786 + * 0 on success, -errno on failure. 1787 + */ 1788 + static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) 1789 + { 1790 + const int base = 'z' - 'a' + 1; 1791 + char *begin = buf + strlen(prefix); 1792 + char *end = buf + buflen; 1793 + char *p; 1794 + int unit; 1795 + 1796 + p = end - 1; 1797 + *p = '\0'; 1798 + unit = base; 1799 + do { 1800 + if (p == begin) 1801 + return -EINVAL; 1802 + *--p = 'a' + (index % unit); 1803 + index = (index / unit) - 1; 1804 + } while (index >= 0); 1805 + 1806 + memmove(begin, p, end - p); 1807 + memcpy(buf, prefix, strlen(prefix)); 1808 + 1809 + return 0; 1810 + } 1811 + 1812 + /** 1771 1813 * sd_probe - called during driver initialization and whenever a 1772 1814 * new scsi device is attached to the system. It is called once 1773 1815 * for each scsi device (not just disks) present. ··· 1851 1801 if (!sdkp) 1852 1802 goto out; 1853 1803 1854 - gd = alloc_disk(16); 1804 + gd = alloc_disk(SD_MINORS); 1855 1805 if (!gd) 1856 1806 goto out_free; 1857 1807 ··· 1865 1815 if (error) 1866 1816 goto out_put; 1867 1817 1868 - error = -EBUSY; 1869 - if (index >= SD_MAX_DISKS) 1818 + error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); 1819 + if (error) 1870 1820 goto out_free_index; 1871 1821 1872 1822 sdkp->device = sdp; ··· 1876 1826 sdkp->openers = 0; 1877 1827 sdkp->previous_state = 1; 1878 1828 1879 - if (!sdp->timeout) { 1829 + if (!sdp->request_queue->rq_timeout) { 1880 1830 if (sdp->type != TYPE_MOD) 1881 - sdp->timeout = SD_TIMEOUT; 1831 + blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); 1882 1832 else 1883 - sdp->timeout = SD_MOD_TIMEOUT; 1833 + blk_queue_rq_timeout(sdp->request_queue, 1834 + SD_MOD_TIMEOUT); 1884 1835 } 1885 1836 1886 1837 device_initialize(&sdkp->dev); ··· 1894 1843 1895 1844 get_device(&sdp->sdev_gendev); 1896 1845 1897 - gd->major = sd_major((index & 0xf0) >> 4); 1898 - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); 1899 - gd->minors = 16; 1900 - gd->fops = &sd_fops; 1901 - 1902 - if (index < 26) { 1903 - sprintf(gd->disk_name, "sd%c", 'a' + index % 26); 1904 - } else if (index < (26 + 1) * 26) { 1905 - sprintf(gd->disk_name, "sd%c%c", 1906 - 'a' + index / 26 - 1,'a' + index % 26); 1907 - } else { 1908 - const unsigned int m1 = (index / 26 - 1) / 26 - 1; 1909 - const unsigned int m2 = (index / 26 - 1) % 26; 1910 - const unsigned int m3 = index % 26; 1911 - sprintf(gd->disk_name, "sd%c%c%c", 1912 - 'a' + m1, 'a' + m2, 'a' + m3); 1846 + if (index < SD_MAX_DISKS) { 1847 + gd->major = sd_major((index & 0xf0) >> 4); 1848 + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); 1849 + gd->minors = SD_MINORS; 1913 1850 } 1914 - 1851 + gd->fops = &sd_fops; 1915 1852 gd->private_data = &sdkp->driver; 1916 1853 gd->queue = sdkp->device->request_queue; 1917 1854 ··· 1908 1869 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); 1909 1870 1910 1871 gd->driverfs_dev = &sdp->sdev_gendev; 1911 - gd->flags = GENHD_FL_DRIVERFS; 1872 + gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; 1912 1873 if (sdp->removable) 1913 1874 gd->flags |= GENHD_FL_REMOVABLE; 1914 1875

+156 -509

drivers/scsi/sg.c

··· 47 47 #include <linux/seq_file.h> 48 48 #include <linux/blkdev.h> 49 49 #include <linux/delay.h> 50 - #include <linux/scatterlist.h> 51 50 #include <linux/blktrace_api.h> 52 51 #include <linux/smp_lock.h> 53 52 ··· 68 69 #endif 69 70 70 71 #define SG_ALLOW_DIO_DEF 0 71 - #define SG_ALLOW_DIO_CODE /* compile out by commenting this define */ 72 72 73 73 #define SG_MAX_DEVS 32768 74 74 ··· 116 118 unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ 117 119 unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ 118 120 unsigned bufflen; /* Size of (aggregate) data buffer */ 119 - unsigned b_malloc_len; /* actual len malloc'ed in buffer */ 120 - struct scatterlist *buffer;/* scatter list */ 121 + struct page **pages; 122 + int page_order; 121 123 char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ 122 124 unsigned char cmd_opcode; /* first byte of command */ 123 125 } Sg_scatter_hold; ··· 135 137 char orphan; /* 1 -> drop on sight, 0 -> normal */ 136 138 char sg_io_owned; /* 1 -> packet belongs to SG_IO */ 137 139 volatile char done; /* 0->before bh, 1->before read, 2->read */ 140 + struct request *rq; 141 + struct bio *bio; 138 142 } Sg_request; 139 143 140 144 typedef struct sg_fd { /* holds the state of a file descriptor */ ··· 175 175 176 176 static int sg_fasync(int fd, struct file *filp, int mode); 177 177 /* tasklet or soft irq callback */ 178 - static void sg_cmd_done(void *data, char *sense, int result, int resid); 179 - static int sg_start_req(Sg_request * srp); 178 + static void sg_rq_end_io(struct request *rq, int uptodate); 179 + static int sg_start_req(Sg_request *srp, unsigned char *cmd); 180 180 static void sg_finish_rem_req(Sg_request * srp); 181 181 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); 182 182 static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, ··· 188 188 int read_only, Sg_request **o_srp); 189 189 static int sg_common_write(Sg_fd * sfp, Sg_request * srp, 190 190 unsigned char *cmnd, int timeout, int blocking); 191 - static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, 192 - int wr_xf, int *countp, unsigned char __user **up); 193 - static int sg_write_xfer(Sg_request * srp); 194 - static int sg_read_xfer(Sg_request * srp); 195 191 static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); 196 192 static void sg_remove_scat(Sg_scatter_hold * schp); 197 193 static void sg_build_reserve(Sg_fd * sfp, int req_size); 198 194 static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); 199 195 static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); 200 - static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp); 201 - static void sg_page_free(struct page *page, int size); 202 196 static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); 203 197 static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); 204 198 static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); ··· 200 206 static Sg_request *sg_add_request(Sg_fd * sfp); 201 207 static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); 202 208 static int sg_res_in_use(Sg_fd * sfp); 203 - static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len); 204 209 static Sg_device *sg_get_dev(int dev); 205 210 #ifdef CONFIG_SCSI_PROC_FS 206 211 static int sg_last_dev(void); ··· 522 529 err = -EFAULT; 523 530 goto err_out; 524 531 } 525 - err = sg_read_xfer(srp); 526 - err_out: 532 + err_out: 527 533 sg_finish_rem_req(srp); 528 534 return (0 == err) ? count : err; 529 535 } ··· 604 612 else 605 613 hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; 606 614 hp->dxfer_len = mxsize; 607 - hp->dxferp = (char __user *)buf + cmd_size; 615 + if (hp->dxfer_direction == SG_DXFER_TO_DEV) 616 + hp->dxferp = (char __user *)buf + cmd_size; 617 + else 618 + hp->dxferp = NULL; 608 619 hp->sbp = NULL; 609 620 hp->timeout = old_hdr.reply_len; /* structure abuse ... */ 610 621 hp->flags = input_size; /* structure abuse ... */ ··· 727 732 SCSI_LOG_TIMEOUT(4, printk("sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", 728 733 (int) cmnd[0], (int) hp->cmd_len)); 729 734 730 - if ((k = sg_start_req(srp))) { 735 + k = sg_start_req(srp, cmnd); 736 + if (k) { 731 737 SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k)); 732 738 sg_finish_rem_req(srp); 733 739 return k; /* probably out of space --> ENOMEM */ 734 - } 735 - if ((k = sg_write_xfer(srp))) { 736 - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n")); 737 - sg_finish_rem_req(srp); 738 - return k; 739 740 } 740 741 if (sdp->detached) { 741 742 sg_finish_rem_req(srp); ··· 754 763 break; 755 764 } 756 765 hp->duration = jiffies_to_msecs(jiffies); 757 - /* Now send everything of to mid-level. The next time we hear about this 758 - packet is when sg_cmd_done() is called (i.e. a callback). */ 759 - if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, 760 - hp->dxfer_len, srp->data.k_use_sg, timeout, 761 - SG_DEFAULT_RETRIES, srp, sg_cmd_done, 762 - GFP_ATOMIC)) { 763 - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n")); 764 - /* 765 - * most likely out of mem, but could also be a bad map 766 - */ 767 - sg_finish_rem_req(srp); 768 - return -ENOMEM; 769 - } else 770 - return 0; 766 + 767 + srp->rq->timeout = timeout; 768 + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, 769 + srp->rq, 1, sg_rq_end_io); 770 + return 0; 771 771 } 772 772 773 773 static int ··· 1174 1192 Sg_fd *sfp; 1175 1193 unsigned long offset, len, sa; 1176 1194 Sg_scatter_hold *rsv_schp; 1177 - struct scatterlist *sg; 1178 - int k; 1195 + int k, length; 1179 1196 1180 1197 if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) 1181 1198 return VM_FAULT_SIGBUS; ··· 1184 1203 return VM_FAULT_SIGBUS; 1185 1204 SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n", 1186 1205 offset, rsv_schp->k_use_sg)); 1187 - sg = rsv_schp->buffer; 1188 1206 sa = vma->vm_start; 1189 - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); 1190 - ++k, sg = sg_next(sg)) { 1207 + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); 1208 + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { 1191 1209 len = vma->vm_end - sa; 1192 - len = (len < sg->length) ? len : sg->length; 1210 + len = (len < length) ? len : length; 1193 1211 if (offset < len) { 1194 - struct page *page; 1195 - page = virt_to_page(page_address(sg_page(sg)) + offset); 1212 + struct page *page = nth_page(rsv_schp->pages[k], 1213 + offset >> PAGE_SHIFT); 1196 1214 get_page(page); /* increment page count */ 1197 1215 vmf->page = page; 1198 1216 return 0; /* success */ ··· 1213 1233 Sg_fd *sfp; 1214 1234 unsigned long req_sz, len, sa; 1215 1235 Sg_scatter_hold *rsv_schp; 1216 - int k; 1217 - struct scatterlist *sg; 1236 + int k, length; 1218 1237 1219 1238 if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) 1220 1239 return -ENXIO; ··· 1227 1248 return -ENOMEM; /* cannot map more than reserved buffer */ 1228 1249 1229 1250 sa = vma->vm_start; 1230 - sg = rsv_schp->buffer; 1231 - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); 1232 - ++k, sg = sg_next(sg)) { 1251 + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); 1252 + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { 1233 1253 len = vma->vm_end - sa; 1234 - len = (len < sg->length) ? len : sg->length; 1254 + len = (len < length) ? len : length; 1235 1255 sa += len; 1236 1256 } 1237 1257 ··· 1241 1263 return 0; 1242 1264 } 1243 1265 1244 - /* This function is a "bottom half" handler that is called by the 1245 - * mid level when a command is completed (or has failed). */ 1246 - static void 1247 - sg_cmd_done(void *data, char *sense, int result, int resid) 1266 + /* 1267 + * This function is a "bottom half" handler that is called by the mid 1268 + * level when a command is completed (or has failed). 1269 + */ 1270 + static void sg_rq_end_io(struct request *rq, int uptodate) 1248 1271 { 1249 - Sg_request *srp = data; 1272 + struct sg_request *srp = rq->end_io_data; 1250 1273 Sg_device *sdp = NULL; 1251 1274 Sg_fd *sfp; 1252 1275 unsigned long iflags; 1253 1276 unsigned int ms; 1277 + char *sense; 1278 + int result, resid; 1254 1279 1255 1280 if (NULL == srp) { 1256 1281 printk(KERN_ERR "sg_cmd_done: NULL request\n"); ··· 1267 1286 return; 1268 1287 } 1269 1288 1289 + sense = rq->sense; 1290 + result = rq->errors; 1291 + resid = rq->data_len; 1270 1292 1271 1293 SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n", 1272 1294 sdp->disk->disk_name, srp->header.pack_id, result)); ··· 1280 1296 if (0 != result) { 1281 1297 struct scsi_sense_hdr sshdr; 1282 1298 1283 - memcpy(srp->sense_b, sense, sizeof (srp->sense_b)); 1284 1299 srp->header.status = 0xff & result; 1285 1300 srp->header.masked_status = status_byte(result); 1286 1301 srp->header.msg_status = msg_byte(result); ··· 1617 1634 idr_destroy(&sg_index_idr); 1618 1635 } 1619 1636 1620 - static int 1621 - sg_start_req(Sg_request * srp) 1637 + static int sg_start_req(Sg_request *srp, unsigned char *cmd) 1622 1638 { 1623 1639 int res; 1640 + struct request *rq; 1624 1641 Sg_fd *sfp = srp->parentfp; 1625 1642 sg_io_hdr_t *hp = &srp->header; 1626 1643 int dxfer_len = (int) hp->dxfer_len; 1627 1644 int dxfer_dir = hp->dxfer_direction; 1645 + unsigned int iov_count = hp->iovec_count; 1628 1646 Sg_scatter_hold *req_schp = &srp->data; 1629 1647 Sg_scatter_hold *rsv_schp = &sfp->reserve; 1648 + struct request_queue *q = sfp->parentdp->device->request_queue; 1649 + struct rq_map_data *md, map_data; 1650 + int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; 1630 1651 1631 - SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); 1652 + SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n", 1653 + dxfer_len)); 1654 + 1655 + rq = blk_get_request(q, rw, GFP_ATOMIC); 1656 + if (!rq) 1657 + return -ENOMEM; 1658 + 1659 + memcpy(rq->cmd, cmd, hp->cmd_len); 1660 + 1661 + rq->cmd_len = hp->cmd_len; 1662 + rq->cmd_type = REQ_TYPE_BLOCK_PC; 1663 + 1664 + srp->rq = rq; 1665 + rq->end_io_data = srp; 1666 + rq->sense = srp->sense_b; 1667 + rq->retries = SG_DEFAULT_RETRIES; 1668 + 1632 1669 if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) 1633 1670 return 0; 1634 - if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && 1635 - (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && 1636 - (!sfp->parentdp->device->host->unchecked_isa_dma)) { 1637 - res = sg_build_direct(srp, sfp, dxfer_len); 1638 - if (res <= 0) /* -ve -> error, 0 -> done, 1 -> try indirect */ 1639 - return res; 1671 + 1672 + if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && 1673 + dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && 1674 + !sfp->parentdp->device->host->unchecked_isa_dma && 1675 + blk_rq_aligned(q, hp->dxferp, dxfer_len)) 1676 + md = NULL; 1677 + else 1678 + md = &map_data; 1679 + 1680 + if (md) { 1681 + if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen) 1682 + sg_link_reserve(sfp, srp, dxfer_len); 1683 + else { 1684 + res = sg_build_indirect(req_schp, sfp, dxfer_len); 1685 + if (res) 1686 + return res; 1687 + } 1688 + 1689 + md->pages = req_schp->pages; 1690 + md->page_order = req_schp->page_order; 1691 + md->nr_entries = req_schp->k_use_sg; 1640 1692 } 1641 - if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) 1642 - sg_link_reserve(sfp, srp, dxfer_len); 1643 - else { 1644 - res = sg_build_indirect(req_schp, sfp, dxfer_len); 1645 - if (res) { 1646 - sg_remove_scat(req_schp); 1647 - return res; 1693 + 1694 + if (iov_count) 1695 + res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count, 1696 + hp->dxfer_len, GFP_ATOMIC); 1697 + else 1698 + res = blk_rq_map_user(q, rq, md, hp->dxferp, 1699 + hp->dxfer_len, GFP_ATOMIC); 1700 + 1701 + if (!res) { 1702 + srp->bio = rq->bio; 1703 + 1704 + if (!md) { 1705 + req_schp->dio_in_use = 1; 1706 + hp->info |= SG_INFO_DIRECT_IO; 1648 1707 } 1649 1708 } 1650 - return 0; 1709 + return res; 1651 1710 } 1652 1711 1653 1712 static void ··· 1703 1678 sg_unlink_reserve(sfp, srp); 1704 1679 else 1705 1680 sg_remove_scat(req_schp); 1681 + 1682 + if (srp->rq) { 1683 + if (srp->bio) 1684 + blk_rq_unmap_user(srp->bio); 1685 + 1686 + blk_put_request(srp->rq); 1687 + } 1688 + 1706 1689 sg_remove_request(sfp, srp); 1707 1690 } 1708 1691 1709 1692 static int 1710 1693 sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) 1711 1694 { 1712 - int sg_bufflen = tablesize * sizeof(struct scatterlist); 1695 + int sg_bufflen = tablesize * sizeof(struct page *); 1713 1696 gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 1714 1697 1715 - /* 1716 - * TODO: test without low_dma, we should not need it since 1717 - * the block layer will bounce the buffer for us 1718 - * 1719 - * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list. 1720 - */ 1721 - if (sfp->low_dma) 1722 - gfp_flags |= GFP_DMA; 1723 - schp->buffer = kzalloc(sg_bufflen, gfp_flags); 1724 - if (!schp->buffer) 1698 + schp->pages = kzalloc(sg_bufflen, gfp_flags); 1699 + if (!schp->pages) 1725 1700 return -ENOMEM; 1726 - sg_init_table(schp->buffer, tablesize); 1727 1701 schp->sglist_len = sg_bufflen; 1728 1702 return tablesize; /* number of scat_gath elements allocated */ 1729 - } 1730 - 1731 - #ifdef SG_ALLOW_DIO_CODE 1732 - /* vvvvvvvv following code borrowed from st driver's direct IO vvvvvvvvv */ 1733 - /* TODO: hopefully we can use the generic block layer code */ 1734 - 1735 - /* Pin down user pages and put them into a scatter gather list. Returns <= 0 if 1736 - - mapping of all pages not successful 1737 - (i.e., either completely successful or fails) 1738 - */ 1739 - static int 1740 - st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, 1741 - unsigned long uaddr, size_t count, int rw) 1742 - { 1743 - unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT; 1744 - unsigned long start = uaddr >> PAGE_SHIFT; 1745 - const int nr_pages = end - start; 1746 - int res, i, j; 1747 - struct page **pages; 1748 - 1749 - /* User attempted Overflow! */ 1750 - if ((uaddr + count) < uaddr) 1751 - return -EINVAL; 1752 - 1753 - /* Too big */ 1754 - if (nr_pages > max_pages) 1755 - return -ENOMEM; 1756 - 1757 - /* Hmm? */ 1758 - if (count == 0) 1759 - return 0; 1760 - 1761 - if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL) 1762 - return -ENOMEM; 1763 - 1764 - /* Try to fault in all of the necessary pages */ 1765 - down_read(&current->mm->mmap_sem); 1766 - /* rw==READ means read from drive, write into memory area */ 1767 - res = get_user_pages( 1768 - current, 1769 - current->mm, 1770 - uaddr, 1771 - nr_pages, 1772 - rw == READ, 1773 - 0, /* don't force */ 1774 - pages, 1775 - NULL); 1776 - up_read(&current->mm->mmap_sem); 1777 - 1778 - /* Errors and no page mapped should return here */ 1779 - if (res < nr_pages) 1780 - goto out_unmap; 1781 - 1782 - for (i=0; i < nr_pages; i++) { 1783 - /* FIXME: flush superflous for rw==READ, 1784 - * probably wrong function for rw==WRITE 1785 - */ 1786 - flush_dcache_page(pages[i]); 1787 - /* ?? Is locking needed? I don't think so */ 1788 - /* if (!trylock_page(pages[i])) 1789 - goto out_unlock; */ 1790 - } 1791 - 1792 - sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK); 1793 - if (nr_pages > 1) { 1794 - sgl[0].length = PAGE_SIZE - sgl[0].offset; 1795 - count -= sgl[0].length; 1796 - for (i=1; i < nr_pages ; i++) 1797 - sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0); 1798 - } 1799 - else { 1800 - sgl[0].length = count; 1801 - } 1802 - 1803 - kfree(pages); 1804 - return nr_pages; 1805 - 1806 - out_unmap: 1807 - if (res > 0) { 1808 - for (j=0; j < res; j++) 1809 - page_cache_release(pages[j]); 1810 - res = 0; 1811 - } 1812 - kfree(pages); 1813 - return res; 1814 - } 1815 - 1816 - 1817 - /* And unmap them... */ 1818 - static int 1819 - st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages, 1820 - int dirtied) 1821 - { 1822 - int i; 1823 - 1824 - for (i=0; i < nr_pages; i++) { 1825 - struct page *page = sg_page(&sgl[i]); 1826 - 1827 - if (dirtied) 1828 - SetPageDirty(page); 1829 - /* unlock_page(page); */ 1830 - /* FIXME: cache flush missing for rw==READ 1831 - * FIXME: call the correct reference counting function 1832 - */ 1833 - page_cache_release(page); 1834 - } 1835 - 1836 - return 0; 1837 - } 1838 - 1839 - /* ^^^^^^^^ above code borrowed from st driver's direct IO ^^^^^^^^^ */ 1840 - #endif 1841 - 1842 - 1843 - /* Returns: -ve -> error, 0 -> done, 1 -> try indirect */ 1844 - static int 1845 - sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) 1846 - { 1847 - #ifdef SG_ALLOW_DIO_CODE 1848 - sg_io_hdr_t *hp = &srp->header; 1849 - Sg_scatter_hold *schp = &srp->data; 1850 - int sg_tablesize = sfp->parentdp->sg_tablesize; 1851 - int mx_sc_elems, res; 1852 - struct scsi_device *sdev = sfp->parentdp->device; 1853 - 1854 - if (((unsigned long)hp->dxferp & 1855 - queue_dma_alignment(sdev->request_queue)) != 0) 1856 - return 1; 1857 - 1858 - mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize); 1859 - if (mx_sc_elems <= 0) { 1860 - return 1; 1861 - } 1862 - res = st_map_user_pages(schp->buffer, mx_sc_elems, 1863 - (unsigned long)hp->dxferp, dxfer_len, 1864 - (SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0); 1865 - if (res <= 0) { 1866 - sg_remove_scat(schp); 1867 - return 1; 1868 - } 1869 - schp->k_use_sg = res; 1870 - schp->dio_in_use = 1; 1871 - hp->info |= SG_INFO_DIRECT_IO; 1872 - return 0; 1873 - #else 1874 - return 1; 1875 - #endif 1876 1703 } 1877 1704 1878 1705 static int 1879 1706 sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) 1880 1707 { 1881 - struct scatterlist *sg; 1882 - int ret_sz = 0, k, rem_sz, num, mx_sc_elems; 1708 + int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems; 1883 1709 int sg_tablesize = sfp->parentdp->sg_tablesize; 1884 - int blk_size = buff_size; 1885 - struct page *p = NULL; 1710 + int blk_size = buff_size, order; 1711 + gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; 1886 1712 1887 1713 if (blk_size < 0) 1888 1714 return -EFAULT; ··· 1757 1881 } else 1758 1882 scatter_elem_sz_prev = num; 1759 1883 } 1760 - for (k = 0, sg = schp->buffer, rem_sz = blk_size; 1761 - (rem_sz > 0) && (k < mx_sc_elems); 1762 - ++k, rem_sz -= ret_sz, sg = sg_next(sg)) { 1763 - 1884 + 1885 + if (sfp->low_dma) 1886 + gfp_mask |= GFP_DMA; 1887 + 1888 + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 1889 + gfp_mask |= __GFP_ZERO; 1890 + 1891 + order = get_order(num); 1892 + retry: 1893 + ret_sz = 1 << (PAGE_SHIFT + order); 1894 + 1895 + for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems; 1896 + k++, rem_sz -= ret_sz) { 1897 + 1764 1898 num = (rem_sz > scatter_elem_sz_prev) ? 1765 - scatter_elem_sz_prev : rem_sz; 1766 - p = sg_page_malloc(num, sfp->low_dma, &ret_sz); 1767 - if (!p) 1768 - return -ENOMEM; 1899 + scatter_elem_sz_prev : rem_sz; 1900 + 1901 + schp->pages[k] = alloc_pages(gfp_mask, order); 1902 + if (!schp->pages[k]) 1903 + goto out; 1769 1904 1770 1905 if (num == scatter_elem_sz_prev) { 1771 1906 if (unlikely(ret_sz > scatter_elem_sz_prev)) { ··· 1784 1897 scatter_elem_sz_prev = ret_sz; 1785 1898 } 1786 1899 } 1787 - sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0); 1788 1900 1789 1901 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " 1790 1902 "ret_sz=%d\n", k, num, ret_sz)); 1791 1903 } /* end of for loop */ 1792 1904 1905 + schp->page_order = order; 1793 1906 schp->k_use_sg = k; 1794 1907 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, " 1795 1908 "rem_sz=%d\n", k, rem_sz)); ··· 1797 1910 schp->bufflen = blk_size; 1798 1911 if (rem_sz > 0) /* must have failed */ 1799 1912 return -ENOMEM; 1800 - 1801 1913 return 0; 1802 - } 1914 + out: 1915 + for (i = 0; i < k; i++) 1916 + __free_pages(schp->pages[k], order); 1803 1917 1804 - static int 1805 - sg_write_xfer(Sg_request * srp) 1806 - { 1807 - sg_io_hdr_t *hp = &srp->header; 1808 - Sg_scatter_hold *schp = &srp->data; 1809 - struct scatterlist *sg = schp->buffer; 1810 - int num_xfer = 0; 1811 - int j, k, onum, usglen, ksglen, res; 1812 - int iovec_count = (int) hp->iovec_count; 1813 - int dxfer_dir = hp->dxfer_direction; 1814 - unsigned char *p; 1815 - unsigned char __user *up; 1816 - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; 1918 + if (--order >= 0) 1919 + goto retry; 1817 1920 1818 - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) || 1819 - (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { 1820 - num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags); 1821 - if (schp->bufflen < num_xfer) 1822 - num_xfer = schp->bufflen; 1823 - } 1824 - if ((num_xfer <= 0) || (schp->dio_in_use) || 1825 - (new_interface 1826 - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) 1827 - return 0; 1828 - 1829 - SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", 1830 - num_xfer, iovec_count, schp->k_use_sg)); 1831 - if (iovec_count) { 1832 - onum = iovec_count; 1833 - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) 1834 - return -EFAULT; 1835 - } else 1836 - onum = 1; 1837 - 1838 - ksglen = sg->length; 1839 - p = page_address(sg_page(sg)); 1840 - for (j = 0, k = 0; j < onum; ++j) { 1841 - res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up); 1842 - if (res) 1843 - return res; 1844 - 1845 - for (; p; sg = sg_next(sg), ksglen = sg->length, 1846 - p = page_address(sg_page(sg))) { 1847 - if (usglen <= 0) 1848 - break; 1849 - if (ksglen > usglen) { 1850 - if (usglen >= num_xfer) { 1851 - if (__copy_from_user(p, up, num_xfer)) 1852 - return -EFAULT; 1853 - return 0; 1854 - } 1855 - if (__copy_from_user(p, up, usglen)) 1856 - return -EFAULT; 1857 - p += usglen; 1858 - ksglen -= usglen; 1859 - break; 1860 - } else { 1861 - if (ksglen >= num_xfer) { 1862 - if (__copy_from_user(p, up, num_xfer)) 1863 - return -EFAULT; 1864 - return 0; 1865 - } 1866 - if (__copy_from_user(p, up, ksglen)) 1867 - return -EFAULT; 1868 - up += ksglen; 1869 - usglen -= ksglen; 1870 - } 1871 - ++k; 1872 - if (k >= schp->k_use_sg) 1873 - return 0; 1874 - } 1875 - } 1876 - 1877 - return 0; 1878 - } 1879 - 1880 - static int 1881 - sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, 1882 - int wr_xf, int *countp, unsigned char __user **up) 1883 - { 1884 - int num_xfer = (int) hp->dxfer_len; 1885 - unsigned char __user *p = hp->dxferp; 1886 - int count; 1887 - 1888 - if (0 == sg_num) { 1889 - if (wr_xf && ('\0' == hp->interface_id)) 1890 - count = (int) hp->flags; /* holds "old" input_size */ 1891 - else 1892 - count = num_xfer; 1893 - } else { 1894 - sg_iovec_t iovec; 1895 - if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC)) 1896 - return -EFAULT; 1897 - p = iovec.iov_base; 1898 - count = (int) iovec.iov_len; 1899 - } 1900 - if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count)) 1901 - return -EFAULT; 1902 - if (up) 1903 - *up = p; 1904 - if (countp) 1905 - *countp = count; 1906 - return 0; 1921 + return -ENOMEM; 1907 1922 } 1908 1923 1909 1924 static void 1910 1925 sg_remove_scat(Sg_scatter_hold * schp) 1911 1926 { 1912 1927 SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); 1913 - if (schp->buffer && (schp->sglist_len > 0)) { 1914 - struct scatterlist *sg = schp->buffer; 1915 - 1916 - if (schp->dio_in_use) { 1917 - #ifdef SG_ALLOW_DIO_CODE 1918 - st_unmap_user_pages(sg, schp->k_use_sg, TRUE); 1919 - #endif 1920 - } else { 1928 + if (schp->pages && schp->sglist_len > 0) { 1929 + if (!schp->dio_in_use) { 1921 1930 int k; 1922 1931 1923 - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); 1924 - ++k, sg = sg_next(sg)) { 1932 + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { 1925 1933 SCSI_LOG_TIMEOUT(5, printk( 1926 - "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", 1927 - k, sg_page(sg), sg->length)); 1928 - sg_page_free(sg_page(sg), sg->length); 1934 + "sg_remove_scat: k=%d, pg=0x%p\n", 1935 + k, schp->pages[k])); 1936 + __free_pages(schp->pages[k], schp->page_order); 1929 1937 } 1938 + 1939 + kfree(schp->pages); 1930 1940 } 1931 - kfree(schp->buffer); 1932 1941 } 1933 1942 memset(schp, 0, sizeof (*schp)); 1934 - } 1935 - 1936 - static int 1937 - sg_read_xfer(Sg_request * srp) 1938 - { 1939 - sg_io_hdr_t *hp = &srp->header; 1940 - Sg_scatter_hold *schp = &srp->data; 1941 - struct scatterlist *sg = schp->buffer; 1942 - int num_xfer = 0; 1943 - int j, k, onum, usglen, ksglen, res; 1944 - int iovec_count = (int) hp->iovec_count; 1945 - int dxfer_dir = hp->dxfer_direction; 1946 - unsigned char *p; 1947 - unsigned char __user *up; 1948 - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; 1949 - 1950 - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir) 1951 - || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { 1952 - num_xfer = hp->dxfer_len; 1953 - if (schp->bufflen < num_xfer) 1954 - num_xfer = schp->bufflen; 1955 - } 1956 - if ((num_xfer <= 0) || (schp->dio_in_use) || 1957 - (new_interface 1958 - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) 1959 - return 0; 1960 - 1961 - SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", 1962 - num_xfer, iovec_count, schp->k_use_sg)); 1963 - if (iovec_count) { 1964 - onum = iovec_count; 1965 - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) 1966 - return -EFAULT; 1967 - } else 1968 - onum = 1; 1969 - 1970 - p = page_address(sg_page(sg)); 1971 - ksglen = sg->length; 1972 - for (j = 0, k = 0; j < onum; ++j) { 1973 - res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up); 1974 - if (res) 1975 - return res; 1976 - 1977 - for (; p; sg = sg_next(sg), ksglen = sg->length, 1978 - p = page_address(sg_page(sg))) { 1979 - if (usglen <= 0) 1980 - break; 1981 - if (ksglen > usglen) { 1982 - if (usglen >= num_xfer) { 1983 - if (__copy_to_user(up, p, num_xfer)) 1984 - return -EFAULT; 1985 - return 0; 1986 - } 1987 - if (__copy_to_user(up, p, usglen)) 1988 - return -EFAULT; 1989 - p += usglen; 1990 - ksglen -= usglen; 1991 - break; 1992 - } else { 1993 - if (ksglen >= num_xfer) { 1994 - if (__copy_to_user(up, p, num_xfer)) 1995 - return -EFAULT; 1996 - return 0; 1997 - } 1998 - if (__copy_to_user(up, p, ksglen)) 1999 - return -EFAULT; 2000 - up += ksglen; 2001 - usglen -= ksglen; 2002 - } 2003 - ++k; 2004 - if (k >= schp->k_use_sg) 2005 - return 0; 2006 - } 2007 - } 2008 - 2009 - return 0; 2010 1943 } 2011 1944 2012 1945 static int 2013 1946 sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) 2014 1947 { 2015 1948 Sg_scatter_hold *schp = &srp->data; 2016 - struct scatterlist *sg = schp->buffer; 2017 1949 int k, num; 2018 1950 2019 1951 SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n", ··· 1840 2134 if ((!outp) || (num_read_xfer <= 0)) 1841 2135 return 0; 1842 2136 1843 - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { 1844 - num = sg->length; 2137 + num = 1 << (PAGE_SHIFT + schp->page_order); 2138 + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { 1845 2139 if (num > num_read_xfer) { 1846 - if (__copy_to_user(outp, page_address(sg_page(sg)), 2140 + if (__copy_to_user(outp, page_address(schp->pages[k]), 1847 2141 num_read_xfer)) 1848 2142 return -EFAULT; 1849 2143 break; 1850 2144 } else { 1851 - if (__copy_to_user(outp, page_address(sg_page(sg)), 2145 + if (__copy_to_user(outp, page_address(schp->pages[k]), 1852 2146 num)) 1853 2147 return -EFAULT; 1854 2148 num_read_xfer -= num; ··· 1883 2177 { 1884 2178 Sg_scatter_hold *req_schp = &srp->data; 1885 2179 Sg_scatter_hold *rsv_schp = &sfp->reserve; 1886 - struct scatterlist *sg = rsv_schp->buffer; 1887 2180 int k, num, rem; 1888 2181 1889 2182 srp->res_used = 1; 1890 2183 SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size)); 1891 2184 rem = size; 1892 2185 1893 - for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) { 1894 - num = sg->length; 2186 + num = 1 << (PAGE_SHIFT + rsv_schp->page_order); 2187 + for (k = 0; k < rsv_schp->k_use_sg; k++) { 1895 2188 if (rem <= num) { 1896 - sfp->save_scat_len = num; 1897 - sg->length = rem; 1898 2189 req_schp->k_use_sg = k + 1; 1899 2190 req_schp->sglist_len = rsv_schp->sglist_len; 1900 - req_schp->buffer = rsv_schp->buffer; 2191 + req_schp->pages = rsv_schp->pages; 1901 2192 1902 2193 req_schp->bufflen = size; 1903 - req_schp->b_malloc_len = rsv_schp->b_malloc_len; 2194 + req_schp->page_order = rsv_schp->page_order; 1904 2195 break; 1905 2196 } else 1906 2197 rem -= num; ··· 1911 2208 sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) 1912 2209 { 1913 2210 Sg_scatter_hold *req_schp = &srp->data; 1914 - Sg_scatter_hold *rsv_schp = &sfp->reserve; 1915 2211 1916 2212 SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n", 1917 2213 (int) req_schp->k_use_sg)); 1918 - if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) { 1919 - struct scatterlist *sg = rsv_schp->buffer; 1920 - 1921 - if (sfp->save_scat_len > 0) 1922 - (sg + (req_schp->k_use_sg - 1))->length = 1923 - (unsigned) sfp->save_scat_len; 1924 - else 1925 - SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n")); 1926 - } 1927 2214 req_schp->k_use_sg = 0; 1928 2215 req_schp->bufflen = 0; 1929 - req_schp->buffer = NULL; 2216 + req_schp->pages = NULL; 2217 + req_schp->page_order = 0; 1930 2218 req_schp->sglist_len = 0; 1931 2219 sfp->save_scat_len = 0; 1932 2220 srp->res_used = 0; ··· 2173 2479 break; 2174 2480 read_unlock_irqrestore(&sfp->rq_list_lock, iflags); 2175 2481 return srp ? 1 : 0; 2176 - } 2177 - 2178 - /* The size fetched (value output via retSzp) set when non-NULL return */ 2179 - static struct page * 2180 - sg_page_malloc(int rqSz, int lowDma, int *retSzp) 2181 - { 2182 - struct page *resp = NULL; 2183 - gfp_t page_mask; 2184 - int order, a_size; 2185 - int resSz; 2186 - 2187 - if ((rqSz <= 0) || (NULL == retSzp)) 2188 - return resp; 2189 - 2190 - if (lowDma) 2191 - page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN; 2192 - else 2193 - page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; 2194 - 2195 - for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; 2196 - order++, a_size <<= 1) ; 2197 - resSz = a_size; /* rounded up if necessary */ 2198 - resp = alloc_pages(page_mask, order); 2199 - while ((!resp) && order) { 2200 - --order; 2201 - a_size >>= 1; /* divide by 2, until PAGE_SIZE */ 2202 - resp = alloc_pages(page_mask, order); /* try half */ 2203 - resSz = a_size; 2204 - } 2205 - if (resp) { 2206 - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2207 - memset(page_address(resp), 0, resSz); 2208 - *retSzp = resSz; 2209 - } 2210 - return resp; 2211 - } 2212 - 2213 - static void 2214 - sg_page_free(struct page *page, int size) 2215 - { 2216 - int order, a_size; 2217 - 2218 - if (!page) 2219 - return; 2220 - for (order = 0, a_size = PAGE_SIZE; a_size < size; 2221 - order++, a_size <<= 1) ; 2222 - __free_pages(page, order); 2223 2482 } 2224 2483 2225 2484 #ifdef CONFIG_SCSI_PROC_FS

+4 -3

drivers/scsi/sr.c

··· 331 331 332 332 static int sr_prep_fn(struct request_queue *q, struct request *rq) 333 333 { 334 - int block=0, this_count, s_size, timeout = SR_TIMEOUT; 334 + int block = 0, this_count, s_size; 335 335 struct scsi_cd *cd; 336 336 struct scsi_cmnd *SCpnt; 337 337 struct scsi_device *sdp = q->queuedata; ··· 461 461 SCpnt->transfersize = cd->device->sector_size; 462 462 SCpnt->underflow = this_count << 9; 463 463 SCpnt->allowed = MAX_RETRIES; 464 - SCpnt->timeout_per_command = timeout; 465 464 466 465 /* 467 466 * This indicates that the command is ready from our end to be ··· 618 619 sprintf(disk->disk_name, "sr%d", minor); 619 620 disk->fops = &sr_bdops; 620 621 disk->flags = GENHD_FL_CD; 622 + 623 + blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); 621 624 622 625 cd->device = sdev; 623 626 cd->disk = disk; ··· 879 878 struct gendisk *disk = cd->disk; 880 879 881 880 spin_lock(&sr_index_lock); 882 - clear_bit(disk->first_minor, sr_index_bits); 881 + clear_bit(MINOR(disk_devt(disk)), sr_index_bits); 883 882 spin_unlock(&sr_index_lock); 884 883 885 884 unregister_cdrom(&cd->cdi);

+2 -2

drivers/scsi/sym53c8xx_2/sym_glue.c

··· 519 519 * Shorten our settle_time if needed for 520 520 * this command not to time out. 521 521 */ 522 - if (np->s.settle_time_valid && cmd->timeout_per_command) { 523 - unsigned long tlimit = jiffies + cmd->timeout_per_command; 522 + if (np->s.settle_time_valid && cmd->request->timeout) { 523 + unsigned long tlimit = jiffies + cmd->request->timeout; 524 524 tlimit -= SYM_CONF_TIMER_INTERVAL*2; 525 525 if (time_after(np->s.settle_time, tlimit)) { 526 526 np->s.settle_time = tlimit;

+28 -1

fs/bio-integrity.c

··· 107 107 BUG_ON(bip == NULL); 108 108 109 109 /* A cloned bio doesn't own the integrity metadata */ 110 - if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) 110 + if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) 111 + && bip->bip_buf != NULL) 111 112 kfree(bip->bip_buf); 112 113 113 114 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); ··· 150 149 return len; 151 150 } 152 151 EXPORT_SYMBOL(bio_integrity_add_page); 152 + 153 + static int bdev_integrity_enabled(struct block_device *bdev, int rw) 154 + { 155 + struct blk_integrity *bi = bdev_get_integrity(bdev); 156 + 157 + if (bi == NULL) 158 + return 0; 159 + 160 + if (rw == READ && bi->verify_fn != NULL && 161 + (bi->flags & INTEGRITY_FLAG_READ)) 162 + return 1; 163 + 164 + if (rw == WRITE && bi->generate_fn != NULL && 165 + (bi->flags & INTEGRITY_FLAG_WRITE)) 166 + return 1; 167 + 168 + return 0; 169 + } 153 170 154 171 /** 155 172 * bio_integrity_enabled - Check whether integrity can be passed ··· 330 311 331 312 kunmap_atomic(kaddr, KM_USER0); 332 313 } 314 + } 315 + 316 + static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) 317 + { 318 + if (bi) 319 + return bi->tuple_size; 320 + 321 + return 0; 333 322 } 334 323 335 324 /**

+179 -118

fs/bio.c

··· 30 30 31 31 static struct kmem_cache *bio_slab __read_mostly; 32 32 33 - mempool_t *bio_split_pool __read_mostly; 33 + static mempool_t *bio_split_pool __read_mostly; 34 34 35 35 /* 36 36 * if you change this list, also change bvec_alloc or things will ··· 60 60 struct bio_vec *bvl; 61 61 62 62 /* 63 - * see comment near bvec_array define! 63 + * If 'bs' is given, lookup the pool and do the mempool alloc. 64 + * If not, this is a bio_kmalloc() allocation and just do a 65 + * kzalloc() for the exact number of vecs right away. 64 66 */ 65 - switch (nr) { 66 - case 1 : *idx = 0; break; 67 - case 2 ... 4: *idx = 1; break; 68 - case 5 ... 16: *idx = 2; break; 69 - case 17 ... 64: *idx = 3; break; 70 - case 65 ... 128: *idx = 4; break; 71 - case 129 ... BIO_MAX_PAGES: *idx = 5; break; 67 + if (bs) { 68 + /* 69 + * see comment near bvec_array define! 70 + */ 71 + switch (nr) { 72 + case 1: 73 + *idx = 0; 74 + break; 75 + case 2 ... 4: 76 + *idx = 1; 77 + break; 78 + case 5 ... 16: 79 + *idx = 2; 80 + break; 81 + case 17 ... 64: 82 + *idx = 3; 83 + break; 84 + case 65 ... 128: 85 + *idx = 4; 86 + break; 87 + case 129 ... BIO_MAX_PAGES: 88 + *idx = 5; 89 + break; 72 90 default: 73 91 return NULL; 74 - } 75 - /* 76 - * idx now points to the pool we want to allocate from 77 - */ 92 + } 78 93 79 - bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 80 - if (bvl) 81 - memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 94 + /* 95 + * idx now points to the pool we want to allocate from 96 + */ 97 + bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 98 + if (bvl) 99 + memset(bvl, 0, 100 + bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 101 + } else 102 + bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); 82 103 83 104 return bvl; 84 105 } ··· 128 107 bio_free(bio, fs_bio_set); 129 108 } 130 109 110 + static void bio_kmalloc_destructor(struct bio *bio) 111 + { 112 + kfree(bio->bi_io_vec); 113 + kfree(bio); 114 + } 115 + 131 116 void bio_init(struct bio *bio) 132 117 { 133 118 memset(bio, 0, sizeof(*bio)); 134 119 bio->bi_flags = 1 << BIO_UPTODATE; 120 + bio->bi_comp_cpu = -1; 135 121 atomic_set(&bio->bi_cnt, 1); 136 122 } 137 123 ··· 146 118 * bio_alloc_bioset - allocate a bio for I/O 147 119 * @gfp_mask: the GFP_ mask given to the slab allocator 148 120 * @nr_iovecs: number of iovecs to pre-allocate 149 - * @bs: the bio_set to allocate from 121 + * @bs: the bio_set to allocate from. If %NULL, just use kmalloc 150 122 * 151 123 * Description: 152 - * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. 124 + * bio_alloc_bioset will first try its own mempool to satisfy the allocation. 153 125 * If %__GFP_WAIT is set then we will block on the internal pool waiting 154 - * for a &struct bio to become free. 126 + * for a &struct bio to become free. If a %NULL @bs is passed in, we will 127 + * fall back to just using @kmalloc to allocate the required memory. 155 128 * 156 129 * allocate bio and iovecs from the memory pools specified by the 157 - * bio_set structure. 130 + * bio_set structure, or @kmalloc if none given. 158 131 **/ 159 132 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 160 133 { 161 - struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); 134 + struct bio *bio; 135 + 136 + if (bs) 137 + bio = mempool_alloc(bs->bio_pool, gfp_mask); 138 + else 139 + bio = kmalloc(sizeof(*bio), gfp_mask); 162 140 163 141 if (likely(bio)) { 164 142 struct bio_vec *bvl = NULL; ··· 175 141 176 142 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 177 143 if (unlikely(!bvl)) { 178 - mempool_free(bio, bs->bio_pool); 144 + if (bs) 145 + mempool_free(bio, bs->bio_pool); 146 + else 147 + kfree(bio); 179 148 bio = NULL; 180 149 goto out; 181 150 } ··· 197 160 198 161 if (bio) 199 162 bio->bi_destructor = bio_fs_destructor; 163 + 164 + return bio; 165 + } 166 + 167 + /* 168 + * Like bio_alloc(), but doesn't use a mempool backing. This means that 169 + * it CAN fail, but while bio_alloc() can only be used for allocations 170 + * that have a short (finite) life span, bio_kmalloc() should be used 171 + * for more permanent bio allocations (like allocating some bio's for 172 + * initalization or setup purposes). 173 + */ 174 + struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) 175 + { 176 + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); 177 + 178 + if (bio) 179 + bio->bi_destructor = bio_kmalloc_destructor; 200 180 201 181 return bio; 202 182 } ··· 260 206 blk_recount_segments(q, bio); 261 207 262 208 return bio->bi_phys_segments; 263 - } 264 - 265 - inline int bio_hw_segments(struct request_queue *q, struct bio *bio) 266 - { 267 - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 268 - blk_recount_segments(q, bio); 269 - 270 - return bio->bi_hw_segments; 271 209 } 272 210 273 211 /** ··· 396 350 */ 397 351 398 352 while (bio->bi_phys_segments >= q->max_phys_segments 399 - || bio->bi_hw_segments >= q->max_hw_segments 400 - || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { 353 + || bio->bi_phys_segments >= q->max_hw_segments) { 401 354 402 355 if (retried_segments) 403 356 return 0; ··· 440 395 } 441 396 442 397 /* If we may be able to merge these biovecs, force a recount */ 443 - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || 444 - BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) 398 + if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) 445 399 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 446 400 447 401 bio->bi_vcnt++; 448 402 bio->bi_phys_segments++; 449 - bio->bi_hw_segments++; 450 403 done: 451 404 bio->bi_size += len; 452 405 return len; ··· 492 449 493 450 struct bio_map_data { 494 451 struct bio_vec *iovecs; 495 - int nr_sgvecs; 496 452 struct sg_iovec *sgvecs; 453 + int nr_sgvecs; 454 + int is_our_pages; 497 455 }; 498 456 499 457 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 500 - struct sg_iovec *iov, int iov_count) 458 + struct sg_iovec *iov, int iov_count, 459 + int is_our_pages) 501 460 { 502 461 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); 503 462 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 504 463 bmd->nr_sgvecs = iov_count; 464 + bmd->is_our_pages = is_our_pages; 505 465 bio->bi_private = bmd; 506 466 } 507 467 ··· 539 493 } 540 494 541 495 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, 542 - struct sg_iovec *iov, int iov_count, int uncopy) 496 + struct sg_iovec *iov, int iov_count, int uncopy, 497 + int do_free_page) 543 498 { 544 499 int ret = 0, i; 545 500 struct bio_vec *bvec; ··· 583 536 } 584 537 } 585 538 586 - if (uncopy) 539 + if (do_free_page) 587 540 __free_page(bvec->bv_page); 588 541 } 589 542 ··· 600 553 int bio_uncopy_user(struct bio *bio) 601 554 { 602 555 struct bio_map_data *bmd = bio->bi_private; 603 - int ret; 556 + int ret = 0; 604 557 605 - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); 606 - 558 + if (!bio_flagged(bio, BIO_NULL_MAPPED)) 559 + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, 560 + bmd->nr_sgvecs, 1, bmd->is_our_pages); 607 561 bio_free_map_data(bmd); 608 562 bio_put(bio); 609 563 return ret; ··· 613 565 /** 614 566 * bio_copy_user_iov - copy user data to bio 615 567 * @q: destination block queue 568 + * @map_data: pointer to the rq_map_data holding pages (if necessary) 616 569 * @iov: the iovec. 617 570 * @iov_count: number of elements in the iovec 618 571 * @write_to_vm: bool indicating writing to pages or not 572 + * @gfp_mask: memory allocation flags 619 573 * 620 574 * Prepares and returns a bio for indirect user io, bouncing data 621 575 * to/from kernel pages as necessary. Must be paired with 622 576 * call bio_uncopy_user() on io completion. 623 577 */ 624 - struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, 625 - int iov_count, int write_to_vm) 578 + struct bio *bio_copy_user_iov(struct request_queue *q, 579 + struct rq_map_data *map_data, 580 + struct sg_iovec *iov, int iov_count, 581 + int write_to_vm, gfp_t gfp_mask) 626 582 { 627 583 struct bio_map_data *bmd; 628 584 struct bio_vec *bvec; ··· 649 597 len += iov[i].iov_len; 650 598 } 651 599 652 - bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); 600 + bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); 653 601 if (!bmd) 654 602 return ERR_PTR(-ENOMEM); 655 603 656 604 ret = -ENOMEM; 657 - bio = bio_alloc(GFP_KERNEL, nr_pages); 605 + bio = bio_alloc(gfp_mask, nr_pages); 658 606 if (!bio) 659 607 goto out_bmd; 660 608 661 609 bio->bi_rw |= (!write_to_vm << BIO_RW); 662 610 663 611 ret = 0; 612 + i = 0; 664 613 while (len) { 665 - unsigned int bytes = PAGE_SIZE; 614 + unsigned int bytes; 615 + 616 + if (map_data) 617 + bytes = 1U << (PAGE_SHIFT + map_data->page_order); 618 + else 619 + bytes = PAGE_SIZE; 666 620 667 621 if (bytes > len) 668 622 bytes = len; 669 623 670 - page = alloc_page(q->bounce_gfp | GFP_KERNEL); 624 + if (map_data) { 625 + if (i == map_data->nr_entries) { 626 + ret = -ENOMEM; 627 + break; 628 + } 629 + page = map_data->pages[i++]; 630 + } else 631 + page = alloc_page(q->bounce_gfp | gfp_mask); 671 632 if (!page) { 672 633 ret = -ENOMEM; 673 634 break; ··· 699 634 * success 700 635 */ 701 636 if (!write_to_vm) { 702 - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); 637 + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); 703 638 if (ret) 704 639 goto cleanup; 705 640 } 706 641 707 - bio_set_map_data(bmd, bio, iov, iov_count); 642 + bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); 708 643 return bio; 709 644 cleanup: 710 - bio_for_each_segment(bvec, bio, i) 711 - __free_page(bvec->bv_page); 645 + if (!map_data) 646 + bio_for_each_segment(bvec, bio, i) 647 + __free_page(bvec->bv_page); 712 648 713 649 bio_put(bio); 714 650 out_bmd: ··· 720 654 /** 721 655 * bio_copy_user - copy user data to bio 722 656 * @q: destination block queue 657 + * @map_data: pointer to the rq_map_data holding pages (if necessary) 723 658 * @uaddr: start of user address 724 659 * @len: length in bytes 725 660 * @write_to_vm: bool indicating writing to pages or not 661 + * @gfp_mask: memory allocation flags 726 662 * 727 663 * Prepares and returns a bio for indirect user io, bouncing data 728 664 * to/from kernel pages as necessary. Must be paired with 729 665 * call bio_uncopy_user() on io completion. 730 666 */ 731 - struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, 732 - unsigned int len, int write_to_vm) 667 + struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, 668 + unsigned long uaddr, unsigned int len, 669 + int write_to_vm, gfp_t gfp_mask) 733 670 { 734 671 struct sg_iovec iov; 735 672 736 673 iov.iov_base = (void __user *)uaddr; 737 674 iov.iov_len = len; 738 675 739 - return bio_copy_user_iov(q, &iov, 1, write_to_vm); 676 + return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); 740 677 } 741 678 742 679 static struct bio *__bio_map_user_iov(struct request_queue *q, 743 680 struct block_device *bdev, 744 681 struct sg_iovec *iov, int iov_count, 745 - int write_to_vm) 682 + int write_to_vm, gfp_t gfp_mask) 746 683 { 747 684 int i, j; 748 685 int nr_pages = 0; ··· 771 702 if (!nr_pages) 772 703 return ERR_PTR(-EINVAL); 773 704 774 - bio = bio_alloc(GFP_KERNEL, nr_pages); 705 + bio = bio_alloc(gfp_mask, nr_pages); 775 706 if (!bio) 776 707 return ERR_PTR(-ENOMEM); 777 708 778 709 ret = -ENOMEM; 779 - pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 710 + pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); 780 711 if (!pages) 781 712 goto out; 782 713 ··· 855 786 * @uaddr: start of user address 856 787 * @len: length in bytes 857 788 * @write_to_vm: bool indicating writing to pages or not 789 + * @gfp_mask: memory allocation flags 858 790 * 859 791 * Map the user space address into a bio suitable for io to a block 860 792 * device. Returns an error pointer in case of error. 861 793 */ 862 794 struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, 863 - unsigned long uaddr, unsigned int len, int write_to_vm) 795 + unsigned long uaddr, unsigned int len, int write_to_vm, 796 + gfp_t gfp_mask) 864 797 { 865 798 struct sg_iovec iov; 866 799 867 800 iov.iov_base = (void __user *)uaddr; 868 801 iov.iov_len = len; 869 802 870 - return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); 803 + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); 871 804 } 872 805 873 806 /** ··· 879 808 * @iov: the iovec. 880 809 * @iov_count: number of elements in the iovec 881 810 * @write_to_vm: bool indicating writing to pages or not 811 + * @gfp_mask: memory allocation flags 882 812 * 883 813 * Map the user space address into a bio suitable for io to a block 884 814 * device. Returns an error pointer in case of error. 885 815 */ 886 816 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 887 817 struct sg_iovec *iov, int iov_count, 888 - int write_to_vm) 818 + int write_to_vm, gfp_t gfp_mask) 889 819 { 890 820 struct bio *bio; 891 821 892 - bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); 893 - 822 + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, 823 + gfp_mask); 894 824 if (IS_ERR(bio)) 895 825 return bio; 896 826 ··· 1048 976 struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 1049 977 gfp_t gfp_mask, int reading) 1050 978 { 1051 - unsigned long kaddr = (unsigned long)data; 1052 - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1053 - unsigned long start = kaddr >> PAGE_SHIFT; 1054 - const int nr_pages = end - start; 1055 979 struct bio *bio; 1056 980 struct bio_vec *bvec; 1057 - struct bio_map_data *bmd; 1058 - int i, ret; 1059 - struct sg_iovec iov; 981 + int i; 1060 982 1061 - iov.iov_base = data; 1062 - iov.iov_len = len; 1063 - 1064 - bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); 1065 - if (!bmd) 1066 - return ERR_PTR(-ENOMEM); 1067 - 1068 - ret = -ENOMEM; 1069 - bio = bio_alloc(gfp_mask, nr_pages); 1070 - if (!bio) 1071 - goto out_bmd; 1072 - 1073 - while (len) { 1074 - struct page *page; 1075 - unsigned int bytes = PAGE_SIZE; 1076 - 1077 - if (bytes > len) 1078 - bytes = len; 1079 - 1080 - page = alloc_page(q->bounce_gfp | gfp_mask); 1081 - if (!page) { 1082 - ret = -ENOMEM; 1083 - goto cleanup; 1084 - } 1085 - 1086 - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { 1087 - ret = -EINVAL; 1088 - goto cleanup; 1089 - } 1090 - 1091 - len -= bytes; 1092 - } 983 + bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); 984 + if (IS_ERR(bio)) 985 + return bio; 1093 986 1094 987 if (!reading) { 1095 988 void *p = data; ··· 1067 1030 } 1068 1031 } 1069 1032 1070 - bio->bi_private = bmd; 1071 1033 bio->bi_end_io = bio_copy_kern_endio; 1072 1034 1073 - bio_set_map_data(bmd, bio, &iov, 1); 1074 1035 return bio; 1075 - cleanup: 1076 - bio_for_each_segment(bvec, bio, i) 1077 - __free_page(bvec->bv_page); 1078 - 1079 - bio_put(bio); 1080 - out_bmd: 1081 - bio_free_map_data(bmd); 1082 - 1083 - return ERR_PTR(ret); 1084 1036 } 1085 1037 1086 1038 /* ··· 1256 1230 * split a bio - only worry about a bio with a single page 1257 1231 * in it's iovec 1258 1232 */ 1259 - struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) 1233 + struct bio_pair *bio_split(struct bio *bi, int first_sectors) 1260 1234 { 1261 - struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); 1235 + struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); 1262 1236 1263 1237 if (!bp) 1264 1238 return bp; ··· 1292 1266 bp->bio2.bi_end_io = bio_pair_end_2; 1293 1267 1294 1268 bp->bio1.bi_private = bi; 1295 - bp->bio2.bi_private = pool; 1269 + bp->bio2.bi_private = bio_split_pool; 1296 1270 1297 1271 if (bio_integrity(bi)) 1298 1272 bio_integrity_split(bi, bp, first_sectors); ··· 1300 1274 return bp; 1301 1275 } 1302 1276 1277 + /** 1278 + * bio_sector_offset - Find hardware sector offset in bio 1279 + * @bio: bio to inspect 1280 + * @index: bio_vec index 1281 + * @offset: offset in bv_page 1282 + * 1283 + * Return the number of hardware sectors between beginning of bio 1284 + * and an end point indicated by a bio_vec index and an offset 1285 + * within that vector's page. 1286 + */ 1287 + sector_t bio_sector_offset(struct bio *bio, unsigned short index, 1288 + unsigned int offset) 1289 + { 1290 + unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); 1291 + struct bio_vec *bv; 1292 + sector_t sectors; 1293 + int i; 1294 + 1295 + sectors = 0; 1296 + 1297 + if (index >= bio->bi_idx) 1298 + index = bio->bi_vcnt - 1; 1299 + 1300 + __bio_for_each_segment(bv, bio, i, 0) { 1301 + if (i == index) { 1302 + if (offset > bv->bv_offset) 1303 + sectors += (offset - bv->bv_offset) / sector_sz; 1304 + break; 1305 + } 1306 + 1307 + sectors += bv->bv_len / sector_sz; 1308 + } 1309 + 1310 + return sectors; 1311 + } 1312 + EXPORT_SYMBOL(bio_sector_offset); 1303 1313 1304 1314 /* 1305 1315 * create memory pools for biovec's in a bio_set. ··· 1438 1376 subsys_initcall(init_bio); 1439 1377 1440 1378 EXPORT_SYMBOL(bio_alloc); 1379 + EXPORT_SYMBOL(bio_kmalloc); 1441 1380 EXPORT_SYMBOL(bio_put); 1442 1381 EXPORT_SYMBOL(bio_free); 1443 1382 EXPORT_SYMBOL(bio_endio); ··· 1446 1383 EXPORT_SYMBOL(__bio_clone); 1447 1384 EXPORT_SYMBOL(bio_clone); 1448 1385 EXPORT_SYMBOL(bio_phys_segments); 1449 - EXPORT_SYMBOL(bio_hw_segments); 1450 1386 EXPORT_SYMBOL(bio_add_page); 1451 1387 EXPORT_SYMBOL(bio_add_pc_page); 1452 1388 EXPORT_SYMBOL(bio_get_nr_vecs); ··· 1455 1393 EXPORT_SYMBOL(bio_copy_kern); 1456 1394 EXPORT_SYMBOL(bio_pair_release); 1457 1395 EXPORT_SYMBOL(bio_split); 1458 - EXPORT_SYMBOL(bio_split_pool); 1459 1396 EXPORT_SYMBOL(bio_copy_user); 1460 1397 EXPORT_SYMBOL(bio_uncopy_user); 1461 1398 EXPORT_SYMBOL(bioset_create);

+123 -59

fs/block_dev.c

··· 540 540 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 541 541 */ 542 542 543 - static struct kobject *bdev_get_kobj(struct block_device *bdev) 544 - { 545 - if (bdev->bd_contains != bdev) 546 - return kobject_get(&bdev->bd_part->dev.kobj); 547 - else 548 - return kobject_get(&bdev->bd_disk->dev.kobj); 549 - } 550 - 551 - static struct kobject *bdev_get_holder(struct block_device *bdev) 552 - { 553 - if (bdev->bd_contains != bdev) 554 - return kobject_get(bdev->bd_part->holder_dir); 555 - else 556 - return kobject_get(bdev->bd_disk->holder_dir); 557 - } 558 - 559 543 static int add_symlink(struct kobject *from, struct kobject *to) 560 544 { 561 545 if (!from || !to) ··· 588 604 if (!bo->hdev) 589 605 goto fail_put_sdir; 590 606 591 - bo->sdev = bdev_get_kobj(bdev); 607 + bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); 592 608 if (!bo->sdev) 593 609 goto fail_put_hdev; 594 610 595 - bo->hdir = bdev_get_holder(bdev); 611 + bo->hdir = kobject_get(bdev->bd_part->holder_dir); 596 612 if (!bo->hdir) 597 613 goto fail_put_sdev; 598 614 ··· 852 868 853 869 EXPORT_SYMBOL(open_by_devnum); 854 870 871 + /** 872 + * flush_disk - invalidates all buffer-cache entries on a disk 873 + * 874 + * @bdev: struct block device to be flushed 875 + * 876 + * Invalidates all buffer-cache entries on a disk. It should be called 877 + * when a disk has been changed -- either by a media change or online 878 + * resize. 879 + */ 880 + static void flush_disk(struct block_device *bdev) 881 + { 882 + if (__invalidate_device(bdev)) { 883 + char name[BDEVNAME_SIZE] = ""; 884 + 885 + if (bdev->bd_disk) 886 + disk_name(bdev->bd_disk, 0, name); 887 + printk(KERN_WARNING "VFS: busy inodes on changed media or " 888 + "resized disk %s\n", name); 889 + } 890 + 891 + if (!bdev->bd_disk) 892 + return; 893 + if (disk_partitionable(bdev->bd_disk)) 894 + bdev->bd_invalidated = 1; 895 + } 896 + 897 + /** 898 + * check_disk_size_change - checks for disk size change and adjusts bdev size. 899 + * @disk: struct gendisk to check 900 + * @bdev: struct bdev to adjust. 901 + * 902 + * This routine checks to see if the bdev size does not match the disk size 903 + * and adjusts it if it differs. 904 + */ 905 + void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) 906 + { 907 + loff_t disk_size, bdev_size; 908 + 909 + disk_size = (loff_t)get_capacity(disk) << 9; 910 + bdev_size = i_size_read(bdev->bd_inode); 911 + if (disk_size != bdev_size) { 912 + char name[BDEVNAME_SIZE]; 913 + 914 + disk_name(disk, 0, name); 915 + printk(KERN_INFO 916 + "%s: detected capacity change from %lld to %lld\n", 917 + name, bdev_size, disk_size); 918 + i_size_write(bdev->bd_inode, disk_size); 919 + flush_disk(bdev); 920 + } 921 + } 922 + EXPORT_SYMBOL(check_disk_size_change); 923 + 924 + /** 925 + * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back 926 + * @disk: struct gendisk to be revalidated 927 + * 928 + * This routine is a wrapper for lower-level driver's revalidate_disk 929 + * call-backs. It is used to do common pre and post operations needed 930 + * for all revalidate_disk operations. 931 + */ 932 + int revalidate_disk(struct gendisk *disk) 933 + { 934 + struct block_device *bdev; 935 + int ret = 0; 936 + 937 + if (disk->fops->revalidate_disk) 938 + ret = disk->fops->revalidate_disk(disk); 939 + 940 + bdev = bdget_disk(disk, 0); 941 + if (!bdev) 942 + return ret; 943 + 944 + mutex_lock(&bdev->bd_mutex); 945 + check_disk_size_change(disk, bdev); 946 + mutex_unlock(&bdev->bd_mutex); 947 + bdput(bdev); 948 + return ret; 949 + } 950 + EXPORT_SYMBOL(revalidate_disk); 951 + 855 952 /* 856 953 * This routine checks whether a removable media has been changed, 857 954 * and invalidates all buffer-cache-entries in that case. This ··· 952 887 if (!bdops->media_changed(bdev->bd_disk)) 953 888 return 0; 954 889 955 - if (__invalidate_device(bdev)) 956 - printk("VFS: busy inodes on changed media.\n"); 957 - 890 + flush_disk(bdev); 958 891 if (bdops->revalidate_disk) 959 892 bdops->revalidate_disk(bdev->bd_disk); 960 - if (bdev->bd_disk->minors > 1) 961 - bdev->bd_invalidated = 1; 962 893 return 1; 963 894 } 964 895 ··· 988 927 989 928 static int do_open(struct block_device *bdev, struct file *file, int for_part) 990 929 { 991 - struct module *owner = NULL; 992 930 struct gendisk *disk; 931 + struct hd_struct *part = NULL; 993 932 int ret; 994 - int part; 933 + int partno; 995 934 int perm = 0; 996 935 997 936 if (file->f_mode & FMODE_READ) ··· 1009 948 1010 949 ret = -ENXIO; 1011 950 file->f_mapping = bdev->bd_inode->i_mapping; 951 + 1012 952 lock_kernel(); 1013 - disk = get_gendisk(bdev->bd_dev, &part); 1014 - if (!disk) { 1015 - unlock_kernel(); 1016 - bdput(bdev); 1017 - return ret; 1018 - } 1019 - owner = disk->fops->owner; 953 + 954 + disk = get_gendisk(bdev->bd_dev, &partno); 955 + if (!disk) 956 + goto out_unlock_kernel; 957 + part = disk_get_part(disk, partno); 958 + if (!part) 959 + goto out_unlock_kernel; 1020 960 1021 961 mutex_lock_nested(&bdev->bd_mutex, for_part); 1022 962 if (!bdev->bd_openers) { 1023 963 bdev->bd_disk = disk; 964 + bdev->bd_part = part; 1024 965 bdev->bd_contains = bdev; 1025 - if (!part) { 966 + if (!partno) { 1026 967 struct backing_dev_info *bdi; 1027 968 if (disk->fops->open) { 1028 969 ret = disk->fops->open(bdev->bd_inode, file); 1029 970 if (ret) 1030 - goto out_first; 971 + goto out_clear; 1031 972 } 1032 973 if (!bdev->bd_openers) { 1033 974 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); ··· 1041 978 if (bdev->bd_invalidated) 1042 979 rescan_partitions(disk, bdev); 1043 980 } else { 1044 - struct hd_struct *p; 1045 981 struct block_device *whole; 1046 982 whole = bdget_disk(disk, 0); 1047 983 ret = -ENOMEM; 1048 984 if (!whole) 1049 - goto out_first; 985 + goto out_clear; 1050 986 BUG_ON(for_part); 1051 987 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); 1052 988 if (ret) 1053 - goto out_first; 989 + goto out_clear; 1054 990 bdev->bd_contains = whole; 1055 - p = disk->part[part - 1]; 1056 991 bdev->bd_inode->i_data.backing_dev_info = 1057 992 whole->bd_inode->i_data.backing_dev_info; 1058 - if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { 993 + if (!(disk->flags & GENHD_FL_UP) || 994 + !part || !part->nr_sects) { 1059 995 ret = -ENXIO; 1060 - goto out_first; 996 + goto out_clear; 1061 997 } 1062 - kobject_get(&p->dev.kobj); 1063 - bdev->bd_part = p; 1064 - bd_set_size(bdev, (loff_t) p->nr_sects << 9); 998 + bd_set_size(bdev, (loff_t)part->nr_sects << 9); 1065 999 } 1066 1000 } else { 1001 + disk_put_part(part); 1067 1002 put_disk(disk); 1068 - module_put(owner); 1003 + module_put(disk->fops->owner); 1004 + part = NULL; 1005 + disk = NULL; 1069 1006 if (bdev->bd_contains == bdev) { 1070 1007 if (bdev->bd_disk->fops->open) { 1071 1008 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); 1072 1009 if (ret) 1073 - goto out; 1010 + goto out_unlock_bdev; 1074 1011 } 1075 1012 if (bdev->bd_invalidated) 1076 1013 rescan_partitions(bdev->bd_disk, bdev); ··· 1083 1020 unlock_kernel(); 1084 1021 return 0; 1085 1022 1086 - out_first: 1023 + out_clear: 1087 1024 bdev->bd_disk = NULL; 1025 + bdev->bd_part = NULL; 1088 1026 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1089 1027 if (bdev != bdev->bd_contains) 1090 1028 __blkdev_put(bdev->bd_contains, 1); 1091 1029 bdev->bd_contains = NULL; 1092 - put_disk(disk); 1093 - module_put(owner); 1094 - out: 1030 + out_unlock_bdev: 1095 1031 mutex_unlock(&bdev->bd_mutex); 1032 + out_unlock_kernel: 1096 1033 unlock_kernel(); 1097 - if (ret) 1098 - bdput(bdev); 1034 + 1035 + disk_put_part(part); 1036 + if (disk) 1037 + module_put(disk->fops->owner); 1038 + put_disk(disk); 1039 + bdput(bdev); 1040 + 1099 1041 return ret; 1100 1042 } 1101 1043 ··· 1185 1117 1186 1118 put_disk(disk); 1187 1119 module_put(owner); 1188 - 1189 - if (bdev->bd_contains != bdev) { 1190 - kobject_put(&bdev->bd_part->dev.kobj); 1191 - bdev->bd_part = NULL; 1192 - } 1120 + disk_put_part(bdev->bd_part); 1121 + bdev->bd_part = NULL; 1193 1122 bdev->bd_disk = NULL; 1194 1123 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1195 1124 if (bdev != bdev->bd_contains) ··· 1262 1197 1263 1198 /** 1264 1199 * lookup_bdev - lookup a struct block_device by name 1200 + * @pathname: special file representing the block device 1265 1201 * 1266 - * @path: special file representing the block device 1267 - * 1268 - * Get a reference to the blockdevice at @path in the current 1202 + * Get a reference to the blockdevice at @pathname in the current 1269 1203 * namespace if possible and return it. Return ERR_PTR(error) 1270 1204 * otherwise. 1271 1205 */

+14

fs/fat/fatent.c

··· 6 6 #include <linux/module.h> 7 7 #include <linux/fs.h> 8 8 #include <linux/msdos_fs.h> 9 + #include <linux/blkdev.h> 9 10 10 11 struct fatent_operations { 11 12 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); ··· 536 535 struct fat_entry fatent; 537 536 struct buffer_head *bhs[MAX_BUF_PER_PAGE]; 538 537 int i, err, nr_bhs; 538 + int first_cl = cluster; 539 539 540 540 nr_bhs = 0; 541 541 fatent_init(&fatent); ··· 551 549 __func__); 552 550 err = -EIO; 553 551 goto error; 552 + } 553 + 554 + /* 555 + * Issue discard for the sectors we no longer care about, 556 + * batching contiguous clusters into one request 557 + */ 558 + if (cluster != fatent.entry + 1) { 559 + int nr_clus = fatent.entry - first_cl + 1; 560 + 561 + sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), 562 + nr_clus * sbi->sec_per_clus); 563 + first_cl = cluster; 554 564 } 555 565 556 566 ops->ent_put(&fatent, FAT_ENT_FREE);

+157 -111

fs/partitions/check.c

··· 120 120 * a pointer to that same buffer (for convenience). 121 121 */ 122 122 123 - char *disk_name(struct gendisk *hd, int part, char *buf) 123 + char *disk_name(struct gendisk *hd, int partno, char *buf) 124 124 { 125 - if (!part) 125 + if (!partno) 126 126 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 127 127 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 128 - snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); 128 + snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 129 129 else 130 - snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); 130 + snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 131 131 132 132 return buf; 133 133 } 134 134 135 135 const char *bdevname(struct block_device *bdev, char *buf) 136 136 { 137 - int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; 138 - return disk_name(bdev->bd_disk, part, buf); 137 + return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 139 138 } 140 139 141 140 EXPORT_SYMBOL(bdevname); ··· 168 169 if (isdigit(state->name[strlen(state->name)-1])) 169 170 sprintf(state->name, "p"); 170 171 171 - state->limit = hd->minors; 172 + state->limit = disk_max_parts(hd); 172 173 i = res = err = 0; 173 174 while (!res && check_part[i]) { 174 175 memset(&state->parts, 0, sizeof(state->parts)); ··· 203 204 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 204 205 } 205 206 206 - static ssize_t part_size_show(struct device *dev, 207 - struct device_attribute *attr, char *buf) 207 + ssize_t part_size_show(struct device *dev, 208 + struct device_attribute *attr, char *buf) 208 209 { 209 210 struct hd_struct *p = dev_to_part(dev); 210 211 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 211 212 } 212 213 213 - static ssize_t part_stat_show(struct device *dev, 214 - struct device_attribute *attr, char *buf) 214 + ssize_t part_stat_show(struct device *dev, 215 + struct device_attribute *attr, char *buf) 215 216 { 216 217 struct hd_struct *p = dev_to_part(dev); 218 + int cpu; 217 219 218 - preempt_disable(); 219 - part_round_stats(p); 220 - preempt_enable(); 220 + cpu = part_stat_lock(); 221 + part_round_stats(cpu, p); 222 + part_stat_unlock(); 221 223 return sprintf(buf, 222 224 "%8lu %8lu %8llu %8u " 223 225 "%8lu %8lu %8llu %8u " ··· 238 238 } 239 239 240 240 #ifdef CONFIG_FAIL_MAKE_REQUEST 241 - static ssize_t part_fail_show(struct device *dev, 242 - struct device_attribute *attr, char *buf) 241 + ssize_t part_fail_show(struct device *dev, 242 + struct device_attribute *attr, char *buf) 243 243 { 244 244 struct hd_struct *p = dev_to_part(dev); 245 245 246 246 return sprintf(buf, "%d\n", p->make_it_fail); 247 247 } 248 248 249 - static ssize_t part_fail_store(struct device *dev, 250 - struct device_attribute *attr, 251 - const char *buf, size_t count) 249 + ssize_t part_fail_store(struct device *dev, 250 + struct device_attribute *attr, 251 + const char *buf, size_t count) 252 252 { 253 253 struct hd_struct *p = dev_to_part(dev); 254 254 int i; ··· 300 300 .release = part_release, 301 301 }; 302 302 303 - static inline void partition_sysfs_add_subdir(struct hd_struct *p) 303 + static void delete_partition_rcu_cb(struct rcu_head *head) 304 304 { 305 - struct kobject *k; 305 + struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 306 306 307 - k = kobject_get(&p->dev.kobj); 308 - p->holder_dir = kobject_create_and_add("holders", k); 309 - kobject_put(k); 307 + part->start_sect = 0; 308 + part->nr_sects = 0; 309 + part_stat_set_all(part, 0); 310 + put_device(part_to_dev(part)); 310 311 } 311 312 312 - static inline void disk_sysfs_add_subdirs(struct gendisk *disk) 313 + void delete_partition(struct gendisk *disk, int partno) 313 314 { 314 - struct kobject *k; 315 + struct disk_part_tbl *ptbl = disk->part_tbl; 316 + struct hd_struct *part; 315 317 316 - k = kobject_get(&disk->dev.kobj); 317 - disk->holder_dir = kobject_create_and_add("holders", k); 318 - disk->slave_dir = kobject_create_and_add("slaves", k); 319 - kobject_put(k); 320 - } 321 - 322 - void delete_partition(struct gendisk *disk, int part) 323 - { 324 - struct hd_struct *p = disk->part[part-1]; 325 - 326 - if (!p) 318 + if (partno >= ptbl->len) 327 319 return; 328 - if (!p->nr_sects) 320 + 321 + part = ptbl->part[partno]; 322 + if (!part) 329 323 return; 330 - disk->part[part-1] = NULL; 331 - p->start_sect = 0; 332 - p->nr_sects = 0; 333 - part_stat_set_all(p, 0); 334 - kobject_put(p->holder_dir); 335 - device_del(&p->dev); 336 - put_device(&p->dev); 324 + 325 + blk_free_devt(part_devt(part)); 326 + rcu_assign_pointer(ptbl->part[partno], NULL); 327 + kobject_put(part->holder_dir); 328 + device_del(part_to_dev(part)); 329 + 330 + call_rcu(&part->rcu_head, delete_partition_rcu_cb); 337 331 } 338 332 339 333 static ssize_t whole_disk_show(struct device *dev, ··· 338 344 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 339 345 whole_disk_show, NULL); 340 346 341 - int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 347 + int add_partition(struct gendisk *disk, int partno, 348 + sector_t start, sector_t len, int flags) 342 349 { 343 350 struct hd_struct *p; 351 + dev_t devt = MKDEV(0, 0); 352 + struct device *ddev = disk_to_dev(disk); 353 + struct device *pdev; 354 + struct disk_part_tbl *ptbl; 355 + const char *dname; 344 356 int err; 357 + 358 + err = disk_expand_part_tbl(disk, partno); 359 + if (err) 360 + return err; 361 + ptbl = disk->part_tbl; 362 + 363 + if (ptbl->part[partno]) 364 + return -EBUSY; 345 365 346 366 p = kzalloc(sizeof(*p), GFP_KERNEL); 347 367 if (!p) ··· 363 355 364 356 if (!init_part_stats(p)) { 365 357 err = -ENOMEM; 366 - goto out0; 358 + goto out_free; 367 359 } 360 + pdev = part_to_dev(p); 361 + 368 362 p->start_sect = start; 369 363 p->nr_sects = len; 370 - p->partno = part; 371 - p->policy = disk->policy; 364 + p->partno = partno; 365 + p->policy = get_disk_ro(disk); 372 366 373 - if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) 374 - snprintf(p->dev.bus_id, BUS_ID_SIZE, 375 - "%sp%d", disk->dev.bus_id, part); 367 + dname = dev_name(ddev); 368 + if (isdigit(dname[strlen(dname) - 1])) 369 + snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); 376 370 else 377 - snprintf(p->dev.bus_id, BUS_ID_SIZE, 378 - "%s%d", disk->dev.bus_id, part); 371 + snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); 379 372 380 - device_initialize(&p->dev); 381 - p->dev.devt = MKDEV(disk->major, disk->first_minor + part); 382 - p->dev.class = &block_class; 383 - p->dev.type = &part_type; 384 - p->dev.parent = &disk->dev; 385 - disk->part[part-1] = p; 373 + device_initialize(pdev); 374 + pdev->class = &block_class; 375 + pdev->type = &part_type; 376 + pdev->parent = ddev; 377 + 378 + err = blk_alloc_devt(p, &devt); 379 + if (err) 380 + goto out_free; 381 + pdev->devt = devt; 386 382 387 383 /* delay uevent until 'holders' subdir is created */ 388 - p->dev.uevent_suppress = 1; 389 - err = device_add(&p->dev); 384 + pdev->uevent_suppress = 1; 385 + err = device_add(pdev); 390 386 if (err) 391 - goto out1; 392 - partition_sysfs_add_subdir(p); 393 - p->dev.uevent_suppress = 0; 387 + goto out_put; 388 + 389 + err = -ENOMEM; 390 + p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 391 + if (!p->holder_dir) 392 + goto out_del; 393 + 394 + pdev->uevent_suppress = 0; 394 395 if (flags & ADDPART_FLAG_WHOLEDISK) { 395 - err = device_create_file(&p->dev, &dev_attr_whole_disk); 396 + err = device_create_file(pdev, &dev_attr_whole_disk); 396 397 if (err) 397 - goto out2; 398 + goto out_del; 398 399 } 399 400 401 + /* everything is up and running, commence */ 402 + INIT_RCU_HEAD(&p->rcu_head); 403 + rcu_assign_pointer(ptbl->part[partno], p); 404 + 400 405 /* suppress uevent if the disk supresses it */ 401 - if (!disk->dev.uevent_suppress) 402 - kobject_uevent(&p->dev.kobj, KOBJ_ADD); 406 + if (!ddev->uevent_suppress) 407 + kobject_uevent(&pdev->kobj, KOBJ_ADD); 403 408 404 409 return 0; 405 410 406 - out2: 407 - device_del(&p->dev); 408 - out1: 409 - put_device(&p->dev); 410 - free_part_stats(p); 411 - out0: 411 + out_free: 412 412 kfree(p); 413 + return err; 414 + out_del: 415 + kobject_put(p->holder_dir); 416 + device_del(pdev); 417 + out_put: 418 + put_device(pdev); 419 + blk_free_devt(devt); 413 420 return err; 414 421 } 415 422 416 423 /* Not exported, helper to add_disk(). */ 417 424 void register_disk(struct gendisk *disk) 418 425 { 426 + struct device *ddev = disk_to_dev(disk); 419 427 struct block_device *bdev; 428 + struct disk_part_iter piter; 429 + struct hd_struct *part; 420 430 char *s; 421 - int i; 422 - struct hd_struct *p; 423 431 int err; 424 432 425 - disk->dev.parent = disk->driverfs_dev; 426 - disk->dev.devt = MKDEV(disk->major, disk->first_minor); 433 + ddev->parent = disk->driverfs_dev; 427 434 428 - strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); 435 + strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); 429 436 /* ewww... some of these buggers have / in the name... */ 430 - s = strchr(disk->dev.bus_id, '/'); 437 + s = strchr(ddev->bus_id, '/'); 431 438 if (s) 432 439 *s = '!'; 433 440 434 441 /* delay uevents, until we scanned partition table */ 435 - disk->dev.uevent_suppress = 1; 442 + ddev->uevent_suppress = 1; 436 443 437 - if (device_add(&disk->dev)) 444 + if (device_add(ddev)) 438 445 return; 439 446 #ifndef CONFIG_SYSFS_DEPRECATED 440 - err = sysfs_create_link(block_depr, &disk->dev.kobj, 441 - kobject_name(&disk->dev.kobj)); 447 + err = sysfs_create_link(block_depr, &ddev->kobj, 448 + kobject_name(&ddev->kobj)); 442 449 if (err) { 443 - device_del(&disk->dev); 450 + device_del(ddev); 444 451 return; 445 452 } 446 453 #endif 447 - disk_sysfs_add_subdirs(disk); 454 + disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); 455 + disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 448 456 449 457 /* No minors to use for partitions */ 450 - if (disk->minors == 1) 458 + if (!disk_partitionable(disk)) 451 459 goto exit; 452 460 453 461 /* No such device (e.g., media were just removed) */ ··· 482 458 483 459 exit: 484 460 /* announce disk after possible partitions are created */ 485 - disk->dev.uevent_suppress = 0; 486 - kobject_uevent(&disk->dev.kobj, KOBJ_ADD); 461 + ddev->uevent_suppress = 0; 462 + kobject_uevent(&ddev->kobj, KOBJ_ADD); 487 463 488 464 /* announce possible partitions */ 489 - for (i = 1; i < disk->minors; i++) { 490 - p = disk->part[i-1]; 491 - if (!p || !p->nr_sects) 492 - continue; 493 - kobject_uevent(&p->dev.kobj, KOBJ_ADD); 494 - } 465 + disk_part_iter_init(&piter, disk, 0); 466 + while ((part = disk_part_iter_next(&piter))) 467 + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); 468 + disk_part_iter_exit(&piter); 495 469 } 496 470 497 471 int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 498 472 { 473 + struct disk_part_iter piter; 474 + struct hd_struct *part; 499 475 struct parsed_partitions *state; 500 - int p, res; 476 + int p, highest, res; 501 477 502 478 if (bdev->bd_part_count) 503 479 return -EBUSY; 504 480 res = invalidate_partition(disk, 0); 505 481 if (res) 506 482 return res; 507 - bdev->bd_invalidated = 0; 508 - for (p = 1; p < disk->minors; p++) 509 - delete_partition(disk, p); 483 + 484 + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 485 + while ((part = disk_part_iter_next(&piter))) 486 + delete_partition(disk, part->partno); 487 + disk_part_iter_exit(&piter); 488 + 510 489 if (disk->fops->revalidate_disk) 511 490 disk->fops->revalidate_disk(disk); 491 + check_disk_size_change(disk, bdev); 492 + bdev->bd_invalidated = 0; 512 493 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 513 494 return 0; 514 495 if (IS_ERR(state)) /* I/O error reading the partition table */ 515 496 return -EIO; 516 497 517 498 /* tell userspace that the media / partition table may have changed */ 518 - kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); 499 + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 519 500 501 + /* Detect the highest partition number and preallocate 502 + * disk->part_tbl. This is an optimization and not strictly 503 + * necessary. 504 + */ 505 + for (p = 1, highest = 0; p < state->limit; p++) 506 + if (state->parts[p].size) 507 + highest = p; 508 + 509 + disk_expand_part_tbl(disk, highest); 510 + 511 + /* add partitions */ 520 512 for (p = 1; p < state->limit; p++) { 521 513 sector_t size = state->parts[p].size; 522 514 sector_t from = state->parts[p].from; ··· 581 541 582 542 void del_gendisk(struct gendisk *disk) 583 543 { 584 - int p; 544 + struct disk_part_iter piter; 545 + struct hd_struct *part; 585 546 586 547 /* invalidate stuff */ 587 - for (p = disk->minors - 1; p > 0; p--) { 588 - invalidate_partition(disk, p); 589 - delete_partition(disk, p); 548 + disk_part_iter_init(&piter, disk, 549 + DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 550 + while ((part = disk_part_iter_next(&piter))) { 551 + invalidate_partition(disk, part->partno); 552 + delete_partition(disk, part->partno); 590 553 } 554 + disk_part_iter_exit(&piter); 555 + 591 556 invalidate_partition(disk, 0); 592 - disk->capacity = 0; 557 + blk_free_devt(disk_to_dev(disk)->devt); 558 + set_capacity(disk, 0); 593 559 disk->flags &= ~GENHD_FL_UP; 594 560 unlink_gendisk(disk); 595 - disk_stat_set_all(disk, 0); 596 - disk->stamp = 0; 561 + part_stat_set_all(&disk->part0, 0); 562 + disk->part0.stamp = 0; 597 563 598 - kobject_put(disk->holder_dir); 564 + kobject_put(disk->part0.holder_dir); 599 565 kobject_put(disk->slave_dir); 600 566 disk->driverfs_dev = NULL; 601 567 #ifndef CONFIG_SYSFS_DEPRECATED 602 - sysfs_remove_link(block_depr, disk->dev.bus_id); 568 + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 603 569 #endif 604 - device_del(&disk->dev); 570 + device_del(disk_to_dev(disk)); 605 571 }

+1 -3

fs/partitions/check.h

··· 5 5 * add_gd_partition adds a partitions details to the devices partition 6 6 * description. 7 7 */ 8 - enum { MAX_PART = 256 }; 9 - 10 8 struct parsed_partitions { 11 9 char name[BDEVNAME_SIZE]; 12 10 struct { 13 11 sector_t from; 14 12 sector_t size; 15 13 int flags; 16 - } parts[MAX_PART]; 14 + } parts[DISK_MAX_PARTS]; 17 15 int next; 18 16 int limit; 19 17 };

+1

include/linux/Kbuild

··· 180 180 unifdef-y += auto_fs.h 181 181 unifdef-y += auxvec.h 182 182 unifdef-y += binfmts.h 183 + unifdef-y += blktrace_api.h 183 184 unifdef-y += capability.h 184 185 unifdef-y += capi.h 185 186 unifdef-y += cciss_ioctl.h

+6

include/linux/ata.h

··· 88 88 ATA_ID_DLF = 128, 89 89 ATA_ID_CSFO = 129, 90 90 ATA_ID_CFA_POWER = 160, 91 + ATA_ID_ROT_SPEED = 217, 91 92 ATA_ID_PIO4 = (1 << 1), 92 93 93 94 ATA_ID_SERNO_LEN = 20, ··· 699 698 (id[ATA_ID_COMMAND_SET_1] & (1 << 2))) 700 699 return 1; 701 700 return 0; 701 + } 702 + 703 + static inline int ata_id_is_ssd(const u16 *id) 704 + { 705 + return id[ATA_ID_ROT_SPEED] == 0x01; 702 706 } 703 707 704 708 static inline int ata_drive_40wire(const u16 *dev_id)

+55 -53

include/linux/bio.h

··· 26 26 27 27 #ifdef CONFIG_BLOCK 28 28 29 - /* Platforms may set this to teach the BIO layer about IOMMU hardware. */ 30 29 #include <asm/io.h> 31 - 32 - #if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) 33 - #define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) 34 - #define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) 35 - #else 36 - #define BIOVEC_VIRT_START_SIZE(x) 0 37 - #define BIOVEC_VIRT_OVERSIZE(x) 0 38 - #endif 39 - 40 - #ifndef BIO_VMERGE_BOUNDARY 41 - #define BIO_VMERGE_BOUNDARY 0 42 - #endif 43 30 44 31 #define BIO_DEBUG 45 32 ··· 75 88 /* Number of segments in this BIO after 76 89 * physical address coalescing is performed. 77 90 */ 78 - unsigned short bi_phys_segments; 79 - 80 - /* Number of segments after physical and DMA remapping 81 - * hardware coalescing is performed. 82 - */ 83 - unsigned short bi_hw_segments; 91 + unsigned int bi_phys_segments; 84 92 85 93 unsigned int bi_size; /* residual I/O count */ 86 94 87 - /* 88 - * To keep track of the max hw size, we account for the 89 - * sizes of the first and last virtually mergeable segments 90 - * in this bio 91 - */ 92 - unsigned int bi_hw_front_size; 93 - unsigned int bi_hw_back_size; 94 - 95 95 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ 96 + 97 + unsigned int bi_comp_cpu; /* completion CPU */ 96 98 97 99 struct bio_vec *bi_io_vec; /* the actual vec list */ 98 100 ··· 102 126 #define BIO_UPTODATE 0 /* ok after I/O completion */ 103 127 #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ 104 128 #define BIO_EOF 2 /* out-out-bounds error */ 105 - #define BIO_SEG_VALID 3 /* nr_hw_seg valid */ 129 + #define BIO_SEG_VALID 3 /* bi_phys_segments valid */ 106 130 #define BIO_CLONED 4 /* doesn't own data */ 107 131 #define BIO_BOUNCED 5 /* bio is a bounce bio */ 108 132 #define BIO_USER_MAPPED 6 /* contains user pages */ 109 133 #define BIO_EOPNOTSUPP 7 /* not supported */ 134 + #define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ 135 + #define BIO_NULL_MAPPED 9 /* contains invalid user pages */ 136 + #define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ 110 137 #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) 111 138 112 139 /* ··· 123 144 /* 124 145 * bio bi_rw flags 125 146 * 126 - * bit 0 -- read (not set) or write (set) 147 + * bit 0 -- data direction 148 + * If not set, bio is a read from device. If set, it's a write to device. 127 149 * bit 1 -- rw-ahead when set 128 150 * bit 2 -- barrier 151 + * Insert a serialization point in the IO queue, forcing previously 152 + * submitted IO to be completed before this oen is issued. 129 153 * bit 3 -- fail fast, don't want low level driver retries 130 154 * bit 4 -- synchronous I/O hint: the block layer will unplug immediately 155 + * Note that this does NOT indicate that the IO itself is sync, just 156 + * that the block layer will not postpone issue of this IO by plugging. 157 + * bit 5 -- metadata request 158 + * Used for tracing to differentiate metadata and data IO. May also 159 + * get some preferential treatment in the IO scheduler 160 + * bit 6 -- discard sectors 161 + * Informs the lower level device that this range of sectors is no longer 162 + * used by the file system and may thus be freed by the device. Used 163 + * for flash based storage. 131 164 */ 132 - #define BIO_RW 0 133 - #define BIO_RW_AHEAD 1 165 + #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ 166 + #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ 134 167 #define BIO_RW_BARRIER 2 135 168 #define BIO_RW_FAILFAST 3 136 169 #define BIO_RW_SYNC 4 137 170 #define BIO_RW_META 5 171 + #define BIO_RW_DISCARD 6 138 172 139 173 /* 140 174 * upper 16 bits of bi_rw define the io priority of this bio ··· 177 185 #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) 178 186 #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) 179 187 #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) 180 - #define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) 188 + #define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) 189 + #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) 181 190 182 191 static inline unsigned int bio_cur_sectors(struct bio *bio) 183 192 { 184 193 if (bio->bi_vcnt) 185 194 return bio_iovec(bio)->bv_len >> 9; 186 - 187 - return 0; 195 + else /* dataless requests such as discard */ 196 + return bio->bi_size >> 9; 188 197 } 189 198 190 199 static inline void *bio_data(struct bio *bio) ··· 229 236 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) 230 237 #endif 231 238 232 - #define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ 233 - ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) 234 239 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ 235 240 (((addr1) | (mask)) == (((addr2) - 1) | (mask))) 236 241 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ ··· 310 319 atomic_t cnt; 311 320 int error; 312 321 }; 313 - extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, 314 - int first_sectors); 315 - extern mempool_t *bio_split_pool; 322 + extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); 316 323 extern void bio_pair_release(struct bio_pair *dbio); 317 324 318 325 extern struct bio_set *bioset_create(int, int); 319 326 extern void bioset_free(struct bio_set *); 320 327 321 328 extern struct bio *bio_alloc(gfp_t, int); 329 + extern struct bio *bio_kmalloc(gfp_t, int); 322 330 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); 323 331 extern void bio_put(struct bio *); 324 332 extern void bio_free(struct bio *, struct bio_set *); ··· 325 335 extern void bio_endio(struct bio *, int); 326 336 struct request_queue; 327 337 extern int bio_phys_segments(struct request_queue *, struct bio *); 328 - extern int bio_hw_segments(struct request_queue *, struct bio *); 329 338 330 339 extern void __bio_clone(struct bio *, struct bio *); 331 340 extern struct bio *bio_clone(struct bio *, gfp_t); ··· 335 346 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, 336 347 unsigned int, unsigned int); 337 348 extern int bio_get_nr_vecs(struct block_device *); 349 + extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); 338 350 extern struct bio *bio_map_user(struct request_queue *, struct block_device *, 339 - unsigned long, unsigned int, int); 351 + unsigned long, unsigned int, int, gfp_t); 340 352 struct sg_iovec; 353 + struct rq_map_data; 341 354 extern struct bio *bio_map_user_iov(struct request_queue *, 342 355 struct block_device *, 343 - struct sg_iovec *, int, int); 356 + struct sg_iovec *, int, int, gfp_t); 344 357 extern void bio_unmap_user(struct bio *); 345 358 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, 346 359 gfp_t); ··· 350 359 gfp_t, int); 351 360 extern void bio_set_pages_dirty(struct bio *bio); 352 361 extern void bio_check_pages_dirty(struct bio *bio); 353 - extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); 354 - extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, 355 - int, int); 362 + extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, 363 + unsigned long, unsigned int, int, gfp_t); 364 + extern struct bio *bio_copy_user_iov(struct request_queue *, 365 + struct rq_map_data *, struct sg_iovec *, 366 + int, int, gfp_t); 356 367 extern int bio_uncopy_user(struct bio *); 357 368 void zero_fill_bio(struct bio *bio); 358 369 extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); 359 370 extern unsigned int bvec_nr_vecs(unsigned short idx); 371 + 372 + /* 373 + * Allow queuer to specify a completion CPU for this bio 374 + */ 375 + static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) 376 + { 377 + bio->bi_comp_cpu = cpu; 378 + } 360 379 361 380 /* 362 381 * bio_set is used to allow other portions of the IO system to ··· 446 445 __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) 447 446 #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) 448 447 448 + /* 449 + * Check whether this bio carries any data or not. A NULL bio is allowed. 450 + */ 451 + static inline int bio_has_data(struct bio *bio) 452 + { 453 + return bio && bio->bi_io_vec != NULL; 454 + } 455 + 449 456 #if defined(CONFIG_BLK_DEV_INTEGRITY) 450 457 451 458 #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) ··· 467 458 #define bip_for_each_vec(bvl, bip, i) \ 468 459 __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) 469 460 470 - static inline int bio_integrity(struct bio *bio) 471 - { 472 - #if defined(CONFIG_BLK_DEV_INTEGRITY) 473 - return bio->bi_integrity != NULL; 474 - #else 475 - return 0; 476 - #endif 477 - } 461 + #define bio_integrity(bio) (bio->bi_integrity != NULL) 478 462 479 463 extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); 480 464 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);

+94 -57

include/linux/blkdev.h

··· 16 16 #include <linux/bio.h> 17 17 #include <linux/module.h> 18 18 #include <linux/stringify.h> 19 + #include <linux/gfp.h> 19 20 #include <linux/bsg.h> 21 + #include <linux/smp.h> 20 22 21 23 #include <asm/scatterlist.h> 22 24 ··· 56 54 REQ_TYPE_PM_SUSPEND, /* suspend request */ 57 55 REQ_TYPE_PM_RESUME, /* resume request */ 58 56 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 59 - REQ_TYPE_FLUSH, /* flush request */ 60 57 REQ_TYPE_SPECIAL, /* driver defined type */ 61 58 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ 62 59 /* ··· 77 76 * 78 77 */ 79 78 enum { 80 - /* 81 - * just examples for now 82 - */ 83 79 REQ_LB_OP_EJECT = 0x40, /* eject request */ 84 - REQ_LB_OP_FLUSH = 0x41, /* flush device */ 80 + REQ_LB_OP_FLUSH = 0x41, /* flush request */ 81 + REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ 85 82 }; 86 83 87 84 /* 88 - * request type modified bits. first three bits match BIO_RW* bits, important 85 + * request type modified bits. first two bits match BIO_RW* bits, important 89 86 */ 90 87 enum rq_flag_bits { 91 88 __REQ_RW, /* not set, read. set, write */ 92 89 __REQ_FAILFAST, /* no low level driver retries */ 90 + __REQ_DISCARD, /* request to discard sectors */ 93 91 __REQ_SORTED, /* elevator knows about this request */ 94 92 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 95 93 __REQ_HARDBARRIER, /* may not be passed by drive either */ ··· 111 111 }; 112 112 113 113 #define REQ_RW (1 << __REQ_RW) 114 + #define REQ_DISCARD (1 << __REQ_DISCARD) 114 115 #define REQ_FAILFAST (1 << __REQ_FAILFAST) 115 116 #define REQ_SORTED (1 << __REQ_SORTED) 116 117 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) ··· 141 140 */ 142 141 struct request { 143 142 struct list_head queuelist; 144 - struct list_head donelist; 143 + struct call_single_data csd; 144 + int cpu; 145 145 146 146 struct request_queue *q; 147 147 148 148 unsigned int cmd_flags; 149 149 enum rq_cmd_type_bits cmd_type; 150 + unsigned long atomic_flags; 150 151 151 152 /* Maintain bio traversal state for part by part I/O submission. 152 153 * hard_* are block layer internals, no driver should touch them! ··· 193 190 */ 194 191 unsigned short nr_phys_segments; 195 192 196 - /* Number of scatter-gather addr+len pairs after 197 - * physical and DMA remapping hardware coalescing is performed. 198 - * This is the number of scatter-gather entries the driver 199 - * will actually have to deal with after DMA mapping is done. 200 - */ 201 - unsigned short nr_hw_segments; 202 - 203 193 unsigned short ioprio; 204 194 205 195 void *special; ··· 216 220 void *data; 217 221 void *sense; 218 222 223 + unsigned long deadline; 224 + struct list_head timeout_list; 219 225 unsigned int timeout; 220 226 int retries; 221 227 ··· 230 232 /* for bidi */ 231 233 struct request *next_rq; 232 234 }; 235 + 236 + static inline unsigned short req_get_ioprio(struct request *req) 237 + { 238 + return req->ioprio; 239 + } 233 240 234 241 /* 235 242 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME ··· 255 252 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 256 253 typedef int (prep_rq_fn) (struct request_queue *, struct request *); 257 254 typedef void (unplug_fn) (struct request_queue *); 255 + typedef int (prepare_discard_fn) (struct request_queue *, struct request *); 258 256 259 257 struct bio_vec; 260 258 struct bvec_merge_data { ··· 269 265 typedef void (prepare_flush_fn) (struct request_queue *, struct request *); 270 266 typedef void (softirq_done_fn)(struct request *); 271 267 typedef int (dma_drain_needed_fn)(struct request *); 268 + typedef int (lld_busy_fn) (struct request_queue *q); 269 + 270 + enum blk_eh_timer_return { 271 + BLK_EH_NOT_HANDLED, 272 + BLK_EH_HANDLED, 273 + BLK_EH_RESET_TIMER, 274 + }; 275 + 276 + typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); 272 277 273 278 enum blk_queue_state { 274 279 Queue_down, ··· 320 307 make_request_fn *make_request_fn; 321 308 prep_rq_fn *prep_rq_fn; 322 309 unplug_fn *unplug_fn; 310 + prepare_discard_fn *prepare_discard_fn; 323 311 merge_bvec_fn *merge_bvec_fn; 324 312 prepare_flush_fn *prepare_flush_fn; 325 313 softirq_done_fn *softirq_done_fn; 314 + rq_timed_out_fn *rq_timed_out_fn; 326 315 dma_drain_needed_fn *dma_drain_needed; 316 + lld_busy_fn *lld_busy_fn; 327 317 328 318 /* 329 319 * Dispatch queue sorting ··· 401 385 unsigned int nr_sorted; 402 386 unsigned int in_flight; 403 387 388 + unsigned int rq_timeout; 389 + struct timer_list timeout; 390 + struct list_head timeout_list; 391 + 404 392 /* 405 393 * sg stuff 406 394 */ ··· 441 421 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 442 422 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 443 423 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ 424 + #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ 425 + #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ 426 + #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ 427 + #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ 444 428 445 429 static inline int queue_is_locked(struct request_queue *q) 446 430 { ··· 550 526 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 551 527 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 552 528 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 529 + #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 553 530 #define blk_queue_flushing(q) ((q)->ordseq) 531 + #define blk_queue_stackable(q) \ 532 + test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) 554 533 555 534 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 556 535 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) ··· 563 536 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) 564 537 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 565 538 566 - #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) 539 + #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 567 540 568 541 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) 569 542 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) 570 543 #define blk_pm_request(rq) \ 571 544 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) 572 545 546 + #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 573 547 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 574 548 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 575 549 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 550 + #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) 576 551 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 577 552 #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) 578 553 /* rq->queuelist of dequeued request must be list_empty() */ ··· 621 592 #define RQ_NOMERGE_FLAGS \ 622 593 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 623 594 #define rq_mergeable(rq) \ 624 - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) 595 + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 596 + (blk_discard_rq(rq) || blk_fs_request((rq)))) 625 597 626 598 /* 627 599 * q->prep_rq_fn return values ··· 667 637 } 668 638 #endif /* CONFIG_MMU */ 669 639 640 + struct rq_map_data { 641 + struct page **pages; 642 + int page_order; 643 + int nr_entries; 644 + }; 645 + 670 646 struct req_iterator { 671 647 int i; 672 648 struct bio *bio; ··· 700 664 extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 701 665 extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 702 666 extern void blk_requeue_request(struct request_queue *, struct request *); 667 + extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); 668 + extern int blk_lld_busy(struct request_queue *q); 669 + extern int blk_insert_cloned_request(struct request_queue *q, 670 + struct request *rq); 703 671 extern void blk_plug_device(struct request_queue *); 704 672 extern void blk_plug_device_unlocked(struct request_queue *); 705 673 extern int blk_remove_plug(struct request_queue *); ··· 745 705 extern void __blk_run_queue(struct request_queue *); 746 706 extern void blk_run_queue(struct request_queue *); 747 707 extern void blk_start_queueing(struct request_queue *); 748 - extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); 708 + extern int blk_rq_map_user(struct request_queue *, struct request *, 709 + struct rq_map_data *, void __user *, unsigned long, 710 + gfp_t); 749 711 extern int blk_rq_unmap_user(struct bio *); 750 712 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 751 713 extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 752 - struct sg_iovec *, int, unsigned int); 714 + struct rq_map_data *, struct sg_iovec *, int, 715 + unsigned int, gfp_t); 753 716 extern int blk_execute_rq(struct request_queue *, struct gendisk *, 754 717 struct request *, int); 755 718 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, ··· 793 750 extern int blk_end_bidi_request(struct request *rq, int error, 794 751 unsigned int nr_bytes, unsigned int bidi_bytes); 795 752 extern void end_request(struct request *, int); 796 - extern void end_queued_request(struct request *, int); 797 - extern void end_dequeued_request(struct request *, int); 798 753 extern int blk_end_request_callback(struct request *rq, int error, 799 754 unsigned int nr_bytes, 800 755 int (drv_callback)(struct request *)); 801 756 extern void blk_complete_request(struct request *); 757 + extern void __blk_complete_request(struct request *); 758 + extern void blk_abort_request(struct request *); 759 + extern void blk_abort_queue(struct request_queue *); 760 + extern void blk_update_request(struct request *rq, int error, 761 + unsigned int nr_bytes); 802 762 803 763 /* 804 764 * blk_end_request() takes bytes instead of sectors as a complete size. ··· 836 790 extern int blk_queue_dma_drain(struct request_queue *q, 837 791 dma_drain_needed_fn *dma_drain_needed, 838 792 void *buf, unsigned int size); 793 + extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); 839 794 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 840 795 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); 841 796 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); 842 797 extern void blk_queue_dma_alignment(struct request_queue *, int); 843 798 extern void blk_queue_update_dma_alignment(struct request_queue *, int); 844 799 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 800 + extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); 801 + extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 802 + extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 845 803 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 846 804 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 847 805 extern int blk_do_ordered(struct request_queue *, struct request **); ··· 887 837 } 888 838 889 839 extern int blkdev_issue_flush(struct block_device *, sector_t *); 840 + extern int blkdev_issue_discard(struct block_device *, 841 + sector_t sector, sector_t nr_sects, gfp_t); 842 + 843 + static inline int sb_issue_discard(struct super_block *sb, 844 + sector_t block, sector_t nr_blocks) 845 + { 846 + block <<= (sb->s_blocksize_bits - 9); 847 + nr_blocks <<= (sb->s_blocksize_bits - 9); 848 + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); 849 + } 890 850 891 851 /* 892 852 * command filter functions ··· 934 874 return q ? q->dma_alignment : 511; 935 875 } 936 876 877 + static inline int blk_rq_aligned(struct request_queue *q, void *addr, 878 + unsigned int len) 879 + { 880 + unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; 881 + return !((unsigned long)addr & alignment) && !(len & alignment); 882 + } 883 + 937 884 /* assumes size > 256 */ 938 885 static inline unsigned int blksize_bits(unsigned int size) 939 886 { ··· 967 900 } 968 901 969 902 struct work_struct; 970 - int kblockd_schedule_work(struct work_struct *work); 903 + int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 971 904 void kblockd_flush_work(struct work_struct *work); 972 905 973 906 #define MODULE_ALIAS_BLOCKDEV(major,minor) \ ··· 1012 945 1013 946 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); 1014 947 extern void blk_integrity_unregister(struct gendisk *); 1015 - extern int blk_integrity_compare(struct block_device *, struct block_device *); 948 + extern int blk_integrity_compare(struct gendisk *, struct gendisk *); 1016 949 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 1017 950 extern int blk_rq_count_integrity_sg(struct request *); 1018 951 1019 - static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) 1020 - { 1021 - if (bi) 1022 - return bi->tuple_size; 1023 - 1024 - return 0; 1025 - } 1026 - 1027 - static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) 952 + static inline 953 + struct blk_integrity *bdev_get_integrity(struct block_device *bdev) 1028 954 { 1029 955 return bdev->bd_disk->integrity; 1030 956 } 1031 957 1032 - static inline unsigned int bdev_get_tag_size(struct block_device *bdev) 958 + static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) 1033 959 { 1034 - struct blk_integrity *bi = bdev_get_integrity(bdev); 1035 - 1036 - if (bi) 1037 - return bi->tag_size; 1038 - 1039 - return 0; 1040 - } 1041 - 1042 - static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) 1043 - { 1044 - struct blk_integrity *bi = bdev_get_integrity(bdev); 1045 - 1046 - if (bi == NULL) 1047 - return 0; 1048 - 1049 - if (rw == READ && bi->verify_fn != NULL && 1050 - (bi->flags & INTEGRITY_FLAG_READ)) 1051 - return 1; 1052 - 1053 - if (rw == WRITE && bi->generate_fn != NULL && 1054 - (bi->flags & INTEGRITY_FLAG_WRITE)) 1055 - return 1; 1056 - 1057 - return 0; 960 + return disk->integrity; 1058 961 } 1059 962 1060 963 static inline int blk_integrity_rq(struct request *rq) ··· 1041 1004 #define blk_rq_count_integrity_sg(a) (0) 1042 1005 #define blk_rq_map_integrity_sg(a, b) (0) 1043 1006 #define bdev_get_integrity(a) (0) 1044 - #define bdev_get_tag_size(a) (0) 1007 + #define blk_get_integrity(a) (0) 1045 1008 #define blk_integrity_compare(a, b) (0) 1046 1009 #define blk_integrity_register(a, b) (0) 1047 1010 #define blk_integrity_unregister(a) do { } while (0);

+36 -26

include/linux/blktrace_api.h

··· 1 1 #ifndef BLKTRACE_H 2 2 #define BLKTRACE_H 3 3 4 + #ifdef __KERNEL__ 4 5 #include <linux/blkdev.h> 5 6 #include <linux/relay.h> 7 + #endif 6 8 7 9 /* 8 10 * Trace categories ··· 23 21 BLK_TC_NOTIFY = 1 << 10, /* special message */ 24 22 BLK_TC_AHEAD = 1 << 11, /* readahead */ 25 23 BLK_TC_META = 1 << 12, /* metadata */ 24 + BLK_TC_DISCARD = 1 << 13, /* discard requests */ 26 25 27 26 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ 28 27 }; ··· 50 47 __BLK_TA_SPLIT, /* bio was split */ 51 48 __BLK_TA_BOUNCE, /* bio was bounced */ 52 49 __BLK_TA_REMAP, /* bio was remapped */ 50 + __BLK_TA_ABORT, /* request aborted */ 53 51 }; 54 52 55 53 /* ··· 81 77 #define BLK_TA_SPLIT (__BLK_TA_SPLIT) 82 78 #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) 83 79 #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) 80 + #define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) 84 81 85 82 #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) 86 83 #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) ··· 94 89 * The trace itself 95 90 */ 96 91 struct blk_io_trace { 97 - u32 magic; /* MAGIC << 8 | version */ 98 - u32 sequence; /* event number */ 99 - u64 time; /* in microseconds */ 100 - u64 sector; /* disk offset */ 101 - u32 bytes; /* transfer length */ 102 - u32 action; /* what happened */ 103 - u32 pid; /* who did it */ 104 - u32 device; /* device number */ 105 - u32 cpu; /* on what cpu did it happen */ 106 - u16 error; /* completion error */ 107 - u16 pdu_len; /* length of data after this trace */ 92 + __u32 magic; /* MAGIC << 8 | version */ 93 + __u32 sequence; /* event number */ 94 + __u64 time; /* in microseconds */ 95 + __u64 sector; /* disk offset */ 96 + __u32 bytes; /* transfer length */ 97 + __u32 action; /* what happened */ 98 + __u32 pid; /* who did it */ 99 + __u32 device; /* device number */ 100 + __u32 cpu; /* on what cpu did it happen */ 101 + __u16 error; /* completion error */ 102 + __u16 pdu_len; /* length of data after this trace */ 108 103 }; 109 104 110 105 /* ··· 122 117 Blktrace_stopped, 123 118 }; 124 119 120 + #define BLKTRACE_BDEV_SIZE 32 121 + 122 + /* 123 + * User setup structure passed with BLKTRACESTART 124 + */ 125 + struct blk_user_trace_setup { 126 + char name[BLKTRACE_BDEV_SIZE]; /* output */ 127 + __u16 act_mask; /* input */ 128 + __u32 buf_size; /* input */ 129 + __u32 buf_nr; /* input */ 130 + __u64 start_lba; 131 + __u64 end_lba; 132 + __u32 pid; 133 + }; 134 + 135 + #ifdef __KERNEL__ 136 + #if defined(CONFIG_BLK_DEV_IO_TRACE) 125 137 struct blk_trace { 126 138 int trace_state; 127 139 struct rchan *rchan; ··· 155 133 atomic_t dropped; 156 134 }; 157 135 158 - /* 159 - * User setup structure passed with BLKTRACESTART 160 - */ 161 - struct blk_user_trace_setup { 162 - char name[BDEVNAME_SIZE]; /* output */ 163 - u16 act_mask; /* input */ 164 - u32 buf_size; /* input */ 165 - u32 buf_nr; /* input */ 166 - u64 start_lba; 167 - u64 end_lba; 168 - u32 pid; 169 - }; 170 - 171 - #ifdef __KERNEL__ 172 - #if defined(CONFIG_BLK_DEV_IO_TRACE) 173 136 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 174 137 extern void blk_trace_shutdown(struct request_queue *); 175 138 extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); ··· 201 194 202 195 if (likely(!bt)) 203 196 return; 197 + 198 + if (blk_discard_rq(rq)) 199 + rw |= (1 << BIO_RW_DISCARD); 204 200 205 201 if (blk_pc_request(rq)) { 206 202 what |= BLK_TC_ACT(BLK_TC_PC);

+13 -1

include/linux/device.h

··· 199 199 struct class_private *p; 200 200 }; 201 201 202 + struct class_dev_iter { 203 + struct klist_iter ki; 204 + const struct device_type *type; 205 + }; 206 + 202 207 extern struct kobject *sysfs_dev_block_kobj; 203 208 extern struct kobject *sysfs_dev_char_kobj; 204 209 extern int __must_check __class_register(struct class *class, ··· 217 212 static struct lock_class_key __key; \ 218 213 __class_register(class, &__key); \ 219 214 }) 215 + 216 + extern void class_dev_iter_init(struct class_dev_iter *iter, 217 + struct class *class, 218 + struct device *start, 219 + const struct device_type *type); 220 + extern struct device *class_dev_iter_next(struct class_dev_iter *iter); 221 + extern void class_dev_iter_exit(struct class_dev_iter *iter); 220 222 221 223 extern int class_for_each_device(struct class *class, struct device *start, 222 224 void *data, ··· 408 396 spinlock_t devres_lock; 409 397 struct list_head devres_head; 410 398 411 - struct list_head node; 399 + struct klist_node knode_class; 412 400 struct class *class; 413 401 dev_t devt; /* dev_t, creates the sysfs "dev" */ 414 402 struct attribute_group **groups; /* optional groups */

+5 -4

include/linux/elevator.h

··· 112 112 extern int elv_register_queue(struct request_queue *q); 113 113 extern void elv_unregister_queue(struct request_queue *q); 114 114 extern int elv_may_queue(struct request_queue *, int); 115 + extern void elv_abort_queue(struct request_queue *); 115 116 extern void elv_completed_request(struct request_queue *, struct request *); 116 117 extern int elv_set_request(struct request_queue *, struct request *, gfp_t); 117 118 extern void elv_put_request(struct request_queue *, struct request *); ··· 174 173 #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) 175 174 176 175 /* 177 - * Hack to reuse the donelist list_head as the fifo time holder while 176 + * Hack to reuse the csd.list list_head as the fifo time holder while 178 177 * the request is in the io scheduler. Saves an unsigned long in rq. 179 178 */ 180 - #define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) 181 - #define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) 179 + #define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) 180 + #define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) 182 181 #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) 183 182 #define rq_fifo_clear(rq) do { \ 184 183 list_del_init(&(rq)->queuelist); \ 185 - INIT_LIST_HEAD(&(rq)->donelist); \ 184 + INIT_LIST_HEAD(&(rq)->csd.list); \ 186 185 } while (0) 187 186 188 187 /*

+7 -1

include/linux/fd.h

··· 15 15 sect, /* sectors per track */ 16 16 head, /* nr of heads */ 17 17 track, /* nr of tracks */ 18 - stretch; /* !=0 means double track steps */ 18 + stretch; /* bit 0 !=0 means double track steps */ 19 + /* bit 1 != 0 means swap sides */ 20 + /* bits 2..9 give the first sector */ 21 + /* number (the LSB is flipped) */ 19 22 #define FD_STRETCH 1 20 23 #define FD_SWAPSIDES 2 21 24 #define FD_ZEROBASED 4 25 + #define FD_SECTBASEMASK 0x3FC 26 + #define FD_MKSECTBASE(s) (((s) ^ 1) << 2) 27 + #define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1) 22 28 23 29 unsigned char gap, /* gap1 size */ 24 30

+8 -1

include/linux/fs.h

··· 86 86 #define READ_META (READ | (1 << BIO_RW_META)) 87 87 #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) 88 88 #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) 89 - #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) 89 + #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) 90 + #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) 91 + #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) 90 92 91 93 #define SEL_IN 1 92 94 #define SEL_OUT 2 ··· 224 222 #define BLKTRACESTART _IO(0x12,116) 225 223 #define BLKTRACESTOP _IO(0x12,117) 226 224 #define BLKTRACETEARDOWN _IO(0x12,118) 225 + #define BLKDISCARD _IO(0x12,119) 227 226 228 227 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 229 228 #define FIBMAP _IO(0x00,1) /* bmap access */ ··· 1685 1682 1686 1683 /* fs/block_dev.c */ 1687 1684 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 1685 + #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 1688 1686 1689 1687 #ifdef CONFIG_BLOCK 1690 1688 #define BLKDEV_MAJOR_HASH_SIZE 255 ··· 1722 1718 */ 1723 1719 #define bio_data_dir(bio) ((bio)->bi_rw & 1) 1724 1720 1721 + extern void check_disk_size_change(struct gendisk *disk, 1722 + struct block_device *bdev); 1723 + extern int revalidate_disk(struct gendisk *); 1725 1724 extern int check_disk_change(struct block_device *); 1726 1725 extern int __invalidate_device(struct block_device *); 1727 1726 extern int invalidate_partition(struct gendisk *, int);

+186 -185

include/linux/genhd.h

··· 11 11 12 12 #include <linux/types.h> 13 13 #include <linux/kdev_t.h> 14 + #include <linux/rcupdate.h> 14 15 15 16 #ifdef CONFIG_BLOCK 16 17 17 - #define kobj_to_dev(k) container_of(k, struct device, kobj) 18 - #define dev_to_disk(device) container_of(device, struct gendisk, dev) 19 - #define dev_to_part(device) container_of(device, struct hd_struct, dev) 18 + #define kobj_to_dev(k) container_of((k), struct device, kobj) 19 + #define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) 20 + #define dev_to_part(device) container_of((device), struct hd_struct, __dev) 21 + #define disk_to_dev(disk) (&(disk)->part0.__dev) 22 + #define part_to_dev(part) (&((part)->__dev)) 20 23 21 24 extern struct device_type part_type; 22 25 extern struct kobject *block_depr; ··· 58 55 UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ 59 56 }; 60 57 58 + #define DISK_MAX_PARTS 256 59 + #define DISK_NAME_LEN 32 60 + 61 61 #include <linux/major.h> 62 62 #include <linux/device.h> 63 63 #include <linux/smp.h> ··· 93 87 struct hd_struct { 94 88 sector_t start_sect; 95 89 sector_t nr_sects; 96 - struct device dev; 90 + struct device __dev; 97 91 struct kobject *holder_dir; 98 92 int policy, partno; 99 93 #ifdef CONFIG_FAIL_MAKE_REQUEST ··· 106 100 #else 107 101 struct disk_stats dkstats; 108 102 #endif 103 + struct rcu_head rcu_head; 109 104 }; 110 105 111 106 #define GENHD_FL_REMOVABLE 1 ··· 115 108 #define GENHD_FL_CD 8 116 109 #define GENHD_FL_UP 16 117 110 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 118 - #define GENHD_FL_FAIL 64 111 + #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ 112 + 113 + #define BLK_SCSI_MAX_CMDS (256) 114 + #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 115 + 116 + struct blk_scsi_cmd_filter { 117 + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; 118 + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; 119 + struct kobject kobj; 120 + }; 121 + 122 + struct disk_part_tbl { 123 + struct rcu_head rcu_head; 124 + int len; 125 + struct hd_struct *part[]; 126 + }; 119 127 120 128 struct gendisk { 129 + /* major, first_minor and minors are input parameters only, 130 + * don't use directly. Use disk_devt() and disk_max_parts(). 131 + */ 121 132 int major; /* major number of driver */ 122 133 int first_minor; 123 134 int minors; /* maximum number of minors, =1 for 124 135 * disks that can't be partitioned. */ 125 - char disk_name[32]; /* name of major driver */ 126 - struct hd_struct **part; /* [indexed by minor] */ 136 + 137 + char disk_name[DISK_NAME_LEN]; /* name of major driver */ 138 + 139 + /* Array of pointers to partitions indexed by partno. 140 + * Protected with matching bdev lock but stat and other 141 + * non-critical accesses use RCU. Always access through 142 + * helpers. 143 + */ 144 + struct disk_part_tbl *part_tbl; 145 + struct hd_struct part0; 146 + 127 147 struct block_device_operations *fops; 128 148 struct request_queue *queue; 129 149 void *private_data; 130 - sector_t capacity; 131 150 132 151 int flags; 133 152 struct device *driverfs_dev; // FIXME: remove 134 - struct device dev; 135 - struct kobject *holder_dir; 136 153 struct kobject *slave_dir; 137 154 138 155 struct timer_rand_state *random; 139 - int policy; 140 156 141 157 atomic_t sync_io; /* RAID */ 142 - unsigned long stamp; 143 - int in_flight; 144 - #ifdef CONFIG_SMP 145 - struct disk_stats *dkstats; 146 - #else 147 - struct disk_stats dkstats; 148 - #endif 149 158 struct work_struct async_notify; 150 159 #ifdef CONFIG_BLK_DEV_INTEGRITY 151 160 struct blk_integrity *integrity; 152 161 #endif 162 + int node_id; 153 163 }; 154 164 155 - /* 156 - * Macros to operate on percpu disk statistics: 157 - * 158 - * The __ variants should only be called in critical sections. The full 159 - * variants disable/enable preemption. 160 - */ 161 - static inline struct hd_struct *get_part(struct gendisk *gendiskp, 162 - sector_t sector) 165 + static inline struct gendisk *part_to_disk(struct hd_struct *part) 163 166 { 164 - struct hd_struct *part; 165 - int i; 166 - for (i = 0; i < gendiskp->minors - 1; i++) { 167 - part = gendiskp->part[i]; 168 - if (part && part->start_sect <= sector 169 - && sector < part->start_sect + part->nr_sects) 170 - return part; 167 + if (likely(part)) { 168 + if (part->partno) 169 + return dev_to_disk(part_to_dev(part)->parent); 170 + else 171 + return dev_to_disk(part_to_dev(part)); 171 172 } 172 173 return NULL; 173 174 } 174 175 176 + static inline int disk_max_parts(struct gendisk *disk) 177 + { 178 + if (disk->flags & GENHD_FL_EXT_DEVT) 179 + return DISK_MAX_PARTS; 180 + return disk->minors; 181 + } 182 + 183 + static inline bool disk_partitionable(struct gendisk *disk) 184 + { 185 + return disk_max_parts(disk) > 1; 186 + } 187 + 188 + static inline dev_t disk_devt(struct gendisk *disk) 189 + { 190 + return disk_to_dev(disk)->devt; 191 + } 192 + 193 + static inline dev_t part_devt(struct hd_struct *part) 194 + { 195 + return part_to_dev(part)->devt; 196 + } 197 + 198 + extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); 199 + 200 + static inline void disk_put_part(struct hd_struct *part) 201 + { 202 + if (likely(part)) 203 + put_device(part_to_dev(part)); 204 + } 205 + 206 + /* 207 + * Smarter partition iterator without context limits. 208 + */ 209 + #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ 210 + #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ 211 + #define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ 212 + 213 + struct disk_part_iter { 214 + struct gendisk *disk; 215 + struct hd_struct *part; 216 + int idx; 217 + unsigned int flags; 218 + }; 219 + 220 + extern void disk_part_iter_init(struct disk_part_iter *piter, 221 + struct gendisk *disk, unsigned int flags); 222 + extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); 223 + extern void disk_part_iter_exit(struct disk_part_iter *piter); 224 + 225 + extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, 226 + sector_t sector); 227 + 228 + /* 229 + * Macros to operate on percpu disk statistics: 230 + * 231 + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters 232 + * and should be called between disk_stat_lock() and 233 + * disk_stat_unlock(). 234 + * 235 + * part_stat_read() can be called at any time. 236 + * 237 + * part_stat_{add|set_all}() and {init|free}_part_stats are for 238 + * internal use only. 239 + */ 175 240 #ifdef CONFIG_SMP 176 - #define __disk_stat_add(gendiskp, field, addnd) \ 177 - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) 241 + #define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) 242 + #define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) 178 243 179 - #define disk_stat_read(gendiskp, field) \ 180 - ({ \ 181 - typeof(gendiskp->dkstats->field) res = 0; \ 182 - int i; \ 183 - for_each_possible_cpu(i) \ 184 - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ 185 - res; \ 186 - }) 187 - 188 - static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { 189 - int i; 190 - 191 - for_each_possible_cpu(i) 192 - memset(per_cpu_ptr(gendiskp->dkstats, i), value, 193 - sizeof(struct disk_stats)); 194 - } 195 - 196 - #define __part_stat_add(part, field, addnd) \ 197 - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) 198 - 199 - #define __all_stat_add(gendiskp, part, field, addnd, sector) \ 200 - ({ \ 201 - if (part) \ 202 - __part_stat_add(part, field, addnd); \ 203 - __disk_stat_add(gendiskp, field, addnd); \ 204 - }) 244 + #define __part_stat_add(cpu, part, field, addnd) \ 245 + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) 205 246 206 247 #define part_stat_read(part, field) \ 207 248 ({ \ 208 - typeof(part->dkstats->field) res = 0; \ 249 + typeof((part)->dkstats->field) res = 0; \ 209 250 int i; \ 210 251 for_each_possible_cpu(i) \ 211 - res += per_cpu_ptr(part->dkstats, i)->field; \ 252 + res += per_cpu_ptr((part)->dkstats, i)->field; \ 212 253 res; \ 213 254 }) 214 255 ··· 267 212 for_each_possible_cpu(i) 268 213 memset(per_cpu_ptr(part->dkstats, i), value, 269 214 sizeof(struct disk_stats)); 270 - } 271 - 272 - #else /* !CONFIG_SMP */ 273 - #define __disk_stat_add(gendiskp, field, addnd) \ 274 - (gendiskp->dkstats.field += addnd) 275 - #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) 276 - 277 - static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) 278 - { 279 - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); 280 - } 281 - 282 - #define __part_stat_add(part, field, addnd) \ 283 - (part->dkstats.field += addnd) 284 - 285 - #define __all_stat_add(gendiskp, part, field, addnd, sector) \ 286 - ({ \ 287 - if (part) \ 288 - part->dkstats.field += addnd; \ 289 - __disk_stat_add(gendiskp, field, addnd); \ 290 - }) 291 - 292 - #define part_stat_read(part, field) (part->dkstats.field) 293 - 294 - static inline void part_stat_set_all(struct hd_struct *part, int value) 295 - { 296 - memset(&part->dkstats, value, sizeof(struct disk_stats)); 297 - } 298 - 299 - #endif /* CONFIG_SMP */ 300 - 301 - #define disk_stat_add(gendiskp, field, addnd) \ 302 - do { \ 303 - preempt_disable(); \ 304 - __disk_stat_add(gendiskp, field, addnd); \ 305 - preempt_enable(); \ 306 - } while (0) 307 - 308 - #define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) 309 - #define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) 310 - 311 - #define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) 312 - #define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) 313 - 314 - #define __disk_stat_sub(gendiskp, field, subnd) \ 315 - __disk_stat_add(gendiskp, field, -subnd) 316 - #define disk_stat_sub(gendiskp, field, subnd) \ 317 - disk_stat_add(gendiskp, field, -subnd) 318 - 319 - #define part_stat_add(gendiskp, field, addnd) \ 320 - do { \ 321 - preempt_disable(); \ 322 - __part_stat_add(gendiskp, field, addnd);\ 323 - preempt_enable(); \ 324 - } while (0) 325 - 326 - #define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) 327 - #define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) 328 - 329 - #define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) 330 - #define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) 331 - 332 - #define __part_stat_sub(gendiskp, field, subnd) \ 333 - __part_stat_add(gendiskp, field, -subnd) 334 - #define part_stat_sub(gendiskp, field, subnd) \ 335 - part_stat_add(gendiskp, field, -subnd) 336 - 337 - #define all_stat_add(gendiskp, part, field, addnd, sector) \ 338 - do { \ 339 - preempt_disable(); \ 340 - __all_stat_add(gendiskp, part, field, addnd, sector); \ 341 - preempt_enable(); \ 342 - } while (0) 343 - 344 - #define __all_stat_dec(gendiskp, field, sector) \ 345 - __all_stat_add(gendiskp, field, -1, sector) 346 - #define all_stat_dec(gendiskp, field, sector) \ 347 - all_stat_add(gendiskp, field, -1, sector) 348 - 349 - #define __all_stat_inc(gendiskp, part, field, sector) \ 350 - __all_stat_add(gendiskp, part, field, 1, sector) 351 - #define all_stat_inc(gendiskp, part, field, sector) \ 352 - all_stat_add(gendiskp, part, field, 1, sector) 353 - 354 - #define __all_stat_sub(gendiskp, part, field, subnd, sector) \ 355 - __all_stat_add(gendiskp, part, field, -subnd, sector) 356 - #define all_stat_sub(gendiskp, part, field, subnd, sector) \ 357 - all_stat_add(gendiskp, part, field, -subnd, sector) 358 - 359 - /* Inlines to alloc and free disk stats in struct gendisk */ 360 - #ifdef CONFIG_SMP 361 - static inline int init_disk_stats(struct gendisk *disk) 362 - { 363 - disk->dkstats = alloc_percpu(struct disk_stats); 364 - if (!disk->dkstats) 365 - return 0; 366 - return 1; 367 - } 368 - 369 - static inline void free_disk_stats(struct gendisk *disk) 370 - { 371 - free_percpu(disk->dkstats); 372 215 } 373 216 374 217 static inline int init_part_stats(struct hd_struct *part) ··· 282 329 free_percpu(part->dkstats); 283 330 } 284 331 285 - #else /* CONFIG_SMP */ 286 - static inline int init_disk_stats(struct gendisk *disk) 287 - { 288 - return 1; 289 - } 332 + #else /* !CONFIG_SMP */ 333 + #define part_stat_lock() ({ rcu_read_lock(); 0; }) 334 + #define part_stat_unlock() rcu_read_unlock() 290 335 291 - static inline void free_disk_stats(struct gendisk *disk) 336 + #define __part_stat_add(cpu, part, field, addnd) \ 337 + ((part)->dkstats.field += addnd) 338 + 339 + #define part_stat_read(part, field) ((part)->dkstats.field) 340 + 341 + static inline void part_stat_set_all(struct hd_struct *part, int value) 292 342 { 343 + memset(&part->dkstats, value, sizeof(struct disk_stats)); 293 344 } 294 345 295 346 static inline int init_part_stats(struct hd_struct *part) ··· 304 347 static inline void free_part_stats(struct hd_struct *part) 305 348 { 306 349 } 307 - #endif /* CONFIG_SMP */ 350 + 351 + #endif /* CONFIG_SMP */ 352 + 353 + #define part_stat_add(cpu, part, field, addnd) do { \ 354 + __part_stat_add((cpu), (part), field, addnd); \ 355 + if ((part)->partno) \ 356 + __part_stat_add((cpu), &part_to_disk((part))->part0, \ 357 + field, addnd); \ 358 + } while (0) 359 + 360 + #define part_stat_dec(cpu, gendiskp, field) \ 361 + part_stat_add(cpu, gendiskp, field, -1) 362 + #define part_stat_inc(cpu, gendiskp, field) \ 363 + part_stat_add(cpu, gendiskp, field, 1) 364 + #define part_stat_sub(cpu, gendiskp, field, subnd) \ 365 + part_stat_add(cpu, gendiskp, field, -subnd) 366 + 367 + static inline void part_inc_in_flight(struct hd_struct *part) 368 + { 369 + part->in_flight++; 370 + if (part->partno) 371 + part_to_disk(part)->part0.in_flight++; 372 + } 373 + 374 + static inline void part_dec_in_flight(struct hd_struct *part) 375 + { 376 + part->in_flight--; 377 + if (part->partno) 378 + part_to_disk(part)->part0.in_flight--; 379 + } 308 380 309 381 /* drivers/block/ll_rw_blk.c */ 310 - extern void disk_round_stats(struct gendisk *disk); 311 - extern void part_round_stats(struct hd_struct *part); 382 + extern void part_round_stats(int cpu, struct hd_struct *part); 312 383 313 384 /* drivers/block/genhd.c */ 314 385 extern int get_blkdev_list(char *, int); 315 386 extern void add_disk(struct gendisk *disk); 316 387 extern void del_gendisk(struct gendisk *gp); 317 388 extern void unlink_gendisk(struct gendisk *gp); 318 - extern struct gendisk *get_gendisk(dev_t dev, int *part); 389 + extern struct gendisk *get_gendisk(dev_t dev, int *partno); 390 + extern struct block_device *bdget_disk(struct gendisk *disk, int partno); 319 391 320 392 extern void set_device_ro(struct block_device *bdev, int flag); 321 393 extern void set_disk_ro(struct gendisk *disk, int flag); 394 + 395 + static inline int get_disk_ro(struct gendisk *disk) 396 + { 397 + return disk->part0.policy; 398 + } 322 399 323 400 /* drivers/char/random.c */ 324 401 extern void add_disk_randomness(struct gendisk *disk); ··· 360 369 361 370 static inline sector_t get_start_sect(struct block_device *bdev) 362 371 { 363 - return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; 372 + return bdev->bd_part->start_sect; 364 373 } 365 374 static inline sector_t get_capacity(struct gendisk *disk) 366 375 { 367 - return disk->capacity; 376 + return disk->part0.nr_sects; 368 377 } 369 378 static inline void set_capacity(struct gendisk *disk, sector_t size) 370 379 { 371 - disk->capacity = size; 380 + disk->part0.nr_sects = size; 372 381 } 373 382 374 383 #ifdef CONFIG_SOLARIS_X86_PARTITION ··· 518 527 #define ADDPART_FLAG_RAID 1 519 528 #define ADDPART_FLAG_WHOLEDISK 2 520 529 521 - extern dev_t blk_lookup_devt(const char *name, int part); 522 - extern char *disk_name (struct gendisk *hd, int part, char *buf); 530 + extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); 531 + extern void blk_free_devt(dev_t devt); 532 + extern dev_t blk_lookup_devt(const char *name, int partno); 533 + extern char *disk_name (struct gendisk *hd, int partno, char *buf); 523 534 535 + extern int disk_expand_part_tbl(struct gendisk *disk, int target); 524 536 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); 525 537 extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); 526 538 extern void delete_partition(struct gendisk *, int); ··· 540 546 void *data); 541 547 extern void blk_unregister_region(dev_t devt, unsigned long range); 542 548 543 - static inline struct block_device *bdget_disk(struct gendisk *disk, int index) 544 - { 545 - return bdget(MKDEV(disk->major, disk->first_minor) + index); 546 - } 549 + extern ssize_t part_size_show(struct device *dev, 550 + struct device_attribute *attr, char *buf); 551 + extern ssize_t part_stat_show(struct device *dev, 552 + struct device_attribute *attr, char *buf); 553 + #ifdef CONFIG_FAIL_MAKE_REQUEST 554 + extern ssize_t part_fail_show(struct device *dev, 555 + struct device_attribute *attr, char *buf); 556 + extern ssize_t part_fail_store(struct device *dev, 557 + struct device_attribute *attr, 558 + const char *buf, size_t count); 559 + #endif /* CONFIG_FAIL_MAKE_REQUEST */ 547 560 548 561 #else /* CONFIG_BLOCK */ 549 562 550 563 static inline void printk_all_partitions(void) { } 551 564 552 - static inline dev_t blk_lookup_devt(const char *name, int part) 565 + static inline dev_t blk_lookup_devt(const char *name, int partno) 553 566 { 554 567 dev_t devt = MKDEV(0, 0); 555 568 return devt;

+1 -2

include/linux/klist.h

··· 38 38 void (*put)(struct klist_node *)); 39 39 40 40 struct klist_node { 41 - struct klist *n_klist; 41 + void *n_klist; /* never access directly */ 42 42 struct list_head n_node; 43 43 struct kref n_ref; 44 44 struct completion n_removed; ··· 57 57 58 58 struct klist_iter { 59 59 struct klist *i_klist; 60 - struct list_head *i_head; 61 60 struct klist_node *i_cur; 62 61 }; 63 62

+2

include/linux/major.h

··· 170 170 171 171 #define VIOTAPE_MAJOR 230 172 172 173 + #define BLOCK_EXT_MAJOR 259 174 + 173 175 #endif

+2

include/linux/mtd/blktrans.h

··· 41 41 unsigned long block, char *buffer); 42 42 int (*writesect)(struct mtd_blktrans_dev *dev, 43 43 unsigned long block, char *buffer); 44 + int (*discard)(struct mtd_blktrans_dev *dev, 45 + unsigned long block, unsigned nr_blocks); 44 46 45 47 /* Block layer ioctls */ 46 48 int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);

-3

include/scsi/scsi_cmnd.h

··· 75 75 76 76 int retries; 77 77 int allowed; 78 - int timeout_per_command; 79 78 80 79 unsigned char prot_op; 81 80 unsigned char prot_type; ··· 85 86 /* These elements define the operation we are about to perform */ 86 87 unsigned char *cmnd; 87 88 88 - struct timer_list eh_timeout; /* Used to time out the command. */ 89 89 90 90 /* These elements define the operation we ultimately want to perform */ 91 91 struct scsi_data_buffer sdb; ··· 137 139 extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *, 138 140 struct device *); 139 141 extern void scsi_finish_command(struct scsi_cmnd *cmd); 140 - extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd); 141 142 142 143 extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, 143 144 size_t *offset, size_t *len);

+1 -8

include/scsi/scsi_host.h

··· 43 43 #define DISABLE_CLUSTERING 0 44 44 #define ENABLE_CLUSTERING 1 45 45 46 - enum scsi_eh_timer_return { 47 - EH_NOT_HANDLED, 48 - EH_HANDLED, 49 - EH_RESET_TIMER, 50 - }; 51 - 52 - 53 46 struct scsi_host_template { 54 47 struct module *module; 55 48 const char *name; ··· 340 347 * 341 348 * Status: OPTIONAL 342 349 */ 343 - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); 350 + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); 344 351 345 352 /* 346 353 * Name of proc directory

+2 -1

include/scsi/scsi_transport.h

··· 21 21 #define SCSI_TRANSPORT_H 22 22 23 23 #include <linux/transport_class.h> 24 + #include <linux/blkdev.h> 24 25 #include <scsi/scsi_host.h> 25 26 #include <scsi/scsi_device.h> 26 27 ··· 65 64 * begin counting again 66 65 * EH_NOT_HANDLED Begin normal error recovery 67 66 */ 68 - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); 67 + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); 69 68 70 69 /* 71 70 * Used as callback for the completion of i_t_nexus request

+4

init/do_mounts.c

··· 263 263 printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); 264 264 265 265 printk_all_partitions(); 266 + #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 267 + printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify " 268 + "explicit textual name for \"root=\" boot option.\n"); 269 + #endif 266 270 panic("VFS: Unable to mount root fs on %s", b); 267 271 } 268 272

+34 -1

lib/Kconfig.debug

··· 624 624 625 625 Say N if you are unsure. 626 626 627 + config DEBUG_BLOCK_EXT_DEVT 628 + bool "Force extended block device numbers and spread them" 629 + depends on DEBUG_KERNEL 630 + depends on BLOCK 631 + default n 632 + help 633 + Conventionally, block device numbers are allocated from 634 + predetermined contiguous area. However, extended block area 635 + may introduce non-contiguous block device numbers. This 636 + option forces most block device numbers to be allocated from 637 + the extended space and spreads them to discover kernel or 638 + userland code paths which assume predetermined contiguous 639 + device number allocation. 640 + 641 + Note that turning on this debug option shuffles all the 642 + device numbers for all IDE and SCSI devices including libata 643 + ones, so root partition specified using device number 644 + directly (via rdev or root=MAJ:MIN) won't work anymore. 645 + Textual device names (root=/dev/sdXn) will continue to work. 646 + 647 + Say N if you are unsure. 648 + 627 649 config LKDTM 628 650 tristate "Linux Kernel Dump Test Tool Module" 629 651 depends on DEBUG_KERNEL ··· 683 661 684 662 config FAIL_MAKE_REQUEST 685 663 bool "Fault-injection capability for disk IO" 686 - depends on FAULT_INJECTION 664 + depends on FAULT_INJECTION && BLOCK 687 665 help 688 666 Provide fault-injection capability for disk IO. 667 + 668 + config FAIL_IO_TIMEOUT 669 + bool "Faul-injection capability for faking disk interrupts" 670 + depends on FAULT_INJECTION && BLOCK 671 + help 672 + Provide fault-injection capability on end IO handling. This 673 + will make the block layer "forget" an interrupt as configured, 674 + thus exercising the error handling. 675 + 676 + Only works with drivers that use the generic timeout handling, 677 + for others it wont do anything. 689 678 690 679 config FAULT_INJECTION_DEBUG_FS 691 680 bool "Debugfs entries for fault-injection capabilities"

+73 -29

lib/klist.c

··· 37 37 #include <linux/klist.h> 38 38 #include <linux/module.h> 39 39 40 + /* 41 + * Use the lowest bit of n_klist to mark deleted nodes and exclude 42 + * dead ones from iteration. 43 + */ 44 + #define KNODE_DEAD 1LU 45 + #define KNODE_KLIST_MASK ~KNODE_DEAD 46 + 47 + static struct klist *knode_klist(struct klist_node *knode) 48 + { 49 + return (struct klist *) 50 + ((unsigned long)knode->n_klist & KNODE_KLIST_MASK); 51 + } 52 + 53 + static bool knode_dead(struct klist_node *knode) 54 + { 55 + return (unsigned long)knode->n_klist & KNODE_DEAD; 56 + } 57 + 58 + static void knode_set_klist(struct klist_node *knode, struct klist *klist) 59 + { 60 + knode->n_klist = klist; 61 + /* no knode deserves to start its life dead */ 62 + WARN_ON(knode_dead(knode)); 63 + } 64 + 65 + static void knode_kill(struct klist_node *knode) 66 + { 67 + /* and no knode should die twice ever either, see we're very humane */ 68 + WARN_ON(knode_dead(knode)); 69 + *(unsigned long *)&knode->n_klist |= KNODE_DEAD; 70 + } 40 71 41 72 /** 42 73 * klist_init - Initialize a klist structure. ··· 110 79 INIT_LIST_HEAD(&n->n_node); 111 80 init_completion(&n->n_removed); 112 81 kref_init(&n->n_ref); 113 - n->n_klist = k; 82 + knode_set_klist(n, k); 114 83 if (k->get) 115 84 k->get(n); 116 85 } ··· 146 115 */ 147 116 void klist_add_after(struct klist_node *n, struct klist_node *pos) 148 117 { 149 - struct klist *k = pos->n_klist; 118 + struct klist *k = knode_klist(pos); 150 119 151 120 klist_node_init(k, n); 152 121 spin_lock(&k->k_lock); ··· 162 131 */ 163 132 void klist_add_before(struct klist_node *n, struct klist_node *pos) 164 133 { 165 - struct klist *k = pos->n_klist; 134 + struct klist *k = knode_klist(pos); 166 135 167 136 klist_node_init(k, n); 168 137 spin_lock(&k->k_lock); ··· 175 144 { 176 145 struct klist_node *n = container_of(kref, struct klist_node, n_ref); 177 146 147 + WARN_ON(!knode_dead(n)); 178 148 list_del(&n->n_node); 179 149 complete(&n->n_removed); 180 - n->n_klist = NULL; 150 + knode_set_klist(n, NULL); 181 151 } 182 152 183 153 static int klist_dec_and_del(struct klist_node *n) 184 154 { 185 155 return kref_put(&n->n_ref, klist_release); 156 + } 157 + 158 + static void klist_put(struct klist_node *n, bool kill) 159 + { 160 + struct klist *k = knode_klist(n); 161 + void (*put)(struct klist_node *) = k->put; 162 + 163 + spin_lock(&k->k_lock); 164 + if (kill) 165 + knode_kill(n); 166 + if (!klist_dec_and_del(n)) 167 + put = NULL; 168 + spin_unlock(&k->k_lock); 169 + if (put) 170 + put(n); 186 171 } 187 172 188 173 /** ··· 207 160 */ 208 161 void klist_del(struct klist_node *n) 209 162 { 210 - struct klist *k = n->n_klist; 211 - void (*put)(struct klist_node *) = k->put; 212 - 213 - spin_lock(&k->k_lock); 214 - if (!klist_dec_and_del(n)) 215 - put = NULL; 216 - spin_unlock(&k->k_lock); 217 - if (put) 218 - put(n); 163 + klist_put(n, true); 219 164 } 220 165 EXPORT_SYMBOL_GPL(klist_del); 221 166 ··· 245 206 struct klist_node *n) 246 207 { 247 208 i->i_klist = k; 248 - i->i_head = &k->k_list; 249 209 i->i_cur = n; 250 210 if (n) 251 211 kref_get(&n->n_ref); ··· 275 237 void klist_iter_exit(struct klist_iter *i) 276 238 { 277 239 if (i->i_cur) { 278 - klist_del(i->i_cur); 240 + klist_put(i->i_cur, false); 279 241 i->i_cur = NULL; 280 242 } 281 243 } ··· 296 258 */ 297 259 struct klist_node *klist_next(struct klist_iter *i) 298 260 { 299 - struct list_head *next; 300 - struct klist_node *lnode = i->i_cur; 301 - struct klist_node *knode = NULL; 302 261 void (*put)(struct klist_node *) = i->i_klist->put; 262 + struct klist_node *last = i->i_cur; 263 + struct klist_node *next; 303 264 304 265 spin_lock(&i->i_klist->k_lock); 305 - if (lnode) { 306 - next = lnode->n_node.next; 307 - if (!klist_dec_and_del(lnode)) 266 + 267 + if (last) { 268 + next = to_klist_node(last->n_node.next); 269 + if (!klist_dec_and_del(last)) 308 270 put = NULL; 309 271 } else 310 - next = i->i_head->next; 272 + next = to_klist_node(i->i_klist->k_list.next); 311 273 312 - if (next != i->i_head) { 313 - knode = to_klist_node(next); 314 - kref_get(&knode->n_ref); 274 + i->i_cur = NULL; 275 + while (next != to_klist_node(&i->i_klist->k_list)) { 276 + if (likely(!knode_dead(next))) { 277 + kref_get(&next->n_ref); 278 + i->i_cur = next; 279 + break; 280 + } 281 + next = to_klist_node(next->n_node.next); 315 282 } 316 - i->i_cur = knode; 283 + 317 284 spin_unlock(&i->i_klist->k_lock); 318 - if (put && lnode) 319 - put(lnode); 320 - return knode; 285 + 286 + if (put && last) 287 + put(last); 288 + return i->i_cur; 321 289 } 322 290 EXPORT_SYMBOL_GPL(klist_next);

+1 -1

mm/bounce.c

··· 267 267 /* 268 268 * Data-less bio, nothing to bounce 269 269 */ 270 - if (bio_empty_barrier(*bio_orig)) 270 + if (!bio_has_data(*bio_orig)) 271 271 return; 272 272 273 273 /*