Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
md: Fix handling for devices from 2TB to 4TB in 0.90 metadata.
md/raid1,10: Remove use-after-free bug in make_request.
md/raid10: unify handling of write completion.
Avoid dereferencing a 'request_queue' after last close.

+48 -32
+10 -2
drivers/md/md.c
··· 1138 1138 ret = 0; 1139 1139 } 1140 1140 rdev->sectors = rdev->sb_start; 1141 + /* Limit to 4TB as metadata cannot record more than that */ 1142 + if (rdev->sectors >= (2ULL << 32)) 1143 + rdev->sectors = (2ULL << 32) - 2; 1141 1144 1142 - if (rdev->sectors < sb->size * 2 && sb->level > 1) 1145 + if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) 1143 1146 /* "this cannot possibly happen" ... */ 1144 1147 ret = -EINVAL; 1145 1148 ··· 1176 1173 mddev->clevel[0] = 0; 1177 1174 mddev->layout = sb->layout; 1178 1175 mddev->raid_disks = sb->raid_disks; 1179 - mddev->dev_sectors = sb->size * 2; 1176 + mddev->dev_sectors = ((sector_t)sb->size) * 2; 1180 1177 mddev->events = ev1; 1181 1178 mddev->bitmap_info.offset = 0; 1182 1179 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; ··· 1418 1415 rdev->sb_start = calc_dev_sboffset(rdev); 1419 1416 if (!num_sectors || num_sectors > rdev->sb_start) 1420 1417 num_sectors = rdev->sb_start; 1418 + /* Limit to 4TB as metadata cannot record more than that. 1419 + * 4TB == 2^32 KB, or 2*2^32 sectors. 1420 + */ 1421 + if (num_sectors >= (2ULL << 32)) 1422 + num_sectors = (2ULL << 32) - 2; 1421 1423 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, 1422 1424 rdev->sb_page); 1423 1425 md_super_wait(rdev->mddev);
+9 -5
drivers/md/raid1.c
··· 1099 1099 bio_list_add(&conf->pending_bio_list, mbio); 1100 1100 spin_unlock_irqrestore(&conf->device_lock, flags); 1101 1101 } 1102 - r1_bio_write_done(r1_bio); 1103 - 1104 - /* In case raid1d snuck in to freeze_array */ 1105 - wake_up(&conf->wait_barrier); 1106 - 1102 + /* Mustn't call r1_bio_write_done before this next test, 1103 + * as it could result in the bio being freed. 1104 + */ 1107 1105 if (sectors_handled < (bio->bi_size >> 9)) { 1106 + r1_bio_write_done(r1_bio); 1108 1107 /* We need another r1_bio. It has already been counted 1109 1108 * in bio->bi_phys_segments 1110 1109 */ ··· 1115 1116 r1_bio->sector = bio->bi_sector + sectors_handled; 1116 1117 goto retry_write; 1117 1118 } 1119 + 1120 + r1_bio_write_done(r1_bio); 1121 + 1122 + /* In case raid1d snuck in to freeze_array */ 1123 + wake_up(&conf->wait_barrier); 1118 1124 1119 1125 if (do_sync || !bitmap || !plugged) 1120 1126 md_wakeup_thread(mddev->thread);
+24 -23
drivers/md/raid10.c
··· 337 337 md_write_end(r10_bio->mddev); 338 338 } 339 339 340 + static void one_write_done(r10bio_t *r10_bio) 341 + { 342 + if (atomic_dec_and_test(&r10_bio->remaining)) { 343 + if (test_bit(R10BIO_WriteError, &r10_bio->state)) 344 + reschedule_retry(r10_bio); 345 + else { 346 + close_write(r10_bio); 347 + if (test_bit(R10BIO_MadeGood, &r10_bio->state)) 348 + reschedule_retry(r10_bio); 349 + else 350 + raid_end_bio_io(r10_bio); 351 + } 352 + } 353 + } 354 + 340 355 static void raid10_end_write_request(struct bio *bio, int error) 341 356 { 342 357 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); ··· 402 387 * Let's see if all mirrored write operations have finished 403 388 * already. 404 389 */ 405 - if (atomic_dec_and_test(&r10_bio->remaining)) { 406 - if (test_bit(R10BIO_WriteError, &r10_bio->state)) 407 - reschedule_retry(r10_bio); 408 - else { 409 - close_write(r10_bio); 410 - if (test_bit(R10BIO_MadeGood, &r10_bio->state)) 411 - reschedule_retry(r10_bio); 412 - else 413 - raid_end_bio_io(r10_bio); 414 - } 415 - } 390 + one_write_done(r10_bio); 416 391 if (dec_rdev) 417 392 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); 418 393 } ··· 1132 1127 spin_unlock_irqrestore(&conf->device_lock, flags); 1133 1128 } 1134 1129 1135 - if (atomic_dec_and_test(&r10_bio->remaining)) { 1136 - /* This matches the end of raid10_end_write_request() */ 1137 - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, 1138 - r10_bio->sectors, 1139 - !test_bit(R10BIO_Degraded, &r10_bio->state), 1140 - 0); 1141 - md_write_end(mddev); 1142 - raid_end_bio_io(r10_bio); 1143 - } 1144 - 1145 - /* In case raid10d snuck in to freeze_array */ 1146 - wake_up(&conf->wait_barrier); 1130 + /* Don't remove the bias on 'remaining' (one_write_done) until 1131 + * after checking if we need to go around again. 1132 + */ 1147 1133 1148 1134 if (sectors_handled < (bio->bi_size >> 9)) { 1135 + one_write_done(r10_bio); 1149 1136 /* We need another r10_bio. It has already been counted 1150 1137 * in bio->bi_phys_segments. 1151 1138 */ ··· 1151 1154 r10_bio->state = 0; 1152 1155 goto retry_write; 1153 1156 } 1157 + one_write_done(r10_bio); 1158 + 1159 + /* In case raid10d snuck in to freeze_array */ 1160 + wake_up(&conf->wait_barrier); 1154 1161 1155 1162 if (do_sync || !mddev->bitmap || !plugged) 1156 1163 md_wakeup_thread(mddev->thread);
+5 -2
fs/block_dev.c
··· 1429 1429 WARN_ON_ONCE(bdev->bd_holders); 1430 1430 sync_blockdev(bdev); 1431 1431 kill_bdev(bdev); 1432 + /* ->release can cause the old bdi to disappear, 1433 + * so must switch it out first 1434 + */ 1435 + bdev_inode_switch_bdi(bdev->bd_inode, 1436 + &default_backing_dev_info); 1432 1437 } 1433 1438 if (bdev->bd_contains == bdev) { 1434 1439 if (disk->fops->release) ··· 1447 1442 disk_put_part(bdev->bd_part); 1448 1443 bdev->bd_part = NULL; 1449 1444 bdev->bd_disk = NULL; 1450 - bdev_inode_switch_bdi(bdev->bd_inode, 1451 - &default_backing_dev_info); 1452 1445 if (bdev != bdev->bd_contains) 1453 1446 victim = bdev->bd_contains; 1454 1447 bdev->bd_contains = NULL;