Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcache: use bio cloning for detached device requests

Previously, bcache hijacked the bi_end_io and bi_private fields of
the incoming bio when the backing device was in a detached state.
This is fragile and breaks if the bio is needed to be processed by
other layers.

This patch transitions to using a cloned bio embedded within a private
structure. This ensures the original bio's metadata remains untouched.

Fixes: 53280e398471 ("bcache: fix improper use of bi_end_io")
Co-developed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
Acked-by: Coly Li <colyli@fnnas.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Shida Zhang and committed by
Jens Axboe
3ef825df 046be7e5

+55 -47
+9
drivers/md/bcache/bcache.h
··· 273 273 274 274 struct bio_set bio_split; 275 275 276 + struct bio_set bio_detached; 277 + 276 278 unsigned int data_csum:1; 277 279 278 280 int (*cache_miss)(struct btree *b, struct search *s, ··· 753 751 */ 754 752 }; 755 753 struct bio bio; 754 + }; 755 + 756 + struct detached_dev_io_private { 757 + struct bcache_device *d; 758 + unsigned long start_time; 759 + struct bio *orig_bio; 760 + struct bio bio; 756 761 }; 757 762 758 763 #define BTREE_PRIO USHRT_MAX
+36 -45
drivers/md/bcache/request.c
··· 1077 1077 continue_at(cl, cached_dev_bio_complete, NULL); 1078 1078 } 1079 1079 1080 - struct detached_dev_io_private { 1081 - struct bcache_device *d; 1082 - unsigned long start_time; 1083 - bio_end_io_t *bi_end_io; 1084 - void *bi_private; 1085 - struct block_device *orig_bdev; 1086 - }; 1087 - 1088 1080 static void detached_dev_end_io(struct bio *bio) 1089 1081 { 1090 - struct detached_dev_io_private *ddip; 1091 - 1092 - ddip = bio->bi_private; 1093 - bio->bi_end_io = ddip->bi_end_io; 1094 - bio->bi_private = ddip->bi_private; 1082 + struct detached_dev_io_private *ddip = 1083 + container_of(bio, struct detached_dev_io_private, bio); 1084 + struct bio *orig_bio = ddip->orig_bio; 1095 1085 1096 1086 /* Count on the bcache device */ 1097 - bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev); 1087 + bio_end_io_acct(orig_bio, ddip->start_time); 1098 1088 1099 1089 if (bio->bi_status) { 1100 - struct cached_dev *dc = container_of(ddip->d, 1101 - struct cached_dev, disk); 1090 + struct cached_dev *dc = bio->bi_private; 1091 + 1102 1092 /* should count I/O error for backing device here */ 1103 1093 bch_count_backing_io_errors(dc, bio); 1094 + orig_bio->bi_status = bio->bi_status; 1104 1095 } 1105 1096 1106 - kfree(ddip); 1107 - bio_endio(bio); 1097 + bio_put(bio); 1098 + bio_endio(orig_bio); 1108 1099 } 1109 1100 1110 - static void detached_dev_do_request(struct bcache_device *d, struct bio *bio, 1111 - struct block_device *orig_bdev, unsigned long start_time) 1101 + static void detached_dev_do_request(struct bcache_device *d, 1102 + struct bio *orig_bio, unsigned long start_time) 1112 1103 { 1113 1104 struct detached_dev_io_private *ddip; 1114 1105 struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1106 + struct bio *clone_bio; 1115 1107 1116 - /* 1117 - * no need to call closure_get(&dc->disk.cl), 1118 - * because upper layer had already opened bcache device, 1119 - * which would call closure_get(&dc->disk.cl) 1120 - */ 1121 - ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); 1122 - if (!ddip) { 1123 - bio->bi_status = BLK_STS_RESOURCE; 1124 - bio_endio(bio); 1108 + if (bio_op(orig_bio) == REQ_OP_DISCARD && 1109 + !bdev_max_discard_sectors(dc->bdev)) { 1110 + bio_endio(orig_bio); 1125 1111 return; 1126 1112 } 1127 1113 1128 - ddip->d = d; 1129 - /* Count on the bcache device */ 1130 - ddip->orig_bdev = orig_bdev; 1131 - ddip->start_time = start_time; 1132 - ddip->bi_end_io = bio->bi_end_io; 1133 - ddip->bi_private = bio->bi_private; 1134 - bio->bi_end_io = detached_dev_end_io; 1135 - bio->bi_private = ddip; 1114 + clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO, 1115 + &d->bio_detached); 1116 + if (!clone_bio) { 1117 + orig_bio->bi_status = BLK_STS_RESOURCE; 1118 + bio_endio(orig_bio); 1119 + return; 1120 + } 1136 1121 1137 - if ((bio_op(bio) == REQ_OP_DISCARD) && 1138 - !bdev_max_discard_sectors(dc->bdev)) 1139 - detached_dev_end_io(bio); 1140 - else 1141 - submit_bio_noacct(bio); 1122 + ddip = container_of(clone_bio, struct detached_dev_io_private, bio); 1123 + /* Count on the bcache device */ 1124 + ddip->d = d; 1125 + ddip->start_time = start_time; 1126 + ddip->orig_bio = orig_bio; 1127 + 1128 + clone_bio->bi_end_io = detached_dev_end_io; 1129 + clone_bio->bi_private = dc; 1130 + 1131 + submit_bio_noacct(clone_bio); 1142 1132 } 1143 1133 1144 1134 static void quit_max_writeback_rate(struct cache_set *c, ··· 1204 1214 1205 1215 start_time = bio_start_io_acct(bio); 1206 1216 1207 - bio_set_dev(bio, dc->bdev); 1208 1217 bio->bi_iter.bi_sector += dc->sb.data_offset; 1209 1218 1210 1219 if (cached_dev_get(dc)) { 1220 + bio_set_dev(bio, dc->bdev); 1211 1221 s = search_alloc(bio, d, orig_bdev, start_time); 1212 1222 trace_bcache_request_start(s->d, bio); 1213 1223 ··· 1227 1237 else 1228 1238 cached_dev_read(dc, s); 1229 1239 } 1230 - } else 1240 + } else { 1231 1241 /* I/O request sent to backing device */ 1232 - detached_dev_do_request(d, bio, orig_bdev, start_time); 1242 + detached_dev_do_request(d, bio, start_time); 1243 + } 1233 1244 } 1234 1245 1235 1246 static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
+10 -2
drivers/md/bcache/super.c
··· 887 887 } 888 888 889 889 bioset_exit(&d->bio_split); 890 + bioset_exit(&d->bio_detached); 890 891 kvfree(d->full_dirty_stripes); 891 892 kvfree(d->stripe_sectors_dirty); 892 893 ··· 950 949 BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 951 950 goto out_ida_remove; 952 951 952 + if (bioset_init(&d->bio_detached, 4, 953 + offsetof(struct detached_dev_io_private, bio), 954 + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 955 + goto out_bioset_split_exit; 956 + 953 957 if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { 954 958 /* 955 959 * This should only happen with BCACHE_SB_VERSION_BDEV. ··· 970 964 971 965 d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 972 966 if (IS_ERR(d->disk)) 973 - goto out_bioset_exit; 967 + goto out_bioset_detach_exit; 974 968 975 969 set_capacity(d->disk, sectors); 976 970 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); ··· 982 976 d->disk->private_data = d; 983 977 return 0; 984 978 985 - out_bioset_exit: 979 + out_bioset_detach_exit: 980 + bioset_exit(&d->bio_detached); 981 + out_bioset_split_exit: 986 982 bioset_exit(&d->bio_split); 987 983 out_ida_remove: 988 984 ida_free(&bcache_device_idx, idx);