Merge tag 'block-6.19-20260122' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull block fixes from Jens Axboe:

- A set of selftest fixes for ublk

- Fix for a pid mismatch in ublk, comparing PIDs in different
namespaces if run inside a namespace

- Fix for a regression added in this release with polling, where the
nvme tcp connect code would spin forever

- Zoned device error path fix

- Tweak the blkzoned uapi additions from this kernel release, making
them more easily discoverable

- Fix for a regression in bcache with bio endio handling added in this
release

* tag 'block-6.19-20260122' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
bcache: use bio cloning for detached device requests
blk-mq: use BLK_POLL_ONESHOT for synchronous poll completion
selftests/ublk: fix garbage output in foreground mode
selftests/ublk: fix error handling for starting device
selftests/ublk: fix IO thread idle check
block: make the new blkzoned UAPI constants discoverable
ublk: fix ublksrv pid handling for pid namespaces
block: Fix an error path in disk_update_zone_resources()

+102 -59
+1 -1
block/blk-mq.c
··· 1480 static void blk_rq_poll_completion(struct request *rq, struct completion *wait) 1481 { 1482 do { 1483 - blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); 1484 cond_resched(); 1485 } while (!completion_done(wait)); 1486 }
··· 1480 static void blk_rq_poll_completion(struct request *rq, struct completion *wait) 1481 { 1482 do { 1483 + blk_hctx_poll(rq->q, rq->mq_hctx, NULL, BLK_POLL_ONESHOT); 1484 cond_resched(); 1485 } while (!completion_done(wait)); 1486 }
+1
block/blk-zoned.c
··· 1957 1958 disk->nr_zones = args->nr_zones; 1959 if (args->nr_conv_zones >= disk->nr_zones) { 1960 pr_warn("%s: Invalid number of conventional zones %u / %u\n", 1961 disk->disk_name, args->nr_conv_zones, disk->nr_zones); 1962 ret = -ENODEV;
··· 1957 1958 disk->nr_zones = args->nr_zones; 1959 if (args->nr_conv_zones >= disk->nr_zones) { 1960 + queue_limits_cancel_update(q); 1961 pr_warn("%s: Invalid number of conventional zones %u / %u\n", 1962 disk->disk_name, args->nr_conv_zones, disk->nr_zones); 1963 ret = -ENODEV;
+34 -5
drivers/block/ublk_drv.c
··· 2885 return ub; 2886 } 2887 2888 static int ublk_ctrl_start_dev(struct ublk_device *ub, 2889 const struct ublksrv_ctrl_cmd *header) 2890 { ··· 2962 if (wait_for_completion_interruptible(&ub->completion) != 0) 2963 return -EINTR; 2964 2965 - if (ub->ublksrv_tgid != ublksrv_pid) 2966 return -EINVAL; 2967 2968 mutex_lock(&ub->mutex); ··· 2981 disk->fops = &ub_fops; 2982 disk->private_data = ub; 2983 2984 - ub->dev_info.ublksrv_pid = ublksrv_pid; 2985 ub->ub_disk = disk; 2986 2987 ublk_apply_params(ub); ··· 3329 static int ublk_ctrl_get_dev_info(struct ublk_device *ub, 3330 const struct ublksrv_ctrl_cmd *header) 3331 { 3332 void __user *argp = (void __user *)(unsigned long)header->addr; 3333 3334 if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) 3335 return -EINVAL; 3336 3337 - if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info))) 3338 return -EFAULT; 3339 3340 return 0; ··· 3499 pr_devel("%s: All FETCH_REQs received, dev id %d\n", __func__, 3500 header->dev_id); 3501 3502 - if (ub->ublksrv_tgid != ublksrv_pid) 3503 return -EINVAL; 3504 3505 mutex_lock(&ub->mutex); ··· 3510 ret = -EBUSY; 3511 goto out_unlock; 3512 } 3513 - ub->dev_info.ublksrv_pid = ublksrv_pid; 3514 ub->dev_info.state = UBLK_S_DEV_LIVE; 3515 pr_devel("%s: new ublksrv_pid %d, dev id %d\n", 3516 __func__, ublksrv_pid, header->dev_id);
··· 2885 return ub; 2886 } 2887 2888 + static bool ublk_validate_user_pid(struct ublk_device *ub, pid_t ublksrv_pid) 2889 + { 2890 + rcu_read_lock(); 2891 + ublksrv_pid = pid_nr(find_vpid(ublksrv_pid)); 2892 + rcu_read_unlock(); 2893 + 2894 + return ub->ublksrv_tgid == ublksrv_pid; 2895 + } 2896 + 2897 static int ublk_ctrl_start_dev(struct ublk_device *ub, 2898 const struct ublksrv_ctrl_cmd *header) 2899 { ··· 2953 if (wait_for_completion_interruptible(&ub->completion) != 0) 2954 return -EINTR; 2955 2956 + if (!ublk_validate_user_pid(ub, ublksrv_pid)) 2957 return -EINVAL; 2958 2959 mutex_lock(&ub->mutex); ··· 2972 disk->fops = &ub_fops; 2973 disk->private_data = ub; 2974 2975 + ub->dev_info.ublksrv_pid = ub->ublksrv_tgid; 2976 ub->ub_disk = disk; 2977 2978 ublk_apply_params(ub); ··· 3320 static int ublk_ctrl_get_dev_info(struct ublk_device *ub, 3321 const struct ublksrv_ctrl_cmd *header) 3322 { 3323 + struct task_struct *p; 3324 + struct pid *pid; 3325 + struct ublksrv_ctrl_dev_info dev_info; 3326 + pid_t init_ublksrv_tgid = ub->dev_info.ublksrv_pid; 3327 void __user *argp = (void __user *)(unsigned long)header->addr; 3328 3329 if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) 3330 return -EINVAL; 3331 3332 + memcpy(&dev_info, &ub->dev_info, sizeof(dev_info)); 3333 + dev_info.ublksrv_pid = -1; 3334 + 3335 + if (init_ublksrv_tgid > 0) { 3336 + rcu_read_lock(); 3337 + pid = find_pid_ns(init_ublksrv_tgid, &init_pid_ns); 3338 + p = pid_task(pid, PIDTYPE_TGID); 3339 + if (p) { 3340 + int vnr = task_tgid_vnr(p); 3341 + 3342 + if (vnr) 3343 + dev_info.ublksrv_pid = vnr; 3344 + } 3345 + rcu_read_unlock(); 3346 + } 3347 + 3348 + if (copy_to_user(argp, &dev_info, sizeof(dev_info))) 3349 return -EFAULT; 3350 3351 return 0; ··· 3470 pr_devel("%s: All FETCH_REQs received, dev id %d\n", __func__, 3471 header->dev_id); 3472 3473 + if (!ublk_validate_user_pid(ub, ublksrv_pid)) 3474 return -EINVAL; 3475 3476 mutex_lock(&ub->mutex); ··· 3481 ret = -EBUSY; 3482 goto out_unlock; 3483 } 3484 + ub->dev_info.ublksrv_pid = ub->ublksrv_tgid; 3485 ub->dev_info.state = UBLK_S_DEV_LIVE; 3486 pr_devel("%s: new ublksrv_pid %d, dev id %d\n", 3487 __func__, ublksrv_pid, header->dev_id);
+9
drivers/md/bcache/bcache.h
··· 273 274 struct bio_set bio_split; 275 276 unsigned int data_csum:1; 277 278 int (*cache_miss)(struct btree *b, struct search *s, ··· 753 */ 754 }; 755 struct bio bio; 756 }; 757 758 #define BTREE_PRIO USHRT_MAX
··· 273 274 struct bio_set bio_split; 275 276 + struct bio_set bio_detached; 277 + 278 unsigned int data_csum:1; 279 280 int (*cache_miss)(struct btree *b, struct search *s, ··· 751 */ 752 }; 753 struct bio bio; 754 + }; 755 + 756 + struct detached_dev_io_private { 757 + struct bcache_device *d; 758 + unsigned long start_time; 759 + struct bio *orig_bio; 760 + struct bio bio; 761 }; 762 763 #define BTREE_PRIO USHRT_MAX
+36 -45
drivers/md/bcache/request.c
··· 1077 continue_at(cl, cached_dev_bio_complete, NULL); 1078 } 1079 1080 - struct detached_dev_io_private { 1081 - struct bcache_device *d; 1082 - unsigned long start_time; 1083 - bio_end_io_t *bi_end_io; 1084 - void *bi_private; 1085 - struct block_device *orig_bdev; 1086 - }; 1087 - 1088 static void detached_dev_end_io(struct bio *bio) 1089 { 1090 - struct detached_dev_io_private *ddip; 1091 - 1092 - ddip = bio->bi_private; 1093 - bio->bi_end_io = ddip->bi_end_io; 1094 - bio->bi_private = ddip->bi_private; 1095 1096 /* Count on the bcache device */ 1097 - bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev); 1098 1099 if (bio->bi_status) { 1100 - struct cached_dev *dc = container_of(ddip->d, 1101 - struct cached_dev, disk); 1102 /* should count I/O error for backing device here */ 1103 bch_count_backing_io_errors(dc, bio); 1104 } 1105 1106 - kfree(ddip); 1107 - bio_endio(bio); 1108 } 1109 1110 - static void detached_dev_do_request(struct bcache_device *d, struct bio *bio, 1111 - struct block_device *orig_bdev, unsigned long start_time) 1112 { 1113 struct detached_dev_io_private *ddip; 1114 struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1115 1116 - /* 1117 - * no need to call closure_get(&dc->disk.cl), 1118 - * because upper layer had already opened bcache device, 1119 - * which would call closure_get(&dc->disk.cl) 1120 - */ 1121 - ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); 1122 - if (!ddip) { 1123 - bio->bi_status = BLK_STS_RESOURCE; 1124 - bio_endio(bio); 1125 return; 1126 } 1127 1128 - ddip->d = d; 1129 - /* Count on the bcache device */ 1130 - ddip->orig_bdev = orig_bdev; 1131 - ddip->start_time = start_time; 1132 - ddip->bi_end_io = bio->bi_end_io; 1133 - ddip->bi_private = bio->bi_private; 1134 - bio->bi_end_io = detached_dev_end_io; 1135 - bio->bi_private = ddip; 1136 1137 - if ((bio_op(bio) == REQ_OP_DISCARD) && 1138 - !bdev_max_discard_sectors(dc->bdev)) 1139 - detached_dev_end_io(bio); 1140 - else 1141 - submit_bio_noacct(bio); 1142 } 1143 1144 static void quit_max_writeback_rate(struct cache_set *c, ··· 1204 1205 start_time = bio_start_io_acct(bio); 1206 1207 - bio_set_dev(bio, dc->bdev); 1208 bio->bi_iter.bi_sector += dc->sb.data_offset; 1209 1210 if (cached_dev_get(dc)) { 1211 s = search_alloc(bio, d, orig_bdev, start_time); 1212 trace_bcache_request_start(s->d, bio); 1213 ··· 1227 else 1228 cached_dev_read(dc, s); 1229 } 1230 - } else 1231 /* I/O request sent to backing device */ 1232 - detached_dev_do_request(d, bio, orig_bdev, start_time); 1233 } 1234 1235 static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
··· 1077 continue_at(cl, cached_dev_bio_complete, NULL); 1078 } 1079 1080 static void detached_dev_end_io(struct bio *bio) 1081 { 1082 + struct detached_dev_io_private *ddip = 1083 + container_of(bio, struct detached_dev_io_private, bio); 1084 + struct bio *orig_bio = ddip->orig_bio; 1085 1086 /* Count on the bcache device */ 1087 + bio_end_io_acct(orig_bio, ddip->start_time); 1088 1089 if (bio->bi_status) { 1090 + struct cached_dev *dc = bio->bi_private; 1091 + 1092 /* should count I/O error for backing device here */ 1093 bch_count_backing_io_errors(dc, bio); 1094 + orig_bio->bi_status = bio->bi_status; 1095 } 1096 1097 + bio_put(bio); 1098 + bio_endio(orig_bio); 1099 } 1100 1101 + static void detached_dev_do_request(struct bcache_device *d, 1102 + struct bio *orig_bio, unsigned long start_time) 1103 { 1104 struct detached_dev_io_private *ddip; 1105 struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1106 + struct bio *clone_bio; 1107 1108 + if (bio_op(orig_bio) == REQ_OP_DISCARD && 1109 + !bdev_max_discard_sectors(dc->bdev)) { 1110 + bio_endio(orig_bio); 1111 return; 1112 } 1113 1114 + clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO, 1115 + &d->bio_detached); 1116 + if (!clone_bio) { 1117 + orig_bio->bi_status = BLK_STS_RESOURCE; 1118 + bio_endio(orig_bio); 1119 + return; 1120 + } 1121 1122 + ddip = container_of(clone_bio, struct detached_dev_io_private, bio); 1123 + /* Count on the bcache device */ 1124 + ddip->d = d; 1125 + ddip->start_time = start_time; 1126 + ddip->orig_bio = orig_bio; 1127 + 1128 + clone_bio->bi_end_io = detached_dev_end_io; 1129 + clone_bio->bi_private = dc; 1130 + 1131 + submit_bio_noacct(clone_bio); 1132 } 1133 1134 static void quit_max_writeback_rate(struct cache_set *c, ··· 1214 1215 start_time = bio_start_io_acct(bio); 1216 1217 bio->bi_iter.bi_sector += dc->sb.data_offset; 1218 1219 if (cached_dev_get(dc)) { 1220 + bio_set_dev(bio, dc->bdev); 1221 s = search_alloc(bio, d, orig_bdev, start_time); 1222 trace_bcache_request_start(s->d, bio); 1223 ··· 1237 else 1238 cached_dev_read(dc, s); 1239 } 1240 + } else { 1241 /* I/O request sent to backing device */ 1242 + detached_dev_do_request(d, bio, start_time); 1243 + } 1244 } 1245 1246 static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
+10 -2
drivers/md/bcache/super.c
··· 887 } 888 889 bioset_exit(&d->bio_split); 890 kvfree(d->full_dirty_stripes); 891 kvfree(d->stripe_sectors_dirty); 892 ··· 950 BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 951 goto out_ida_remove; 952 953 if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { 954 /* 955 * This should only happen with BCACHE_SB_VERSION_BDEV. ··· 970 971 d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 972 if (IS_ERR(d->disk)) 973 - goto out_bioset_exit; 974 975 set_capacity(d->disk, sectors); 976 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); ··· 982 d->disk->private_data = d; 983 return 0; 984 985 - out_bioset_exit: 986 bioset_exit(&d->bio_split); 987 out_ida_remove: 988 ida_free(&bcache_device_idx, idx);
··· 887 } 888 889 bioset_exit(&d->bio_split); 890 + bioset_exit(&d->bio_detached); 891 kvfree(d->full_dirty_stripes); 892 kvfree(d->stripe_sectors_dirty); 893 ··· 949 BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 950 goto out_ida_remove; 951 952 + if (bioset_init(&d->bio_detached, 4, 953 + offsetof(struct detached_dev_io_private, bio), 954 + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 955 + goto out_bioset_split_exit; 956 + 957 if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { 958 /* 959 * This should only happen with BCACHE_SB_VERSION_BDEV. ··· 964 965 d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 966 if (IS_ERR(d->disk)) 967 + goto out_bioset_detach_exit; 968 969 set_capacity(d->disk, sectors); 970 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); ··· 976 d->disk->private_data = d; 977 return 0; 978 979 + out_bioset_detach_exit: 980 + bioset_exit(&d->bio_detached); 981 + out_bioset_split_exit: 982 bioset_exit(&d->bio_split); 983 out_ida_remove: 984 ida_free(&bcache_device_idx, idx);
+4 -2
include/uapi/linux/blkzoned.h
··· 81 BLK_ZONE_COND_FULL = 0xE, 82 BLK_ZONE_COND_OFFLINE = 0xF, 83 84 - BLK_ZONE_COND_ACTIVE = 0xFF, 85 }; 86 87 /** ··· 101 BLK_ZONE_REP_CAPACITY = (1U << 0), 102 103 /* Input flags */ 104 - BLK_ZONE_REP_CACHED = (1U << 31), 105 }; 106 107 /**
··· 81 BLK_ZONE_COND_FULL = 0xE, 82 BLK_ZONE_COND_OFFLINE = 0xF, 83 84 + BLK_ZONE_COND_ACTIVE = 0xFF, /* added in Linux 6.19 */ 85 + #define BLK_ZONE_COND_ACTIVE BLK_ZONE_COND_ACTIVE 86 }; 87 88 /** ··· 100 BLK_ZONE_REP_CAPACITY = (1U << 0), 101 102 /* Input flags */ 103 + BLK_ZONE_REP_CACHED = (1U << 31), /* added in Linux 6.19 */ 104 + #define BLK_ZONE_REP_CACHED BLK_ZONE_REP_CACHED 105 }; 106 107 /**
+7 -4
tools/testing/selftests/ublk/kublk.c
··· 753 754 static int ublk_thread_is_done(struct ublk_thread *t) 755 { 756 - return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t); 757 } 758 759 static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, ··· 1054 } 1055 if (ret < 0) { 1056 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 1057 - goto fail; 1058 } 1059 1060 ublk_ctrl_get_info(dev); ··· 1064 ublk_ctrl_dump(dev); 1065 else 1066 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 1067 - 1068 /* wait until we are terminated */ 1069 for (i = 0; i < dev->nthreads; i++) 1070 pthread_join(tinfo[i].thread, &thread_ret); ··· 1274 } 1275 1276 ret = ublk_start_daemon(ctx, dev); 1277 - ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); 1278 if (ret < 0) 1279 ublk_ctrl_del_dev(dev); 1280 ··· 1620 int option_idx, opt; 1621 const char *cmd = argv[1]; 1622 struct dev_ctx ctx = { 1623 .queue_depth = 128, 1624 .nr_hw_queues = 2, 1625 .dev_id = -1,
··· 753 754 static int ublk_thread_is_done(struct ublk_thread *t) 755 { 756 + return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t) && !t->cmd_inflight; 757 } 758 759 static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, ··· 1054 } 1055 if (ret < 0) { 1056 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 1057 + /* stop device so that inflight uring_cmd can be cancelled */ 1058 + ublk_ctrl_stop_dev(dev); 1059 + goto fail_start; 1060 } 1061 1062 ublk_ctrl_get_info(dev); ··· 1062 ublk_ctrl_dump(dev); 1063 else 1064 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 1065 + fail_start: 1066 /* wait until we are terminated */ 1067 for (i = 0; i < dev->nthreads; i++) 1068 pthread_join(tinfo[i].thread, &thread_ret); ··· 1272 } 1273 1274 ret = ublk_start_daemon(ctx, dev); 1275 + ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); 1276 if (ret < 0) 1277 ublk_ctrl_del_dev(dev); 1278 ··· 1618 int option_idx, opt; 1619 const char *cmd = argv[1]; 1620 struct dev_ctx ctx = { 1621 + ._evtfd = -1, 1622 .queue_depth = 128, 1623 .nr_hw_queues = 2, 1624 .dev_id = -1,