Merge tag 'block-6.19-20260122' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull block fixes from Jens Axboe:

- A set of selftest fixes for ublk

- Fix for a pid mismatch in ublk, comparing PIDs in different
namespaces if run inside a namespace

- Fix for a regression added in this release with polling, where the
nvme tcp connect code would spin forever

- Zoned device error path fix

- Tweak the blkzoned uapi additions from this kernel release, making
them more easily discoverable

- Fix for a regression in bcache with bio endio handling added in this
release

* tag 'block-6.19-20260122' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
bcache: use bio cloning for detached device requests
blk-mq: use BLK_POLL_ONESHOT for synchronous poll completion
selftests/ublk: fix garbage output in foreground mode
selftests/ublk: fix error handling for starting device
selftests/ublk: fix IO thread idle check
block: make the new blkzoned UAPI constants discoverable
ublk: fix ublksrv pid handling for pid namespaces
block: Fix an error path in disk_update_zone_resources()

+102 -59
+1 -1
block/blk-mq.c
··· 1480 1480 static void blk_rq_poll_completion(struct request *rq, struct completion *wait) 1481 1481 { 1482 1482 do { 1483 - blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0); 1483 + blk_hctx_poll(rq->q, rq->mq_hctx, NULL, BLK_POLL_ONESHOT); 1484 1484 cond_resched(); 1485 1485 } while (!completion_done(wait)); 1486 1486 }
+1
block/blk-zoned.c
··· 1957 1957 1958 1958 disk->nr_zones = args->nr_zones; 1959 1959 if (args->nr_conv_zones >= disk->nr_zones) { 1960 + queue_limits_cancel_update(q); 1960 1961 pr_warn("%s: Invalid number of conventional zones %u / %u\n", 1961 1962 disk->disk_name, args->nr_conv_zones, disk->nr_zones); 1962 1963 ret = -ENODEV;
+34 -5
drivers/block/ublk_drv.c
··· 2885 2885 return ub; 2886 2886 } 2887 2887 2888 + static bool ublk_validate_user_pid(struct ublk_device *ub, pid_t ublksrv_pid) 2889 + { 2890 + rcu_read_lock(); 2891 + ublksrv_pid = pid_nr(find_vpid(ublksrv_pid)); 2892 + rcu_read_unlock(); 2893 + 2894 + return ub->ublksrv_tgid == ublksrv_pid; 2895 + } 2896 + 2888 2897 static int ublk_ctrl_start_dev(struct ublk_device *ub, 2889 2898 const struct ublksrv_ctrl_cmd *header) 2890 2899 { ··· 2962 2953 if (wait_for_completion_interruptible(&ub->completion) != 0) 2963 2954 return -EINTR; 2964 2955 2965 - if (ub->ublksrv_tgid != ublksrv_pid) 2956 + if (!ublk_validate_user_pid(ub, ublksrv_pid)) 2966 2957 return -EINVAL; 2967 2958 2968 2959 mutex_lock(&ub->mutex); ··· 2981 2972 disk->fops = &ub_fops; 2982 2973 disk->private_data = ub; 2983 2974 2984 - ub->dev_info.ublksrv_pid = ublksrv_pid; 2975 + ub->dev_info.ublksrv_pid = ub->ublksrv_tgid; 2985 2976 ub->ub_disk = disk; 2986 2977 2987 2978 ublk_apply_params(ub); ··· 3329 3320 static int ublk_ctrl_get_dev_info(struct ublk_device *ub, 3330 3321 const struct ublksrv_ctrl_cmd *header) 3331 3322 { 3323 + struct task_struct *p; 3324 + struct pid *pid; 3325 + struct ublksrv_ctrl_dev_info dev_info; 3326 + pid_t init_ublksrv_tgid = ub->dev_info.ublksrv_pid; 3332 3327 void __user *argp = (void __user *)(unsigned long)header->addr; 3333 3328 3334 3329 if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) 3335 3330 return -EINVAL; 3336 3331 3337 - if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info))) 3332 + memcpy(&dev_info, &ub->dev_info, sizeof(dev_info)); 3333 + dev_info.ublksrv_pid = -1; 3334 + 3335 + if (init_ublksrv_tgid > 0) { 3336 + rcu_read_lock(); 3337 + pid = find_pid_ns(init_ublksrv_tgid, &init_pid_ns); 3338 + p = pid_task(pid, PIDTYPE_TGID); 3339 + if (p) { 3340 + int vnr = task_tgid_vnr(p); 3341 + 3342 + if (vnr) 3343 + dev_info.ublksrv_pid = vnr; 3344 + } 3345 + rcu_read_unlock(); 3346 + } 3347 + 3348 + if (copy_to_user(argp, &dev_info, sizeof(dev_info))) 3338 3349 return -EFAULT; 3339 3350 3340 3351 return 0; ··· 3499 3470 pr_devel("%s: All FETCH_REQs received, dev id %d\n", __func__, 3500 3471 header->dev_id); 3501 3472 3502 - if (ub->ublksrv_tgid != ublksrv_pid) 3473 + if (!ublk_validate_user_pid(ub, ublksrv_pid)) 3503 3474 return -EINVAL; 3504 3475 3505 3476 mutex_lock(&ub->mutex); ··· 3510 3481 ret = -EBUSY; 3511 3482 goto out_unlock; 3512 3483 } 3513 - ub->dev_info.ublksrv_pid = ublksrv_pid; 3484 + ub->dev_info.ublksrv_pid = ub->ublksrv_tgid; 3514 3485 ub->dev_info.state = UBLK_S_DEV_LIVE; 3515 3486 pr_devel("%s: new ublksrv_pid %d, dev id %d\n", 3516 3487 __func__, ublksrv_pid, header->dev_id);
+9
drivers/md/bcache/bcache.h
··· 273 273 274 274 struct bio_set bio_split; 275 275 276 + struct bio_set bio_detached; 277 + 276 278 unsigned int data_csum:1; 277 279 278 280 int (*cache_miss)(struct btree *b, struct search *s, ··· 753 751 */ 754 752 }; 755 753 struct bio bio; 754 + }; 755 + 756 + struct detached_dev_io_private { 757 + struct bcache_device *d; 758 + unsigned long start_time; 759 + struct bio *orig_bio; 760 + struct bio bio; 756 761 }; 757 762 758 763 #define BTREE_PRIO USHRT_MAX
+36 -45
drivers/md/bcache/request.c
··· 1077 1077 continue_at(cl, cached_dev_bio_complete, NULL); 1078 1078 } 1079 1079 1080 - struct detached_dev_io_private { 1081 - struct bcache_device *d; 1082 - unsigned long start_time; 1083 - bio_end_io_t *bi_end_io; 1084 - void *bi_private; 1085 - struct block_device *orig_bdev; 1086 - }; 1087 - 1088 1080 static void detached_dev_end_io(struct bio *bio) 1089 1081 { 1090 - struct detached_dev_io_private *ddip; 1091 - 1092 - ddip = bio->bi_private; 1093 - bio->bi_end_io = ddip->bi_end_io; 1094 - bio->bi_private = ddip->bi_private; 1082 + struct detached_dev_io_private *ddip = 1083 + container_of(bio, struct detached_dev_io_private, bio); 1084 + struct bio *orig_bio = ddip->orig_bio; 1095 1085 1096 1086 /* Count on the bcache device */ 1097 - bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev); 1087 + bio_end_io_acct(orig_bio, ddip->start_time); 1098 1088 1099 1089 if (bio->bi_status) { 1100 - struct cached_dev *dc = container_of(ddip->d, 1101 - struct cached_dev, disk); 1090 + struct cached_dev *dc = bio->bi_private; 1091 + 1102 1092 /* should count I/O error for backing device here */ 1103 1093 bch_count_backing_io_errors(dc, bio); 1094 + orig_bio->bi_status = bio->bi_status; 1104 1095 } 1105 1096 1106 - kfree(ddip); 1107 - bio_endio(bio); 1097 + bio_put(bio); 1098 + bio_endio(orig_bio); 1108 1099 } 1109 1100 1110 - static void detached_dev_do_request(struct bcache_device *d, struct bio *bio, 1111 - struct block_device *orig_bdev, unsigned long start_time) 1101 + static void detached_dev_do_request(struct bcache_device *d, 1102 + struct bio *orig_bio, unsigned long start_time) 1112 1103 { 1113 1104 struct detached_dev_io_private *ddip; 1114 1105 struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1106 + struct bio *clone_bio; 1115 1107 1116 - /* 1117 - * no need to call closure_get(&dc->disk.cl), 1118 - * because upper layer had already opened bcache device, 1119 - * which would call closure_get(&dc->disk.cl) 1120 - */ 1121 - ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); 1122 - if (!ddip) { 1123 - bio->bi_status = BLK_STS_RESOURCE; 1124 - bio_endio(bio); 1108 + if (bio_op(orig_bio) == REQ_OP_DISCARD && 1109 + !bdev_max_discard_sectors(dc->bdev)) { 1110 + bio_endio(orig_bio); 1125 1111 return; 1126 1112 } 1127 1113 1128 - ddip->d = d; 1129 - /* Count on the bcache device */ 1130 - ddip->orig_bdev = orig_bdev; 1131 - ddip->start_time = start_time; 1132 - ddip->bi_end_io = bio->bi_end_io; 1133 - ddip->bi_private = bio->bi_private; 1134 - bio->bi_end_io = detached_dev_end_io; 1135 - bio->bi_private = ddip; 1114 + clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO, 1115 + &d->bio_detached); 1116 + if (!clone_bio) { 1117 + orig_bio->bi_status = BLK_STS_RESOURCE; 1118 + bio_endio(orig_bio); 1119 + return; 1120 + } 1136 1121 1137 - if ((bio_op(bio) == REQ_OP_DISCARD) && 1138 - !bdev_max_discard_sectors(dc->bdev)) 1139 - detached_dev_end_io(bio); 1140 - else 1141 - submit_bio_noacct(bio); 1122 + ddip = container_of(clone_bio, struct detached_dev_io_private, bio); 1123 + /* Count on the bcache device */ 1124 + ddip->d = d; 1125 + ddip->start_time = start_time; 1126 + ddip->orig_bio = orig_bio; 1127 + 1128 + clone_bio->bi_end_io = detached_dev_end_io; 1129 + clone_bio->bi_private = dc; 1130 + 1131 + submit_bio_noacct(clone_bio); 1142 1132 } 1143 1133 1144 1134 static void quit_max_writeback_rate(struct cache_set *c, ··· 1204 1214 1205 1215 start_time = bio_start_io_acct(bio); 1206 1216 1207 - bio_set_dev(bio, dc->bdev); 1208 1217 bio->bi_iter.bi_sector += dc->sb.data_offset; 1209 1218 1210 1219 if (cached_dev_get(dc)) { 1220 + bio_set_dev(bio, dc->bdev); 1211 1221 s = search_alloc(bio, d, orig_bdev, start_time); 1212 1222 trace_bcache_request_start(s->d, bio); 1213 1223 ··· 1227 1237 else 1228 1238 cached_dev_read(dc, s); 1229 1239 } 1230 - } else 1240 + } else { 1231 1241 /* I/O request sent to backing device */ 1232 - detached_dev_do_request(d, bio, orig_bdev, start_time); 1242 + detached_dev_do_request(d, bio, start_time); 1243 + } 1233 1244 } 1234 1245 1235 1246 static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
+10 -2
drivers/md/bcache/super.c
··· 887 887 } 888 888 889 889 bioset_exit(&d->bio_split); 890 + bioset_exit(&d->bio_detached); 890 891 kvfree(d->full_dirty_stripes); 891 892 kvfree(d->stripe_sectors_dirty); 892 893 ··· 950 949 BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 951 950 goto out_ida_remove; 952 951 952 + if (bioset_init(&d->bio_detached, 4, 953 + offsetof(struct detached_dev_io_private, bio), 954 + BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) 955 + goto out_bioset_split_exit; 956 + 953 957 if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { 954 958 /* 955 959 * This should only happen with BCACHE_SB_VERSION_BDEV. ··· 970 964 971 965 d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); 972 966 if (IS_ERR(d->disk)) 973 - goto out_bioset_exit; 967 + goto out_bioset_detach_exit; 974 968 975 969 set_capacity(d->disk, sectors); 976 970 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); ··· 982 976 d->disk->private_data = d; 983 977 return 0; 984 978 985 - out_bioset_exit: 979 + out_bioset_detach_exit: 980 + bioset_exit(&d->bio_detached); 981 + out_bioset_split_exit: 986 982 bioset_exit(&d->bio_split); 987 983 out_ida_remove: 988 984 ida_free(&bcache_device_idx, idx);
+4 -2
include/uapi/linux/blkzoned.h
··· 81 81 BLK_ZONE_COND_FULL = 0xE, 82 82 BLK_ZONE_COND_OFFLINE = 0xF, 83 83 84 - BLK_ZONE_COND_ACTIVE = 0xFF, 84 + BLK_ZONE_COND_ACTIVE = 0xFF, /* added in Linux 6.19 */ 85 + #define BLK_ZONE_COND_ACTIVE BLK_ZONE_COND_ACTIVE 85 86 }; 86 87 87 88 /** ··· 101 100 BLK_ZONE_REP_CAPACITY = (1U << 0), 102 101 103 102 /* Input flags */ 104 - BLK_ZONE_REP_CACHED = (1U << 31), 103 + BLK_ZONE_REP_CACHED = (1U << 31), /* added in Linux 6.19 */ 104 + #define BLK_ZONE_REP_CACHED BLK_ZONE_REP_CACHED 105 105 }; 106 106 107 107 /**
+7 -4
tools/testing/selftests/ublk/kublk.c
··· 753 753 754 754 static int ublk_thread_is_done(struct ublk_thread *t) 755 755 { 756 - return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t); 756 + return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t) && !t->cmd_inflight; 757 757 } 758 758 759 759 static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, ··· 1054 1054 } 1055 1055 if (ret < 0) { 1056 1056 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 1057 - goto fail; 1057 + /* stop device so that inflight uring_cmd can be cancelled */ 1058 + ublk_ctrl_stop_dev(dev); 1059 + goto fail_start; 1058 1060 } 1059 1061 1060 1062 ublk_ctrl_get_info(dev); ··· 1064 1062 ublk_ctrl_dump(dev); 1065 1063 else 1066 1064 ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 1067 - 1065 + fail_start: 1068 1066 /* wait until we are terminated */ 1069 1067 for (i = 0; i < dev->nthreads; i++) 1070 1068 pthread_join(tinfo[i].thread, &thread_ret); ··· 1274 1272 } 1275 1273 1276 1274 ret = ublk_start_daemon(ctx, dev); 1277 - ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); 1275 + ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); 1278 1276 if (ret < 0) 1279 1277 ublk_ctrl_del_dev(dev); 1280 1278 ··· 1620 1618 int option_idx, opt; 1621 1619 const char *cmd = argv[1]; 1622 1620 struct dev_ctx ctx = { 1621 + ._evtfd = -1, 1623 1622 .queue_depth = 128, 1624 1623 .nr_hw_queues = 2, 1625 1624 .dev_id = -1,