Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"Here's a set of fixes that should go into this merge window. This
contains:

- NVMe pull request from Christoph with various fixes

- Better discard support for loop (Evan)

- Only call ->commit_rqs() if we have queued IO (Keith)

- blkcg offlining fixes (Tejun)

- fix (and fix the fix) for busy partitions"

* tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block:
block: fix busy device checking in blk_drop_partitions again
block: fix busy device checking in blk_drop_partitions
nvmet-rdma: fix double free of rdma queue
blk-mq: don't commit_rqs() if none were queued
nvme-fc: Revert "add module to ops template to allow module references"
nvme: fix deadlock caused by ANA update wrong locking
nvmet-rdma: fix bonding failover possible NULL deref
loop: Better discard support for block devices
loop: Report EOPNOTSUPP properly
nvmet: fix NULL dereference when removing a referral
nvme: inherit stable pages constraint in the mpath stack device
blkcg: don't offline parent blkcg first
blkcg: rename blkcg->cgwb_refcnt to ->online_pin and always use it
nvme-tcp: fix possible crash in recv error flow
nvme-tcp: don't poll a non-live queue
nvme-tcp: fix possible crash in write_zeroes processing
nvmet-fc: fix typo in comment
nvme-rdma: Replace comma with a semicolon
nvme-fcloop: fix deallocation of working context
nvme: fix compat address handling in several ioctls

+324 -180
+19 -3
block/blk-cgroup.c
··· 883 883 /* this prevents anyone from attaching or migrating to this blkcg */ 884 884 wb_blkcg_offline(blkcg); 885 885 886 - /* put the base cgwb reference allowing step 2 to be triggered */ 887 - blkcg_cgwb_put(blkcg); 886 + /* put the base online pin allowing step 2 to be triggered */ 887 + blkcg_unpin_online(blkcg); 888 888 } 889 889 890 890 /** ··· 983 983 } 984 984 985 985 spin_lock_init(&blkcg->lock); 986 + refcount_set(&blkcg->online_pin, 1); 986 987 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); 987 988 INIT_HLIST_HEAD(&blkcg->blkg_list); 988 989 #ifdef CONFIG_CGROUP_WRITEBACK 989 990 INIT_LIST_HEAD(&blkcg->cgwb_list); 990 - refcount_set(&blkcg->cgwb_refcnt, 1); 991 991 #endif 992 992 list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); 993 993 ··· 1004 1004 unlock: 1005 1005 mutex_unlock(&blkcg_pol_mutex); 1006 1006 return ret; 1007 + } 1008 + 1009 + static int blkcg_css_online(struct cgroup_subsys_state *css) 1010 + { 1011 + struct blkcg *blkcg = css_to_blkcg(css); 1012 + struct blkcg *parent = blkcg_parent(blkcg); 1013 + 1014 + /* 1015 + * blkcg_pin_online() is used to delay blkcg offline so that blkgs 1016 + * don't go offline while cgwbs are still active on them. Pin the 1017 + * parent so that offline always happens towards the root. 1018 + */ 1019 + if (parent) 1020 + blkcg_pin_online(parent); 1021 + return 0; 1007 1022 } 1008 1023 1009 1024 /** ··· 1214 1199 1215 1200 struct cgroup_subsys io_cgrp_subsys = { 1216 1201 .css_alloc = blkcg_css_alloc, 1202 + .css_online = blkcg_css_online, 1217 1203 .css_offline = blkcg_css_offline, 1218 1204 .css_free = blkcg_css_free, 1219 1205 .can_attach = blkcg_can_attach,
+6 -3
block/blk-mq.c
··· 1289 1289 * the driver there was more coming, but that turned out to 1290 1290 * be a lie. 1291 1291 */ 1292 - if (q->mq_ops->commit_rqs) 1292 + if (q->mq_ops->commit_rqs && queued) 1293 1293 q->mq_ops->commit_rqs(hctx); 1294 1294 1295 1295 spin_lock(&hctx->lock); ··· 1911 1911 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, 1912 1912 struct list_head *list) 1913 1913 { 1914 + int queued = 0; 1915 + 1914 1916 while (!list_empty(list)) { 1915 1917 blk_status_t ret; 1916 1918 struct request *rq = list_first_entry(list, struct request, ··· 1928 1926 break; 1929 1927 } 1930 1928 blk_mq_end_request(rq, ret); 1931 - } 1929 + } else 1930 + queued++; 1932 1931 } 1933 1932 1934 1933 /* ··· 1937 1934 * the driver there was more coming, but that turned out to 1938 1935 * be a lie. 1939 1936 */ 1940 - if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) 1937 + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued) 1941 1938 hctx->queue->mq_ops->commit_rqs(hctx); 1942 1939 } 1943 1940
+1 -1
block/partitions/core.c
··· 496 496 497 497 if (!disk_part_scan_enabled(disk)) 498 498 return 0; 499 - if (bdev->bd_part_count || bdev->bd_super) 499 + if (bdev->bd_part_count || bdev->bd_openers > 1) 500 500 return -EBUSY; 501 501 res = invalidate_partition(disk, 0); 502 502 if (res)
+36 -13
drivers/block/loop.c
··· 429 429 * information. 430 430 */ 431 431 struct file *file = lo->lo_backing_file; 432 + struct request_queue *q = lo->lo_queue; 432 433 int ret; 433 434 434 435 mode |= FALLOC_FL_KEEP_SIZE; 435 436 436 - if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { 437 + if (!blk_queue_discard(q)) { 437 438 ret = -EOPNOTSUPP; 438 439 goto out; 439 440 } ··· 464 463 if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || 465 464 req_op(rq) != REQ_OP_READ) { 466 465 if (cmd->ret < 0) 467 - ret = BLK_STS_IOERR; 466 + ret = errno_to_blk_status(cmd->ret); 468 467 goto end_io; 469 468 } 470 469 ··· 869 868 struct request_queue *q = lo->lo_queue; 870 869 871 870 /* 871 + * If the backing device is a block device, mirror its zeroing 872 + * capability. Set the discard sectors to the block device's zeroing 873 + * capabilities because loop discards result in blkdev_issue_zeroout(), 874 + * not blkdev_issue_discard(). This maintains consistent behavior with 875 + * file-backed loop devices: discarded regions read back as zero. 876 + */ 877 + if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { 878 + struct request_queue *backingq; 879 + 880 + backingq = bdev_get_queue(inode->i_bdev); 881 + blk_queue_max_discard_sectors(q, 882 + backingq->limits.max_write_zeroes_sectors); 883 + 884 + blk_queue_max_write_zeroes_sectors(q, 885 + backingq->limits.max_write_zeroes_sectors); 886 + 887 + /* 872 888 * We use punch hole to reclaim the free space used by the 873 889 * image a.k.a. discard. However we do not support discard if 874 890 * encryption is enabled, because it may give an attacker 875 891 * useful information. 876 892 */ 877 - if ((!file->f_op->fallocate) || 878 - lo->lo_encrypt_key_size) { 893 + } else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) { 879 894 q->limits.discard_granularity = 0; 880 895 q->limits.discard_alignment = 0; 881 896 blk_queue_max_discard_sectors(q, 0); 882 897 blk_queue_max_write_zeroes_sectors(q, 0); 883 - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); 884 - return; 898 + 899 + } else { 900 + q->limits.discard_granularity = inode->i_sb->s_blocksize; 901 + q->limits.discard_alignment = 0; 902 + 903 + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); 904 + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); 885 905 } 886 906 887 - q->limits.discard_granularity = inode->i_sb->s_blocksize; 888 - q->limits.discard_alignment = 0; 889 - 890 - blk_queue_max_discard_sectors(q, UINT_MAX >> 9); 891 - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); 892 - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 907 + if (q->limits.max_write_zeroes_sectors) 908 + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 909 + else 910 + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); 893 911 } 894 912 895 913 static void loop_unprepare_queue(struct loop_device *lo) ··· 1975 1955 failed: 1976 1956 /* complete non-aio request */ 1977 1957 if (!cmd->use_aio || ret) { 1978 - cmd->ret = ret ? -EIO : 0; 1958 + if (ret == -EOPNOTSUPP) 1959 + cmd->ret = ret; 1960 + else 1961 + cmd->ret = ret ? -EIO : 0; 1979 1962 blk_mq_complete_request(rq); 1980 1963 } 1981 1964 }
+27 -7
drivers/nvme/host/core.c
··· 6 6 7 7 #include <linux/blkdev.h> 8 8 #include <linux/blk-mq.h> 9 + #include <linux/compat.h> 9 10 #include <linux/delay.h> 10 11 #include <linux/errno.h> 11 12 #include <linux/hdreg.h> ··· 1253 1252 queue_work(nvme_wq, &ctrl->async_event_work); 1254 1253 } 1255 1254 1255 + /* 1256 + * Convert integer values from ioctl structures to user pointers, silently 1257 + * ignoring the upper bits in the compat case to match behaviour of 32-bit 1258 + * kernels. 1259 + */ 1260 + static void __user *nvme_to_user_ptr(uintptr_t ptrval) 1261 + { 1262 + if (in_compat_syscall()) 1263 + ptrval = (compat_uptr_t)ptrval; 1264 + return (void __user *)ptrval; 1265 + } 1266 + 1256 1267 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 1257 1268 { 1258 1269 struct nvme_user_io io; ··· 1288 1275 1289 1276 length = (io.nblocks + 1) << ns->lba_shift; 1290 1277 meta_len = (io.nblocks + 1) * ns->ms; 1291 - metadata = (void __user *)(uintptr_t)io.metadata; 1278 + metadata = nvme_to_user_ptr(io.metadata); 1292 1279 1293 1280 if (ns->ext) { 1294 1281 length += meta_len; ··· 1311 1298 c.rw.appmask = cpu_to_le16(io.appmask); 1312 1299 1313 1300 return nvme_submit_user_cmd(ns->queue, &c, 1314 - (void __user *)(uintptr_t)io.addr, length, 1301 + nvme_to_user_ptr(io.addr), length, 1315 1302 metadata, meta_len, lower_32_bits(io.slba), NULL, 0); 1316 1303 } 1317 1304 ··· 1431 1418 1432 1419 effects = nvme_passthru_start(ctrl, ns, cmd.opcode); 1433 1420 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 1434 - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, 1435 - (void __user *)(uintptr_t)cmd.metadata, 1436 - cmd.metadata_len, 0, &result, timeout); 1421 + nvme_to_user_ptr(cmd.addr), cmd.data_len, 1422 + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 1423 + 0, &result, timeout); 1437 1424 nvme_passthru_end(ctrl, effects); 1438 1425 1439 1426 if (status >= 0) { ··· 1478 1465 1479 1466 effects = nvme_passthru_start(ctrl, ns, cmd.opcode); 1480 1467 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 1481 - (void __user *)(uintptr_t)cmd.addr, cmd.data_len, 1482 - (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, 1468 + nvme_to_user_ptr(cmd.addr), cmd.data_len, 1469 + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 1483 1470 0, &cmd.result, timeout); 1484 1471 nvme_passthru_end(ctrl, effects); 1485 1472 ··· 1897 1884 if (ns->head->disk) { 1898 1885 nvme_update_disk_info(ns->head->disk, ns, id); 1899 1886 blk_queue_stack_limits(ns->head->disk->queue, ns->queue); 1887 + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { 1888 + struct backing_dev_info *info = 1889 + ns->head->disk->queue->backing_dev_info; 1890 + 1891 + info->capabilities |= BDI_CAP_STABLE_WRITES; 1892 + } 1893 + 1900 1894 revalidate_disk(ns->head->disk); 1901 1895 } 1902 1896 #endif
+2 -12
drivers/nvme/host/fc.c
··· 342 342 !template->ls_req || !template->fcp_io || 343 343 !template->ls_abort || !template->fcp_abort || 344 344 !template->max_hw_queues || !template->max_sgl_segments || 345 - !template->max_dif_sgl_segments || !template->dma_boundary || 346 - !template->module) { 345 + !template->max_dif_sgl_segments || !template->dma_boundary) { 347 346 ret = -EINVAL; 348 347 goto out_reghost_failed; 349 348 } ··· 2015 2016 { 2016 2017 struct nvme_fc_ctrl *ctrl = 2017 2018 container_of(ref, struct nvme_fc_ctrl, ref); 2018 - struct nvme_fc_lport *lport = ctrl->lport; 2019 2019 unsigned long flags; 2020 2020 2021 2021 if (ctrl->ctrl.tagset) { ··· 2041 2043 if (ctrl->ctrl.opts) 2042 2044 nvmf_free_options(ctrl->ctrl.opts); 2043 2045 kfree(ctrl); 2044 - module_put(lport->ops->module); 2045 2046 } 2046 2047 2047 2048 static void ··· 3071 3074 goto out_fail; 3072 3075 } 3073 3076 3074 - if (!try_module_get(lport->ops->module)) { 3075 - ret = -EUNATCH; 3076 - goto out_free_ctrl; 3077 - } 3078 - 3079 3077 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 3080 3078 if (idx < 0) { 3081 3079 ret = -ENOSPC; 3082 - goto out_mod_put; 3080 + goto out_free_ctrl; 3083 3081 } 3084 3082 3085 3083 ctrl->ctrl.opts = opts; ··· 3224 3232 out_free_ida: 3225 3233 put_device(ctrl->dev); 3226 3234 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 3227 - out_mod_put: 3228 - module_put(lport->ops->module); 3229 3235 out_free_ctrl: 3230 3236 kfree(ctrl); 3231 3237 out_fail:
+2 -2
drivers/nvme/host/multipath.c
··· 510 510 if (!nr_nsids) 511 511 return 0; 512 512 513 - down_write(&ctrl->namespaces_rwsem); 513 + down_read(&ctrl->namespaces_rwsem); 514 514 list_for_each_entry(ns, &ctrl->namespaces, list) { 515 515 unsigned nsid = le32_to_cpu(desc->nsids[n]); 516 516 ··· 521 521 if (++n == nr_nsids) 522 522 break; 523 523 } 524 - up_write(&ctrl->namespaces_rwsem); 524 + up_read(&ctrl->namespaces_rwsem); 525 525 return 0; 526 526 } 527 527
+1 -1
drivers/nvme/host/rdma.c
··· 1342 1342 int ret; 1343 1343 1344 1344 sge->addr = qe->dma; 1345 - sge->length = sizeof(struct nvme_command), 1345 + sge->length = sizeof(struct nvme_command); 1346 1346 sge->lkey = queue->device->pd->local_dma_lkey; 1347 1347 1348 1348 wr.next = NULL;
+11 -7
drivers/nvme/host/tcp.c
··· 174 174 static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) 175 175 { 176 176 struct request *rq; 177 - unsigned int bytes; 178 177 179 178 if (unlikely(nvme_tcp_async_req(req))) 180 179 return false; /* async events don't have a request */ 181 180 182 181 rq = blk_mq_rq_from_pdu(req); 183 - bytes = blk_rq_payload_bytes(rq); 184 182 185 - return rq_data_dir(rq) == WRITE && bytes && 186 - bytes <= nvme_tcp_inline_data_size(req->queue); 183 + return rq_data_dir(rq) == WRITE && req->data_len && 184 + req->data_len <= nvme_tcp_inline_data_size(req->queue); 187 185 } 188 186 189 187 static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) ··· 1073 1075 if (result > 0) 1074 1076 pending = true; 1075 1077 else if (unlikely(result < 0)) 1076 - break; 1078 + return; 1077 1079 1078 1080 if (!pending) 1079 1081 return; ··· 2162 2164 2163 2165 c->common.flags |= NVME_CMD_SGL_METABUF; 2164 2166 2165 - if (rq_data_dir(rq) == WRITE && req->data_len && 2167 + if (!blk_rq_nr_phys_segments(rq)) 2168 + nvme_tcp_set_sg_null(c); 2169 + else if (rq_data_dir(rq) == WRITE && 2166 2170 req->data_len <= nvme_tcp_inline_data_size(queue)) 2167 2171 nvme_tcp_set_sg_inline(queue, c, req->data_len); 2168 2172 else ··· 2191 2191 req->data_sent = 0; 2192 2192 req->pdu_len = 0; 2193 2193 req->pdu_sent = 0; 2194 - req->data_len = blk_rq_payload_bytes(rq); 2194 + req->data_len = blk_rq_nr_phys_segments(rq) ? 2195 + blk_rq_payload_bytes(rq) : 0; 2195 2196 req->curr_bio = rq->bio; 2196 2197 2197 2198 if (rq_data_dir(rq) == WRITE && ··· 2298 2297 { 2299 2298 struct nvme_tcp_queue *queue = hctx->driver_data; 2300 2299 struct sock *sk = queue->sock->sk; 2300 + 2301 + if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) 2302 + return 0; 2301 2303 2302 2304 if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) 2303 2305 sk_busy_loop(sk, true);
+9 -1
drivers/nvme/target/configfs.c
··· 1098 1098 NULL, 1099 1099 }; 1100 1100 1101 - static void nvmet_referral_release(struct config_item *item) 1101 + static void nvmet_referral_notify(struct config_group *group, 1102 + struct config_item *item) 1102 1103 { 1103 1104 struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); 1104 1105 struct nvmet_port *port = to_nvmet_port(item); 1105 1106 1106 1107 nvmet_referral_disable(parent, port); 1108 + } 1109 + 1110 + static void nvmet_referral_release(struct config_item *item) 1111 + { 1112 + struct nvmet_port *port = to_nvmet_port(item); 1113 + 1107 1114 kfree(port); 1108 1115 } 1109 1116 ··· 1141 1134 1142 1135 static struct configfs_group_operations nvmet_referral_group_ops = { 1143 1136 .make_group = nvmet_referral_make, 1137 + .disconnect_notify = nvmet_referral_notify, 1144 1138 }; 1145 1139 1146 1140 static const struct config_item_type nvmet_referrals_type = {
+1 -1
drivers/nvme/target/fc.c
··· 684 684 disconnect = atomic_xchg(&queue->connected, 0); 685 685 686 686 spin_lock_irqsave(&queue->qlock, flags); 687 - /* about outstanding io's */ 687 + /* abort outstanding io's */ 688 688 for (i = 0; i < queue->sqsize; fod++, i++) { 689 689 if (fod->active) { 690 690 spin_lock(&fod->flock);
+52 -25
drivers/nvme/target/fcloop.c
··· 198 198 }; 199 199 200 200 struct fcloop_rport { 201 - struct nvme_fc_remote_port *remoteport; 202 - struct nvmet_fc_target_port *targetport; 203 - struct fcloop_nport *nport; 204 - struct fcloop_lport *lport; 201 + struct nvme_fc_remote_port *remoteport; 202 + struct nvmet_fc_target_port *targetport; 203 + struct fcloop_nport *nport; 204 + struct fcloop_lport *lport; 205 + spinlock_t lock; 206 + struct list_head ls_list; 207 + struct work_struct ls_work; 205 208 }; 206 209 207 210 struct fcloop_tport { ··· 227 224 }; 228 225 229 226 struct fcloop_lsreq { 230 - struct fcloop_tport *tport; 231 227 struct nvmefc_ls_req *lsreq; 232 - struct work_struct work; 233 228 struct nvmefc_tgt_ls_req tgt_ls_req; 234 229 int status; 230 + struct list_head ls_list; /* fcloop_rport->ls_list */ 235 231 }; 236 232 237 233 struct fcloop_rscn { ··· 294 292 { 295 293 } 296 294 297 - 298 - /* 299 - * Transmit of LS RSP done (e.g. buffers all set). call back up 300 - * initiator "done" flows. 301 - */ 302 295 static void 303 - fcloop_tgt_lsrqst_done_work(struct work_struct *work) 296 + fcloop_rport_lsrqst_work(struct work_struct *work) 304 297 { 305 - struct fcloop_lsreq *tls_req = 306 - container_of(work, struct fcloop_lsreq, work); 307 - struct fcloop_tport *tport = tls_req->tport; 308 - struct nvmefc_ls_req *lsreq = tls_req->lsreq; 298 + struct fcloop_rport *rport = 299 + container_of(work, struct fcloop_rport, ls_work); 300 + struct fcloop_lsreq *tls_req; 309 301 310 - if (!tport || tport->remoteport) 311 - lsreq->done(lsreq, tls_req->status); 302 + spin_lock(&rport->lock); 303 + for (;;) { 304 + tls_req = list_first_entry_or_null(&rport->ls_list, 305 + struct fcloop_lsreq, ls_list); 306 + if (!tls_req) 307 + break; 308 + 309 + list_del(&tls_req->ls_list); 310 + spin_unlock(&rport->lock); 311 + 312 + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); 313 + /* 314 + * callee may free memory containing tls_req. 315 + * do not reference lsreq after this. 316 + */ 317 + 318 + spin_lock(&rport->lock); 319 + } 320 + spin_unlock(&rport->lock); 312 321 } 313 322 314 323 static int ··· 332 319 int ret = 0; 333 320 334 321 tls_req->lsreq = lsreq; 335 - INIT_WORK(&tls_req->work, fcloop_tgt_lsrqst_done_work); 322 + INIT_LIST_HEAD(&tls_req->ls_list); 336 323 337 324 if (!rport->targetport) { 338 325 tls_req->status = -ECONNREFUSED; 339 - tls_req->tport = NULL; 340 - schedule_work(&tls_req->work); 326 + spin_lock(&rport->lock); 327 + list_add_tail(&rport->ls_list, &tls_req->ls_list); 328 + spin_unlock(&rport->lock); 329 + schedule_work(&rport->ls_work); 341 330 return ret; 342 331 } 343 332 344 333 tls_req->status = 0; 345 - tls_req->tport = rport->targetport->private; 346 334 ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, 347 335 lsreq->rqstaddr, lsreq->rqstlen); 348 336 ··· 351 337 } 352 338 353 339 static int 354 - fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, 340 + fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, 355 341 struct nvmefc_tgt_ls_req *tgt_lsreq) 356 342 { 357 343 struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); 358 344 struct nvmefc_ls_req *lsreq = tls_req->lsreq; 345 + struct fcloop_tport *tport = targetport->private; 346 + struct nvme_fc_remote_port *remoteport = tport->remoteport; 347 + struct fcloop_rport *rport; 359 348 360 349 memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, 361 350 ((lsreq->rsplen < tgt_lsreq->rsplen) ? 362 351 lsreq->rsplen : tgt_lsreq->rsplen)); 352 + 363 353 tgt_lsreq->done(tgt_lsreq); 364 354 365 - schedule_work(&tls_req->work); 355 + if (remoteport) { 356 + rport = remoteport->private; 357 + spin_lock(&rport->lock); 358 + list_add_tail(&rport->ls_list, &tls_req->ls_list); 359 + spin_unlock(&rport->lock); 360 + schedule_work(&rport->ls_work); 361 + } 366 362 367 363 return 0; 368 364 } ··· 858 834 { 859 835 struct fcloop_rport *rport = remoteport->private; 860 836 837 + flush_work(&rport->ls_work); 861 838 fcloop_nport_put(rport->nport); 862 839 } 863 840 ··· 875 850 #define FCLOOP_DMABOUND_4G 0xFFFFFFFF 876 851 877 852 static struct nvme_fc_port_template fctemplate = { 878 - .module = THIS_MODULE, 879 853 .localport_delete = fcloop_localport_delete, 880 854 .remoteport_delete = fcloop_remoteport_delete, 881 855 .create_queue = fcloop_create_queue, ··· 1160 1136 rport->nport = nport; 1161 1137 rport->lport = nport->lport; 1162 1138 nport->rport = rport; 1139 + spin_lock_init(&rport->lock); 1140 + INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work); 1141 + INIT_LIST_HEAD(&rport->ls_list); 1163 1142 1164 1143 return count; 1165 1144 }
+137 -68
drivers/nvme/target/rdma.c
··· 78 78 79 79 struct nvmet_rdma_queue { 80 80 struct rdma_cm_id *cm_id; 81 + struct ib_qp *qp; 81 82 struct nvmet_port *port; 82 83 struct ib_cq *cq; 83 84 atomic_t sq_wr_avail; ··· 104 103 int send_queue_size; 105 104 106 105 struct list_head queue_list; 106 + }; 107 + 108 + struct nvmet_rdma_port { 109 + struct nvmet_port *nport; 110 + struct sockaddr_storage addr; 111 + struct rdma_cm_id *cm_id; 112 + struct delayed_work repair_work; 107 113 }; 108 114 109 115 struct nvmet_rdma_device { ··· 469 461 if (ndev->srq) 470 462 ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); 471 463 else 472 - ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); 464 + ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); 473 465 474 466 if (unlikely(ret)) 475 467 pr_err("post_recv cmd failed\n"); ··· 508 500 atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); 509 501 510 502 if (rsp->n_rdma) { 511 - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, 503 + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, 512 504 queue->cm_id->port_num, rsp->req.sg, 513 505 rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); 514 506 } ··· 592 584 593 585 WARN_ON(rsp->n_rdma <= 0); 594 586 atomic_add(rsp->n_rdma, &queue->sq_wr_avail); 595 - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, 587 + rdma_rw_ctx_destroy(&rsp->rw, queue->qp, 596 588 queue->cm_id->port_num, rsp->req.sg, 597 589 rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); 598 590 rsp->n_rdma = 0; ··· 747 739 } 748 740 749 741 if (nvmet_rdma_need_data_in(rsp)) { 750 - if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp, 742 + if (rdma_rw_ctx_post(&rsp->rw, queue->qp, 751 743 queue->cm_id->port_num, &rsp->read_cqe, NULL)) 752 744 nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); 753 745 } else { ··· 919 911 static struct nvmet_rdma_device * 920 912 nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) 921 913 { 922 - struct nvmet_port *port = cm_id->context; 914 + struct nvmet_rdma_port *port = cm_id->context; 915 + struct nvmet_port *nport = port->nport; 923 916 struct nvmet_rdma_device *ndev; 924 917 int inline_page_count; 925 918 int inline_sge_count; ··· 937 928 if (!ndev) 938 929 goto out_err; 939 930 940 - inline_page_count = num_pages(port->inline_data_size); 931 + inline_page_count = num_pages(nport->inline_data_size); 941 932 inline_sge_count = max(cm_id->device->attrs.max_sge_rd, 942 933 cm_id->device->attrs.max_recv_sge) - 1; 943 934 if (inline_page_count > inline_sge_count) { 944 935 pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", 945 - port->inline_data_size, cm_id->device->name, 936 + nport->inline_data_size, cm_id->device->name, 946 937 inline_sge_count * PAGE_SIZE); 947 - port->inline_data_size = inline_sge_count * PAGE_SIZE; 938 + nport->inline_data_size = inline_sge_count * PAGE_SIZE; 948 939 inline_page_count = inline_sge_count; 949 940 } 950 - ndev->inline_data_size = port->inline_data_size; 941 + ndev->inline_data_size = nport->inline_data_size; 951 942 ndev->inline_page_count = inline_page_count; 952 943 ndev->device = cm_id->device; 953 944 kref_init(&ndev->ref); ··· 1033 1024 pr_err("failed to create_qp ret= %d\n", ret); 1034 1025 goto err_destroy_cq; 1035 1026 } 1027 + queue->qp = queue->cm_id->qp; 1036 1028 1037 1029 atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); 1038 1030 ··· 1062 1052 1063 1053 static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) 1064 1054 { 1065 - struct ib_qp *qp = queue->cm_id->qp; 1066 - 1067 - ib_drain_qp(qp); 1068 - rdma_destroy_id(queue->cm_id); 1069 - ib_destroy_qp(qp); 1055 + ib_drain_qp(queue->qp); 1056 + if (queue->cm_id) 1057 + rdma_destroy_id(queue->cm_id); 1058 + ib_destroy_qp(queue->qp); 1070 1059 ib_free_cq(queue->cq); 1071 1060 } 1072 1061 ··· 1275 1266 static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, 1276 1267 struct rdma_cm_event *event) 1277 1268 { 1269 + struct nvmet_rdma_port *port = cm_id->context; 1278 1270 struct nvmet_rdma_device *ndev; 1279 1271 struct nvmet_rdma_queue *queue; 1280 1272 int ret = -EINVAL; ··· 1291 1281 ret = -ENOMEM; 1292 1282 goto put_device; 1293 1283 } 1294 - queue->port = cm_id->context; 1284 + queue->port = port->nport; 1295 1285 1296 1286 if (queue->host_qid == 0) { 1297 1287 /* Let inflight controller teardown complete */ ··· 1300 1290 1301 1291 ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); 1302 1292 if (ret) { 1303 - schedule_work(&queue->release_work); 1304 - /* Destroying rdma_cm id is not needed here */ 1305 - return 0; 1293 + /* 1294 + * Don't destroy the cm_id in free path, as we implicitly 1295 + * destroy the cm_id here with non-zero ret code. 1296 + */ 1297 + queue->cm_id = NULL; 1298 + goto free_queue; 1306 1299 } 1307 1300 1308 1301 mutex_lock(&nvmet_rdma_queue_mutex); ··· 1314 1301 1315 1302 return 0; 1316 1303 1304 + free_queue: 1305 + nvmet_rdma_free_queue(queue); 1317 1306 put_device: 1318 1307 kref_put(&ndev->ref, nvmet_rdma_free_dev); 1319 1308 ··· 1421 1406 static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, 1422 1407 struct nvmet_rdma_queue *queue) 1423 1408 { 1424 - struct nvmet_port *port; 1409 + struct nvmet_rdma_port *port; 1425 1410 1426 1411 if (queue) { 1427 1412 /* ··· 1440 1425 * cm_id destroy. use atomic xchg to make sure 1441 1426 * we don't compete with remove_port. 1442 1427 */ 1443 - if (xchg(&port->priv, NULL) != cm_id) 1428 + if (xchg(&port->cm_id, NULL) != cm_id) 1444 1429 return 0; 1445 1430 1446 1431 /* ··· 1471 1456 nvmet_rdma_queue_established(queue); 1472 1457 break; 1473 1458 case RDMA_CM_EVENT_ADDR_CHANGE: 1459 + if (!queue) { 1460 + struct nvmet_rdma_port *port = cm_id->context; 1461 + 1462 + schedule_delayed_work(&port->repair_work, 0); 1463 + break; 1464 + } 1465 + /* FALLTHROUGH */ 1474 1466 case RDMA_CM_EVENT_DISCONNECTED: 1475 1467 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 1476 1468 nvmet_rdma_queue_disconnect(queue); ··· 1520 1498 mutex_unlock(&nvmet_rdma_queue_mutex); 1521 1499 } 1522 1500 1523 - static int nvmet_rdma_add_port(struct nvmet_port *port) 1501 + static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) 1524 1502 { 1503 + struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); 1504 + 1505 + if (cm_id) 1506 + rdma_destroy_id(cm_id); 1507 + } 1508 + 1509 + static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) 1510 + { 1511 + struct sockaddr *addr = (struct sockaddr *)&port->addr; 1525 1512 struct rdma_cm_id *cm_id; 1526 - struct sockaddr_storage addr = { }; 1527 - __kernel_sa_family_t af; 1528 1513 int ret; 1529 - 1530 - switch (port->disc_addr.adrfam) { 1531 - case NVMF_ADDR_FAMILY_IP4: 1532 - af = AF_INET; 1533 - break; 1534 - case NVMF_ADDR_FAMILY_IP6: 1535 - af = AF_INET6; 1536 - break; 1537 - default: 1538 - pr_err("address family %d not supported\n", 1539 - port->disc_addr.adrfam); 1540 - return -EINVAL; 1541 - } 1542 - 1543 - if (port->inline_data_size < 0) { 1544 - port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; 1545 - } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { 1546 - pr_warn("inline_data_size %u is too large, reducing to %u\n", 1547 - port->inline_data_size, 1548 - NVMET_RDMA_MAX_INLINE_DATA_SIZE); 1549 - port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; 1550 - } 1551 - 1552 - ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, 1553 - port->disc_addr.trsvcid, &addr); 1554 - if (ret) { 1555 - pr_err("malformed ip/port passed: %s:%s\n", 1556 - port->disc_addr.traddr, port->disc_addr.trsvcid); 1557 - return ret; 1558 - } 1559 1514 1560 1515 cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, 1561 1516 RDMA_PS_TCP, IB_QPT_RC); ··· 1551 1552 goto out_destroy_id; 1552 1553 } 1553 1554 1554 - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); 1555 + ret = rdma_bind_addr(cm_id, addr); 1555 1556 if (ret) { 1556 - pr_err("binding CM ID to %pISpcs failed (%d)\n", 1557 - (struct sockaddr *)&addr, ret); 1557 + pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); 1558 1558 goto out_destroy_id; 1559 1559 } 1560 1560 1561 1561 ret = rdma_listen(cm_id, 128); 1562 1562 if (ret) { 1563 - pr_err("listening to %pISpcs failed (%d)\n", 1564 - (struct sockaddr *)&addr, ret); 1563 + pr_err("listening to %pISpcs failed (%d)\n", addr, ret); 1565 1564 goto out_destroy_id; 1566 1565 } 1567 1566 1568 - pr_info("enabling port %d (%pISpcs)\n", 1569 - le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); 1570 - port->priv = cm_id; 1567 + port->cm_id = cm_id; 1571 1568 return 0; 1572 1569 1573 1570 out_destroy_id: ··· 1571 1576 return ret; 1572 1577 } 1573 1578 1574 - static void nvmet_rdma_remove_port(struct nvmet_port *port) 1579 + static void nvmet_rdma_repair_port_work(struct work_struct *w) 1575 1580 { 1576 - struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); 1581 + struct nvmet_rdma_port *port = container_of(to_delayed_work(w), 1582 + struct nvmet_rdma_port, repair_work); 1583 + int ret; 1577 1584 1578 - if (cm_id) 1579 - rdma_destroy_id(cm_id); 1585 + nvmet_rdma_disable_port(port); 1586 + ret = nvmet_rdma_enable_port(port); 1587 + if (ret) 1588 + schedule_delayed_work(&port->repair_work, 5 * HZ); 1589 + } 1590 + 1591 + static int nvmet_rdma_add_port(struct nvmet_port *nport) 1592 + { 1593 + struct nvmet_rdma_port *port; 1594 + __kernel_sa_family_t af; 1595 + int ret; 1596 + 1597 + port = kzalloc(sizeof(*port), GFP_KERNEL); 1598 + if (!port) 1599 + return -ENOMEM; 1600 + 1601 + nport->priv = port; 1602 + port->nport = nport; 1603 + INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); 1604 + 1605 + switch (nport->disc_addr.adrfam) { 1606 + case NVMF_ADDR_FAMILY_IP4: 1607 + af = AF_INET; 1608 + break; 1609 + case NVMF_ADDR_FAMILY_IP6: 1610 + af = AF_INET6; 1611 + break; 1612 + default: 1613 + pr_err("address family %d not supported\n", 1614 + nport->disc_addr.adrfam); 1615 + ret = -EINVAL; 1616 + goto out_free_port; 1617 + } 1618 + 1619 + if (nport->inline_data_size < 0) { 1620 + nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; 1621 + } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { 1622 + pr_warn("inline_data_size %u is too large, reducing to %u\n", 1623 + nport->inline_data_size, 1624 + NVMET_RDMA_MAX_INLINE_DATA_SIZE); 1625 + nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; 1626 + } 1627 + 1628 + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, 1629 + nport->disc_addr.trsvcid, &port->addr); 1630 + if (ret) { 1631 + pr_err("malformed ip/port passed: %s:%s\n", 1632 + nport->disc_addr.traddr, nport->disc_addr.trsvcid); 1633 + goto out_free_port; 1634 + } 1635 + 1636 + ret = nvmet_rdma_enable_port(port); 1637 + if (ret) 1638 + goto out_free_port; 1639 + 1640 + pr_info("enabling port %d (%pISpcs)\n", 1641 + le16_to_cpu(nport->disc_addr.portid), 1642 + (struct sockaddr *)&port->addr); 1643 + 1644 + return 0; 1645 + 1646 + out_free_port: 1647 + kfree(port); 1648 + return ret; 1649 + } 1650 + 1651 + static void nvmet_rdma_remove_port(struct nvmet_port *nport) 1652 + { 1653 + struct nvmet_rdma_port *port = nport->priv; 1654 + 1655 + cancel_delayed_work_sync(&port->repair_work); 1656 + nvmet_rdma_disable_port(port); 1657 + kfree(port); 1580 1658 } 1581 1659 1582 1660 static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, 1583 - struct nvmet_port *port, char *traddr) 1661 + struct nvmet_port *nport, char *traddr) 1584 1662 { 1585 - struct rdma_cm_id *cm_id = port->priv; 1663 + struct nvmet_rdma_port *port = nport->priv; 1664 + struct rdma_cm_id *cm_id = port->cm_id; 1586 1665 1587 1666 if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { 1588 1667 struct nvmet_rdma_rsp *rsp = ··· 1666 1597 1667 1598 sprintf(traddr, "%pISc", addr); 1668 1599 } else { 1669 - memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); 1600 + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); 1670 1601 } 1671 1602 } 1672 1603
-2
drivers/scsi/lpfc/lpfc_nvme.c
··· 1985 1985 1986 1986 /* Declare and initialization an instance of the FC NVME template. */ 1987 1987 static struct nvme_fc_port_template lpfc_nvme_template = { 1988 - .module = THIS_MODULE, 1989 - 1990 1988 /* initiator-based functions */ 1991 1989 .localport_delete = lpfc_nvme_localport_delete, 1992 1990 .remoteport_delete = lpfc_nvme_remoteport_delete,
-1
drivers/scsi/qla2xxx/qla_nvme.c
··· 610 610 } 611 611 612 612 static struct nvme_fc_port_template qla_nvme_fc_transport = { 613 - .module = THIS_MODULE, 614 613 .localport_delete = qla_nvme_localport_delete, 615 614 .remoteport_delete = qla_nvme_remoteport_delete, 616 615 .create_queue = qla_nvme_alloc_queue,
+17 -26
include/linux/blk-cgroup.h
··· 46 46 struct blkcg { 47 47 struct cgroup_subsys_state css; 48 48 spinlock_t lock; 49 + refcount_t online_pin; 49 50 50 51 struct radix_tree_root blkg_tree; 51 52 struct blkcg_gq __rcu *blkg_hint; ··· 57 56 struct list_head all_blkcgs_node; 58 57 #ifdef CONFIG_CGROUP_WRITEBACK 59 58 struct list_head cgwb_list; 60 - refcount_t cgwb_refcnt; 61 59 #endif 62 60 }; 63 61 ··· 412 412 413 413 extern void blkcg_destroy_blkgs(struct blkcg *blkcg); 414 414 415 - #ifdef CONFIG_CGROUP_WRITEBACK 416 - 417 415 /** 418 - * blkcg_cgwb_get - get a reference for blkcg->cgwb_list 416 + * blkcg_pin_online - pin online state 419 417 * @blkcg: blkcg of interest 420 418 * 421 - * This is used to track the number of active wb's related to a blkcg. 419 + * While pinned, a blkcg is kept online. This is primarily used to 420 + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline 421 + * while an associated cgwb is still active. 422 422 */ 423 - static inline void blkcg_cgwb_get(struct blkcg *blkcg) 423 + static inline void blkcg_pin_online(struct blkcg *blkcg) 424 424 { 425 - refcount_inc(&blkcg->cgwb_refcnt); 425 + refcount_inc(&blkcg->online_pin); 426 426 } 427 427 428 428 /** 429 - * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list 429 + * blkcg_unpin_online - unpin online state 430 430 * @blkcg: blkcg of interest 431 431 * 432 - * This is used to track the number of active wb's related to a blkcg. 433 - * When this count goes to zero, all active wb has finished so the 432 + * This is primarily used to impedance-match blkg and cgwb lifetimes so 433 + * that blkg doesn't go offline while an associated cgwb is still active. 434 + * When this count goes to zero, all active cgwbs have finished so the 434 435 * blkcg can continue destruction by calling blkcg_destroy_blkgs(). 435 - * This work may occur in cgwb_release_workfn() on the cgwb_release 436 - * workqueue. 437 436 */ 438 - static inline void blkcg_cgwb_put(struct blkcg *blkcg) 437 + static inline void blkcg_unpin_online(struct blkcg *blkcg) 439 438 { 440 - if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) 439 + do { 440 + if (!refcount_dec_and_test(&blkcg->online_pin)) 441 + break; 441 442 blkcg_destroy_blkgs(blkcg); 443 + blkcg = blkcg_parent(blkcg); 444 + } while (blkcg); 442 445 } 443 - 444 - #else 445 - 446 - static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } 447 - 448 - static inline void blkcg_cgwb_put(struct blkcg *blkcg) 449 - { 450 - /* wb isn't being accounted, so trigger destruction right away */ 451 - blkcg_destroy_blkgs(blkcg); 452 - } 453 - 454 - #endif 455 446 456 447 /** 457 448 * blkg_path - format cgroup path of blkg
-4
include/linux/nvme-fc-driver.h
··· 270 270 * 271 271 * Host/Initiator Transport Entrypoints/Parameters: 272 272 * 273 - * @module: The LLDD module using the interface 274 - * 275 273 * @localport_delete: The LLDD initiates deletion of a localport via 276 274 * nvme_fc_deregister_localport(). However, the teardown is 277 275 * asynchronous. This routine is called upon the completion of the ··· 383 385 * Value is Mandatory. Allowed to be zero. 384 386 */ 385 387 struct nvme_fc_port_template { 386 - struct module *module; 387 - 388 388 /* initiator-based functions */ 389 389 void (*localport_delete)(struct nvme_fc_local_port *); 390 390 void (*remoteport_delete)(struct nvme_fc_remote_port *);
+3 -3
mm/backing-dev.c
··· 491 491 css_put(wb->blkcg_css); 492 492 mutex_unlock(&wb->bdi->cgwb_release_mutex); 493 493 494 - /* triggers blkg destruction if cgwb_refcnt becomes zero */ 495 - blkcg_cgwb_put(blkcg); 494 + /* triggers blkg destruction if no online users left */ 495 + blkcg_unpin_online(blkcg); 496 496 497 497 fprop_local_destroy_percpu(&wb->memcg_completions); 498 498 percpu_ref_exit(&wb->refcnt); ··· 592 592 list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); 593 593 list_add(&wb->memcg_node, memcg_cgwb_list); 594 594 list_add(&wb->blkcg_node, blkcg_cgwb_list); 595 - blkcg_cgwb_get(blkcg); 595 + blkcg_pin_online(blkcg); 596 596 css_get(memcg_css); 597 597 css_get(blkcg_css); 598 598 }