Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
"Quite a few small bug fixes old and new, also Doug Ledford is retiring
now, we thank him for his work. Details:

- Use after free in rxe

- mlx5 DM regression

- hns bugs triggred by device reset

- Two fixes for CONFIG_DEBUG_PREEMPT

- Several longstanding corner case bugs in hfi1

- Two irdma data path bugs in rare cases and some memory issues"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/irdma: Don't arm the CQ more than two times if no CE for this CQ
RDMA/irdma: Report correct WC errors
RDMA/irdma: Fix a potential memory allocation issue in 'irdma_prm_add_pble_mem()'
RDMA/irdma: Fix a user-after-free in add_pble_prm
IB/hfi1: Fix leak of rcvhdrtail_dummy_kvaddr
IB/hfi1: Fix early init panic
IB/hfi1: Insure use of smp_processor_id() is preempt disabled
IB/hfi1: Correct guard on eager buffer deallocation
RDMA/rtrs: Call {get,put}_cpu_ptr to silence a debug kernel warning
RDMA/hns: Do not destroy QP resources in the hw resetting phase
RDMA/hns: Do not halt commands during reset until later
Remove Doug Ledford from MAINTAINERS
RDMA/mlx5: Fix releasing unallocated memory in dereg MR flow
RDMA: Fix use-after-free in rxe_queue_cleanup

+102 -67
-1
MAINTAINERS
··· 9329 9329 F: drivers/iio/pressure/dps310.c 9330 9330 9331 9331 INFINIBAND SUBSYSTEM 9332 - M: Doug Ledford <dledford@redhat.com> 9333 9332 M: Jason Gunthorpe <jgg@nvidia.com> 9334 9333 L: linux-rdma@vger.kernel.org 9335 9334 S: Supported
+2
drivers/infiniband/hw/hfi1/chip.c
··· 8415 8415 */ 8416 8416 static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) 8417 8417 { 8418 + if (!rcd->rcvhdrq) 8419 + return; 8418 8420 clear_recv_intr(rcd); 8419 8421 if (check_packet_present(rcd)) 8420 8422 force_recv_intr(rcd);
+2
drivers/infiniband/hw/hfi1/driver.c
··· 1012 1012 struct hfi1_packet packet; 1013 1013 int skip_pkt = 0; 1014 1014 1015 + if (!rcd->rcvhdrq) 1016 + return RCV_PKT_OK; 1015 1017 /* Control context will always use the slow path interrupt handler */ 1016 1018 needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1; 1017 1019
+17 -23
drivers/infiniband/hw/hfi1/init.c
··· 113 113 rcd->fast_handler = get_dma_rtail_setting(rcd) ? 114 114 handle_receive_interrupt_dma_rtail : 115 115 handle_receive_interrupt_nodma_rtail; 116 - rcd->slow_handler = handle_receive_interrupt; 117 116 118 117 hfi1_set_seq_cnt(rcd, 1); 119 118 ··· 333 334 rcd->numa_id = numa; 334 335 rcd->rcv_array_groups = dd->rcv_entries.ngroups; 335 336 rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; 337 + rcd->slow_handler = handle_receive_interrupt; 338 + rcd->do_interrupt = rcd->slow_handler; 336 339 rcd->msix_intr = CCE_NUM_MSIX_VECTORS; 337 340 338 341 mutex_init(&rcd->exp_mutex); ··· 875 874 if (ret) 876 875 goto done; 877 876 878 - /* allocate dummy tail memory for all receive contexts */ 879 - dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, 880 - sizeof(u64), 881 - &dd->rcvhdrtail_dummy_dma, 882 - GFP_KERNEL); 883 - 884 - if (!dd->rcvhdrtail_dummy_kvaddr) { 885 - dd_dev_err(dd, "cannot allocate dummy tail memory\n"); 886 - ret = -ENOMEM; 887 - goto done; 888 - } 889 - 890 877 /* dd->rcd can be NULL if early initialization failed */ 891 878 for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { 892 879 /* ··· 886 897 rcd = hfi1_rcd_get_by_index(dd, i); 887 898 if (!rcd) 888 899 continue; 889 - 890 - rcd->do_interrupt = &handle_receive_interrupt; 891 900 892 901 lastfail = hfi1_create_rcvhdrq(dd, rcd); 893 902 if (!lastfail) ··· 1107 1120 rcd->egrbufs.rcvtids = NULL; 1108 1121 1109 1122 for (e = 0; e < rcd->egrbufs.alloced; e++) { 1110 - if (rcd->egrbufs.buffers[e].dma) 1123 + if (rcd->egrbufs.buffers[e].addr) 1111 1124 dma_free_coherent(&dd->pcidev->dev, 1112 1125 rcd->egrbufs.buffers[e].len, 1113 1126 rcd->egrbufs.buffers[e].addr, ··· 1188 1201 dd->tx_opstats = NULL; 1189 1202 kfree(dd->comp_vect); 1190 1203 dd->comp_vect = NULL; 1204 + if (dd->rcvhdrtail_dummy_kvaddr) 1205 + dma_free_coherent(&dd->pcidev->dev, sizeof(u64), 1206 + (void *)dd->rcvhdrtail_dummy_kvaddr, 1207 + dd->rcvhdrtail_dummy_dma); 1208 + dd->rcvhdrtail_dummy_kvaddr = NULL; 1191 1209 sdma_clean(dd, dd->num_sdma); 1192 1210 rvt_dealloc_device(&dd->verbs_dev.rdi); 1193 1211 } ··· 1286 1294 1287 1295 dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL); 1288 1296 if (!dd->comp_vect) { 1297 + ret = -ENOMEM; 1298 + goto bail; 1299 + } 1300 + 1301 + /* allocate dummy tail memory for all receive contexts */ 1302 + dd->rcvhdrtail_dummy_kvaddr = 1303 + dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64), 1304 + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); 1305 + if (!dd->rcvhdrtail_dummy_kvaddr) { 1289 1306 ret = -ENOMEM; 1290 1307 goto bail; 1291 1308 } ··· 1505 1504 } 1506 1505 1507 1506 free_credit_return(dd); 1508 - 1509 - if (dd->rcvhdrtail_dummy_kvaddr) { 1510 - dma_free_coherent(&dd->pcidev->dev, sizeof(u64), 1511 - (void *)dd->rcvhdrtail_dummy_kvaddr, 1512 - dd->rcvhdrtail_dummy_dma); 1513 - dd->rcvhdrtail_dummy_kvaddr = NULL; 1514 - } 1515 1507 1516 1508 /* 1517 1509 * Free any resources still in use (usually just kernel contexts)
+1 -1
drivers/infiniband/hw/hfi1/sdma.c
··· 838 838 if (current->nr_cpus_allowed != 1) 839 839 goto out; 840 840 841 - cpu_id = smp_processor_id(); 842 841 rcu_read_lock(); 842 + cpu_id = smp_processor_id(); 843 843 rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id, 844 844 sdma_rht_params); 845 845
+11 -3
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 33 33 #include <linux/acpi.h> 34 34 #include <linux/etherdevice.h> 35 35 #include <linux/interrupt.h> 36 + #include <linux/iopoll.h> 36 37 #include <linux/kernel.h> 37 38 #include <linux/types.h> 38 39 #include <net/addrconf.h> ··· 1051 1050 unsigned long instance_stage, 1052 1051 unsigned long reset_stage) 1053 1052 { 1053 + #define HW_RESET_TIMEOUT_US 1000000 1054 + #define HW_RESET_SLEEP_US 1000 1055 + 1054 1056 struct hns_roce_v2_priv *priv = hr_dev->priv; 1055 1057 struct hnae3_handle *handle = priv->handle; 1056 1058 const struct hnae3_ae_ops *ops = handle->ae_algo->ops; 1059 + unsigned long val; 1060 + int ret; 1057 1061 1058 1062 /* When hardware reset is detected, we should stop sending mailbox&cmq& 1059 1063 * doorbell to hardware. If now in .init_instance() function, we should ··· 1070 1064 * again. 1071 1065 */ 1072 1066 hr_dev->dis_db = true; 1073 - if (!ops->get_hw_reset_stat(handle)) 1067 + 1068 + ret = read_poll_timeout(ops->ae_dev_reset_cnt, val, 1069 + val > hr_dev->reset_cnt, HW_RESET_SLEEP_US, 1070 + HW_RESET_TIMEOUT_US, false, handle); 1071 + if (!ret) 1074 1072 hr_dev->is_reset = true; 1075 1073 1076 1074 if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT || ··· 6397 6387 if (!hr_dev) 6398 6388 return 0; 6399 6389 6400 - hr_dev->is_reset = true; 6401 6390 hr_dev->active = false; 6402 6391 hr_dev->dis_db = true; 6403 - 6404 6392 hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; 6405 6393 6406 6394 return 0;
+6 -1
drivers/infiniband/hw/irdma/hw.c
··· 60 60 { 61 61 struct irdma_cq *cq = iwcq->back_cq; 62 62 63 + if (!cq->user_mode) 64 + cq->armed = false; 63 65 if (cq->ibcq.comp_handler) 64 66 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 65 67 } ··· 148 146 qp->flush_code = FLUSH_PROT_ERR; 149 147 break; 150 148 case IRDMA_AE_AMP_BAD_QP: 149 + case IRDMA_AE_WQE_UNEXPECTED_OPCODE: 151 150 qp->flush_code = FLUSH_LOC_QP_OP_ERR; 152 151 break; 153 152 case IRDMA_AE_AMP_BAD_STAG_KEY: ··· 159 156 case IRDMA_AE_PRIV_OPERATION_DENIED: 160 157 case IRDMA_AE_IB_INVALID_REQUEST: 161 158 case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: 162 - case IRDMA_AE_IB_REMOTE_OP_ERROR: 163 159 qp->flush_code = FLUSH_REM_ACCESS_ERR; 164 160 qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; 165 161 break; ··· 185 183 case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: 186 184 case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: 187 185 qp->flush_code = FLUSH_MW_BIND_ERR; 186 + break; 187 + case IRDMA_AE_IB_REMOTE_OP_ERROR: 188 + qp->flush_code = FLUSH_REM_OP_ERR; 188 189 break; 189 190 default: 190 191 qp->flush_code = FLUSH_FATAL_ERR;
+1
drivers/infiniband/hw/irdma/main.h
··· 542 542 void (*callback_fcn)(struct irdma_cqp_request *cqp_request), 543 543 void *cb_param); 544 544 void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request); 545 + bool irdma_cq_empty(struct irdma_cq *iwcq); 545 546 int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, 546 547 void *ptr); 547 548 int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+3 -5
drivers/infiniband/hw/irdma/pble.c
··· 25 25 list_del(&chunk->list); 26 26 if (chunk->type == PBLE_SD_PAGED) 27 27 irdma_pble_free_paged_mem(chunk); 28 - if (chunk->bitmapbuf) 29 - kfree(chunk->bitmapmem.va); 28 + bitmap_free(chunk->bitmapbuf); 30 29 kfree(chunk->chunkmem.va); 31 30 } 32 31 } ··· 282 283 "PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n", 283 284 pble_rsrc->next_fpm_addr, chunk->size, chunk->size); 284 285 pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3); 285 - list_add(&chunk->list, &pble_rsrc->pinfo.clist); 286 286 sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ? 287 287 sd_entry->u.pd_table.pd_page_addr.pa : 288 288 sd_entry->u.bp.addr.pa; ··· 293 295 goto error; 294 296 } 295 297 298 + list_add(&chunk->list, &pble_rsrc->pinfo.clist); 296 299 sd_entry->valid = true; 297 300 return 0; 298 301 299 302 error: 300 - if (chunk->bitmapbuf) 301 - kfree(chunk->bitmapmem.va); 303 + bitmap_free(chunk->bitmapbuf); 302 304 kfree(chunk->chunkmem.va); 303 305 304 306 return ret_code;
-1
drivers/infiniband/hw/irdma/pble.h
··· 78 78 u32 pg_cnt; 79 79 enum irdma_alloc_type type; 80 80 struct irdma_sc_dev *dev; 81 - struct irdma_virt_mem bitmapmem; 82 81 struct irdma_virt_mem chunkmem; 83 82 }; 84 83
+17 -7
drivers/infiniband/hw/irdma/utils.c
··· 2239 2239 2240 2240 sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift; 2241 2241 2242 - pchunk->bitmapmem.size = sizeofbitmap >> 3; 2243 - pchunk->bitmapmem.va = kzalloc(pchunk->bitmapmem.size, GFP_KERNEL); 2244 - 2245 - if (!pchunk->bitmapmem.va) 2242 + pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL); 2243 + if (!pchunk->bitmapbuf) 2246 2244 return IRDMA_ERR_NO_MEMORY; 2247 - 2248 - pchunk->bitmapbuf = pchunk->bitmapmem.va; 2249 - bitmap_zero(pchunk->bitmapbuf, sizeofbitmap); 2250 2245 2251 2246 pchunk->sizeofbitmap = sizeofbitmap; 2252 2247 /* each pble is 8 bytes hence shift by 3 */ ··· 2485 2490 ibevent.device = iwqp->ibqp.device; 2486 2491 ibevent.element.qp = &iwqp->ibqp; 2487 2492 iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context); 2493 + } 2494 + 2495 + bool irdma_cq_empty(struct irdma_cq *iwcq) 2496 + { 2497 + struct irdma_cq_uk *ukcq; 2498 + u64 qword3; 2499 + __le64 *cqe; 2500 + u8 polarity; 2501 + 2502 + ukcq = &iwcq->sc_cq.cq_uk; 2503 + cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); 2504 + get_64bit_val(cqe, 24, &qword3); 2505 + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); 2506 + 2507 + return polarity != ukcq->polarity; 2488 2508 }
+18 -5
drivers/infiniband/hw/irdma/verbs.c
··· 3584 3584 struct irdma_cq *iwcq; 3585 3585 struct irdma_cq_uk *ukcq; 3586 3586 unsigned long flags; 3587 - enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; 3587 + enum irdma_cmpl_notify cq_notify; 3588 + bool promo_event = false; 3589 + int ret = 0; 3588 3590 3591 + cq_notify = notify_flags == IB_CQ_SOLICITED ? 3592 + IRDMA_CQ_COMPL_SOLICITED : IRDMA_CQ_COMPL_EVENT; 3589 3593 iwcq = to_iwcq(ibcq); 3590 3594 ukcq = &iwcq->sc_cq.cq_uk; 3591 - if (notify_flags == IB_CQ_SOLICITED) 3592 - cq_notify = IRDMA_CQ_COMPL_SOLICITED; 3593 3595 3594 3596 spin_lock_irqsave(&iwcq->lock, flags); 3595 - irdma_uk_cq_request_notification(ukcq, cq_notify); 3597 + /* Only promote to arm the CQ for any event if the last arm event was solicited. */ 3598 + if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED) 3599 + promo_event = true; 3600 + 3601 + if (!iwcq->armed || promo_event) { 3602 + iwcq->armed = true; 3603 + iwcq->last_notify = cq_notify; 3604 + irdma_uk_cq_request_notification(ukcq, cq_notify); 3605 + } 3606 + 3607 + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq)) 3608 + ret = 1; 3596 3609 spin_unlock_irqrestore(&iwcq->lock, flags); 3597 3610 3598 - return 0; 3611 + return ret; 3599 3612 } 3600 3613 3601 3614 static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
+2
drivers/infiniband/hw/irdma/verbs.h
··· 110 110 u16 cq_size; 111 111 u16 cq_num; 112 112 bool user_mode; 113 + bool armed; 114 + enum irdma_cmpl_notify last_notify; 113 115 u32 polled_cmpls; 114 116 u32 cq_mem_size; 115 117 struct irdma_dma_mem kmem;
+3 -3
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 664 664 665 665 /* User MR data */ 666 666 struct mlx5_cache_ent *cache_ent; 667 - struct ib_umem *umem; 668 667 669 668 /* This is zero'd when the MR is allocated */ 670 669 union { ··· 675 676 struct list_head list; 676 677 }; 677 678 678 - /* Used only by kernel MRs (umem == NULL) */ 679 + /* Used only by kernel MRs */ 679 680 struct { 680 681 void *descs; 681 682 void *descs_alloc; ··· 696 697 int data_length; 697 698 }; 698 699 699 - /* Used only by User MRs (umem != NULL) */ 700 + /* Used only by User MRs */ 700 701 struct { 702 + struct ib_umem *umem; 701 703 unsigned int page_shift; 702 704 /* Current access_flags */ 703 705 int access_flags;
+12 -14
drivers/infiniband/hw/mlx5/mr.c
··· 1904 1904 return ret; 1905 1905 } 1906 1906 1907 - static void 1908 - mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1907 + static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1909 1908 { 1910 - if (!mr->umem && mr->descs) { 1911 - struct ib_device *device = mr->ibmr.device; 1912 - int size = mr->max_descs * mr->desc_size; 1913 - struct mlx5_ib_dev *dev = to_mdev(device); 1909 + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1910 + int size = mr->max_descs * mr->desc_size; 1914 1911 1915 - dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 1916 - DMA_TO_DEVICE); 1917 - kfree(mr->descs_alloc); 1918 - mr->descs = NULL; 1919 - } 1912 + if (!mr->descs) 1913 + return; 1914 + 1915 + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 1916 + DMA_TO_DEVICE); 1917 + kfree(mr->descs_alloc); 1918 + mr->descs = NULL; 1920 1919 } 1921 1920 1922 1921 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ··· 1991 1992 if (mr->cache_ent) { 1992 1993 mlx5_mr_cache_free(dev, mr); 1993 1994 } else { 1994 - mlx5_free_priv_descs(mr); 1995 + if (!udata) 1996 + mlx5_free_priv_descs(mr); 1995 1997 kfree(mr); 1996 1998 } 1997 1999 return 0; ··· 2079 2079 if (err) 2080 2080 goto err_free_in; 2081 2081 2082 - mr->umem = NULL; 2083 2082 kfree(in); 2084 2083 2085 2084 return mr; ··· 2205 2206 } 2206 2207 2207 2208 mr->ibmr.device = pd->device; 2208 - mr->umem = NULL; 2209 2209 2210 2210 switch (mr_type) { 2211 2211 case IB_MR_TYPE_MEM_REG:
+1
drivers/infiniband/sw/rxe/rxe_qp.c
··· 359 359 360 360 err2: 361 361 rxe_queue_cleanup(qp->sq.queue); 362 + qp->sq.queue = NULL; 362 363 err1: 363 364 qp->pd = NULL; 364 365 qp->rcq = NULL;
+6 -3
drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
··· 19 19 int cpu; 20 20 21 21 cpu = raw_smp_processor_id(); 22 - s = this_cpu_ptr(stats->pcpu_stats); 22 + s = get_cpu_ptr(stats->pcpu_stats); 23 23 if (con->cpu != cpu) { 24 24 s->cpu_migr.to++; 25 25 ··· 27 27 s = per_cpu_ptr(stats->pcpu_stats, con->cpu); 28 28 atomic_inc(&s->cpu_migr.from); 29 29 } 30 + put_cpu_ptr(stats->pcpu_stats); 30 31 } 31 32 32 33 void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) 33 34 { 34 35 struct rtrs_clt_stats_pcpu *s; 35 36 36 - s = this_cpu_ptr(stats->pcpu_stats); 37 + s = get_cpu_ptr(stats->pcpu_stats); 37 38 s->rdma.failover_cnt++; 39 + put_cpu_ptr(stats->pcpu_stats); 38 40 } 39 41 40 42 int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) ··· 171 169 { 172 170 struct rtrs_clt_stats_pcpu *s; 173 171 174 - s = this_cpu_ptr(stats->pcpu_stats); 172 + s = get_cpu_ptr(stats->pcpu_stats); 175 173 s->rdma.dir[d].cnt++; 176 174 s->rdma.dir[d].size_total += size; 175 + put_cpu_ptr(stats->pcpu_stats); 177 176 } 178 177 179 178 void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)