Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'cxgb4'

Hariprasad Shenai says:

====================
Adds support for CIQ and other misc. fixes for rdma/cxgb4

This patch series adds support to allocate and use IQs specifically for
indirect interrupts, adds fixes to align ISS for iWARP connections & fixes
related to tcp snd/rvd window for Chelsio T4/T5 adapters on iw_cxgb4.
Also changes Interrupt Holdoff Packet Count threshold of response queues for
cxgb4 driver.

The patches series is created against 'net-next' tree.
And includes patches on cxgb4 and iw_cxgb4 driver.

Since this patch-series contains cxgb4 and iw_cxgb4 patches, we would like to
request this patch series to get merged via David Miller's 'net-next' tree.

We have included all the maintainers of respective drivers. Kindly review the
change and let us know in case of any review comments.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+357 -58
+112 -15
drivers/infiniband/hw/cxgb4/cm.c
··· 232 232 233 233 static void set_emss(struct c4iw_ep *ep, u16 opt) 234 234 { 235 - ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40; 235 + ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 236 + sizeof(struct iphdr) - sizeof(struct tcphdr); 236 237 ep->mss = ep->emss; 237 238 if (GET_TCPOPT_TSTAMP(opt)) 238 239 ep->emss -= 12; 239 240 if (ep->emss < 128) 240 241 ep->emss = 128; 242 + if (ep->emss & 7) 243 + PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n", 244 + GET_TCPOPT_MSS(opt), ep->mss, ep->emss); 241 245 PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt), 242 246 ep->mss, ep->emss); 243 247 } ··· 472 468 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 473 469 flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); 474 470 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 475 - flowc->mnemval[6].val = cpu_to_be32(snd_win); 471 + flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); 476 472 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 477 473 flowc->mnemval[7].val = cpu_to_be32(ep->emss); 478 474 /* Pad WR to 16 byte boundary */ ··· 532 528 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 533 529 } 534 530 531 + static void best_mtu(const unsigned short *mtus, unsigned short mtu, 532 + unsigned int *idx, int use_ts) 533 + { 534 + unsigned short hdr_size = sizeof(struct iphdr) + 535 + sizeof(struct tcphdr) + 536 + (use_ts ? 12 : 0); 537 + unsigned short data_size = mtu - hdr_size; 538 + 539 + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); 540 + } 541 + 535 542 static int send_connect(struct c4iw_ep *ep) 536 543 { 537 544 struct cpl_act_open_req *req; ··· 565 550 struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr; 566 551 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr; 567 552 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr; 553 + int win; 568 554 569 555 wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? 570 556 roundup(sizev4, 16) : ··· 581 565 } 582 566 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 583 567 584 - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 568 + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 569 + enable_tcp_timestamps); 585 570 wscale = compute_wscale(rcv_win); 571 + 572 + /* 573 + * Specify the largest window that will fit in opt0. The 574 + * remainder will be specified in the rx_data_ack. 575 + */ 576 + win = ep->rcv_win >> 10; 577 + if (win > RCV_BUFSIZ_MASK) 578 + win = RCV_BUFSIZ_MASK; 579 + 586 580 opt0 = (nocong ? NO_CONG(1) : 0) | 587 581 KEEP_ALIVE(1) | 588 582 DELACK(1) | ··· 603 577 SMAC_SEL(ep->smac_idx) | 604 578 DSCP(ep->tos) | 605 579 ULP_MODE(ULP_MODE_TCPDDP) | 606 - RCV_BUFSIZ(rcv_win>>10); 580 + RCV_BUFSIZ(win); 607 581 opt2 = RX_CHANNEL(0) | 608 582 CCTRL_ECN(enable_ecn) | 609 583 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); ··· 659 633 req6->opt2 = cpu_to_be32(opt2); 660 634 } 661 635 } else { 636 + u32 isn = (prandom_u32() & ~7UL) - 1; 637 + 638 + opt2 |= T5_OPT_2_VALID; 639 + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ 640 + if (peer2peer) 641 + isn += 4; 642 + 662 643 if (ep->com.remote_addr.ss_family == AF_INET) { 663 644 t5_req = (struct cpl_t5_act_open_req *) 664 645 skb_put(skb, wrlen); ··· 682 649 cxgb4_select_ntuple( 683 650 ep->com.dev->rdev.lldi.ports[0], 684 651 ep->l2t))); 652 + t5_req->rsvd = cpu_to_be32(isn); 653 + PDBG("%s snd_isn %u\n", __func__, 654 + be32_to_cpu(t5_req->rsvd)); 685 655 t5_req->opt2 = cpu_to_be32(opt2); 686 656 } else { 687 657 t5_req6 = (struct cpl_t5_act_open_req6 *) ··· 708 672 cxgb4_select_ntuple( 709 673 ep->com.dev->rdev.lldi.ports[0], 710 674 ep->l2t)); 675 + t5_req6->rsvd = cpu_to_be32(isn); 676 + PDBG("%s snd_isn %u\n", __func__, 677 + be32_to_cpu(t5_req6->rsvd)); 711 678 t5_req6->opt2 = cpu_to_be32(opt2); 712 679 } 713 680 } ··· 1183 1144 printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n"); 1184 1145 return 0; 1185 1146 } 1147 + 1148 + /* 1149 + * If we couldn't specify the entire rcv window at connection setup 1150 + * due to the limit in the number of bits in the RCV_BUFSIZ field, 1151 + * then add the overage in to the credits returned. 1152 + */ 1153 + if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024) 1154 + credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024; 1186 1155 1187 1156 req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); 1188 1157 memset(req, 0, wrlen); ··· 1665 1618 unsigned int mtu_idx; 1666 1619 int wscale; 1667 1620 struct sockaddr_in *sin; 1621 + int win; 1668 1622 1669 1623 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); 1670 1624 req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); ··· 1688 1640 htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); 1689 1641 req->tcb.tx_max = (__force __be32) jiffies; 1690 1642 req->tcb.rcv_adv = htons(1); 1691 - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 1643 + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 1644 + enable_tcp_timestamps); 1692 1645 wscale = compute_wscale(rcv_win); 1646 + 1647 + /* 1648 + * Specify the largest window that will fit in opt0. The 1649 + * remainder will be specified in the rx_data_ack. 1650 + */ 1651 + win = ep->rcv_win >> 10; 1652 + if (win > RCV_BUFSIZ_MASK) 1653 + win = RCV_BUFSIZ_MASK; 1654 + 1693 1655 req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | 1694 1656 (nocong ? NO_CONG(1) : 0) | 1695 1657 KEEP_ALIVE(1) | ··· 1711 1653 SMAC_SEL(ep->smac_idx) | 1712 1654 DSCP(ep->tos) | 1713 1655 ULP_MODE(ULP_MODE_TCPDDP) | 1714 - RCV_BUFSIZ(rcv_win >> 10)); 1656 + RCV_BUFSIZ(win)); 1715 1657 req->tcb.opt2 = (__force __be32) (PACE(1) | 1716 1658 TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | 1717 1659 RX_CHANNEL(0) | ··· 1746 1688 return status == CPL_ERR_RTX_NEG_ADVICE || 1747 1689 status == CPL_ERR_PERSIST_NEG_ADVICE || 1748 1690 status == CPL_ERR_KEEPALV_NEG_ADVICE; 1691 + } 1692 + 1693 + static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) 1694 + { 1695 + ep->snd_win = snd_win; 1696 + ep->rcv_win = rcv_win; 1697 + PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win); 1749 1698 } 1750 1699 1751 1700 #define ACT_OPEN_RETRY_COUNT 2 ··· 1803 1738 ep->ctrlq_idx = cxgb4_port_idx(pdev); 1804 1739 ep->rss_qid = cdev->rdev.lldi.rxq_ids[ 1805 1740 cxgb4_port_idx(pdev) * step]; 1741 + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); 1806 1742 dev_put(pdev); 1807 1743 } else { 1808 1744 pdev = get_real_dev(n->dev); ··· 1822 1756 cdev->rdev.lldi.nchan; 1823 1757 ep->rss_qid = cdev->rdev.lldi.rxq_ids[ 1824 1758 cxgb4_port_idx(n->dev) * step]; 1759 + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); 1825 1760 1826 1761 if (clear_mpa_v1) { 1827 1762 ep->retry_with_mpa_v1 = 0; ··· 2053 1986 u64 opt0; 2054 1987 u32 opt2; 2055 1988 int wscale; 1989 + struct cpl_t5_pass_accept_rpl *rpl5 = NULL; 1990 + int win; 2056 1991 2057 1992 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2058 1993 BUG_ON(skb_cloned(skb)); 2059 - skb_trim(skb, sizeof(*rpl)); 1994 + 2060 1995 skb_get(skb); 2061 - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 1996 + rpl = cplhdr(skb); 1997 + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 1998 + skb_trim(skb, roundup(sizeof(*rpl5), 16)); 1999 + rpl5 = (void *)rpl; 2000 + INIT_TP_WR(rpl5, ep->hwtid); 2001 + } else { 2002 + skb_trim(skb, sizeof(*rpl)); 2003 + INIT_TP_WR(rpl, ep->hwtid); 2004 + } 2005 + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, 2006 + ep->hwtid)); 2007 + 2008 + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, 2009 + enable_tcp_timestamps && req->tcpopt.tstamp); 2062 2010 wscale = compute_wscale(rcv_win); 2011 + 2012 + /* 2013 + * Specify the largest window that will fit in opt0. The 2014 + * remainder will be specified in the rx_data_ack. 2015 + */ 2016 + win = ep->rcv_win >> 10; 2017 + if (win > RCV_BUFSIZ_MASK) 2018 + win = RCV_BUFSIZ_MASK; 2063 2019 opt0 = (nocong ? NO_CONG(1) : 0) | 2064 2020 KEEP_ALIVE(1) | 2065 2021 DELACK(1) | ··· 2093 2003 SMAC_SEL(ep->smac_idx) | 2094 2004 DSCP(ep->tos >> 2) | 2095 2005 ULP_MODE(ULP_MODE_TCPDDP) | 2096 - RCV_BUFSIZ(rcv_win>>10); 2006 + RCV_BUFSIZ(win); 2097 2007 opt2 = RX_CHANNEL(0) | 2098 2008 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); 2099 2009 ··· 2113 2023 opt2 |= CCTRL_ECN(1); 2114 2024 } 2115 2025 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 2026 + u32 isn = (prandom_u32() & ~7UL) - 1; 2116 2027 opt2 |= T5_OPT_2_VALID; 2117 2028 opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); 2029 + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ 2030 + rpl5 = (void *)rpl; 2031 + memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); 2032 + if (peer2peer) 2033 + isn += 4; 2034 + rpl5->iss = cpu_to_be32(isn); 2035 + PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss)); 2118 2036 } 2119 2037 2120 - rpl = cplhdr(skb); 2121 - INIT_TP_WR(rpl, ep->hwtid); 2122 - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, 2123 - ep->hwtid)); 2124 2038 rpl->opt0 = cpu_to_be64(opt0); 2125 2039 rpl->opt2 = cpu_to_be32(opt2); 2126 2040 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); ··· 2189 2095 int err; 2190 2096 u16 peer_mss = ntohs(req->tcpopt.mss); 2191 2097 int iptype; 2098 + unsigned short hdrs; 2192 2099 2193 2100 parent_ep = lookup_stid(t, stid); 2194 2101 if (!parent_ep) { ··· 2247 2152 goto reject; 2248 2153 } 2249 2154 2250 - if (peer_mss && child_ep->mtu > (peer_mss + 40)) 2251 - child_ep->mtu = peer_mss + 40; 2155 + hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) + 2156 + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); 2157 + if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) 2158 + child_ep->mtu = peer_mss + hdrs; 2252 2159 2253 2160 state_set(&child_ep->com, CONNECTING); 2254 2161 child_ep->com.dev = dev;
+6 -1
drivers/infiniband/hw/cxgb4/cq.c
··· 134 134 V_FW_RI_RES_WR_IQANUS(0) | 135 135 V_FW_RI_RES_WR_IQANUD(1) | 136 136 F_FW_RI_RES_WR_IQANDST | 137 - V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids)); 137 + V_FW_RI_RES_WR_IQANDSTINDEX( 138 + rdev->lldi.ciq_ids[cq->vector])); 138 139 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 139 140 F_FW_RI_RES_WR_IQDROPRSS | 140 141 V_FW_RI_RES_WR_IQPCIECH(2) | ··· 871 870 872 871 rhp = to_c4iw_dev(ibdev); 873 872 873 + if (vector >= rhp->rdev.lldi.nciq) 874 + return ERR_PTR(-EINVAL); 875 + 874 876 chp = kzalloc(sizeof(*chp), GFP_KERNEL); 875 877 if (!chp) 876 878 return ERR_PTR(-ENOMEM); ··· 919 915 } 920 916 chp->cq.size = hwentries; 921 917 chp->cq.memsize = memsize; 918 + chp->cq.vector = vector; 922 919 923 920 ret = create_cq(&rhp->rdev, &chp->cq, 924 921 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+2
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 805 805 u8 retry_with_mpa_v1; 806 806 u8 tried_with_mpa_v1; 807 807 unsigned int retry_count; 808 + int snd_win; 809 + int rcv_win; 808 810 }; 809 811 810 812 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
+1 -1
drivers/infiniband/hw/cxgb4/provider.c
··· 499 499 dev->ibdev.node_type = RDMA_NODE_RNIC; 500 500 memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); 501 501 dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; 502 - dev->ibdev.num_comp_vectors = 1; 502 + dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; 503 503 dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev); 504 504 dev->ibdev.query_device = c4iw_query_device; 505 505 dev->ibdev.query_port = c4iw_query_port;
+1
drivers/infiniband/hw/cxgb4/t4.h
··· 542 542 size_t memsize; 543 543 __be64 bits_type_ts; 544 544 u32 cqid; 545 + int vector; 545 546 u16 size; /* including status page */ 546 547 u16 cidx; 547 548 u16 sw_pidx;
+1
drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
··· 848 848 #define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) 849 849 #define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) 850 850 851 + #define CONG_CNTRL_VALID (1 << 18) 851 852 #define T5_OPT_2_VALID (1 << 31) 852 853 853 854 #endif /* _T4FW_RI_API_H_ */
+12 -2
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
··· 357 357 MAX_OFLD_QSETS = 16, /* # of offload Tx/Rx queue sets */ 358 358 MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ 359 359 MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ 360 + MAX_RDMA_CIQS = NCHAN, /* # of RDMA concentrator IQs */ 361 + MAX_ISCSI_QUEUES = NCHAN, /* # of streaming iSCSI Rx queues */ 360 362 }; 361 363 362 364 enum { 363 - MAX_EGRQ = 128, /* max # of egress queues, including FLs */ 364 - MAX_INGQ = 64 /* max # of interrupt-capable ingress queues */ 365 + INGQ_EXTRAS = 2, /* firmware event queue and */ 366 + /* forwarded interrupts */ 367 + MAX_EGRQ = MAX_ETH_QSETS*2 + MAX_OFLD_QSETS*2 368 + + MAX_CTRL_QUEUES + MAX_RDMA_QUEUES + MAX_ISCSI_QUEUES, 369 + MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES 370 + + MAX_RDMA_CIQS + MAX_ISCSI_QUEUES + INGQ_EXTRAS, 365 371 }; 366 372 367 373 struct adapter; ··· 544 538 struct sge_eth_rxq ethrxq[MAX_ETH_QSETS]; 545 539 struct sge_ofld_rxq ofldrxq[MAX_OFLD_QSETS]; 546 540 struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; 541 + struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; 547 542 struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; 548 543 549 544 struct sge_rspq intrq ____cacheline_aligned_in_smp; ··· 555 548 u16 ethtxq_rover; /* Tx queue to clean up next */ 556 549 u16 ofldqsets; /* # of active offload queue sets */ 557 550 u16 rdmaqs; /* # of available RDMA Rx queues */ 551 + u16 rdmaciqs; /* # of available RDMA concentrator IQs */ 558 552 u16 ofld_rxq[MAX_OFLD_QSETS]; 559 553 u16 rdma_rxq[NCHAN]; 554 + u16 rdma_ciq[NCHAN]; 560 555 u16 timer_val[SGE_NTIMERS]; 561 556 u8 counter_val[SGE_NCOUNTERS]; 562 557 u32 fl_pg_order; /* large page allocation size */ ··· 586 577 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++) 587 578 #define for_each_ofldrxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++) 588 579 #define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++) 580 + #define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++) 589 581 590 582 struct l2t_data; 591 583
+200 -38
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
··· 818 818 for_each_rdmarxq(&adap->sge, i) 819 819 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", 820 820 adap->port[0]->name, i); 821 + 822 + for_each_rdmaciq(&adap->sge, i) 823 + snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", 824 + adap->port[0]->name, i); 821 825 } 822 826 823 827 static int request_msix_queue_irqs(struct adapter *adap) 824 828 { 825 829 struct sge *s = &adap->sge; 826 - int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi_index = 2; 830 + int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; 831 + int msi_index = 2; 827 832 828 833 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, 829 834 adap->msix_info[1].desc, &s->fw_evtq); ··· 862 857 goto unwind; 863 858 msi_index++; 864 859 } 860 + for_each_rdmaciq(s, rdmaciqqidx) { 861 + err = request_irq(adap->msix_info[msi_index].vec, 862 + t4_sge_intr_msix, 0, 863 + adap->msix_info[msi_index].desc, 864 + &s->rdmaciq[rdmaciqqidx].rspq); 865 + if (err) 866 + goto unwind; 867 + msi_index++; 868 + } 865 869 return 0; 866 870 867 871 unwind: 872 + while (--rdmaciqqidx >= 0) 873 + free_irq(adap->msix_info[--msi_index].vec, 874 + &s->rdmaciq[rdmaciqqidx].rspq); 868 875 while (--rdmaqidx >= 0) 869 876 free_irq(adap->msix_info[--msi_index].vec, 870 877 &s->rdmarxq[rdmaqidx].rspq); ··· 902 885 free_irq(adap->msix_info[msi_index++].vec, &s->ofldrxq[i].rspq); 903 886 for_each_rdmarxq(s, i) 904 887 free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); 888 + for_each_rdmaciq(s, i) 889 + free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); 905 890 } 906 891 907 892 /** ··· 1066 1047 if (msi_idx > 0) 1067 1048 msi_idx++; 1068 1049 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx, 1069 - &q->fl, uldrx_handler); 1050 + q->fl.size ? &q->fl : NULL, 1051 + uldrx_handler); 1070 1052 if (err) 1071 1053 goto freeout; 1072 1054 memset(&q->stats, 0, sizeof(q->stats)); ··· 1084 1064 if (msi_idx > 0) 1085 1065 msi_idx++; 1086 1066 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i], 1087 - msi_idx, &q->fl, uldrx_handler); 1067 + msi_idx, q->fl.size ? &q->fl : NULL, 1068 + uldrx_handler); 1088 1069 if (err) 1089 1070 goto freeout; 1090 1071 memset(&q->stats, 0, sizeof(q->stats)); 1091 1072 s->rdma_rxq[i] = q->rspq.abs_id; 1073 + } 1074 + 1075 + for_each_rdmaciq(s, i) { 1076 + struct sge_ofld_rxq *q = &s->rdmaciq[i]; 1077 + 1078 + if (msi_idx > 0) 1079 + msi_idx++; 1080 + err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i], 1081 + msi_idx, q->fl.size ? &q->fl : NULL, 1082 + uldrx_handler); 1083 + if (err) 1084 + goto freeout; 1085 + memset(&q->stats, 0, sizeof(q->stats)); 1086 + s->rdma_ciq[i] = q->rspq.abs_id; 1092 1087 } 1093 1088 1094 1089 for_each_port(adap, i) { ··· 2503 2468 } 2504 2469 2505 2470 /** 2506 - * set_rxq_intr_params - set a queue's interrupt holdoff parameters 2507 - * @adap: the adapter 2471 + * set_rspq_intr_params - set a queue's interrupt holdoff parameters 2508 2472 * @q: the Rx queue 2509 2473 * @us: the hold-off time in us, or 0 to disable timer 2510 2474 * @cnt: the hold-off packet count, or 0 to disable counter ··· 2511 2477 * Sets an Rx queue's interrupt hold-off time and packet count. At least 2512 2478 * one of the two needs to be enabled for the queue to generate interrupts. 2513 2479 */ 2514 - static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q, 2515 - unsigned int us, unsigned int cnt) 2480 + static int set_rspq_intr_params(struct sge_rspq *q, 2481 + unsigned int us, unsigned int cnt) 2516 2482 { 2483 + struct adapter *adap = q->adap; 2484 + 2517 2485 if ((us | cnt) == 0) 2518 2486 cnt = 1; 2519 2487 ··· 2542 2506 return 0; 2543 2507 } 2544 2508 2509 + /** 2510 + * set_rx_intr_params - set a net devices's RX interrupt holdoff paramete! 2511 + * @dev: the network device 2512 + * @us: the hold-off time in us, or 0 to disable timer 2513 + * @cnt: the hold-off packet count, or 0 to disable counter 2514 + * 2515 + * Set the RX interrupt hold-off parameters for a network device. 2516 + */ 2517 + static int set_rx_intr_params(struct net_device *dev, 2518 + unsigned int us, unsigned int cnt) 2519 + { 2520 + int i, err; 2521 + struct port_info *pi = netdev_priv(dev); 2522 + struct adapter *adap = pi->adapter; 2523 + struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset]; 2524 + 2525 + for (i = 0; i < pi->nqsets; i++, q++) { 2526 + err = set_rspq_intr_params(&q->rspq, us, cnt); 2527 + if (err) 2528 + return err; 2529 + } 2530 + return 0; 2531 + } 2532 + 2545 2533 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) 2546 2534 { 2547 - const struct port_info *pi = netdev_priv(dev); 2548 - struct adapter *adap = pi->adapter; 2549 - struct sge_rspq *q; 2550 - int i; 2551 - int r = 0; 2552 - 2553 - for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++) { 2554 - q = &adap->sge.ethrxq[i].rspq; 2555 - r = set_rxq_intr_params(adap, q, c->rx_coalesce_usecs, 2556 - c->rx_max_coalesced_frames); 2557 - if (r) { 2558 - dev_err(&dev->dev, "failed to set coalesce %d\n", r); 2559 - break; 2560 - } 2561 - } 2562 - return r; 2535 + return set_rx_intr_params(dev, c->rx_coalesce_usecs, 2536 + c->rx_max_coalesced_frames); 2563 2537 } 2564 2538 2565 2539 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) ··· 3439 3393 EXPORT_SYMBOL(cxgb4_best_mtu); 3440 3394 3441 3395 /** 3396 + * cxgb4_best_aligned_mtu - find best MTU, [hopefully] data size aligned 3397 + * @mtus: the HW MTU table 3398 + * @header_size: Header Size 3399 + * @data_size_max: maximum Data Segment Size 3400 + * @data_size_align: desired Data Segment Size Alignment (2^N) 3401 + * @mtu_idxp: HW MTU Table Index return value pointer (possibly NULL) 3402 + * 3403 + * Similar to cxgb4_best_mtu() but instead of searching the Hardware 3404 + * MTU Table based solely on a Maximum MTU parameter, we break that 3405 + * parameter up into a Header Size and Maximum Data Segment Size, and 3406 + * provide a desired Data Segment Size Alignment. If we find an MTU in 3407 + * the Hardware MTU Table which will result in a Data Segment Size with 3408 + * the requested alignment _and_ that MTU isn't "too far" from the 3409 + * closest MTU, then we'll return that rather than the closest MTU. 3410 + */ 3411 + unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus, 3412 + unsigned short header_size, 3413 + unsigned short data_size_max, 3414 + unsigned short data_size_align, 3415 + unsigned int *mtu_idxp) 3416 + { 3417 + unsigned short max_mtu = header_size + data_size_max; 3418 + unsigned short data_size_align_mask = data_size_align - 1; 3419 + int mtu_idx, aligned_mtu_idx; 3420 + 3421 + /* Scan the MTU Table till we find an MTU which is larger than our 3422 + * Maximum MTU or we reach the end of the table. Along the way, 3423 + * record the last MTU found, if any, which will result in a Data 3424 + * Segment Length matching the requested alignment. 3425 + */ 3426 + for (mtu_idx = 0, aligned_mtu_idx = -1; mtu_idx < NMTUS; mtu_idx++) { 3427 + unsigned short data_size = mtus[mtu_idx] - header_size; 3428 + 3429 + /* If this MTU minus the Header Size would result in a 3430 + * Data Segment Size of the desired alignment, remember it. 3431 + */ 3432 + if ((data_size & data_size_align_mask) == 0) 3433 + aligned_mtu_idx = mtu_idx; 3434 + 3435 + /* If we're not at the end of the Hardware MTU Table and the 3436 + * next element is larger than our Maximum MTU, drop out of 3437 + * the loop. 3438 + */ 3439 + if (mtu_idx+1 < NMTUS && mtus[mtu_idx+1] > max_mtu) 3440 + break; 3441 + } 3442 + 3443 + /* If we fell out of the loop because we ran to the end of the table, 3444 + * then we just have to use the last [largest] entry. 3445 + */ 3446 + if (mtu_idx == NMTUS) 3447 + mtu_idx--; 3448 + 3449 + /* If we found an MTU which resulted in the requested Data Segment 3450 + * Length alignment and that's "not far" from the largest MTU which is 3451 + * less than or equal to the maximum MTU, then use that. 3452 + */ 3453 + if (aligned_mtu_idx >= 0 && 3454 + mtu_idx - aligned_mtu_idx <= 1) 3455 + mtu_idx = aligned_mtu_idx; 3456 + 3457 + /* If the caller has passed in an MTU Index pointer, pass the 3458 + * MTU Index back. Return the MTU value. 3459 + */ 3460 + if (mtu_idxp) 3461 + *mtu_idxp = mtu_idx; 3462 + return mtus[mtu_idx]; 3463 + } 3464 + EXPORT_SYMBOL(cxgb4_best_aligned_mtu); 3465 + 3466 + /** 3442 3467 * cxgb4_port_chan - get the HW channel of a port 3443 3468 * @dev: the net device for the port 3444 3469 * ··· 3906 3789 lli.mtus = adap->params.mtus; 3907 3790 if (uld == CXGB4_ULD_RDMA) { 3908 3791 lli.rxq_ids = adap->sge.rdma_rxq; 3792 + lli.ciq_ids = adap->sge.rdma_ciq; 3909 3793 lli.nrxq = adap->sge.rdmaqs; 3794 + lli.nciq = adap->sge.rdmaciqs; 3910 3795 } else if (uld == CXGB4_ULD_ISCSI) { 3911 3796 lli.rxq_ids = adap->sge.ofld_rxq; 3912 3797 lli.nrxq = adap->sge.ofldqsets; ··· 5654 5535 #undef FW_PARAM_PFVF 5655 5536 #undef FW_PARAM_DEV 5656 5537 5657 - /* 5658 - * These are finalized by FW initialization, load their values now. 5538 + /* The MTU/MSS Table is initialized by now, so load their values. If 5539 + * we're initializing the adapter, then we'll make any modifications 5540 + * we want to the MTU/MSS Table and also initialize the congestion 5541 + * parameters. 5659 5542 */ 5660 5543 t4_read_mtu_tbl(adap, adap->params.mtus, NULL); 5661 - t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd, 5662 - adap->params.b_wnd); 5544 + if (state != DEV_STATE_INIT) { 5545 + int i; 5663 5546 5547 + /* The default MTU Table contains values 1492 and 1500. 5548 + * However, for TCP, it's better to have two values which are 5549 + * a multiple of 8 +/- 4 bytes apart near this popular MTU. 5550 + * This allows us to have a TCP Data Payload which is a 5551 + * multiple of 8 regardless of what combination of TCP Options 5552 + * are in use (always a multiple of 4 bytes) which is 5553 + * important for performance reasons. For instance, if no 5554 + * options are in use, then we have a 20-byte IP header and a 5555 + * 20-byte TCP header. In this case, a 1500-byte MSS would 5556 + * result in a TCP Data Payload of 1500 - 40 == 1460 bytes 5557 + * which is not a multiple of 8. So using an MSS of 1488 in 5558 + * this case results in a TCP Data Payload of 1448 bytes which 5559 + * is a multiple of 8. On the other hand, if 12-byte TCP Time 5560 + * Stamps have been negotiated, then an MTU of 1500 bytes 5561 + * results in a TCP Data Payload of 1448 bytes which, as 5562 + * above, is a multiple of 8 bytes ... 5563 + */ 5564 + for (i = 0; i < NMTUS; i++) 5565 + if (adap->params.mtus[i] == 1492) { 5566 + adap->params.mtus[i] = 1488; 5567 + break; 5568 + } 5569 + 5570 + t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd, 5571 + adap->params.b_wnd); 5572 + } 5664 5573 t4_init_tp_params(adap); 5665 5574 adap->flags |= FW_OK; 5666 5575 return 0; ··· 5823 5676 (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; 5824 5677 } 5825 5678 5826 - static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx, 5679 + static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, 5680 + unsigned int us, unsigned int cnt, 5827 5681 unsigned int size, unsigned int iqe_size) 5828 5682 { 5829 - q->intr_params = QINTR_TIMER_IDX(timer_idx) | 5830 - (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0); 5831 - q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0; 5683 + q->adap = adap; 5684 + set_rspq_intr_params(q, us, cnt); 5832 5685 q->iqe_len = iqe_size; 5833 5686 q->size = size; 5834 5687 } ··· 5842 5695 { 5843 5696 struct sge *s = &adap->sge; 5844 5697 int i, q10g = 0, n10g = 0, qidx = 0; 5698 + int ciq_size; 5845 5699 5846 5700 for_each_port(adap, i) 5847 5701 n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); ··· 5881 5733 s->ofldqsets = adap->params.nports; 5882 5734 /* For RDMA one Rx queue per channel suffices */ 5883 5735 s->rdmaqs = adap->params.nports; 5736 + s->rdmaciqs = adap->params.nports; 5884 5737 } 5885 5738 5886 5739 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { 5887 5740 struct sge_eth_rxq *r = &s->ethrxq[i]; 5888 5741 5889 - init_rspq(&r->rspq, 0, 0, 1024, 64); 5742 + init_rspq(adap, &r->rspq, 5, 10, 1024, 64); 5890 5743 r->fl.size = 72; 5891 5744 } 5892 5745 ··· 5903 5754 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) { 5904 5755 struct sge_ofld_rxq *r = &s->ofldrxq[i]; 5905 5756 5906 - init_rspq(&r->rspq, 0, 0, 1024, 64); 5757 + init_rspq(adap, &r->rspq, 5, 1, 1024, 64); 5907 5758 r->rspq.uld = CXGB4_ULD_ISCSI; 5908 5759 r->fl.size = 72; 5909 5760 } ··· 5911 5762 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { 5912 5763 struct sge_ofld_rxq *r = &s->rdmarxq[i]; 5913 5764 5914 - init_rspq(&r->rspq, 0, 0, 511, 64); 5765 + init_rspq(adap, &r->rspq, 5, 1, 511, 64); 5915 5766 r->rspq.uld = CXGB4_ULD_RDMA; 5916 5767 r->fl.size = 72; 5917 5768 } 5918 5769 5919 - init_rspq(&s->fw_evtq, 6, 0, 512, 64); 5920 - init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64); 5770 + ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; 5771 + if (ciq_size > SGE_MAX_IQ_SIZE) { 5772 + CH_WARN(adap, "CIQ size too small for available IQs\n"); 5773 + ciq_size = SGE_MAX_IQ_SIZE; 5774 + } 5775 + 5776 + for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { 5777 + struct sge_ofld_rxq *r = &s->rdmaciq[i]; 5778 + 5779 + init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); 5780 + r->rspq.uld = CXGB4_ULD_RDMA; 5781 + } 5782 + 5783 + init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); 5784 + init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); 5921 5785 } 5922 5786 5923 5787 /* ··· 5977 5815 5978 5816 want = s->max_ethqsets + EXTRA_VECS; 5979 5817 if (is_offload(adap)) { 5980 - want += s->rdmaqs + s->ofldqsets; 5818 + want += s->rdmaqs + s->rdmaciqs + s->ofldqsets; 5981 5819 /* need nchan for each possible ULD */ 5982 - ofld_need = 2 * nchan; 5820 + ofld_need = 3 * nchan; 5983 5821 } 5984 5822 need = adap->params.nports + EXTRA_VECS + ofld_need; 5985 5823
+7
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
··· 232 232 const struct cxgb4_virt_res *vr; /* assorted HW resources */ 233 233 const unsigned short *mtus; /* MTU table */ 234 234 const unsigned short *rxq_ids; /* the ULD's Rx queue ids */ 235 + const unsigned short *ciq_ids; /* the ULD's concentrator IQ ids */ 235 236 unsigned short nrxq; /* # of Rx queues */ 236 237 unsigned short ntxq; /* # of Tx queues */ 238 + unsigned short nciq; /* # of concentrator IQ */ 237 239 unsigned char nchan:4; /* # of channels */ 238 240 unsigned char nports:4; /* # of ports */ 239 241 unsigned char wr_cred; /* WR 16-byte credits */ ··· 276 274 unsigned int cxgb4_port_idx(const struct net_device *dev); 277 275 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu, 278 276 unsigned int *idx); 277 + unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus, 278 + unsigned short header_size, 279 + unsigned short data_size_max, 280 + unsigned short data_size_align, 281 + unsigned int *mtu_idxp); 279 282 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4, 280 283 struct tp_tcp_stats *v6); 281 284 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
+4 -1
drivers/net/ethernet/chelsio/cxgb4/sge.c
··· 2215 2215 iq->cntxt_id = ntohs(c.iqid); 2216 2216 iq->abs_id = ntohs(c.physiqid); 2217 2217 iq->size--; /* subtract status entry */ 2218 - iq->adap = adap; 2219 2218 iq->netdev = dev; 2220 2219 iq->handler = hnd; 2221 2220 ··· 2511 2512 free_rspq_fl(adap, &oq->rspq, &oq->fl); 2512 2513 } 2513 2514 for (i = 0, oq = adap->sge.rdmarxq; i < adap->sge.rdmaqs; i++, oq++) { 2515 + if (oq->rspq.desc) 2516 + free_rspq_fl(adap, &oq->rspq, &oq->fl); 2517 + } 2518 + for (i = 0, oq = adap->sge.rdmaciq; i < adap->sge.rdmaciqs; i++, oq++) { 2514 2519 if (oq->rspq.desc) 2515 2520 free_rspq_fl(adap, &oq->rspq, &oq->fl); 2516 2521 }
+1
drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
··· 68 68 SGE_MAX_WR_LEN = 512, /* max WR size in bytes */ 69 69 SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ 70 70 SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ 71 + SGE_MAX_IQ_SIZE = 65520, 71 72 72 73 SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */ 73 74 SGE_TIMER_UPD_CIDX = 7, /* update cidx only */
+10
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
··· 227 227 #define DELACK(x) ((x) << 5) 228 228 #define ULP_MODE(x) ((x) << 8) 229 229 #define RCV_BUFSIZ(x) ((x) << 12) 230 + #define RCV_BUFSIZ_MASK 0x3FFU 230 231 #define DSCP(x) ((x) << 22) 231 232 #define SMAC_SEL(x) ((u64)(x) << 28) 232 233 #define L2T_IDX(x) ((u64)(x) << 36) ··· 277 276 #define TSTAMPS_EN(x) ((x) << 29) 278 277 #define SACK_EN(x) ((x) << 30) 279 278 __be64 opt0; 279 + }; 280 + 281 + struct cpl_t5_pass_accept_rpl { 282 + WR_HDR; 283 + union opcode_tid ot; 284 + __be32 opt2; 285 + __be64 opt0; 286 + __be32 iss; 287 + __be32 rsvd; 280 288 }; 281 289 282 290 struct cpl_act_open_req {