Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'cxgb4-next'

Hariprasad Shenai says:

====================
cxgb4: Misc. fixes for sge

Increases value of MAX_IMM_TX_PKT_LEN to improve latency, fill freelist
starving threshold based on adapter type, add comments for tx flits and sge
length code and don't call t4_slow_intr_handler when we are not master PF.

This patch series has been created against net-next tree and includes patches on
cxgb4 driver

We have included all the maintainers of respective drivers. Kindly review the
change and let us know in case of any review comments.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+62 -17
+2 -1
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
··· 724 724 adap->swintr = 1; 725 725 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A), v); 726 726 } 727 - t4_slow_intr_handler(adap); 727 + if (adap->flags & MASTER_PF) 728 + t4_slow_intr_handler(adap); 728 729 return IRQ_HANDLED; 729 730 } 730 731
+60 -16
drivers/net/ethernet/chelsio/cxgb4/sge.c
··· 121 121 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1) 122 122 123 123 /* 124 - * An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will 125 - * attempt to refill it. 126 - */ 127 - #define FL_STARVE_THRES 4 128 - 129 - /* 130 124 * Suspend an Ethernet Tx queue with fewer available descriptors than this. 131 125 * This is the same as calc_tx_descs() for a TSO packet with 132 126 * nr_frags == MAX_SKB_FRAGS. ··· 138 144 * Max Tx descriptor space we allow for an Ethernet packet to be inlined 139 145 * into a WR. 140 146 */ 141 - #define MAX_IMM_TX_PKT_LEN 128 147 + #define MAX_IMM_TX_PKT_LEN 256 142 148 143 149 /* 144 150 * Max size of a WR sent through a control Tx queue. ··· 242 248 return fl->size - 8; /* 1 descriptor = 8 buffers */ 243 249 } 244 250 245 - static inline bool fl_starving(const struct sge_fl *fl) 251 + /** 252 + * fl_starving - return whether a Free List is starving. 253 + * @adapter: pointer to the adapter 254 + * @fl: the Free List 255 + * 256 + * Tests specified Free List to see whether the number of buffers 257 + * available to the hardware has falled below our "starvation" 258 + * threshold. 259 + */ 260 + static inline bool fl_starving(const struct adapter *adapter, 261 + const struct sge_fl *fl) 246 262 { 247 - return fl->avail - fl->pend_cred <= FL_STARVE_THRES; 263 + const struct sge *s = &adapter->sge; 264 + 265 + return fl->avail - fl->pend_cred <= s->fl_starve_thres; 248 266 } 249 267 250 268 static int map_skb(struct device *dev, const struct sk_buff *skb, ··· 592 586 unsigned int cred = q->avail; 593 587 __be64 *d = &q->desc[q->pidx]; 594 588 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 589 + int node; 595 590 596 591 gfp |= __GFP_NOWARN; 592 + node = dev_to_node(adap->pdev_dev); 597 593 598 594 if (s->fl_pg_order == 0) 599 595 goto alloc_small_pages; ··· 604 596 * Prefer large buffers 605 597 */ 606 598 while (n) { 607 - pg = __dev_alloc_pages(gfp, s->fl_pg_order); 599 + pg = alloc_pages_node(node, gfp | __GFP_COMP, s->fl_pg_order); 608 600 if (unlikely(!pg)) { 609 601 q->large_alloc_failed++; 610 602 break; /* fall back to single pages */ ··· 634 626 635 627 alloc_small_pages: 636 628 while (n--) { 637 - pg = __dev_alloc_page(gfp); 629 + pg = alloc_pages_node(node, gfp, 0); 638 630 if (unlikely(!pg)) { 639 631 q->alloc_failed++; 640 632 break; ··· 663 655 q->pend_cred += cred; 664 656 ring_fl_db(adap, q); 665 657 666 - if (unlikely(fl_starving(q))) { 658 + if (unlikely(fl_starving(adap, q))) { 667 659 smp_wmb(); 668 660 set_bit(q->cntxt_id - adap->sge.egr_start, 669 661 adap->sge.starving_fl); ··· 730 722 */ 731 723 static inline unsigned int sgl_len(unsigned int n) 732 724 { 725 + /* A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA 726 + * addresses. The DSGL Work Request starts off with a 32-bit DSGL 727 + * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N, 728 + * repeated sequences of { Length[i], Length[i+1], Address[i], 729 + * Address[i+1] } (this ensures that all addresses are on 64-bit 730 + * boundaries). If N is even, then Length[N+1] should be set to 0 and 731 + * Address[N+1] is omitted. 732 + * 733 + * The following calculation incorporates all of the above. It's 734 + * somewhat hard to follow but, briefly: the "+2" accounts for the 735 + * first two flits which include the DSGL header, Length0 and 736 + * Address0; the "(3*(n-1))/2" covers the main body of list entries (3 737 + * flits for every pair of the remaining N) +1 if (n-1) is odd; and 738 + * finally the "+((n-1)&1)" adds the one remaining flit needed if 739 + * (n-1) is odd ... 740 + */ 733 741 n--; 734 742 return (3 * n) / 2 + (n & 1) + 2; 735 743 } ··· 793 769 unsigned int flits; 794 770 int hdrlen = is_eth_imm(skb); 795 771 772 + /* If the skb is small enough, we can pump it out as a work request 773 + * with only immediate data. In that case we just have to have the 774 + * TX Packet header plus the skb data in the Work Request. 775 + */ 776 + 796 777 if (hdrlen) 797 778 return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64)); 798 779 780 + /* Otherwise, we're going to have to construct a Scatter gather list 781 + * of the skb body and fragments. We also include the flits necessary 782 + * for the TX Packet Work Request and CPL. We always have a firmware 783 + * Write Header (incorporated as part of the cpl_tx_pkt_lso and 784 + * cpl_tx_pkt structures), followed by either a TX Packet Write CPL 785 + * message or, if we're doing a Large Send Offload, an LSO CPL message 786 + * with an embedded TX Packet Write CPL message. 787 + */ 799 788 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4; 800 789 if (skb_shinfo(skb)->gso_size) 801 - flits += 2; 790 + flits += (sizeof(struct fw_eth_tx_pkt_wr) + 791 + sizeof(struct cpl_tx_pkt_lso_core) + 792 + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); 793 + else 794 + flits += (sizeof(struct fw_eth_tx_pkt_wr) + 795 + sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64); 802 796 return flits; 803 797 } 804 798 ··· 2238 2196 { 2239 2197 struct adapter *adap = cookie; 2240 2198 2241 - t4_slow_intr_handler(adap); 2199 + if (adap->flags & MASTER_PF) 2200 + t4_slow_intr_handler(adap); 2242 2201 process_intrq(adap); 2243 2202 return IRQ_HANDLED; 2244 2203 } ··· 2254 2211 struct adapter *adap = cookie; 2255 2212 2256 2213 t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI_A), 0); 2257 - if (t4_slow_intr_handler(adap) | process_intrq(adap)) 2214 + if (((adap->flags & MASTER_PF) && t4_slow_intr_handler(adap)) | 2215 + process_intrq(adap)) 2258 2216 return IRQ_HANDLED; 2259 2217 return IRQ_NONE; /* probably shared interrupt */ 2260 2218 } ··· 2292 2248 clear_bit(id, s->starving_fl); 2293 2249 smp_mb__after_atomic(); 2294 2250 2295 - if (fl_starving(fl)) { 2251 + if (fl_starving(adap, fl)) { 2296 2252 rxq = container_of(fl, struct sge_eth_rxq, fl); 2297 2253 if (napi_reschedule(&rxq->rspq.napi)) 2298 2254 fl->starving++;