Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 's390-qeth-next'

Julian Wiedmann says:

====================
s390/qeth: updates 2019-08-23

please apply one more round of qeth patches. These implement support for
a bunch of TX-related features - namely TX NAPI, BQL and xmit_more.

Note that this includes two qdio patches which lay the necessary
groundwork, and have been acked by Vasily.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+415 -158
+5 -1
arch/s390/include/asm/qdio.h
··· 16 16 #define QDIO_MAX_QUEUES_PER_IRQ 4 17 17 #define QDIO_MAX_BUFFERS_PER_Q 128 18 18 #define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1) 19 + #define QDIO_BUFNR(num) ((num) & QDIO_MAX_BUFFERS_MASK) 19 20 #define QDIO_MAX_ELEMENTS_PER_BUFFER 16 20 21 #define QDIO_SBAL_SIZE 256 21 22 ··· 360 359 qdio_handler_t *output_handler; 361 360 void (**queue_start_poll_array) (struct ccw_device *, int, 362 361 unsigned long); 363 - int scan_threshold; 362 + unsigned int scan_threshold; 364 363 unsigned long int_parm; 365 364 struct qdio_buffer **input_sbal_addr_array; 366 365 struct qdio_buffer **output_sbal_addr_array; ··· 417 416 extern int qdio_start_irq(struct ccw_device *, int); 418 417 extern int qdio_stop_irq(struct ccw_device *, int); 419 418 extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); 419 + extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, 420 + bool is_input, unsigned int *bufnr, 421 + unsigned int *error); 420 422 extern int qdio_shutdown(struct ccw_device *, int); 421 423 extern int qdio_free(struct ccw_device *); 422 424 extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+1 -2
drivers/s390/cio/qdio.h
··· 206 206 struct qdio_outbuf_state *sbal_state; 207 207 /* timer to check for more outbound work */ 208 208 struct timer_list timer; 209 - /* used SBALs before tasklet schedule */ 210 - int scan_threshold; 211 209 }; 212 210 213 211 /* ··· 293 295 struct qdio_ssqd_desc ssqd_desc; 294 296 void (*orig_handler) (struct ccw_device *, unsigned long, struct irb *); 295 297 298 + unsigned int scan_threshold; /* used SBALs before tasklet schedule */ 296 299 int perf_stat_enabled; 297 300 298 301 struct qdr *qdr;
+51 -24
drivers/s390/cio/qdio_main.c
··· 647 647 qperf_inc(q, outbound_handler); 648 648 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "koh: s:%02x c:%02x", 649 649 start, count); 650 - if (q->u.out.use_cq) 651 - qdio_handle_aobs(q, start, count); 652 650 } 653 651 654 652 q->handler(q->irq_ptr->cdev, q->qdio_error, q->nr, start, count, ··· 772 774 773 775 count = get_outbound_buffer_frontier(q, start); 774 776 775 - if (count) 777 + if (count) { 776 778 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr); 779 + if (q->u.out.use_cq) 780 + qdio_handle_aobs(q, start, count); 781 + } 777 782 778 783 return count; 779 784 } ··· 880 879 struct qdio_q *out; 881 880 int i; 882 881 883 - if (!pci_out_supported(irq)) 882 + if (!pci_out_supported(irq) || !irq->scan_threshold) 884 883 return; 885 884 886 885 for_each_output_queue(irq, out, i) ··· 973 972 } 974 973 } 975 974 976 - if (!pci_out_supported(irq_ptr)) 975 + if (!pci_out_supported(irq_ptr) || !irq_ptr->scan_threshold) 977 976 return; 978 977 979 978 for_each_output_queue(irq_ptr, q, i) { ··· 1528 1527 static int handle_outbound(struct qdio_q *q, unsigned int callflags, 1529 1528 int bufnr, int count) 1530 1529 { 1530 + const unsigned int scan_threshold = q->irq_ptr->scan_threshold; 1531 1531 unsigned char state = 0; 1532 1532 int used, rc = 0; 1533 1533 ··· 1567 1565 rc = qdio_kick_outbound_q(q, 0); 1568 1566 } 1569 1567 1568 + /* Let drivers implement their own completion scanning: */ 1569 + if (!scan_threshold) 1570 + return rc; 1571 + 1570 1572 /* in case of SIGA errors we must process the error immediately */ 1571 - if (used >= q->u.out.scan_threshold || rc) 1573 + if (used >= scan_threshold || rc) 1572 1574 qdio_tasklet_schedule(q); 1573 1575 else 1574 1576 /* free the SBALs in case of no further traffic */ ··· 1661 1655 } 1662 1656 EXPORT_SYMBOL(qdio_start_irq); 1663 1657 1658 + static int __qdio_inspect_queue(struct qdio_q *q, unsigned int *bufnr, 1659 + unsigned int *error) 1660 + { 1661 + unsigned int start = q->first_to_check; 1662 + int count; 1663 + 1664 + count = q->is_input_q ? qdio_inbound_q_moved(q, start) : 1665 + qdio_outbound_q_moved(q, start); 1666 + if (count == 0) 1667 + return 0; 1668 + 1669 + *bufnr = start; 1670 + *error = q->qdio_error; 1671 + 1672 + /* for the next time */ 1673 + q->first_to_check = add_buf(start, count); 1674 + q->qdio_error = 0; 1675 + 1676 + return count; 1677 + } 1678 + 1679 + int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, bool is_input, 1680 + unsigned int *bufnr, unsigned int *error) 1681 + { 1682 + struct qdio_irq *irq_ptr = cdev->private->qdio_data; 1683 + struct qdio_q *q; 1684 + 1685 + if (!irq_ptr) 1686 + return -ENODEV; 1687 + q = is_input ? irq_ptr->input_qs[nr] : irq_ptr->output_qs[nr]; 1688 + 1689 + if (need_siga_sync(q)) 1690 + qdio_siga_sync_q(q); 1691 + 1692 + return __qdio_inspect_queue(q, bufnr, error); 1693 + } 1694 + EXPORT_SYMBOL_GPL(qdio_inspect_queue); 1695 + 1664 1696 /** 1665 1697 * qdio_get_next_buffers - process input buffers 1666 1698 * @cdev: associated ccw_device for the qdio subchannel ··· 1716 1672 { 1717 1673 struct qdio_q *q; 1718 1674 struct qdio_irq *irq_ptr = cdev->private->qdio_data; 1719 - unsigned int start; 1720 - int count; 1721 1675 1722 1676 if (!irq_ptr) 1723 1677 return -ENODEV; 1724 1678 q = irq_ptr->input_qs[nr]; 1725 - start = q->first_to_check; 1726 1679 1727 1680 /* 1728 1681 * Cannot rely on automatic sync after interrupt since queues may ··· 1730 1689 1731 1690 qdio_check_outbound_pci_queues(irq_ptr); 1732 1691 1733 - count = qdio_inbound_q_moved(q, start); 1734 - if (count == 0) 1735 - return 0; 1736 - 1737 - start = add_buf(start, count); 1738 - q->first_to_check = start; 1739 - 1740 1692 /* Note: upper-layer MUST stop processing immediately here ... */ 1741 1693 if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) 1742 1694 return -EIO; 1743 1695 1744 - *bufnr = q->first_to_kick; 1745 - *error = q->qdio_error; 1746 - 1747 - /* for the next time */ 1748 - q->first_to_kick = add_buf(q->first_to_kick, count); 1749 - q->qdio_error = 0; 1750 - 1751 - return count; 1696 + return __qdio_inspect_queue(q, bufnr, error); 1752 1697 } 1753 1698 EXPORT_SYMBOL(qdio_get_next_buffers); 1754 1699
+1 -1
drivers/s390/cio/qdio_setup.c
··· 248 248 output_sbal_state_array += QDIO_MAX_BUFFERS_PER_Q; 249 249 250 250 q->is_input_q = 0; 251 - q->u.out.scan_threshold = qdio_init->scan_threshold; 252 251 setup_storage_lists(q, irq_ptr, output_sbal_array, i); 253 252 output_sbal_array += QDIO_MAX_BUFFERS_PER_Q; 254 253 ··· 473 474 irq_ptr->nr_input_qs = init_data->no_input_qs; 474 475 irq_ptr->nr_output_qs = init_data->no_output_qs; 475 476 irq_ptr->cdev = init_data->cdev; 477 + irq_ptr->scan_threshold = init_data->scan_threshold; 476 478 ccw_device_get_schid(irq_ptr->cdev, &irq_ptr->schid); 477 479 setup_queues(irq_ptr, init_data); 478 480
+52
drivers/s390/net/qeth_core.h
··· 22 22 #include <linux/hashtable.h> 23 23 #include <linux/ip.h> 24 24 #include <linux/refcount.h> 25 + #include <linux/timer.h> 25 26 #include <linux/wait.h> 26 27 #include <linux/workqueue.h> 27 28 ··· 31 30 #include <net/ipv6.h> 32 31 #include <net/if_inet6.h> 33 32 #include <net/addrconf.h> 33 + #include <net/sch_generic.h> 34 34 #include <net/tcp.h> 35 35 36 36 #include <asm/debug.h> ··· 378 376 #define QETH_HDR_EXT_CSUM_TRANSP_REQ 0x20 379 377 #define QETH_HDR_EXT_UDP 0x40 /*bit off for TCP*/ 380 378 379 + static inline bool qeth_l2_same_vlan(struct qeth_hdr_layer2 *h1, 380 + struct qeth_hdr_layer2 *h2) 381 + { 382 + return !((h1->flags[2] ^ h2->flags[2]) & QETH_LAYER2_FLAG_VLAN) && 383 + h1->vlan_id == h2->vlan_id; 384 + } 385 + 386 + static inline bool qeth_l3_iqd_same_vlan(struct qeth_hdr_layer3 *h1, 387 + struct qeth_hdr_layer3 *h2) 388 + { 389 + return !((h1->ext_flags ^ h2->ext_flags) & QETH_HDR_EXT_VLAN_FRAME) && 390 + h1->vlan_id == h2->vlan_id; 391 + } 392 + 393 + static inline bool qeth_l3_same_next_hop(struct qeth_hdr_layer3 *h1, 394 + struct qeth_hdr_layer3 *h2) 395 + { 396 + return !((h1->flags ^ h2->flags) & QETH_HDR_IPV6) && 397 + ipv6_addr_equal(&h1->next_hop.ipv6_addr, 398 + &h2->next_hop.ipv6_addr); 399 + } 400 + 381 401 enum qeth_qdio_info_states { 382 402 QETH_QDIO_UNINITIALIZED, 383 403 QETH_QDIO_ALLOCATED, ··· 448 424 struct qdio_buffer *buffer; 449 425 atomic_t state; 450 426 int next_element_to_fill; 427 + unsigned int bytes; 451 428 struct sk_buff_head skb_list; 452 429 int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER]; 453 430 ··· 498 473 u64 tso_bytes; 499 474 u64 packing_mode_switch; 500 475 u64 stopped; 476 + u64 completion_yield; 477 + u64 completion_timer; 501 478 502 479 /* rtnl_link_stats64 */ 503 480 u64 tx_packets; ··· 507 480 u64 tx_errors; 508 481 u64 tx_dropped; 509 482 }; 483 + 484 + #define QETH_TX_TIMER_USECS 500 510 485 511 486 struct qeth_qdio_out_q { 512 487 struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; ··· 528 499 atomic_t used_buffers; 529 500 /* indicates whether PCI flag must be set (or if one is outstanding) */ 530 501 atomic_t set_pci_flags_count; 502 + struct napi_struct napi; 503 + struct timer_list timer; 504 + struct qeth_hdr *prev_hdr; 505 + u8 bulk_start; 531 506 }; 507 + 508 + #define qeth_for_each_output_queue(card, q, i) \ 509 + for (i = 0; i < card->qdio.no_out_queues && \ 510 + (q = card->qdio.out_qs[i]); i++) 511 + 512 + #define qeth_napi_to_out_queue(n) container_of(n, struct qeth_qdio_out_q, napi) 513 + 514 + static inline void qeth_tx_arm_timer(struct qeth_qdio_out_q *queue) 515 + { 516 + if (timer_pending(&queue->timer)) 517 + return; 518 + mod_timer(&queue->timer, usecs_to_jiffies(QETH_TX_TIMER_USECS) + 519 + jiffies); 520 + } 532 521 533 522 static inline bool qeth_out_queue_is_full(struct qeth_qdio_out_q *queue) 534 523 { 535 524 return atomic_read(&queue->used_buffers) >= QDIO_MAX_BUFFERS_PER_Q; 525 + } 526 + 527 + static inline bool qeth_out_queue_is_empty(struct qeth_qdio_out_q *queue) 528 + { 529 + return atomic_read(&queue->used_buffers) == 0; 536 530 } 537 531 538 532 struct qeth_qdio_info {
+296 -116
drivers/s390/net/qeth_core_main.c
··· 71 71 static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, 72 72 struct qeth_qdio_out_buffer *buf, 73 73 enum iucv_tx_notify notification); 74 - static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); 74 + static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error, 75 + int budget); 75 76 static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); 76 77 77 78 static void qeth_close_dev_handler(struct work_struct *work) ··· 412 411 /* release here to avoid interleaving between 413 412 outbound tasklet and inbound tasklet 414 413 regarding notifications and lifecycle */ 415 - qeth_release_skbs(c); 414 + qeth_tx_complete_buf(c, forced_cleanup, 0); 416 415 417 416 c = f->next_pending; 418 417 WARN_ON_ONCE(head->next_pending != f); ··· 1078 1077 } 1079 1078 } 1080 1079 1081 - static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf) 1080 + static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error, 1081 + int budget) 1082 1082 { 1083 + struct qeth_qdio_out_q *queue = buf->q; 1083 1084 struct sk_buff *skb; 1084 1085 1085 1086 /* release may never happen from within CQ tasklet scope */ 1086 1087 WARN_ON_ONCE(atomic_read(&buf->state) == QETH_QDIO_BUF_IN_CQ); 1087 1088 1088 1089 if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING) 1089 - qeth_notify_skbs(buf->q, buf, TX_NOTIFY_GENERALERROR); 1090 + qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR); 1090 1091 1091 - while ((skb = __skb_dequeue(&buf->skb_list)) != NULL) 1092 - consume_skb(skb); 1092 + /* Empty buffer? */ 1093 + if (buf->next_element_to_fill == 0) 1094 + return; 1095 + 1096 + QETH_TXQ_STAT_INC(queue, bufs); 1097 + QETH_TXQ_STAT_ADD(queue, buf_elements, buf->next_element_to_fill); 1098 + while ((skb = __skb_dequeue(&buf->skb_list)) != NULL) { 1099 + unsigned int bytes = qdisc_pkt_len(skb); 1100 + bool is_tso = skb_is_gso(skb); 1101 + unsigned int packets; 1102 + 1103 + packets = is_tso ? skb_shinfo(skb)->gso_segs : 1; 1104 + if (error) { 1105 + QETH_TXQ_STAT_ADD(queue, tx_errors, packets); 1106 + } else { 1107 + QETH_TXQ_STAT_ADD(queue, tx_packets, packets); 1108 + QETH_TXQ_STAT_ADD(queue, tx_bytes, bytes); 1109 + if (skb->ip_summed == CHECKSUM_PARTIAL) 1110 + QETH_TXQ_STAT_ADD(queue, skbs_csum, packets); 1111 + if (skb_is_nonlinear(skb)) 1112 + QETH_TXQ_STAT_INC(queue, skbs_sg); 1113 + if (is_tso) { 1114 + QETH_TXQ_STAT_INC(queue, skbs_tso); 1115 + QETH_TXQ_STAT_ADD(queue, tso_bytes, bytes); 1116 + } 1117 + } 1118 + 1119 + napi_consume_skb(skb, budget); 1120 + } 1093 1121 } 1094 1122 1095 1123 static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, 1096 - struct qeth_qdio_out_buffer *buf) 1124 + struct qeth_qdio_out_buffer *buf, 1125 + bool error, int budget) 1097 1126 { 1098 1127 int i; 1099 1128 ··· 1131 1100 if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ) 1132 1101 atomic_dec(&queue->set_pci_flags_count); 1133 1102 1134 - qeth_release_skbs(buf); 1103 + qeth_tx_complete_buf(buf, error, budget); 1135 1104 1136 1105 for (i = 0; i < queue->max_elements; ++i) { 1137 1106 if (buf->buffer->element[i].addr && buf->is_header[i]) ··· 1142 1111 1143 1112 qeth_scrub_qdio_buffer(buf->buffer, queue->max_elements); 1144 1113 buf->next_element_to_fill = 0; 1114 + buf->bytes = 0; 1145 1115 atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY); 1146 1116 } 1147 1117 ··· 1154 1122 if (!q->bufs[j]) 1155 1123 continue; 1156 1124 qeth_cleanup_handled_pending(q, j, 1); 1157 - qeth_clear_output_buffer(q, q->bufs[j]); 1125 + qeth_clear_output_buffer(q, q->bufs[j], true, 0); 1158 1126 if (free) { 1159 1127 kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]); 1160 1128 q->bufs[j] = NULL; ··· 2287 2255 return q; 2288 2256 } 2289 2257 2258 + static void qeth_tx_completion_timer(struct timer_list *timer) 2259 + { 2260 + struct qeth_qdio_out_q *queue = from_timer(queue, timer, timer); 2261 + 2262 + napi_schedule(&queue->napi); 2263 + QETH_TXQ_STAT_INC(queue, completion_timer); 2264 + } 2265 + 2290 2266 static int qeth_alloc_qdio_queues(struct qeth_card *card) 2291 2267 { 2292 2268 int i, j; ··· 2316 2276 2317 2277 /* outbound */ 2318 2278 for (i = 0; i < card->qdio.no_out_queues; ++i) { 2319 - card->qdio.out_qs[i] = qeth_alloc_output_queue(); 2320 - if (!card->qdio.out_qs[i]) 2279 + struct qeth_qdio_out_q *queue; 2280 + 2281 + queue = qeth_alloc_output_queue(); 2282 + if (!queue) 2321 2283 goto out_freeoutq; 2322 2284 QETH_CARD_TEXT_(card, 2, "outq %i", i); 2323 - QETH_CARD_HEX(card, 2, &card->qdio.out_qs[i], sizeof(void *)); 2324 - card->qdio.out_qs[i]->card = card; 2325 - card->qdio.out_qs[i]->queue_no = i; 2285 + QETH_CARD_HEX(card, 2, &queue, sizeof(void *)); 2286 + card->qdio.out_qs[i] = queue; 2287 + queue->card = card; 2288 + queue->queue_no = i; 2289 + timer_setup(&queue->timer, qeth_tx_completion_timer, 0); 2290 + 2326 2291 /* give outbound qeth_qdio_buffers their qdio_buffers */ 2327 2292 for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { 2328 - WARN_ON(card->qdio.out_qs[i]->bufs[j] != NULL); 2329 - if (qeth_init_qdio_out_buf(card->qdio.out_qs[i], j)) 2293 + WARN_ON(queue->bufs[j]); 2294 + if (qeth_init_qdio_out_buf(queue, j)) 2330 2295 goto out_freeoutqbufs; 2331 2296 } 2332 2297 } ··· 2671 2626 queue->max_elements = QETH_MAX_BUFFER_ELEMENTS(card); 2672 2627 queue->next_buf_to_fill = 0; 2673 2628 queue->do_pack = 0; 2629 + queue->prev_hdr = NULL; 2630 + queue->bulk_start = 0; 2674 2631 atomic_set(&queue->used_buffers, 0); 2675 2632 atomic_set(&queue->set_pci_flags_count, 0); 2676 2633 atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); 2634 + netdev_tx_reset_queue(netdev_get_tx_queue(card->dev, i)); 2677 2635 } 2678 2636 return 0; 2679 2637 } ··· 3245 3197 static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, 3246 3198 int count) 3247 3199 { 3200 + struct qeth_card *card = queue->card; 3248 3201 struct qeth_qdio_out_buffer *buf; 3249 3202 int rc; 3250 3203 int i; ··· 3289 3240 } 3290 3241 } 3291 3242 3292 - QETH_TXQ_STAT_ADD(queue, bufs, count); 3293 3243 qdio_flags = QDIO_FLAG_SYNC_OUTPUT; 3294 3244 if (atomic_read(&queue->set_pci_flags_count)) 3295 3245 qdio_flags |= QDIO_FLAG_PCI_OUT; 3296 3246 rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, 3297 3247 queue->queue_no, index, count); 3248 + 3249 + /* Fake the TX completion interrupt: */ 3250 + if (IS_IQD(card)) 3251 + napi_schedule(&queue->napi); 3252 + 3298 3253 if (rc) { 3299 - QETH_TXQ_STAT_ADD(queue, tx_errors, count); 3300 3254 /* ignore temporary SIGA errors without busy condition */ 3301 3255 if (rc == -ENOBUFS) 3302 3256 return; ··· 3314 3262 qeth_schedule_recovery(queue->card); 3315 3263 return; 3316 3264 } 3265 + } 3266 + 3267 + static void qeth_flush_queue(struct qeth_qdio_out_q *queue) 3268 + { 3269 + qeth_flush_buffers(queue, queue->bulk_start, 1); 3270 + 3271 + queue->bulk_start = QDIO_BUFNR(queue->bulk_start + 1); 3272 + queue->prev_hdr = NULL; 3317 3273 } 3318 3274 3319 3275 static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) ··· 3485 3425 int bidx = i % QDIO_MAX_BUFFERS_PER_Q; 3486 3426 buffer = queue->bufs[bidx]; 3487 3427 qeth_handle_send_error(card, buffer, qdio_error); 3488 - 3489 - if (queue->bufstates && 3490 - (queue->bufstates[bidx].flags & 3491 - QDIO_OUTBUF_STATE_FLAG_PENDING) != 0) { 3492 - WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED); 3493 - 3494 - if (atomic_cmpxchg(&buffer->state, 3495 - QETH_QDIO_BUF_PRIMED, 3496 - QETH_QDIO_BUF_PENDING) == 3497 - QETH_QDIO_BUF_PRIMED) { 3498 - qeth_notify_skbs(queue, buffer, 3499 - TX_NOTIFY_PENDING); 3500 - } 3501 - QETH_CARD_TEXT_(queue->card, 5, "pel%d", bidx); 3502 - 3503 - /* prepare the queue slot for re-use: */ 3504 - qeth_scrub_qdio_buffer(buffer->buffer, 3505 - queue->max_elements); 3506 - if (qeth_init_qdio_out_buf(queue, bidx)) { 3507 - QETH_CARD_TEXT(card, 2, "outofbuf"); 3508 - qeth_schedule_recovery(card); 3509 - } 3510 - } else { 3511 - if (card->options.cq == QETH_CQ_ENABLED) { 3512 - enum iucv_tx_notify n; 3513 - 3514 - n = qeth_compute_cq_notification( 3515 - buffer->buffer->element[15].sflags, 0); 3516 - qeth_notify_skbs(queue, buffer, n); 3517 - } 3518 - 3519 - qeth_clear_output_buffer(queue, buffer); 3520 - } 3521 - qeth_cleanup_handled_pending(queue, bidx, 0); 3428 + qeth_clear_output_buffer(queue, buffer, qdio_error, 0); 3522 3429 } 3523 - atomic_sub(count, &queue->used_buffers); 3524 - /* check if we need to do something on this outbound queue */ 3525 - if (!IS_IQD(card)) 3526 - qeth_check_outbound_queue(queue); 3527 3430 3528 - if (IS_IQD(card)) 3529 - __queue = qeth_iqd_translate_txq(dev, __queue); 3431 + atomic_sub(count, &queue->used_buffers); 3432 + qeth_check_outbound_queue(queue); 3433 + 3530 3434 txq = netdev_get_tx_queue(dev, __queue); 3531 3435 /* xmit may have observed the full-condition, but not yet stopped the 3532 3436 * txq. In which case the code below won't trigger. So before returning, ··· 3679 3655 return 0; 3680 3656 } 3681 3657 3682 - static void __qeth_fill_buffer(struct sk_buff *skb, 3683 - struct qeth_qdio_out_buffer *buf, 3684 - bool is_first_elem, unsigned int offset) 3658 + static bool qeth_iqd_may_bulk(struct qeth_qdio_out_q *queue, 3659 + struct qeth_qdio_out_buffer *buffer, 3660 + struct sk_buff *curr_skb, 3661 + struct qeth_hdr *curr_hdr) 3662 + { 3663 + struct qeth_hdr *prev_hdr = queue->prev_hdr; 3664 + 3665 + if (!prev_hdr) 3666 + return true; 3667 + 3668 + /* All packets must have the same target: */ 3669 + if (curr_hdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) { 3670 + struct sk_buff *prev_skb = skb_peek(&buffer->skb_list); 3671 + 3672 + return ether_addr_equal(eth_hdr(prev_skb)->h_dest, 3673 + eth_hdr(curr_skb)->h_dest) && 3674 + qeth_l2_same_vlan(&prev_hdr->hdr.l2, &curr_hdr->hdr.l2); 3675 + } 3676 + 3677 + return qeth_l3_same_next_hop(&prev_hdr->hdr.l3, &curr_hdr->hdr.l3) && 3678 + qeth_l3_iqd_same_vlan(&prev_hdr->hdr.l3, &curr_hdr->hdr.l3); 3679 + } 3680 + 3681 + static unsigned int __qeth_fill_buffer(struct sk_buff *skb, 3682 + struct qeth_qdio_out_buffer *buf, 3683 + bool is_first_elem, unsigned int offset) 3685 3684 { 3686 3685 struct qdio_buffer *buffer = buf->buffer; 3687 3686 int element = buf->next_element_to_fill; ··· 3761 3714 if (buffer->element[element - 1].eflags) 3762 3715 buffer->element[element - 1].eflags = SBAL_EFLAGS_LAST_FRAG; 3763 3716 buf->next_element_to_fill = element; 3717 + return element; 3764 3718 } 3765 3719 3766 3720 /** 3767 3721 * qeth_fill_buffer() - map skb into an output buffer 3768 - * @queue: QDIO queue to submit the buffer on 3769 3722 * @buf: buffer to transport the skb 3770 3723 * @skb: skb to map into the buffer 3771 3724 * @hdr: qeth_hdr for this skb. Either at skb->data, or allocated 3772 3725 * from qeth_core_header_cache. 3773 3726 * @offset: when mapping the skb, start at skb->data + offset 3774 3727 * @hd_len: if > 0, build a dedicated header element of this size 3775 - * flush: Prepare the buffer to be flushed, regardless of its fill level. 3776 3728 */ 3777 - static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, 3778 - struct qeth_qdio_out_buffer *buf, 3779 - struct sk_buff *skb, struct qeth_hdr *hdr, 3780 - unsigned int offset, unsigned int hd_len, 3781 - bool flush) 3729 + static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf, 3730 + struct sk_buff *skb, struct qeth_hdr *hdr, 3731 + unsigned int offset, unsigned int hd_len) 3782 3732 { 3783 3733 struct qdio_buffer *buffer = buf->buffer; 3784 3734 bool is_first_elem = true; ··· 3795 3751 buf->next_element_to_fill++; 3796 3752 } 3797 3753 3798 - __qeth_fill_buffer(skb, buf, is_first_elem, offset); 3799 - 3800 - if (!queue->do_pack) { 3801 - QETH_CARD_TEXT(queue->card, 6, "fillbfnp"); 3802 - } else { 3803 - QETH_CARD_TEXT(queue->card, 6, "fillbfpa"); 3804 - 3805 - QETH_TXQ_STAT_INC(queue, skbs_pack); 3806 - /* If the buffer still has free elements, keep using it. */ 3807 - if (!flush && 3808 - buf->next_element_to_fill < queue->max_elements) 3809 - return 0; 3810 - } 3811 - 3812 - /* flush out the buffer */ 3813 - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); 3814 - queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % 3815 - QDIO_MAX_BUFFERS_PER_Q; 3816 - return 1; 3754 + return __qeth_fill_buffer(skb, buf, is_first_elem, offset); 3817 3755 } 3818 3756 3819 - static int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, 3820 - struct sk_buff *skb, struct qeth_hdr *hdr, 3821 - unsigned int offset, unsigned int hd_len) 3757 + static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue, 3758 + struct sk_buff *skb, unsigned int elements, 3759 + struct qeth_hdr *hdr, unsigned int offset, 3760 + unsigned int hd_len) 3822 3761 { 3823 - int index = queue->next_buf_to_fill; 3824 - struct qeth_qdio_out_buffer *buffer = queue->bufs[index]; 3762 + struct qeth_qdio_out_buffer *buffer = queue->bufs[queue->bulk_start]; 3763 + unsigned int bytes = qdisc_pkt_len(skb); 3764 + unsigned int next_element; 3825 3765 struct netdev_queue *txq; 3826 3766 bool stopped = false; 3767 + bool flush; 3768 + 3769 + txq = netdev_get_tx_queue(card->dev, skb_get_queue_mapping(skb)); 3827 3770 3828 3771 /* Just a sanity check, the wake/stop logic should ensure that we always 3829 3772 * get a free buffer. ··· 3818 3787 if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) 3819 3788 return -EBUSY; 3820 3789 3821 - txq = netdev_get_tx_queue(queue->card->dev, skb_get_queue_mapping(skb)); 3790 + if ((buffer->next_element_to_fill + elements > queue->max_elements) || 3791 + !qeth_iqd_may_bulk(queue, buffer, skb, hdr)) { 3792 + atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); 3793 + qeth_flush_queue(queue); 3794 + buffer = queue->bufs[queue->bulk_start]; 3822 3795 3823 - if (atomic_inc_return(&queue->used_buffers) >= QDIO_MAX_BUFFERS_PER_Q) { 3796 + /* Sanity-check again: */ 3797 + if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) 3798 + return -EBUSY; 3799 + } 3800 + 3801 + if (buffer->next_element_to_fill == 0 && 3802 + atomic_inc_return(&queue->used_buffers) >= QDIO_MAX_BUFFERS_PER_Q) { 3824 3803 /* If a TX completion happens right _here_ and misses to wake 3825 3804 * the txq, then our re-check below will catch the race. 3826 3805 */ ··· 3839 3798 stopped = true; 3840 3799 } 3841 3800 3842 - qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len, stopped); 3843 - qeth_flush_buffers(queue, index, 1); 3801 + next_element = qeth_fill_buffer(buffer, skb, hdr, offset, hd_len); 3802 + buffer->bytes += bytes; 3803 + queue->prev_hdr = hdr; 3804 + 3805 + flush = __netdev_tx_sent_queue(txq, bytes, 3806 + !stopped && netdev_xmit_more()); 3807 + 3808 + if (flush || next_element >= queue->max_elements) { 3809 + atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); 3810 + qeth_flush_queue(queue); 3811 + } 3844 3812 3845 3813 if (stopped && !qeth_out_queue_is_full(queue)) 3846 3814 netif_tx_start_queue(txq); ··· 3862 3812 int elements_needed) 3863 3813 { 3864 3814 struct qeth_qdio_out_buffer *buffer; 3815 + unsigned int next_element; 3865 3816 struct netdev_queue *txq; 3866 3817 bool stopped = false; 3867 3818 int start_index; ··· 3925 3874 stopped = true; 3926 3875 } 3927 3876 3928 - flush_count += qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len, 3929 - stopped); 3877 + next_element = qeth_fill_buffer(buffer, skb, hdr, offset, hd_len); 3878 + 3879 + if (queue->do_pack) 3880 + QETH_TXQ_STAT_INC(queue, skbs_pack); 3881 + if (!queue->do_pack || stopped || next_element >= queue->max_elements) { 3882 + flush_count++; 3883 + atomic_set(&buffer->state, QETH_QDIO_BUF_PRIMED); 3884 + queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % 3885 + QDIO_MAX_BUFFERS_PER_Q; 3886 + } 3887 + 3930 3888 if (flush_count) 3931 3889 qeth_flush_buffers(queue, start_index, flush_count); 3932 3890 else if (!atomic_read(&queue->set_pci_flags_count)) ··· 4002 3942 unsigned int hd_len = 0; 4003 3943 unsigned int elements; 4004 3944 int push_len, rc; 4005 - bool is_sg; 4006 3945 4007 3946 if (is_tso) { 4008 3947 hw_hdr_len = sizeof(struct qeth_hdr_tso); ··· 4030 3971 qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr, 4031 3972 frame_len - proto_len, skb, proto_len); 4032 3973 4033 - is_sg = skb_is_nonlinear(skb); 4034 3974 if (IS_IQD(card)) { 4035 - rc = qeth_do_send_packet_fast(queue, skb, hdr, data_offset, 4036 - hd_len); 3975 + rc = __qeth_xmit(card, queue, skb, elements, hdr, data_offset, 3976 + hd_len); 4037 3977 } else { 4038 3978 /* TODO: drop skb_orphan() once TX completion is fast enough */ 4039 3979 skb_orphan(skb); ··· 4040 3982 hd_len, elements); 4041 3983 } 4042 3984 4043 - if (!rc) { 4044 - QETH_TXQ_STAT_ADD(queue, buf_elements, elements); 4045 - if (is_sg) 4046 - QETH_TXQ_STAT_INC(queue, skbs_sg); 4047 - if (is_tso) { 4048 - QETH_TXQ_STAT_INC(queue, skbs_tso); 4049 - QETH_TXQ_STAT_ADD(queue, tso_bytes, frame_len); 4050 - } 4051 - } else { 4052 - if (!push_len) 4053 - kmem_cache_free(qeth_core_header_cache, hdr); 4054 - } 3985 + if (rc && !push_len) 3986 + kmem_cache_free(qeth_core_header_cache, hdr); 3987 + 4055 3988 return rc; 4056 3989 } 4057 3990 EXPORT_SYMBOL_GPL(qeth_xmit); ··· 4773 4724 init_data.input_sbal_addr_array = in_sbal_ptrs; 4774 4725 init_data.output_sbal_addr_array = out_sbal_ptrs; 4775 4726 init_data.output_sbal_state_array = card->qdio.out_bufstates; 4776 - init_data.scan_threshold = IS_IQD(card) ? 1 : 32; 4727 + init_data.scan_threshold = IS_IQD(card) ? 0 : 32; 4777 4728 4778 4729 if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED, 4779 4730 QETH_QDIO_ESTABLISHED) == QETH_QDIO_ALLOCATED) { ··· 5186 5137 return work_done; 5187 5138 } 5188 5139 EXPORT_SYMBOL_GPL(qeth_poll); 5140 + 5141 + static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue, 5142 + unsigned int bidx, bool error, int budget) 5143 + { 5144 + struct qeth_qdio_out_buffer *buffer = queue->bufs[bidx]; 5145 + u8 sflags = buffer->buffer->element[15].sflags; 5146 + struct qeth_card *card = queue->card; 5147 + 5148 + if (queue->bufstates && (queue->bufstates[bidx].flags & 5149 + QDIO_OUTBUF_STATE_FLAG_PENDING)) { 5150 + WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED); 5151 + 5152 + if (atomic_cmpxchg(&buffer->state, QETH_QDIO_BUF_PRIMED, 5153 + QETH_QDIO_BUF_PENDING) == 5154 + QETH_QDIO_BUF_PRIMED) 5155 + qeth_notify_skbs(queue, buffer, TX_NOTIFY_PENDING); 5156 + 5157 + QETH_CARD_TEXT_(card, 5, "pel%u", bidx); 5158 + 5159 + /* prepare the queue slot for re-use: */ 5160 + qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements); 5161 + if (qeth_init_qdio_out_buf(queue, bidx)) { 5162 + QETH_CARD_TEXT(card, 2, "outofbuf"); 5163 + qeth_schedule_recovery(card); 5164 + } 5165 + 5166 + return; 5167 + } 5168 + 5169 + if (card->options.cq == QETH_CQ_ENABLED) 5170 + qeth_notify_skbs(queue, buffer, 5171 + qeth_compute_cq_notification(sflags, 0)); 5172 + qeth_clear_output_buffer(queue, buffer, error, budget); 5173 + } 5174 + 5175 + static int qeth_tx_poll(struct napi_struct *napi, int budget) 5176 + { 5177 + struct qeth_qdio_out_q *queue = qeth_napi_to_out_queue(napi); 5178 + unsigned int queue_no = queue->queue_no; 5179 + struct qeth_card *card = queue->card; 5180 + struct net_device *dev = card->dev; 5181 + unsigned int work_done = 0; 5182 + struct netdev_queue *txq; 5183 + 5184 + txq = netdev_get_tx_queue(dev, qeth_iqd_translate_txq(dev, queue_no)); 5185 + 5186 + while (1) { 5187 + unsigned int start, error, i; 5188 + unsigned int packets = 0; 5189 + unsigned int bytes = 0; 5190 + int completed; 5191 + 5192 + if (qeth_out_queue_is_empty(queue)) { 5193 + napi_complete(napi); 5194 + return 0; 5195 + } 5196 + 5197 + /* Give the CPU a breather: */ 5198 + if (work_done >= QDIO_MAX_BUFFERS_PER_Q) { 5199 + QETH_TXQ_STAT_INC(queue, completion_yield); 5200 + if (napi_complete_done(napi, 0)) 5201 + napi_schedule(napi); 5202 + return 0; 5203 + } 5204 + 5205 + completed = qdio_inspect_queue(CARD_DDEV(card), queue_no, false, 5206 + &start, &error); 5207 + if (completed <= 0) { 5208 + /* Ensure we see TX completion for pending work: */ 5209 + if (napi_complete_done(napi, 0)) 5210 + qeth_tx_arm_timer(queue); 5211 + return 0; 5212 + } 5213 + 5214 + for (i = start; i < start + completed; i++) { 5215 + struct qeth_qdio_out_buffer *buffer; 5216 + unsigned int bidx = QDIO_BUFNR(i); 5217 + 5218 + buffer = queue->bufs[bidx]; 5219 + packets += skb_queue_len(&buffer->skb_list); 5220 + bytes += buffer->bytes; 5221 + 5222 + qeth_handle_send_error(card, buffer, error); 5223 + qeth_iqd_tx_complete(queue, bidx, error, budget); 5224 + qeth_cleanup_handled_pending(queue, bidx, false); 5225 + } 5226 + 5227 + netdev_tx_completed_queue(txq, packets, bytes); 5228 + atomic_sub(completed, &queue->used_buffers); 5229 + work_done += completed; 5230 + 5231 + /* xmit may have observed the full-condition, but not yet 5232 + * stopped the txq. In which case the code below won't trigger. 5233 + * So before returning, xmit will re-check the txq's fill level 5234 + * and wake it up if needed. 5235 + */ 5236 + if (netif_tx_queue_stopped(txq) && 5237 + !qeth_out_queue_is_full(queue)) 5238 + netif_tx_wake_queue(txq); 5239 + } 5240 + } 5189 5241 5190 5242 static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd) 5191 5243 { ··· 6234 6084 napi_enable(&card->napi); 6235 6085 local_bh_disable(); 6236 6086 napi_schedule(&card->napi); 6087 + if (IS_IQD(card)) { 6088 + struct qeth_qdio_out_q *queue; 6089 + unsigned int i; 6090 + 6091 + qeth_for_each_output_queue(card, queue, i) { 6092 + netif_tx_napi_add(dev, &queue->napi, qeth_tx_poll, 6093 + QETH_NAPI_WEIGHT); 6094 + napi_enable(&queue->napi); 6095 + napi_schedule(&queue->napi); 6096 + } 6097 + } 6237 6098 /* kick-start the NAPI softirq: */ 6238 6099 local_bh_enable(); 6239 6100 return 0; ··· 6256 6095 struct qeth_card *card = dev->ml_priv; 6257 6096 6258 6097 QETH_CARD_TEXT(card, 4, "qethstop"); 6259 - netif_tx_disable(dev); 6098 + if (IS_IQD(card)) { 6099 + struct qeth_qdio_out_q *queue; 6100 + unsigned int i; 6101 + 6102 + /* Quiesce the NAPI instances: */ 6103 + qeth_for_each_output_queue(card, queue, i) { 6104 + napi_disable(&queue->napi); 6105 + del_timer_sync(&queue->timer); 6106 + } 6107 + 6108 + /* Stop .ndo_start_xmit, might still access queue->napi. */ 6109 + netif_tx_disable(dev); 6110 + 6111 + /* Queues may get re-allocated, so remove the NAPIs here. */ 6112 + qeth_for_each_output_queue(card, queue, i) 6113 + netif_napi_del(&queue->napi); 6114 + } else { 6115 + netif_tx_disable(dev); 6116 + } 6117 + 6260 6118 napi_disable(&card->napi); 6261 6119 return 0; 6262 6120 }
+2
drivers/s390/net/qeth_ethtool.c
··· 39 39 QETH_TXQ_STAT("TSO bytes", tso_bytes), 40 40 QETH_TXQ_STAT("Packing mode switches", packing_mode_switch), 41 41 QETH_TXQ_STAT("Queue stopped", stopped), 42 + QETH_TXQ_STAT("Completion yield", completion_yield), 43 + QETH_TXQ_STAT("Completion timer", completion_timer), 42 44 }; 43 45 44 46 static const struct qeth_stats card_stats[] = {
+4 -8
drivers/s390/net/qeth_l2_main.c
··· 175 175 hdr->hdr.l2.id = QETH_HEADER_TYPE_L2_TSO; 176 176 } else { 177 177 hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2; 178 - if (skb->ip_summed == CHECKSUM_PARTIAL) { 178 + if (skb->ip_summed == CHECKSUM_PARTIAL) 179 179 qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv); 180 - QETH_TXQ_STAT_INC(queue, skbs_csum); 181 - } 182 180 } 183 181 184 182 /* set byte byte 3 to casting flags */ ··· 586 588 struct qeth_card *card = dev->ml_priv; 587 589 u16 txq = skb_get_queue_mapping(skb); 588 590 struct qeth_qdio_out_q *queue; 589 - int tx_bytes = skb->len; 590 591 int rc; 591 592 593 + if (!skb_is_gso(skb)) 594 + qdisc_skb_cb(skb)->pkt_len = skb->len; 592 595 if (IS_IQD(card)) 593 596 txq = qeth_iqd_translate_txq(dev, txq); 594 597 queue = card->qdio.out_qs[txq]; ··· 600 601 rc = qeth_xmit(card, skb, queue, qeth_get_ip_version(skb), 601 602 qeth_l2_fill_header); 602 603 603 - if (!rc) { 604 - QETH_TXQ_STAT_INC(queue, tx_packets); 605 - QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes); 604 + if (!rc) 606 605 return NETDEV_TX_OK; 607 - } 608 606 609 607 QETH_TXQ_STAT_INC(queue, tx_dropped); 610 608 kfree_skb(skb);
+3 -6
drivers/s390/net/qeth_l3_main.c
··· 1957 1957 /* some HW requires combined L3+L4 csum offload: */ 1958 1958 if (ipv == 4) 1959 1959 hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_HDR_REQ; 1960 - QETH_TXQ_STAT_INC(queue, skbs_csum); 1961 1960 } 1962 1961 } 1963 1962 ··· 2043 2044 u16 txq = skb_get_queue_mapping(skb); 2044 2045 int ipv = qeth_get_ip_version(skb); 2045 2046 struct qeth_qdio_out_q *queue; 2046 - int tx_bytes = skb->len; 2047 2047 int rc; 2048 2048 2049 + if (!skb_is_gso(skb)) 2050 + qdisc_skb_cb(skb)->pkt_len = skb->len; 2049 2051 if (IS_IQD(card)) { 2050 2052 queue = card->qdio.out_qs[qeth_iqd_translate_txq(dev, txq)]; 2051 2053 ··· 2069 2069 else 2070 2070 rc = qeth_xmit(card, skb, queue, ipv, qeth_l3_fill_header); 2071 2071 2072 - if (!rc) { 2073 - QETH_TXQ_STAT_INC(queue, tx_packets); 2074 - QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes); 2072 + if (!rc) 2075 2073 return NETDEV_TX_OK; 2076 - } 2077 2074 2078 2075 tx_drop: 2079 2076 QETH_TXQ_STAT_INC(queue, tx_dropped);