Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

1) The wireless rate info fix from Johannes Berg.

2) When a RAW socket is in hdrincl mode, we need to make sure that the
user provided at least a minimally sized ipv4/ipv6 header. Fix from
Alexander Potapenko.

3) We must emit IFLA_PHYS_PORT_NAME netlink attributes using
nla_put_string() so that it is NULL terminated.

4) Fix a bug in TCP fastopen handling, wherein child sockets
erroneously inherit the fastopen_req from the parent, and later can
end up derefencing freed memory or doing a double free. From Eric
Dumazet.

5) Don't clear out netdev stats at close time in tg3 driver, from
YueHaibing.

6) Fix refcount leak in xt_CT, from Gao Feng.

7) In nft_set_bitmap() don't leak dummy elements, from Liping Zhang.

8) Fix deadlock due to taking the expectation lock twice, also from
Liping Zhang.

9) Make xt_socket work again with ipv6, from Peter Tirsek.

10) Don't allow IPV6 to be used with IPVS if ipv6.disable=1, from Paolo
Abeni.

11) Make the BPF loader more flexible wrt. changes to the bpf MAP entry
layout. From Jesper Dangaard Brouer.

12) Fix ethtool reported device name in aquantia driver, from Pavel
Belous.

13) Fix build failures due to the compile time size test not working in
netfilter conntrack. From Geert Uytterhoeven.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (52 commits)
cfg80211: make RATE_INFO_BW_20 the default
ipv6: initialize route null entry in addrconf_init()
qede: Fix possible misconfiguration of advertised autoneg value.
qed: Fix overriding of supported autoneg value.
qed*: Fix possible overflow for status block id field.
rtnetlink: NUL-terminate IFLA_PHYS_PORT_NAME string
netvsc: make sure napi enabled before vmbus_open
aquantia: Fix driver name reported by ethtool
ipv4, ipv6: ensure raw socket message is big enough to hold an IP header
net/sched: remove redundant null check on head
tcp: do not inherit fastopen_req from parent
forcedeth: remove unnecessary carrier status check
ibmvnic: Move queue restarting in ibmvnic_tx_complete
ibmvnic: Record SKB RX queue during poll
ibmvnic: Continue skb processing after skb completion error
ibmvnic: Check for driver reset first in ibmvnic_xmit
ibmvnic: Wait for any pending scrqs entries at driver close
ibmvnic: Clean up tx pools when closing
ibmvnic: Whitespace correction in release_rx_pools
ibmvnic: Delete napi's when releasing driver resources
...

+1177 -440
+3 -1
MAINTAINERS
··· 8782 8782 NETFILTER 8783 8783 M: Pablo Neira Ayuso <pablo@netfilter.org> 8784 8784 M: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 8785 + M: Florian Westphal <fw@strlen.de> 8785 8786 L: netfilter-devel@vger.kernel.org 8786 8787 L: coreteam@netfilter.org 8787 8788 W: http://www.netfilter.org/ 8788 8789 W: http://www.iptables.org/ 8790 + W: http://www.nftables.org/ 8789 8791 Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/ 8790 8792 T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git 8791 8793 T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git 8792 - S: Supported 8794 + S: Maintained 8793 8795 F: include/linux/netfilter* 8794 8796 F: include/linux/netfilter/ 8795 8797 F: include/net/netfilter/
+1 -1
drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
··· 68 68 69 69 #define AQ_CFG_DRV_AUTHOR "aQuantia" 70 70 #define AQ_CFG_DRV_DESC "aQuantia Corporation(R) Network Driver" 71 - #define AQ_CFG_DRV_NAME "aquantia" 71 + #define AQ_CFG_DRV_NAME "atlantic" 72 72 #define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\ 73 73 __stringify(NIC_MINOR_DRIVER_VERSION)"."\ 74 74 __stringify(NIC_BUILD_DRIVER_VERSION)"."\
-4
drivers/net/ethernet/broadcom/tg3.c
··· 11729 11729 11730 11730 tg3_stop(tp); 11731 11731 11732 - /* Clear stats across close / open calls */ 11733 - memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev)); 11734 - memset(&tp->estats_prev, 0, sizeof(tp->estats_prev)); 11735 - 11736 11732 if (pci_device_is_present(tp->pdev)) { 11737 11733 tg3_power_down_prepare(tp); 11738 11734
+369 -204
drivers/net/ethernet/ibm/ibmvnic.c
··· 194 194 if (!ltb->buff) 195 195 return; 196 196 197 - if (!adapter->failover) 197 + if (adapter->reset_reason != VNIC_RESET_FAILOVER && 198 + adapter->reset_reason != VNIC_RESET_MOBILITY) 198 199 send_request_unmap(adapter, ltb->map_id); 199 200 dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); 200 201 } ··· 293 292 { 294 293 int i; 295 294 296 - if (adapter->migrated) 297 - return; 298 - 299 295 adapter->replenish_task_cycles++; 300 296 for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); 301 297 i++) { ··· 348 350 free_long_term_buff(adapter, &rx_pool->long_term_buff); 349 351 350 352 if (!rx_pool->rx_buff) 351 - continue; 353 + continue; 352 354 353 355 for (j = 0; j < rx_pool->size; j++) { 354 356 if (rx_pool->rx_buff[j].skb) { ··· 552 554 553 555 static void release_resources(struct ibmvnic_adapter *adapter) 554 556 { 557 + int i; 558 + 555 559 release_tx_pools(adapter); 556 560 release_rx_pools(adapter); 557 561 558 562 release_stats_token(adapter); 559 563 release_error_buffers(adapter); 564 + 565 + if (adapter->napi) { 566 + for (i = 0; i < adapter->req_rx_queues; i++) { 567 + if (&adapter->napi[i]) 568 + netif_napi_del(&adapter->napi[i]); 569 + } 570 + } 560 571 } 561 572 562 573 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) ··· 575 568 union ibmvnic_crq crq; 576 569 bool resend; 577 570 int rc; 578 - 579 - if (adapter->logical_link_state == link_state) { 580 - netdev_dbg(netdev, "Link state already %d\n", link_state); 581 - return 0; 582 - } 583 571 584 572 netdev_err(netdev, "setting link state %d\n", link_state); 585 573 memset(&crq, 0, sizeof(crq)); ··· 626 624 return rc; 627 625 } 628 626 629 - static int ibmvnic_open(struct net_device *netdev) 627 + static int init_resources(struct ibmvnic_adapter *adapter) 630 628 { 631 - struct ibmvnic_adapter *adapter = netdev_priv(netdev); 632 - struct device *dev = &adapter->vdev->dev; 633 - int rc = 0; 634 - int i; 635 - 636 - if (adapter->is_closed) { 637 - rc = ibmvnic_init(adapter); 638 - if (rc) 639 - return rc; 640 - } 641 - 642 - rc = ibmvnic_login(netdev); 643 - if (rc) 644 - return rc; 629 + struct net_device *netdev = adapter->netdev; 630 + int i, rc; 645 631 646 632 rc = set_real_num_queues(netdev); 647 633 if (rc) ··· 637 647 638 648 rc = init_sub_crq_irqs(adapter); 639 649 if (rc) { 640 - dev_err(dev, "failed to initialize sub crq irqs\n"); 650 + netdev_err(netdev, "failed to initialize sub crq irqs\n"); 641 651 return -1; 642 652 } 643 653 ··· 649 659 adapter->napi = kcalloc(adapter->req_rx_queues, 650 660 sizeof(struct napi_struct), GFP_KERNEL); 651 661 if (!adapter->napi) 652 - goto ibmvnic_open_fail; 662 + return -ENOMEM; 663 + 653 664 for (i = 0; i < adapter->req_rx_queues; i++) { 654 665 netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll, 655 666 NAPI_POLL_WEIGHT); 656 - napi_enable(&adapter->napi[i]); 657 667 } 658 668 659 669 send_map_query(adapter); 660 670 661 671 rc = init_rx_pools(netdev); 662 672 if (rc) 663 - goto ibmvnic_open_fail; 673 + return rc; 664 674 665 675 rc = init_tx_pools(netdev); 666 - if (rc) 667 - goto ibmvnic_open_fail; 676 + return rc; 677 + } 668 678 679 + static int __ibmvnic_open(struct net_device *netdev) 680 + { 681 + struct ibmvnic_adapter *adapter = netdev_priv(netdev); 682 + enum vnic_state prev_state = adapter->state; 683 + int i, rc; 684 + 685 + adapter->state = VNIC_OPENING; 669 686 replenish_pools(adapter); 687 + 688 + for (i = 0; i < adapter->req_rx_queues; i++) 689 + napi_enable(&adapter->napi[i]); 670 690 671 691 /* We're ready to receive frames, enable the sub-crq interrupts and 672 692 * set the logical link state to up 673 693 */ 674 - for (i = 0; i < adapter->req_rx_queues; i++) 675 - enable_scrq_irq(adapter, adapter->rx_scrq[i]); 694 + for (i = 0; i < adapter->req_rx_queues; i++) { 695 + if (prev_state == VNIC_CLOSED) 696 + enable_irq(adapter->rx_scrq[i]->irq); 697 + else 698 + enable_scrq_irq(adapter, adapter->rx_scrq[i]); 699 + } 676 700 677 - for (i = 0; i < adapter->req_tx_queues; i++) 678 - enable_scrq_irq(adapter, adapter->tx_scrq[i]); 701 + for (i = 0; i < adapter->req_tx_queues; i++) { 702 + if (prev_state == VNIC_CLOSED) 703 + enable_irq(adapter->tx_scrq[i]->irq); 704 + else 705 + enable_scrq_irq(adapter, adapter->tx_scrq[i]); 706 + } 679 707 680 708 rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); 681 - if (rc) 682 - goto ibmvnic_open_fail; 709 + if (rc) { 710 + for (i = 0; i < adapter->req_rx_queues; i++) 711 + napi_disable(&adapter->napi[i]); 712 + release_resources(adapter); 713 + return rc; 714 + } 683 715 684 716 netif_tx_start_all_queues(netdev); 685 - adapter->is_closed = false; 686 717 687 - return 0; 688 - 689 - ibmvnic_open_fail: 690 - for (i = 0; i < adapter->req_rx_queues; i++) 691 - napi_disable(&adapter->napi[i]); 692 - release_resources(adapter); 693 - return -ENOMEM; 694 - } 695 - 696 - static void disable_sub_crqs(struct ibmvnic_adapter *adapter) 697 - { 698 - int i; 699 - 700 - if (adapter->tx_scrq) { 701 - for (i = 0; i < adapter->req_tx_queues; i++) 702 - if (adapter->tx_scrq[i]) 703 - disable_irq(adapter->tx_scrq[i]->irq); 704 - } 705 - 706 - if (adapter->rx_scrq) { 718 + if (prev_state == VNIC_CLOSED) { 707 719 for (i = 0; i < adapter->req_rx_queues; i++) 708 - if (adapter->rx_scrq[i]) 709 - disable_irq(adapter->rx_scrq[i]->irq); 720 + napi_schedule(&adapter->napi[i]); 721 + } 722 + 723 + adapter->state = VNIC_OPEN; 724 + return rc; 725 + } 726 + 727 + static int ibmvnic_open(struct net_device *netdev) 728 + { 729 + struct ibmvnic_adapter *adapter = netdev_priv(netdev); 730 + int rc; 731 + 732 + mutex_lock(&adapter->reset_lock); 733 + 734 + if (adapter->state != VNIC_CLOSED) { 735 + rc = ibmvnic_login(netdev); 736 + if (rc) { 737 + mutex_unlock(&adapter->reset_lock); 738 + return rc; 739 + } 740 + 741 + rc = init_resources(adapter); 742 + if (rc) { 743 + netdev_err(netdev, "failed to initialize resources\n"); 744 + release_resources(adapter); 745 + mutex_unlock(&adapter->reset_lock); 746 + return rc; 747 + } 748 + } 749 + 750 + rc = __ibmvnic_open(netdev); 751 + mutex_unlock(&adapter->reset_lock); 752 + 753 + return rc; 754 + } 755 + 756 + static void clean_tx_pools(struct ibmvnic_adapter *adapter) 757 + { 758 + struct ibmvnic_tx_pool *tx_pool; 759 + u64 tx_entries; 760 + int tx_scrqs; 761 + int i, j; 762 + 763 + if (!adapter->tx_pool) 764 + return; 765 + 766 + tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); 767 + tx_entries = adapter->req_tx_entries_per_subcrq; 768 + 769 + /* Free any remaining skbs in the tx buffer pools */ 770 + for (i = 0; i < tx_scrqs; i++) { 771 + tx_pool = &adapter->tx_pool[i]; 772 + if (!tx_pool) 773 + continue; 774 + 775 + for (j = 0; j < tx_entries; j++) { 776 + if (tx_pool->tx_buff[j].skb) { 777 + dev_kfree_skb_any(tx_pool->tx_buff[j].skb); 778 + tx_pool->tx_buff[j].skb = NULL; 779 + } 780 + } 710 781 } 711 782 } 712 783 713 - static int ibmvnic_close(struct net_device *netdev) 784 + static int __ibmvnic_close(struct net_device *netdev) 714 785 { 715 786 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 716 787 int rc = 0; 717 788 int i; 718 789 719 - adapter->closing = true; 720 - disable_sub_crqs(adapter); 790 + adapter->state = VNIC_CLOSING; 791 + netif_tx_stop_all_queues(netdev); 721 792 722 793 if (adapter->napi) { 723 794 for (i = 0; i < adapter->req_rx_queues; i++) 724 795 napi_disable(&adapter->napi[i]); 725 796 } 726 797 727 - if (!adapter->failover) 728 - netif_tx_stop_all_queues(netdev); 798 + clean_tx_pools(adapter); 799 + 800 + if (adapter->tx_scrq) { 801 + for (i = 0; i < adapter->req_tx_queues; i++) 802 + if (adapter->tx_scrq[i]->irq) 803 + disable_irq(adapter->tx_scrq[i]->irq); 804 + } 729 805 730 806 rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); 807 + if (rc) 808 + return rc; 731 809 732 - release_resources(adapter); 810 + if (adapter->rx_scrq) { 811 + for (i = 0; i < adapter->req_rx_queues; i++) { 812 + int retries = 10; 733 813 734 - adapter->is_closed = true; 735 - adapter->closing = false; 814 + while (pending_scrq(adapter, adapter->rx_scrq[i])) { 815 + retries--; 816 + mdelay(100); 817 + 818 + if (retries == 0) 819 + break; 820 + } 821 + 822 + if (adapter->rx_scrq[i]->irq) 823 + disable_irq(adapter->rx_scrq[i]->irq); 824 + } 825 + } 826 + 827 + adapter->state = VNIC_CLOSED; 828 + return rc; 829 + } 830 + 831 + static int ibmvnic_close(struct net_device *netdev) 832 + { 833 + struct ibmvnic_adapter *adapter = netdev_priv(netdev); 834 + int rc; 835 + 836 + mutex_lock(&adapter->reset_lock); 837 + rc = __ibmvnic_close(netdev); 838 + mutex_unlock(&adapter->reset_lock); 839 + 736 840 return rc; 737 841 } 738 842 ··· 985 901 int index = 0; 986 902 int ret = 0; 987 903 988 - tx_pool = &adapter->tx_pool[queue_num]; 989 - tx_scrq = adapter->tx_scrq[queue_num]; 990 - txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); 991 - handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + 992 - be32_to_cpu(adapter->login_rsp_buf-> 993 - off_txsubm_subcrqs)); 994 - if (adapter->migrated) { 904 + if (adapter->resetting) { 995 905 if (!netif_subqueue_stopped(netdev, skb)) 996 906 netif_stop_subqueue(netdev, queue_num); 997 907 dev_kfree_skb_any(skb); ··· 995 917 ret = NETDEV_TX_OK; 996 918 goto out; 997 919 } 920 + 921 + tx_pool = &adapter->tx_pool[queue_num]; 922 + tx_scrq = adapter->tx_scrq[queue_num]; 923 + txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); 924 + handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + 925 + be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); 998 926 999 927 index = tx_pool->free_map[tx_pool->consumer_index]; 1000 928 offset = index * adapter->req_mtu; ··· 1183 1099 return 0; 1184 1100 } 1185 1101 1102 + /** 1103 + * do_reset returns zero if we are able to keep processing reset events, or 1104 + * non-zero if we hit a fatal error and must halt. 1105 + */ 1106 + static int do_reset(struct ibmvnic_adapter *adapter, 1107 + struct ibmvnic_rwi *rwi, u32 reset_state) 1108 + { 1109 + struct net_device *netdev = adapter->netdev; 1110 + int i, rc; 1111 + 1112 + netif_carrier_off(netdev); 1113 + adapter->reset_reason = rwi->reset_reason; 1114 + 1115 + if (rwi->reset_reason == VNIC_RESET_MOBILITY) { 1116 + rc = ibmvnic_reenable_crq_queue(adapter); 1117 + if (rc) 1118 + return 0; 1119 + } 1120 + 1121 + rc = __ibmvnic_close(netdev); 1122 + if (rc) 1123 + return rc; 1124 + 1125 + /* remove the closed state so when we call open it appears 1126 + * we are coming from the probed state. 1127 + */ 1128 + adapter->state = VNIC_PROBED; 1129 + 1130 + release_resources(adapter); 1131 + release_sub_crqs(adapter); 1132 + release_crq_queue(adapter); 1133 + 1134 + rc = ibmvnic_init(adapter); 1135 + if (rc) 1136 + return 0; 1137 + 1138 + /* If the adapter was in PROBE state prior to the reset, exit here. */ 1139 + if (reset_state == VNIC_PROBED) 1140 + return 0; 1141 + 1142 + rc = ibmvnic_login(netdev); 1143 + if (rc) { 1144 + adapter->state = VNIC_PROBED; 1145 + return 0; 1146 + } 1147 + 1148 + rtnl_lock(); 1149 + rc = init_resources(adapter); 1150 + rtnl_unlock(); 1151 + if (rc) 1152 + return rc; 1153 + 1154 + if (reset_state == VNIC_CLOSED) 1155 + return 0; 1156 + 1157 + rc = __ibmvnic_open(netdev); 1158 + if (rc) { 1159 + if (list_empty(&adapter->rwi_list)) 1160 + adapter->state = VNIC_CLOSED; 1161 + else 1162 + adapter->state = reset_state; 1163 + 1164 + return 0; 1165 + } 1166 + 1167 + netif_carrier_on(netdev); 1168 + 1169 + /* kick napi */ 1170 + for (i = 0; i < adapter->req_rx_queues; i++) 1171 + napi_schedule(&adapter->napi[i]); 1172 + 1173 + return 0; 1174 + } 1175 + 1176 + static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) 1177 + { 1178 + struct ibmvnic_rwi *rwi; 1179 + 1180 + mutex_lock(&adapter->rwi_lock); 1181 + 1182 + if (!list_empty(&adapter->rwi_list)) { 1183 + rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi, 1184 + list); 1185 + list_del(&rwi->list); 1186 + } else { 1187 + rwi = NULL; 1188 + } 1189 + 1190 + mutex_unlock(&adapter->rwi_lock); 1191 + return rwi; 1192 + } 1193 + 1194 + static void free_all_rwi(struct ibmvnic_adapter *adapter) 1195 + { 1196 + struct ibmvnic_rwi *rwi; 1197 + 1198 + rwi = get_next_rwi(adapter); 1199 + while (rwi) { 1200 + kfree(rwi); 1201 + rwi = get_next_rwi(adapter); 1202 + } 1203 + } 1204 + 1205 + static void __ibmvnic_reset(struct work_struct *work) 1206 + { 1207 + struct ibmvnic_rwi *rwi; 1208 + struct ibmvnic_adapter *adapter; 1209 + struct net_device *netdev; 1210 + u32 reset_state; 1211 + int rc; 1212 + 1213 + adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); 1214 + netdev = adapter->netdev; 1215 + 1216 + mutex_lock(&adapter->reset_lock); 1217 + adapter->resetting = true; 1218 + reset_state = adapter->state; 1219 + 1220 + rwi = get_next_rwi(adapter); 1221 + while (rwi) { 1222 + rc = do_reset(adapter, rwi, reset_state); 1223 + kfree(rwi); 1224 + if (rc) 1225 + break; 1226 + 1227 + rwi = get_next_rwi(adapter); 1228 + } 1229 + 1230 + if (rc) { 1231 + free_all_rwi(adapter); 1232 + return; 1233 + } 1234 + 1235 + adapter->resetting = false; 1236 + mutex_unlock(&adapter->reset_lock); 1237 + } 1238 + 1239 + static void ibmvnic_reset(struct ibmvnic_adapter *adapter, 1240 + enum ibmvnic_reset_reason reason) 1241 + { 1242 + struct ibmvnic_rwi *rwi, *tmp; 1243 + struct net_device *netdev = adapter->netdev; 1244 + struct list_head *entry; 1245 + 1246 + if (adapter->state == VNIC_REMOVING || 1247 + adapter->state == VNIC_REMOVED) { 1248 + netdev_dbg(netdev, "Adapter removing, skipping reset\n"); 1249 + return; 1250 + } 1251 + 1252 + mutex_lock(&adapter->rwi_lock); 1253 + 1254 + list_for_each(entry, &adapter->rwi_list) { 1255 + tmp = list_entry(entry, struct ibmvnic_rwi, list); 1256 + if (tmp->reset_reason == reason) { 1257 + netdev_err(netdev, "Matching reset found, skipping\n"); 1258 + mutex_unlock(&adapter->rwi_lock); 1259 + return; 1260 + } 1261 + } 1262 + 1263 + rwi = kzalloc(sizeof(*rwi), GFP_KERNEL); 1264 + if (!rwi) { 1265 + mutex_unlock(&adapter->rwi_lock); 1266 + ibmvnic_close(netdev); 1267 + return; 1268 + } 1269 + 1270 + rwi->reset_reason = reason; 1271 + list_add_tail(&rwi->list, &adapter->rwi_list); 1272 + mutex_unlock(&adapter->rwi_lock); 1273 + schedule_work(&adapter->ibmvnic_reset); 1274 + } 1275 + 1186 1276 static void ibmvnic_tx_timeout(struct net_device *dev) 1187 1277 { 1188 1278 struct ibmvnic_adapter *adapter = netdev_priv(dev); 1189 - int rc; 1190 1279 1191 - /* Adapter timed out, resetting it */ 1192 - release_sub_crqs(adapter); 1193 - rc = ibmvnic_reset_crq(adapter); 1194 - if (rc) 1195 - dev_err(&adapter->vdev->dev, "Adapter timeout, reset failed\n"); 1196 - else 1197 - ibmvnic_send_crq_init(adapter); 1280 + ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); 1198 1281 } 1199 1282 1200 1283 static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, ··· 1404 1153 /* free the entry */ 1405 1154 next->rx_comp.first = 0; 1406 1155 remove_buff_from_pool(adapter, rx_buff); 1407 - break; 1156 + continue; 1408 1157 } 1409 1158 1410 1159 length = be32_to_cpu(next->rx_comp.len); ··· 1428 1177 1429 1178 skb_put(skb, length); 1430 1179 skb->protocol = eth_type_trans(skb, netdev); 1180 + skb_record_rx_queue(skb, scrq_num); 1431 1181 1432 1182 if (flags & IBMVNIC_IP_CHKSUM_GOOD && 1433 1183 flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) { ··· 1809 1557 } 1810 1558 1811 1559 if (txbuff->last_frag) { 1812 - if (atomic_sub_return(next->tx_comp.num_comps, 1813 - &scrq->used) <= 1814 - (adapter->req_tx_entries_per_subcrq / 2) && 1815 - netif_subqueue_stopped(adapter->netdev, 1816 - txbuff->skb)) { 1817 - netif_wake_subqueue(adapter->netdev, 1818 - scrq->pool_index); 1819 - netdev_dbg(adapter->netdev, 1820 - "Started queue %d\n", 1821 - scrq->pool_index); 1822 - } 1823 - 1824 1560 dev_kfree_skb_any(txbuff->skb); 1561 + txbuff->skb = NULL; 1825 1562 } 1826 1563 1827 1564 adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. ··· 1821 1580 } 1822 1581 /* remove tx_comp scrq*/ 1823 1582 next->tx_comp.first = 0; 1583 + 1584 + if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <= 1585 + (adapter->req_tx_entries_per_subcrq / 2) && 1586 + __netif_subqueue_stopped(adapter->netdev, 1587 + scrq->pool_index)) { 1588 + netif_wake_subqueue(adapter->netdev, scrq->pool_index); 1589 + netdev_info(adapter->netdev, "Started queue %d\n", 1590 + scrq->pool_index); 1591 + } 1824 1592 } 1825 1593 1826 1594 enable_scrq_irq(adapter, scrq); ··· 2103 1853 { 2104 1854 union sub_crq *entry = &scrq->msgs[scrq->cur]; 2105 1855 2106 - if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP || adapter->closing) 1856 + if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP || 1857 + adapter->state == VNIC_CLOSING) 2107 1858 return 1; 2108 1859 else 2109 1860 return 0; ··· 2238 1987 crq.generic.first = IBMVNIC_CRQ_INIT_CMD; 2239 1988 crq.generic.cmd = IBMVNIC_CRQ_INIT; 2240 1989 netdev_dbg(adapter->netdev, "Sending CRQ init\n"); 2241 - 2242 - return ibmvnic_send_crq(adapter, &crq); 2243 - } 2244 - 2245 - static int ibmvnic_send_crq_init_complete(struct ibmvnic_adapter *adapter) 2246 - { 2247 - union ibmvnic_crq crq; 2248 - 2249 - memset(&crq, 0, sizeof(crq)); 2250 - crq.generic.first = IBMVNIC_CRQ_INIT_CMD; 2251 - crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE; 2252 - netdev_dbg(adapter->netdev, "Sending CRQ init complete\n"); 2253 1990 2254 1991 return ibmvnic_send_crq(adapter, &crq); 2255 1992 } ··· 2739 2500 2740 2501 if (be32_to_cpu(crq->error_indication.error_id)) 2741 2502 request_error_information(adapter, crq); 2503 + 2504 + if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR) 2505 + ibmvnic_reset(adapter, VNIC_RESET_FATAL); 2742 2506 } 2743 2507 2744 2508 static void handle_change_mac_rsp(union ibmvnic_crq *crq, ··· 3130 2888 } 3131 2889 } 3132 2890 3133 - static void ibmvnic_xport_event(struct work_struct *work) 3134 - { 3135 - struct ibmvnic_adapter *adapter = container_of(work, 3136 - struct ibmvnic_adapter, 3137 - ibmvnic_xport); 3138 - struct device *dev = &adapter->vdev->dev; 3139 - long rc; 3140 - 3141 - release_sub_crqs(adapter); 3142 - if (adapter->migrated) { 3143 - rc = ibmvnic_reenable_crq_queue(adapter); 3144 - if (rc) 3145 - dev_err(dev, "Error after enable rc=%ld\n", rc); 3146 - adapter->migrated = false; 3147 - rc = ibmvnic_send_crq_init(adapter); 3148 - if (rc) 3149 - dev_err(dev, "Error sending init rc=%ld\n", rc); 3150 - } 3151 - } 3152 - 3153 2891 static void ibmvnic_handle_crq(union ibmvnic_crq *crq, 3154 2892 struct ibmvnic_adapter *adapter) 3155 2893 { ··· 3147 2925 switch (gen_crq->cmd) { 3148 2926 case IBMVNIC_CRQ_INIT: 3149 2927 dev_info(dev, "Partner initialized\n"); 3150 - /* Send back a response */ 3151 - rc = ibmvnic_send_crq_init_complete(adapter); 3152 - if (!rc) 3153 - schedule_work(&adapter->vnic_crq_init); 3154 - else 3155 - dev_err(dev, "Can't send initrsp rc=%ld\n", rc); 3156 2928 break; 3157 2929 case IBMVNIC_CRQ_INIT_COMPLETE: 3158 2930 dev_info(dev, "Partner initialization complete\n"); ··· 3157 2941 } 3158 2942 return; 3159 2943 case IBMVNIC_CRQ_XPORT_EVENT: 2944 + netif_carrier_off(netdev); 3160 2945 if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { 3161 - dev_info(dev, "Re-enabling adapter\n"); 3162 - adapter->migrated = true; 3163 - schedule_work(&adapter->ibmvnic_xport); 2946 + dev_info(dev, "Migrated, re-enabling adapter\n"); 2947 + ibmvnic_reset(adapter, VNIC_RESET_MOBILITY); 3164 2948 } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { 3165 2949 dev_info(dev, "Backing device failover detected\n"); 3166 - netif_carrier_off(netdev); 3167 - adapter->failover = true; 2950 + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); 3168 2951 } else { 3169 2952 /* The adapter lost the connection */ 3170 2953 dev_err(dev, "Virtual Adapter failed (rc=%d)\n", 3171 2954 gen_crq->cmd); 3172 - schedule_work(&adapter->ibmvnic_xport); 2955 + ibmvnic_reset(adapter, VNIC_RESET_FATAL); 3173 2956 } 3174 2957 return; 3175 2958 case IBMVNIC_CRQ_CMD_RSP: ··· 3449 3234 return retrc; 3450 3235 } 3451 3236 3452 - static void handle_crq_init_rsp(struct work_struct *work) 3453 - { 3454 - struct ibmvnic_adapter *adapter = container_of(work, 3455 - struct ibmvnic_adapter, 3456 - vnic_crq_init); 3457 - struct device *dev = &adapter->vdev->dev; 3458 - struct net_device *netdev = adapter->netdev; 3459 - unsigned long timeout = msecs_to_jiffies(30000); 3460 - bool restart = false; 3461 - int rc; 3462 - 3463 - if (adapter->failover) { 3464 - release_sub_crqs(adapter); 3465 - if (netif_running(netdev)) { 3466 - netif_tx_disable(netdev); 3467 - ibmvnic_close(netdev); 3468 - restart = true; 3469 - } 3470 - } 3471 - 3472 - reinit_completion(&adapter->init_done); 3473 - send_version_xchg(adapter); 3474 - if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { 3475 - dev_err(dev, "Passive init timeout\n"); 3476 - goto task_failed; 3477 - } 3478 - 3479 - netdev->mtu = adapter->req_mtu - ETH_HLEN; 3480 - 3481 - if (adapter->failover) { 3482 - adapter->failover = false; 3483 - if (restart) { 3484 - rc = ibmvnic_open(netdev); 3485 - if (rc) 3486 - goto restart_failed; 3487 - } 3488 - netif_carrier_on(netdev); 3489 - return; 3490 - } 3491 - 3492 - rc = register_netdev(netdev); 3493 - if (rc) { 3494 - dev_err(dev, 3495 - "failed to register netdev rc=%d\n", rc); 3496 - goto register_failed; 3497 - } 3498 - dev_info(dev, "ibmvnic registered\n"); 3499 - 3500 - return; 3501 - 3502 - restart_failed: 3503 - dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc); 3504 - register_failed: 3505 - release_sub_crqs(adapter); 3506 - task_failed: 3507 - dev_err(dev, "Passive initialization was not successful\n"); 3508 - } 3509 - 3510 3237 static int ibmvnic_init(struct ibmvnic_adapter *adapter) 3511 3238 { 3512 3239 struct device *dev = &adapter->vdev->dev; ··· 3503 3346 return -ENOMEM; 3504 3347 3505 3348 adapter = netdev_priv(netdev); 3349 + adapter->state = VNIC_PROBING; 3506 3350 dev_set_drvdata(&dev->dev, netdev); 3507 3351 adapter->vdev = dev; 3508 3352 adapter->netdev = netdev; 3509 - adapter->failover = false; 3510 3353 3511 3354 ether_addr_copy(adapter->mac_addr, mac_addr_p); 3512 3355 ether_addr_copy(netdev->dev_addr, adapter->mac_addr); ··· 3515 3358 netdev->ethtool_ops = &ibmvnic_ethtool_ops; 3516 3359 SET_NETDEV_DEV(netdev, &dev->dev); 3517 3360 3518 - INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); 3519 - INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); 3520 - 3521 3361 spin_lock_init(&adapter->stats_lock); 3522 3362 3523 3363 INIT_LIST_HEAD(&adapter->errors); 3524 3364 spin_lock_init(&adapter->error_list_lock); 3365 + 3366 + INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); 3367 + INIT_LIST_HEAD(&adapter->rwi_list); 3368 + mutex_init(&adapter->reset_lock); 3369 + mutex_init(&adapter->rwi_lock); 3370 + adapter->resetting = false; 3525 3371 3526 3372 rc = ibmvnic_init(adapter); 3527 3373 if (rc) { ··· 3533 3373 } 3534 3374 3535 3375 netdev->mtu = adapter->req_mtu - ETH_HLEN; 3536 - adapter->is_closed = false; 3537 3376 3538 3377 rc = register_netdev(netdev); 3539 3378 if (rc) { ··· 3542 3383 } 3543 3384 dev_info(&dev->dev, "ibmvnic registered\n"); 3544 3385 3386 + adapter->state = VNIC_PROBED; 3545 3387 return 0; 3546 3388 } 3547 3389 ··· 3551 3391 struct net_device *netdev = dev_get_drvdata(&dev->dev); 3552 3392 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3553 3393 3394 + adapter->state = VNIC_REMOVING; 3554 3395 unregister_netdev(netdev); 3396 + mutex_lock(&adapter->reset_lock); 3555 3397 3556 3398 release_resources(adapter); 3557 3399 release_sub_crqs(adapter); 3558 3400 release_crq_queue(adapter); 3559 3401 3402 + adapter->state = VNIC_REMOVED; 3403 + 3404 + mutex_unlock(&adapter->reset_lock); 3560 3405 free_netdev(netdev); 3561 3406 dev_set_drvdata(&dev->dev, NULL); 3562 3407
+25 -6
drivers/net/ethernet/ibm/ibmvnic.h
··· 913 913 __be32 error_id; 914 914 }; 915 915 916 + enum vnic_state {VNIC_PROBING = 1, 917 + VNIC_PROBED, 918 + VNIC_OPENING, 919 + VNIC_OPEN, 920 + VNIC_CLOSING, 921 + VNIC_CLOSED, 922 + VNIC_REMOVING, 923 + VNIC_REMOVED}; 924 + 925 + enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, 926 + VNIC_RESET_MOBILITY, 927 + VNIC_RESET_FATAL, 928 + VNIC_RESET_TIMEOUT}; 929 + 930 + struct ibmvnic_rwi { 931 + enum ibmvnic_reset_reason reset_reason; 932 + struct list_head list; 933 + }; 934 + 916 935 struct ibmvnic_adapter { 917 936 struct vio_dev *vdev; 918 937 struct net_device *netdev; ··· 941 922 dma_addr_t ip_offload_tok; 942 923 struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; 943 924 dma_addr_t ip_offload_ctrl_tok; 944 - bool migrated; 945 925 u32 msg_enable; 946 926 947 927 /* Statistics */ ··· 980 962 u64 promisc; 981 963 982 964 struct ibmvnic_tx_pool *tx_pool; 983 - bool closing; 984 965 struct completion init_done; 985 966 int init_done_rc; 986 967 ··· 1024 1007 __be64 tx_rx_desc_req; 1025 1008 u8 map_id; 1026 1009 1027 - struct work_struct vnic_crq_init; 1028 - struct work_struct ibmvnic_xport; 1029 1010 struct tasklet_struct tasklet; 1030 - bool failover; 1031 - bool is_closed; 1011 + enum vnic_state state; 1012 + enum ibmvnic_reset_reason reset_reason; 1013 + struct mutex reset_lock, rwi_lock; 1014 + struct list_head rwi_list; 1015 + struct work_struct ibmvnic_reset; 1016 + bool resetting; 1032 1017 };
+2 -2
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
··· 2532 2532 if (!dp->xdp_prog) 2533 2533 return 0; 2534 2534 if (dp->fl_bufsz > PAGE_SIZE) { 2535 - NL_MOD_TRY_SET_ERR_MSG(extack, "MTU too large w/ XDP enabled"); 2535 + NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled"); 2536 2536 return -EINVAL; 2537 2537 } 2538 2538 if (dp->num_tx_rings > nn->max_tx_rings) { 2539 - NL_MOD_TRY_SET_ERR_MSG(extack, "Insufficient number of TX rings w/ XDP enabled"); 2539 + NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled"); 2540 2540 return -EINVAL; 2541 2541 } 2542 2542
+2 -4
drivers/net/ethernet/nvidia/forcedeth.c
··· 4248 4248 /* We do not track link speed / duplex setting if the 4249 4249 * interface is disabled. Force a link check */ 4250 4250 if (nv_update_linkspeed(dev)) { 4251 - if (!netif_carrier_ok(dev)) 4252 - netif_carrier_on(dev); 4251 + netif_carrier_on(dev); 4253 4252 } else { 4254 - if (netif_carrier_ok(dev)) 4255 - netif_carrier_off(dev); 4253 + netif_carrier_off(dev); 4256 4254 } 4257 4255 } 4258 4256
+7 -4
drivers/net/ethernet/qlogic/qed/qed_dev.c
··· 2536 2536 DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp); 2537 2537 } 2538 2538 2539 + p_hwfn->mcp_info->link_capabilities.default_speed_autoneg = 2540 + link->speed.autoneg; 2541 + 2539 2542 link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK; 2540 2543 link_temp >>= NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET; 2541 2544 link->pause.autoneg = !!(link_temp & ··· 3589 3586 } 3590 3587 3591 3588 int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, 3592 - u16 coalesce, u8 qid, u16 sb_id) 3589 + u16 coalesce, u16 qid, u16 sb_id) 3593 3590 { 3594 3591 struct ustorm_eth_queue_zone eth_qzone; 3595 3592 u8 timeset, timer_res; ··· 3610 3607 } 3611 3608 timeset = (u8)(coalesce >> timer_res); 3612 3609 3613 - rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid); 3610 + rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid); 3614 3611 if (rc) 3615 3612 return rc; 3616 3613 ··· 3631 3628 } 3632 3629 3633 3630 int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, 3634 - u16 coalesce, u8 qid, u16 sb_id) 3631 + u16 coalesce, u16 qid, u16 sb_id) 3635 3632 { 3636 3633 struct xstorm_eth_queue_zone eth_qzone; 3637 3634 u8 timeset, timer_res; ··· 3652 3649 } 3653 3650 timeset = (u8)(coalesce >> timer_res); 3654 3651 3655 - rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid); 3652 + rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid); 3656 3653 if (rc) 3657 3654 return rc; 3658 3655
+2 -2
drivers/net/ethernet/qlogic/qed/qed_dev_api.h
··· 454 454 * @return int 455 455 */ 456 456 int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, 457 - u16 coalesce, u8 qid, u16 sb_id); 457 + u16 coalesce, u16 qid, u16 sb_id); 458 458 459 459 /** 460 460 * @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue ··· 471 471 * @return int 472 472 */ 473 473 int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, 474 - u16 coalesce, u8 qid, u16 sb_id); 474 + u16 coalesce, u16 qid, u16 sb_id); 475 475 476 476 const char *qed_hw_get_resc_name(enum qed_resources res_id); 477 477 #endif
+6 -2
drivers/net/ethernet/qlogic/qed/qed_main.c
··· 1372 1372 1373 1373 /* TODO - at the moment assume supported and advertised speed equal */ 1374 1374 if_link->supported_caps = QED_LM_FIBRE_BIT; 1375 - if (params.speed.autoneg) 1375 + if (link_caps.default_speed_autoneg) 1376 1376 if_link->supported_caps |= QED_LM_Autoneg_BIT; 1377 1377 if (params.pause.autoneg || 1378 1378 (params.pause.forced_rx && params.pause.forced_tx)) ··· 1382 1382 if_link->supported_caps |= QED_LM_Pause_BIT; 1383 1383 1384 1384 if_link->advertised_caps = if_link->supported_caps; 1385 + if (params.speed.autoneg) 1386 + if_link->advertised_caps |= QED_LM_Autoneg_BIT; 1387 + else 1388 + if_link->advertised_caps &= ~QED_LM_Autoneg_BIT; 1385 1389 if (params.speed.advertised_speeds & 1386 1390 NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) 1387 1391 if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT | ··· 1525 1521 } 1526 1522 1527 1523 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, 1528 - u8 qid, u16 sb_id) 1524 + u16 qid, u16 sb_id) 1529 1525 { 1530 1526 struct qed_hwfn *hwfn; 1531 1527 struct qed_ptt *ptt;
+1
drivers/net/ethernet/qlogic/qed/qed_mcp.h
··· 61 61 62 62 struct qed_mcp_link_capabilities { 63 63 u32 speed_capabilities; 64 + bool default_speed_autoneg; 64 65 }; 65 66 66 67 struct qed_mcp_link_state {
+7 -3
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
··· 493 493 params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS; 494 494 params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG; 495 495 if (base->autoneg == AUTONEG_ENABLE) { 496 + if (!(current_link.supported_caps & QED_LM_Autoneg_BIT)) { 497 + DP_INFO(edev, "Auto negotiation is not supported\n"); 498 + return -EOPNOTSUPP; 499 + } 500 + 496 501 params.autoneg = true; 497 502 params.forced_speed = 0; 498 503 QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising) ··· 711 706 { 712 707 struct qede_dev *edev = netdev_priv(dev); 713 708 int i, rc = 0; 714 - u16 rxc, txc; 715 - u8 sb_id; 709 + u16 rxc, txc, sb_id; 716 710 717 711 if (!netif_running(dev)) { 718 712 DP_INFO(edev, "Interface is down\n"); ··· 733 729 for_each_queue(i) { 734 730 sb_id = edev->fp_array[i].sb_info->igu_sb_id; 735 731 rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc, 736 - (u8)i, sb_id); 732 + (u16)i, sb_id); 737 733 if (rc) { 738 734 DP_INFO(edev, "Set coalesce error, rc = %d\n", rc); 739 735 return rc;
+30 -19
drivers/net/ethernet/smsc/smsc911x.c
··· 25 25 * LAN9215, LAN9216, LAN9217, LAN9218 26 26 * LAN9210, LAN9211 27 27 * LAN9220, LAN9221 28 - * LAN89218 28 + * LAN89218,LAN9250 29 29 * 30 30 */ 31 31 ··· 1450 1450 unsigned int timeout; 1451 1451 unsigned int temp; 1452 1452 int ret; 1453 + unsigned int reset_offset = HW_CFG; 1454 + unsigned int reset_mask = HW_CFG_SRST_; 1453 1455 1454 1456 /* 1455 1457 * Make sure to power-up the PHY chip before doing a reset, otherwise ··· 1478 1476 } 1479 1477 } 1480 1478 1479 + if ((pdata->idrev & 0xFFFF0000) == LAN9250) { 1480 + /* special reset for LAN9250 */ 1481 + reset_offset = RESET_CTL; 1482 + reset_mask = RESET_CTL_DIGITAL_RST_; 1483 + } 1484 + 1481 1485 /* Reset the LAN911x */ 1482 - smsc911x_reg_write(pdata, HW_CFG, HW_CFG_SRST_); 1486 + smsc911x_reg_write(pdata, reset_offset, reset_mask); 1487 + 1488 + /* verify reset bit is cleared */ 1483 1489 timeout = 10; 1484 1490 do { 1485 1491 udelay(10); 1486 - temp = smsc911x_reg_read(pdata, HW_CFG); 1487 - } while ((--timeout) && (temp & HW_CFG_SRST_)); 1492 + temp = smsc911x_reg_read(pdata, reset_offset); 1493 + } while ((--timeout) && (temp & reset_mask)); 1488 1494 1489 - if (unlikely(temp & HW_CFG_SRST_)) { 1495 + if (unlikely(temp & reset_mask)) { 1490 1496 SMSC_WARN(pdata, drv, "Failed to complete reset"); 1491 1497 return -EIO; 1492 1498 } ··· 2263 2253 2264 2254 pdata->idrev = smsc911x_reg_read(pdata, ID_REV); 2265 2255 switch (pdata->idrev & 0xFFFF0000) { 2266 - case 0x01180000: 2267 - case 0x01170000: 2268 - case 0x01160000: 2269 - case 0x01150000: 2270 - case 0x218A0000: 2256 + case LAN9118: 2257 + case LAN9117: 2258 + case LAN9116: 2259 + case LAN9115: 2260 + case LAN89218: 2271 2261 /* LAN911[5678] family */ 2272 2262 pdata->generation = pdata->idrev & 0x0000FFFF; 2273 2263 break; 2274 2264 2275 - case 0x118A0000: 2276 - case 0x117A0000: 2277 - case 0x116A0000: 2278 - case 0x115A0000: 2265 + case LAN9218: 2266 + case LAN9217: 2267 + case LAN9216: 2268 + case LAN9215: 2279 2269 /* LAN921[5678] family */ 2280 2270 pdata->generation = 3; 2281 2271 break; 2282 2272 2283 - case 0x92100000: 2284 - case 0x92110000: 2285 - case 0x92200000: 2286 - case 0x92210000: 2287 - /* LAN9210/LAN9211/LAN9220/LAN9221 */ 2273 + case LAN9210: 2274 + case LAN9211: 2275 + case LAN9220: 2276 + case LAN9221: 2277 + case LAN9250: 2278 + /* LAN9210/LAN9211/LAN9220/LAN9221/LAN9250 */ 2288 2279 pdata->generation = 4; 2289 2280 break; 2290 2281
+19
drivers/net/ethernet/smsc/smsc911x.h
··· 20 20 #ifndef __SMSC911X_H__ 21 21 #define __SMSC911X_H__ 22 22 23 + /*Chip ID*/ 24 + #define LAN9115 0x01150000 25 + #define LAN9116 0x01160000 26 + #define LAN9117 0x01170000 27 + #define LAN9118 0x01180000 28 + #define LAN9215 0x115A0000 29 + #define LAN9216 0x116A0000 30 + #define LAN9217 0x117A0000 31 + #define LAN9218 0x118A0000 32 + #define LAN9210 0x92100000 33 + #define LAN9211 0x92110000 34 + #define LAN9220 0x92200000 35 + #define LAN9221 0x92210000 36 + #define LAN9250 0x92500000 37 + #define LAN89218 0x218A0000 38 + 23 39 #define TX_FIFO_LOW_THRESHOLD ((u32)1600) 24 40 #define SMSC911X_EEPROM_SIZE ((u32)128) 25 41 #define USE_DEBUG 0 ··· 318 302 #define E2P_DATA 0xB4 319 303 #define E2P_DATA_EEPROM_DATA_ 0x000000FF 320 304 #define LAN_REGISTER_EXTENT 0x00000100 305 + 306 + #define RESET_CTL 0x1F8 307 + #define RESET_CTL_DIGITAL_RST_ 0x00000001 321 308 322 309 /* 323 310 * MAC Control and Status Register (Indirect Address)
+5 -3
drivers/net/hyperv/netvsc.c
··· 1322 1322 nvchan->channel = device->channel; 1323 1323 } 1324 1324 1325 + /* Enable NAPI handler before init callbacks */ 1326 + netif_napi_add(ndev, &net_device->chan_table[0].napi, 1327 + netvsc_poll, NAPI_POLL_WEIGHT); 1328 + 1325 1329 /* Open the channel */ 1326 1330 ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, 1327 1331 ring_size * PAGE_SIZE, NULL, 0, ··· 1333 1329 net_device->chan_table); 1334 1330 1335 1331 if (ret != 0) { 1332 + netif_napi_del(&net_device->chan_table[0].napi); 1336 1333 netdev_err(ndev, "unable to open channel: %d\n", ret); 1337 1334 goto cleanup; 1338 1335 } ··· 1341 1336 /* Channel is opened */ 1342 1337 netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); 1343 1338 1344 - /* Enable NAPI handler for init callbacks */ 1345 - netif_napi_add(ndev, &net_device->chan_table[0].napi, 1346 - netvsc_poll, NAPI_POLL_WEIGHT); 1347 1339 napi_enable(&net_device->chan_table[0].napi); 1348 1340 1349 1341 /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
+1 -1
drivers/net/hyperv/rndis_filter.c
··· 1018 1018 if (ret == 0) 1019 1019 napi_enable(&nvchan->napi); 1020 1020 else 1021 - netdev_err(ndev, "sub channel open failed (%d)\n", ret); 1021 + netif_napi_del(&nvchan->napi); 1022 1022 1023 1023 if (refcount_dec_and_test(&nvscdev->sc_offered)) 1024 1024 complete(&nvscdev->channel_init_wait);
+1
drivers/net/usb/qmi_wwan.c
··· 1201 1201 {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ 1202 1202 {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ 1203 1203 {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ 1204 + {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ 1204 1205 {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ 1205 1206 {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ 1206 1207 {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
+4 -4
drivers/net/virtio_net.c
··· 1891 1891 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || 1892 1892 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || 1893 1893 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) { 1894 - NL_SET_ERR_MSG(extack, "can't set XDP while host is implementing LRO, disable LRO first"); 1894 + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first"); 1895 1895 return -EOPNOTSUPP; 1896 1896 } 1897 1897 1898 1898 if (vi->mergeable_rx_bufs && !vi->any_header_sg) { 1899 - NL_SET_ERR_MSG(extack, "XDP expects header/data in single page, any_header_sg required"); 1899 + NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); 1900 1900 return -EINVAL; 1901 1901 } 1902 1902 1903 1903 if (dev->mtu > max_sz) { 1904 - NL_SET_ERR_MSG(extack, "MTU too large to enable XDP"); 1904 + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); 1905 1905 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); 1906 1906 return -EINVAL; 1907 1907 } ··· 1912 1912 1913 1913 /* XDP requires extra queues for XDP_TX */ 1914 1914 if (curr_qp + xdp_qp > vi->max_queue_pairs) { 1915 - NL_SET_ERR_MSG(extack, "Too few free TX rings available"); 1915 + NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); 1916 1916 netdev_warn(dev, "request %i queues but max is %i\n", 1917 1917 curr_qp + xdp_qp, vi->max_queue_pairs); 1918 1918 return -ENOMEM;
+8 -11
include/linux/netlink.h
··· 86 86 * Currently string formatting is not supported (due 87 87 * to the lack of an output buffer.) 88 88 */ 89 - #define NL_SET_ERR_MSG(extack, msg) do { \ 90 - static const char _msg[] = (msg); \ 91 - \ 92 - (extack)->_msg = _msg; \ 89 + #define NL_SET_ERR_MSG(extack, msg) do { \ 90 + static const char __msg[] = (msg); \ 91 + struct netlink_ext_ack *__extack = (extack); \ 92 + \ 93 + if (__extack) \ 94 + __extack->_msg = __msg; \ 93 95 } while (0) 94 96 95 - #define NL_MOD_TRY_SET_ERR_MSG(extack, msg) do { \ 96 - static const char _msg[] = KBUILD_MODNAME ": " msg; \ 97 - struct netlink_ext_ack *_extack = (extack); \ 98 - \ 99 - if (_extack) \ 100 - _extack->_msg = _msg; \ 101 - } while (0) 97 + #define NL_SET_ERR_MSG_MOD(extack, msg) \ 98 + NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) 102 99 103 100 extern void netlink_kernel_release(struct sock *sk); 104 101 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
+1 -1
include/linux/qed/qed_if.h
··· 635 635 * @return 0 on success, error otherwise. 636 636 */ 637 637 int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, 638 - u8 qid, u16 sb_id); 638 + u16 qid, u16 sb_id); 639 639 640 640 /** 641 641 * @brief set_led - Configure LED mode
+1 -1
include/net/cfg80211.h
··· 1013 1013 * @RATE_INFO_BW_160: 160 MHz bandwidth 1014 1014 */ 1015 1015 enum rate_info_bw { 1016 + RATE_INFO_BW_20 = 0, 1016 1017 RATE_INFO_BW_5, 1017 1018 RATE_INFO_BW_10, 1018 - RATE_INFO_BW_20, 1019 1019 RATE_INFO_BW_40, 1020 1020 RATE_INFO_BW_80, 1021 1021 RATE_INFO_BW_160,
+1
include/net/ip6_route.h
··· 84 84 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 85 85 int ifindex, struct flowi6 *fl6, int flags); 86 86 87 + void ip6_route_init_special_entries(void); 87 88 int ip6_route_init(void); 88 89 void ip6_route_cleanup(void); 89 90
+9 -4
include/uapi/linux/netfilter/nf_conntrack_common.h
··· 84 84 IPS_DYING_BIT = 9, 85 85 IPS_DYING = (1 << IPS_DYING_BIT), 86 86 87 - /* Bits that cannot be altered from userland. */ 88 - IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | 89 - IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING), 90 - 91 87 /* Connection has fixed timeout. */ 92 88 IPS_FIXED_TIMEOUT_BIT = 10, 93 89 IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), ··· 99 103 /* Conntrack got a helper explicitly attached via CT target. */ 100 104 IPS_HELPER_BIT = 13, 101 105 IPS_HELPER = (1 << IPS_HELPER_BIT), 106 + 107 + /* Be careful here, modifying these bits can make things messy, 108 + * so don't let users modify them directly. 109 + */ 110 + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | 111 + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | 112 + IPS_SEQ_ADJUST | IPS_TEMPLATE), 113 + 114 + __IPS_MAX_BIT = 14, 102 115 }; 103 116 104 117 /* Connection tracking event types */
+5 -5
lib/test_bpf.c
··· 4769 4769 BPF_LD_IMM64(R1, 3), 4770 4770 BPF_LD_IMM64(R2, 2), 4771 4771 BPF_JMP_REG(BPF_JGE, R1, R2, 2), 4772 - BPF_LD_IMM64(R0, 0xffffffffffffffffUL), 4773 - BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL), 4772 + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), 4773 + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL), 4774 4774 BPF_EXIT_INSN(), 4775 4775 }, 4776 4776 INTERNAL, ··· 4784 4784 BPF_LD_IMM64(R1, 3), 4785 4785 BPF_LD_IMM64(R2, 2), 4786 4786 BPF_JMP_REG(BPF_JGE, R1, R2, 0), 4787 - BPF_LD_IMM64(R0, 0xffffffffffffffffUL), 4787 + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), 4788 4788 BPF_EXIT_INSN(), 4789 4789 }, 4790 4790 INTERNAL, ··· 4798 4798 BPF_LD_IMM64(R1, 3), 4799 4799 BPF_LD_IMM64(R2, 2), 4800 4800 BPF_JMP_REG(BPF_JGE, R1, R2, 4), 4801 - BPF_LD_IMM64(R0, 0xffffffffffffffffUL), 4802 - BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL), 4801 + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), 4802 + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL), 4803 4803 BPF_EXIT_INSN(), 4804 4804 }, 4805 4805 INTERNAL,
+20
net/bridge/netfilter/ebt_dnat.c
··· 9 9 */ 10 10 #include <linux/module.h> 11 11 #include <net/sock.h> 12 + #include "../br_private.h" 12 13 #include <linux/netfilter.h> 13 14 #include <linux/netfilter/x_tables.h> 14 15 #include <linux/netfilter_bridge/ebtables.h> ··· 19 18 ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) 20 19 { 21 20 const struct ebt_nat_info *info = par->targinfo; 21 + struct net_device *dev; 22 22 23 23 if (!skb_make_writable(skb, 0)) 24 24 return EBT_DROP; 25 25 26 26 ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); 27 + 28 + if (is_multicast_ether_addr(info->mac)) { 29 + if (is_broadcast_ether_addr(info->mac)) 30 + skb->pkt_type = PACKET_BROADCAST; 31 + else 32 + skb->pkt_type = PACKET_MULTICAST; 33 + } else { 34 + if (xt_hooknum(par) != NF_BR_BROUTING) 35 + dev = br_port_get_rcu(xt_in(par))->br->dev; 36 + else 37 + dev = xt_in(par); 38 + 39 + if (ether_addr_equal(info->mac, dev->dev_addr)) 40 + skb->pkt_type = PACKET_HOST; 41 + else 42 + skb->pkt_type = PACKET_OTHERHOST; 43 + } 44 + 27 45 return info->target; 28 46 } 29 47
+1 -1
net/core/rtnetlink.c
··· 1054 1054 return err; 1055 1055 } 1056 1056 1057 - if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) 1057 + if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name)) 1058 1058 return -EMSGSIZE; 1059 1059 1060 1060 return 0;
+3
net/ipv4/raw.c
··· 358 358 rt->dst.dev->mtu); 359 359 return -EMSGSIZE; 360 360 } 361 + if (length < sizeof(struct iphdr)) 362 + return -EINVAL; 363 + 361 364 if (flags&MSG_PROBE) 362 365 goto out; 363 366
+1
net/ipv4/tcp_minisocks.c
··· 523 523 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; 524 524 newtp->rx_opt.mss_clamp = req->mss; 525 525 tcp_ecn_openreq_child(newtp, req); 526 + newtp->fastopen_req = NULL; 526 527 newtp->fastopen_rsk = NULL; 527 528 newtp->syn_data_acked = 0; 528 529 newtp->rack.mstamp.v64 = 0;
+5 -2
net/ipv6/addrconf.c
··· 3328 3328 idev->dev, 0, 0); 3329 3329 } 3330 3330 3331 - addrconf_dad_start(ifp); 3331 + if (ifp->state == INET6_IFADDR_STATE_PREDAD) 3332 + addrconf_dad_start(ifp); 3332 3333 3333 3334 return 0; 3334 3335 } ··· 3684 3683 if (keep) { 3685 3684 /* set state to skip the notifier below */ 3686 3685 state = INET6_IFADDR_STATE_DEAD; 3687 - ifa->state = 0; 3686 + ifa->state = INET6_IFADDR_STATE_PREDAD; 3688 3687 if (!(ifa->flags & IFA_F_NODAD)) 3689 3688 ifa->flags |= IFA_F_TENTATIVE; 3690 3689 ··· 6572 6571 err = PTR_ERR(idev); 6573 6572 goto errlo; 6574 6573 } 6574 + 6575 + ip6_route_init_special_entries(); 6575 6576 6576 6577 for (i = 0; i < IN6_ADDR_HSIZE; i++) 6577 6578 INIT_HLIST_HEAD(&inet6_addr_lst[i]);
+1 -1
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
··· 235 235 inside->icmp6.icmp6_cksum = 236 236 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 237 237 skb->len - hdrlen, IPPROTO_ICMPV6, 238 - csum_partial(&inside->icmp6, 238 + skb_checksum(skb, hdrlen, 239 239 skb->len - hdrlen, 0)); 240 240 } 241 241
+2
net/ipv6/raw.c
··· 632 632 ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); 633 633 return -EMSGSIZE; 634 634 } 635 + if (length < sizeof(struct ipv6hdr)) 636 + return -EINVAL; 635 637 if (flags&MSG_PROBE) 636 638 goto out; 637 639
+15 -11
net/ipv6/route.c
··· 4027 4027 .priority = 0, 4028 4028 }; 4029 4029 4030 + void __init ip6_route_init_special_entries(void) 4031 + { 4032 + /* Registering of the loopback is done before this portion of code, 4033 + * the loopback reference in rt6_info will not be taken, do it 4034 + * manually for init_net */ 4035 + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 4036 + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4037 + #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4038 + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 4039 + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4040 + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 4041 + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4042 + #endif 4043 + } 4044 + 4030 4045 int __init ip6_route_init(void) 4031 4046 { 4032 4047 int ret; ··· 4068 4053 4069 4054 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 4070 4055 4071 - /* Registering of the loopback is done before this portion of code, 4072 - * the loopback reference in rt6_info will not be taken, do it 4073 - * manually for init_net */ 4074 - init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 4075 - init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4076 - #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4077 - init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 4078 - init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4079 - init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 4080 - init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 4081 - #endif 4082 4056 ret = fib6_init(); 4083 4057 if (ret) 4084 4058 goto out_register_subsys;
+17 -5
net/netfilter/ipvs/ip_vs_ctl.c
··· 3078 3078 return skb->len; 3079 3079 } 3080 3080 3081 + static bool ip_vs_is_af_valid(int af) 3082 + { 3083 + if (af == AF_INET) 3084 + return true; 3085 + #ifdef CONFIG_IP_VS_IPV6 3086 + if (af == AF_INET6 && ipv6_mod_enabled()) 3087 + return true; 3088 + #endif 3089 + return false; 3090 + } 3091 + 3081 3092 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3082 3093 struct ip_vs_service_user_kern *usvc, 3083 3094 struct nlattr *nla, int full_entry, ··· 3116 3105 memset(usvc, 0, sizeof(*usvc)); 3117 3106 3118 3107 usvc->af = nla_get_u16(nla_af); 3119 - #ifdef CONFIG_IP_VS_IPV6 3120 - if (usvc->af != AF_INET && usvc->af != AF_INET6) 3121 - #else 3122 - if (usvc->af != AF_INET) 3123 - #endif 3108 + if (!ip_vs_is_af_valid(usvc->af)) 3124 3109 return -EAFNOSUPPORT; 3125 3110 3126 3111 if (nla_fwmark) { ··· 3618 3611 */ 3619 3612 if (udest.af == 0) 3620 3613 udest.af = svc->af; 3614 + 3615 + if (!ip_vs_is_af_valid(udest.af)) { 3616 + ret = -EAFNOSUPPORT; 3617 + goto out; 3618 + } 3621 3619 3622 3620 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3623 3621 /* The synchronization protocol is incompatible
+1 -1
net/netfilter/nf_conntrack_core.c
··· 1853 1853 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1854 1854 &nf_conntrack_htable_size, 0600); 1855 1855 1856 - static unsigned int total_extension_size(void) 1856 + static __always_inline unsigned int total_extension_size(void) 1857 1857 { 1858 1858 /* remember to add new extensions below */ 1859 1859 BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
+21 -5
net/netfilter/nf_conntrack_helper.c
··· 385 385 struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; 386 386 unsigned int h = helper_hash(&me->tuple); 387 387 struct nf_conntrack_helper *cur; 388 - int ret = 0; 388 + int ret = 0, i; 389 389 390 390 BUG_ON(me->expect_policy == NULL); 391 391 BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); ··· 395 395 return -EINVAL; 396 396 397 397 mutex_lock(&nf_ct_helper_mutex); 398 - hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { 399 - if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { 400 - ret = -EEXIST; 401 - goto out; 398 + for (i = 0; i < nf_ct_helper_hsize; i++) { 399 + hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) { 400 + if (!strcmp(cur->name, me->name) && 401 + (cur->tuple.src.l3num == NFPROTO_UNSPEC || 402 + cur->tuple.src.l3num == me->tuple.src.l3num) && 403 + cur->tuple.dst.protonum == me->tuple.dst.protonum) { 404 + ret = -EEXIST; 405 + goto out; 406 + } 407 + } 408 + } 409 + 410 + /* avoid unpredictable behaviour for auto_assign_helper */ 411 + if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) { 412 + hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { 413 + if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, 414 + &mask)) { 415 + ret = -EEXIST; 416 + goto out; 417 + } 402 418 } 403 419 } 404 420 hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
+49 -40
net/netfilter/nf_conntrack_netlink.c
··· 417 417 return -1; 418 418 } 419 419 420 - static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, 421 - const struct nf_conn *ct) 420 + static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct) 422 421 { 423 422 struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); 424 423 struct nf_ct_seqadj *seq; ··· 425 426 if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj) 426 427 return 0; 427 428 429 + spin_lock_bh(&ct->lock); 428 430 seq = &seqadj->seq[IP_CT_DIR_ORIGINAL]; 429 431 if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1) 430 - return -1; 432 + goto err; 431 433 432 434 seq = &seqadj->seq[IP_CT_DIR_REPLY]; 433 435 if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1) 434 - return -1; 436 + goto err; 435 437 438 + spin_unlock_bh(&ct->lock); 436 439 return 0; 440 + err: 441 + spin_unlock_bh(&ct->lock); 442 + return -1; 437 443 } 438 444 439 445 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) ··· 1421 1417 } 1422 1418 #endif 1423 1419 1420 + static void 1421 + __ctnetlink_change_status(struct nf_conn *ct, unsigned long on, 1422 + unsigned long off) 1423 + { 1424 + unsigned int bit; 1425 + 1426 + /* Ignore these unchangable bits */ 1427 + on &= ~IPS_UNCHANGEABLE_MASK; 1428 + off &= ~IPS_UNCHANGEABLE_MASK; 1429 + 1430 + for (bit = 0; bit < __IPS_MAX_BIT; bit++) { 1431 + if (on & (1 << bit)) 1432 + set_bit(bit, &ct->status); 1433 + else if (off & (1 << bit)) 1434 + clear_bit(bit, &ct->status); 1435 + } 1436 + } 1437 + 1424 1438 static int 1425 1439 ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) 1426 1440 { ··· 1458 1436 /* ASSURED bit can only be set */ 1459 1437 return -EBUSY; 1460 1438 1461 - /* Be careful here, modifying NAT bits can screw up things, 1462 - * so don't let users modify them directly if they don't pass 1463 - * nf_nat_range. */ 1464 - ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); 1439 + __ctnetlink_change_status(ct, status, 0); 1465 1440 return 0; 1466 1441 } 1467 1442 ··· 1527 1508 return 0; 1528 1509 } 1529 1510 1511 + rcu_read_lock(); 1530 1512 helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), 1531 1513 nf_ct_protonum(ct)); 1532 1514 if (helper == NULL) { 1533 - #ifdef CONFIG_MODULES 1534 - spin_unlock_bh(&nf_conntrack_expect_lock); 1535 - 1536 - if (request_module("nfct-helper-%s", helpname) < 0) { 1537 - spin_lock_bh(&nf_conntrack_expect_lock); 1538 - return -EOPNOTSUPP; 1539 - } 1540 - 1541 - spin_lock_bh(&nf_conntrack_expect_lock); 1542 - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), 1543 - nf_ct_protonum(ct)); 1544 - if (helper) 1545 - return -EAGAIN; 1546 - #endif 1515 + rcu_read_unlock(); 1547 1516 return -EOPNOTSUPP; 1548 1517 } 1549 1518 ··· 1540 1533 /* update private helper data if allowed. */ 1541 1534 if (helper->from_nlattr) 1542 1535 helper->from_nlattr(helpinfo, ct); 1543 - return 0; 1536 + err = 0; 1544 1537 } else 1545 - return -EBUSY; 1538 + err = -EBUSY; 1539 + } else { 1540 + /* we cannot set a helper for an existing conntrack */ 1541 + err = -EOPNOTSUPP; 1546 1542 } 1547 1543 1548 - /* we cannot set a helper for an existing conntrack */ 1549 - return -EOPNOTSUPP; 1544 + rcu_read_unlock(); 1545 + return err; 1550 1546 } 1551 1547 1552 1548 static int ctnetlink_change_timeout(struct nf_conn *ct, ··· 1640 1630 if (!seqadj) 1641 1631 return 0; 1642 1632 1633 + spin_lock_bh(&ct->lock); 1643 1634 if (cda[CTA_SEQ_ADJ_ORIG]) { 1644 1635 ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL], 1645 1636 cda[CTA_SEQ_ADJ_ORIG]); 1646 1637 if (ret < 0) 1647 - return ret; 1638 + goto err; 1648 1639 1649 - ct->status |= IPS_SEQ_ADJUST; 1640 + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 1650 1641 } 1651 1642 1652 1643 if (cda[CTA_SEQ_ADJ_REPLY]) { 1653 1644 ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY], 1654 1645 cda[CTA_SEQ_ADJ_REPLY]); 1655 1646 if (ret < 0) 1656 - return ret; 1647 + goto err; 1657 1648 1658 - ct->status |= IPS_SEQ_ADJUST; 1649 + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 1659 1650 } 1660 1651 1652 + spin_unlock_bh(&ct->lock); 1661 1653 return 0; 1654 + err: 1655 + spin_unlock_bh(&ct->lock); 1656 + return ret; 1662 1657 } 1663 1658 1664 1659 static int ··· 1974 1959 err = -EEXIST; 1975 1960 ct = nf_ct_tuplehash_to_ctrack(h); 1976 1961 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { 1977 - spin_lock_bh(&nf_conntrack_expect_lock); 1978 1962 err = ctnetlink_change_conntrack(ct, cda); 1979 - spin_unlock_bh(&nf_conntrack_expect_lock); 1980 1963 if (err == 0) { 1981 1964 nf_conntrack_eventmask_report((1 << IPCT_REPLY) | 1982 1965 (1 << IPCT_ASSURED) | ··· 2307 2294 /* This check is less strict than ctnetlink_change_status() 2308 2295 * because callers often flip IPS_EXPECTED bits when sending 2309 2296 * an NFQA_CT attribute to the kernel. So ignore the 2310 - * unchangeable bits but do not error out. 2297 + * unchangeable bits but do not error out. Also user programs 2298 + * are allowed to clear the bits that they are allowed to change. 2311 2299 */ 2312 - ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | 2313 - (ct->status & IPS_UNCHANGEABLE_MASK); 2300 + __ctnetlink_change_status(ct, status, ~status); 2314 2301 return 0; 2315 2302 } 2316 2303 ··· 2364 2351 if (ret < 0) 2365 2352 return ret; 2366 2353 2367 - spin_lock_bh(&nf_conntrack_expect_lock); 2368 - ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); 2369 - spin_unlock_bh(&nf_conntrack_expect_lock); 2370 - 2371 - return ret; 2354 + return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); 2372 2355 } 2373 2356 2374 2357 static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
+5
net/netfilter/nf_tables_api.c
··· 3778 3778 err = set->ops->insert(ctx->net, set, &elem, &ext2); 3779 3779 if (err) { 3780 3780 if (err == -EEXIST) { 3781 + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ 3782 + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || 3783 + nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ 3784 + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) 3785 + return -EBUSY; 3781 3786 if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && 3782 3787 nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && 3783 3788 memcmp(nft_set_ext_data(ext),
+2 -3
net/netfilter/nft_dynset.c
··· 82 82 nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { 83 83 timeout = priv->timeout ? : set->timeout; 84 84 *nft_set_ext_expiration(ext) = jiffies + timeout; 85 - } else if (sexpr == NULL) 86 - goto out; 85 + } 87 86 88 87 if (sexpr != NULL) 89 88 sexpr->ops->eval(sexpr, regs, pkt); ··· 91 92 regs->verdict.code = NFT_BREAK; 92 93 return; 93 94 } 94 - out: 95 + 95 96 if (!priv->invert) 96 97 regs->verdict.code = NFT_BREAK; 97 98 }
+5
net/netfilter/nft_set_bitmap.c
··· 257 257 258 258 static void nft_bitmap_destroy(const struct nft_set *set) 259 259 { 260 + struct nft_bitmap *priv = nft_set_priv(set); 261 + struct nft_bitmap_elem *be, *n; 262 + 263 + list_for_each_entry_safe(be, n, &priv->list, head) 264 + nft_set_elem_destroy(set, be, true); 260 265 } 261 266 262 267 static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+3 -1
net/netfilter/x_tables.c
··· 1051 1051 list_for_each_entry(t, &init_net.xt.tables[af], list) { 1052 1052 if (strcmp(t->name, name)) 1053 1053 continue; 1054 - if (!try_module_get(t->me)) 1054 + if (!try_module_get(t->me)) { 1055 + mutex_unlock(&xt[af].mutex); 1055 1056 return NULL; 1057 + } 1056 1058 1057 1059 mutex_unlock(&xt[af].mutex); 1058 1060 if (t->table_init(net) != 0) {
+9 -2
net/netfilter/xt_CT.c
··· 168 168 goto err_put_timeout; 169 169 } 170 170 timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); 171 - if (timeout_ext == NULL) 171 + if (!timeout_ext) { 172 172 ret = -ENOMEM; 173 + goto err_put_timeout; 174 + } 173 175 174 176 rcu_read_unlock(); 175 177 return ret; ··· 203 201 struct xt_ct_target_info_v1 *info) 204 202 { 205 203 struct nf_conntrack_zone zone; 204 + struct nf_conn_help *help; 206 205 struct nf_conn *ct; 207 206 int ret = -EOPNOTSUPP; 208 207 ··· 252 249 if (info->timeout[0]) { 253 250 ret = xt_ct_set_timeout(ct, par, info->timeout); 254 251 if (ret < 0) 255 - goto err3; 252 + goto err4; 256 253 } 257 254 __set_bit(IPS_CONFIRMED_BIT, &ct->status); 258 255 nf_conntrack_get(&ct->ct_general); ··· 260 257 info->ct = ct; 261 258 return 0; 262 259 260 + err4: 261 + help = nfct_help(ct); 262 + if (help) 263 + module_put(help->helper->me); 263 264 err3: 264 265 nf_ct_tmpl_free(ct); 265 266 err2:
+1 -1
net/netfilter/xt_socket.c
··· 152 152 switch (family) { 153 153 case NFPROTO_IPV4: 154 154 return nf_defrag_ipv4_enable(net); 155 - #ifdef XT_SOCKET_HAVE_IPV6 155 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 156 156 case NFPROTO_IPV6: 157 157 return nf_defrag_ipv6_enable(net); 158 158 #endif
+29 -1
net/openvswitch/conntrack.c
··· 516 516 u16 proto, const struct sk_buff *skb) 517 517 { 518 518 struct nf_conntrack_tuple tuple; 519 + struct nf_conntrack_expect *exp; 519 520 520 521 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) 521 522 return NULL; 522 - return __nf_ct_expect_find(net, zone, &tuple); 523 + 524 + exp = __nf_ct_expect_find(net, zone, &tuple); 525 + if (exp) { 526 + struct nf_conntrack_tuple_hash *h; 527 + 528 + /* Delete existing conntrack entry, if it clashes with the 529 + * expectation. This can happen since conntrack ALGs do not 530 + * check for clashes between (new) expectations and existing 531 + * conntrack entries. nf_conntrack_in() will check the 532 + * expectations only if a conntrack entry can not be found, 533 + * which can lead to OVS finding the expectation (here) in the 534 + * init direction, but which will not be removed by the 535 + * nf_conntrack_in() call, if a matching conntrack entry is 536 + * found instead. In this case all init direction packets 537 + * would be reported as new related packets, while reply 538 + * direction packets would be reported as un-related 539 + * established packets. 540 + */ 541 + h = nf_conntrack_find_get(net, zone, &tuple); 542 + if (h) { 543 + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 544 + 545 + nf_ct_delete(ct, 0, 0); 546 + nf_conntrack_put(&ct->ct_general); 547 + } 548 + } 549 + 550 + return exp; 523 551 } 524 552 525 553 /* This replicates logic from nf_conntrack_core.c that is not exported. */
+1 -2
net/sched/cls_matchall.c
··· 203 203 204 204 *arg = (unsigned long) head; 205 205 rcu_assign_pointer(tp->root, new); 206 - if (head) 207 - call_rcu(&head->rcu, mall_destroy_rcu); 206 + call_rcu(&head->rcu, mall_destroy_rcu); 208 207 return 0; 209 208 210 209 err_replace_hw_filter:
+154 -69
samples/bpf/bpf_load.c
··· 39 39 int prog_cnt; 40 40 int prog_array_fd = -1; 41 41 42 + struct bpf_map_data map_data[MAX_MAPS]; 43 + int map_data_count = 0; 44 + 42 45 static int populate_prog_array(const char *event, int prog_fd) 43 46 { 44 47 int ind = atoi(event), err; ··· 189 186 return 0; 190 187 } 191 188 192 - static int load_maps(struct bpf_map_def *maps, int nr_maps, 193 - const char **map_names, fixup_map_cb fixup_map) 189 + static int load_maps(struct bpf_map_data *maps, int nr_maps, 190 + fixup_map_cb fixup_map) 194 191 { 195 192 int i; 196 - /* 197 - * Warning: Using "maps" pointing to ELF data_maps->d_buf as 198 - * an array of struct bpf_map_def is a wrong assumption about 199 - * the ELF maps section format. 200 - */ 193 + 201 194 for (i = 0; i < nr_maps; i++) { 202 - if (fixup_map) 203 - fixup_map(&maps[i], map_names[i], i); 195 + if (fixup_map) { 196 + fixup_map(&maps[i], i); 197 + /* Allow userspace to assign map FD prior to creation */ 198 + if (maps[i].fd != -1) { 199 + map_fd[i] = maps[i].fd; 200 + continue; 201 + } 202 + } 204 203 205 - if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 206 - maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) { 207 - int inner_map_fd = map_fd[maps[i].inner_map_idx]; 204 + if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 205 + maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { 206 + int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; 208 207 209 - map_fd[i] = bpf_create_map_in_map(maps[i].type, 210 - maps[i].key_size, 211 - inner_map_fd, 212 - maps[i].max_entries, 213 - maps[i].map_flags); 208 + map_fd[i] = bpf_create_map_in_map(maps[i].def.type, 209 + maps[i].def.key_size, 210 + inner_map_fd, 211 + maps[i].def.max_entries, 212 + maps[i].def.map_flags); 214 213 } else { 215 - map_fd[i] = bpf_create_map(maps[i].type, 216 - maps[i].key_size, 217 - maps[i].value_size, 218 - maps[i].max_entries, 219 - maps[i].map_flags); 214 + map_fd[i] = bpf_create_map(maps[i].def.type, 215 + maps[i].def.key_size, 216 + maps[i].def.value_size, 217 + maps[i].def.max_entries, 218 + maps[i].def.map_flags); 220 219 } 221 220 if (map_fd[i] < 0) { 222 221 printf("failed to create a map: %d %s\n", 223 222 errno, strerror(errno)); 224 223 return 1; 225 224 } 225 + maps[i].fd = map_fd[i]; 226 226 227 - if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) 227 + if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) 228 228 prog_array_fd = map_fd[i]; 229 229 } 230 230 return 0; ··· 257 251 } 258 252 259 253 static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, 260 - GElf_Shdr *shdr, struct bpf_insn *insn) 254 + GElf_Shdr *shdr, struct bpf_insn *insn, 255 + struct bpf_map_data *maps, int nr_maps) 261 256 { 262 257 int i, nrels; 263 258 ··· 268 261 GElf_Sym sym; 269 262 GElf_Rel rel; 270 263 unsigned int insn_idx; 264 + bool match = false; 265 + int j, map_idx; 271 266 272 267 gelf_getrel(data, i, &rel); 273 268 ··· 283 274 return 1; 284 275 } 285 276 insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; 286 - /* 287 - * Warning: Using sizeof(struct bpf_map_def) here is a 288 - * wrong assumption about ELF maps section format 289 - */ 290 - insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; 277 + 278 + /* Match FD relocation against recorded map_data[] offset */ 279 + for (map_idx = 0; map_idx < nr_maps; map_idx++) { 280 + if (maps[map_idx].elf_offset == sym.st_value) { 281 + match = true; 282 + break; 283 + } 284 + } 285 + if (match) { 286 + insn[insn_idx].imm = maps[map_idx].fd; 287 + } else { 288 + printf("invalid relo for insn[%d] no map_data match\n", 289 + insn_idx); 290 + return 1; 291 + } 291 292 } 292 293 293 294 return 0; ··· 316 297 return 0; 317 298 } 318 299 319 - static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx, 320 - int strtabidx, char **map_names) 300 + static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, 301 + Elf *elf, Elf_Data *symbols, int strtabidx) 321 302 { 322 - GElf_Sym map_symbols[MAX_MAPS]; 323 - int i, nr_maps = 0; 303 + int map_sz_elf, map_sz_copy; 304 + bool validate_zero = false; 305 + Elf_Data *data_maps; 306 + int i, nr_maps; 307 + GElf_Sym *sym; 308 + Elf_Scn *scn; 309 + int copy_sz; 324 310 325 - for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { 326 - assert(nr_maps < MAX_MAPS); 327 - if (!gelf_getsym(symbols, i, &map_symbols[nr_maps])) 311 + if (maps_shndx < 0) 312 + return -EINVAL; 313 + if (!symbols) 314 + return -EINVAL; 315 + 316 + /* Get data for maps section via elf index */ 317 + scn = elf_getscn(elf, maps_shndx); 318 + if (scn) 319 + data_maps = elf_getdata(scn, NULL); 320 + if (!scn || !data_maps) { 321 + printf("Failed to get Elf_Data from maps section %d\n", 322 + maps_shndx); 323 + return -EINVAL; 324 + } 325 + 326 + /* For each map get corrosponding symbol table entry */ 327 + sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); 328 + for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { 329 + assert(nr_maps < MAX_MAPS+1); 330 + if (!gelf_getsym(symbols, i, &sym[nr_maps])) 328 331 continue; 329 - if (map_symbols[nr_maps].st_shndx != maps_shndx) 332 + if (sym[nr_maps].st_shndx != maps_shndx) 330 333 continue; 334 + /* Only increment iif maps section */ 331 335 nr_maps++; 332 336 } 333 337 334 - qsort(map_symbols, nr_maps, sizeof(GElf_Sym), cmp_symbols); 338 + /* Align to map_fd[] order, via sort on offset in sym.st_value */ 339 + qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); 335 340 341 + /* Keeping compatible with ELF maps section changes 342 + * ------------------------------------------------ 343 + * The program size of struct bpf_map_def is known by loader 344 + * code, but struct stored in ELF file can be different. 345 + * 346 + * Unfortunately sym[i].st_size is zero. To calculate the 347 + * struct size stored in the ELF file, assume all struct have 348 + * the same size, and simply divide with number of map 349 + * symbols. 350 + */ 351 + map_sz_elf = data_maps->d_size / nr_maps; 352 + map_sz_copy = sizeof(struct bpf_map_def); 353 + if (map_sz_elf < map_sz_copy) { 354 + /* 355 + * Backward compat, loading older ELF file with 356 + * smaller struct, keeping remaining bytes zero. 357 + */ 358 + map_sz_copy = map_sz_elf; 359 + } else if (map_sz_elf > map_sz_copy) { 360 + /* 361 + * Forward compat, loading newer ELF file with larger 362 + * struct with unknown features. Assume zero means 363 + * feature not used. Thus, validate rest of struct 364 + * data is zero. 365 + */ 366 + validate_zero = true; 367 + } 368 + 369 + /* Memcpy relevant part of ELF maps data to loader maps */ 336 370 for (i = 0; i < nr_maps; i++) { 337 - char *map_name; 371 + unsigned char *addr, *end; 372 + struct bpf_map_def *def; 373 + const char *map_name; 374 + size_t offset; 338 375 339 - map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name); 340 - if (!map_name) { 341 - printf("cannot get map symbol\n"); 342 - return -1; 343 - } 344 - 345 - map_names[i] = strdup(map_name); 346 - if (!map_names[i]) { 376 + map_name = elf_strptr(elf, strtabidx, sym[i].st_name); 377 + maps[i].name = strdup(map_name); 378 + if (!maps[i].name) { 347 379 printf("strdup(%s): %s(%d)\n", map_name, 348 380 strerror(errno), errno); 349 - return -1; 381 + free(sym); 382 + return -errno; 383 + } 384 + 385 + /* Symbol value is offset into ELF maps section data area */ 386 + offset = sym[i].st_value; 387 + def = (struct bpf_map_def *)(data_maps->d_buf + offset); 388 + maps[i].elf_offset = offset; 389 + memset(&maps[i].def, 0, sizeof(struct bpf_map_def)); 390 + memcpy(&maps[i].def, def, map_sz_copy); 391 + 392 + /* Verify no newer features were requested */ 393 + if (validate_zero) { 394 + addr = (unsigned char*) def + map_sz_copy; 395 + end = (unsigned char*) def + map_sz_elf; 396 + for (; addr < end; addr++) { 397 + if (*addr != 0) { 398 + free(sym); 399 + return -EFBIG; 400 + } 401 + } 350 402 } 351 403 } 352 404 405 + free(sym); 353 406 return nr_maps; 354 407 } 355 408 ··· 432 341 GElf_Ehdr ehdr; 433 342 GElf_Shdr shdr, shdr_prog; 434 343 Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; 435 - char *shname, *shname_prog, *map_names[MAX_MAPS] = { NULL }; 344 + char *shname, *shname_prog; 345 + int nr_maps = 0; 436 346 437 347 /* reset global variables */ 438 348 kern_version = 0; ··· 481 389 } 482 390 memcpy(&kern_version, data->d_buf, sizeof(int)); 483 391 } else if (strcmp(shname, "maps") == 0) { 392 + int j; 393 + 484 394 maps_shndx = i; 485 395 data_maps = data; 396 + for (j = 0; j < MAX_MAPS; j++) 397 + map_data[j].fd = -1; 486 398 } else if (shdr.sh_type == SHT_SYMTAB) { 487 399 strtabidx = shdr.sh_link; 488 400 symbols = data; ··· 501 405 } 502 406 503 407 if (data_maps) { 504 - int nr_maps; 505 - int prog_elf_map_sz; 506 - 507 - nr_maps = get_sorted_map_names(elf, symbols, maps_shndx, 508 - strtabidx, map_names); 509 - if (nr_maps < 0) 510 - goto done; 511 - 512 - /* Deduce map struct size stored in ELF maps section */ 513 - prog_elf_map_sz = data_maps->d_size / nr_maps; 514 - if (prog_elf_map_sz != sizeof(struct bpf_map_def)) { 515 - printf("Error: ELF maps sec wrong size (%d/%lu)," 516 - " old kern.o file?\n", 517 - prog_elf_map_sz, sizeof(struct bpf_map_def)); 408 + nr_maps = load_elf_maps_section(map_data, maps_shndx, 409 + elf, symbols, strtabidx); 410 + if (nr_maps < 0) { 411 + printf("Error: Failed loading ELF maps (errno:%d):%s\n", 412 + nr_maps, strerror(-nr_maps)); 518 413 ret = 1; 519 414 goto done; 520 415 } 521 - 522 - if (load_maps(data_maps->d_buf, nr_maps, 523 - (const char **)map_names, fixup_map)) 416 + if (load_maps(map_data, nr_maps, fixup_map)) 524 417 goto done; 418 + map_data_count = nr_maps; 525 419 526 420 processed_sec[maps_shndx] = true; 527 421 } ··· 539 453 processed_sec[shdr.sh_info] = true; 540 454 processed_sec[i] = true; 541 455 542 - if (parse_relo_and_apply(data, symbols, &shdr, insns)) 456 + if (parse_relo_and_apply(data, symbols, &shdr, insns, 457 + map_data, nr_maps)) 543 458 continue; 544 459 545 460 if (memcmp(shname_prog, "kprobe/", 7) == 0 || ··· 575 488 576 489 ret = 0; 577 490 done: 578 - for (i = 0; i < MAX_MAPS; i++) 579 - free(map_names[i]); 580 491 close(fd); 581 492 return ret; 582 493 }
+15 -3
samples/bpf/bpf_load.h
··· 15 15 unsigned int inner_map_idx; 16 16 }; 17 17 18 - typedef void (*fixup_map_cb)(struct bpf_map_def *map, const char *map_name, 19 - int idx); 18 + struct bpf_map_data { 19 + int fd; 20 + char *name; 21 + size_t elf_offset; 22 + struct bpf_map_def def; 23 + }; 20 24 21 - extern int map_fd[MAX_MAPS]; 25 + typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); 26 + 22 27 extern int prog_fd[MAX_PROGS]; 23 28 extern int event_fd[MAX_PROGS]; 24 29 extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; 25 30 extern int prog_cnt; 31 + 32 + /* There is a one-to-one mapping between map_fd[] and map_data[]. 33 + * The map_data[] just contains more rich info on the given map. 34 + */ 35 + extern int map_fd[MAX_MAPS]; 36 + extern struct bpf_map_data map_data[MAX_MAPS]; 37 + extern int map_data_count; 26 38 27 39 /* parses elf file compiled by llvm .c->.o 28 40 * . parses 'maps' section and creates maps via BPF syscall
+7 -7
samples/bpf/map_perf_test_user.c
··· 320 320 assert(!r); 321 321 } 322 322 323 - static void fixup_map(struct bpf_map_def *map, const char *name, int idx) 323 + static void fixup_map(struct bpf_map_data *map, int idx) 324 324 { 325 325 int i; 326 326 327 - if (!strcmp("inner_lru_hash_map", name)) { 327 + if (!strcmp("inner_lru_hash_map", map->name)) { 328 328 inner_lru_hash_idx = idx; 329 - inner_lru_hash_size = map->max_entries; 329 + inner_lru_hash_size = map->def.max_entries; 330 330 } 331 331 332 - if (!strcmp("array_of_lru_hashs", name)) { 332 + if (!strcmp("array_of_lru_hashs", map->name)) { 333 333 if (inner_lru_hash_idx == -1) { 334 334 printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n"); 335 335 exit(1); 336 336 } 337 - map->inner_map_idx = inner_lru_hash_idx; 337 + map->def.inner_map_idx = inner_lru_hash_idx; 338 338 array_of_lru_hashs_idx = idx; 339 339 } 340 340 ··· 345 345 346 346 /* Only change the max_entries for the enabled test(s) */ 347 347 for (i = 0; i < NR_TESTS; i++) { 348 - if (!strcmp(test_map_names[i], name) && 348 + if (!strcmp(test_map_names[i], map->name) && 349 349 (check_test_flags(i))) { 350 - map->max_entries = num_map_entries; 350 + map->def.max_entries = num_map_entries; 351 351 } 352 352 } 353 353 }
+7
samples/bpf/tracex2_user.c
··· 4 4 #include <signal.h> 5 5 #include <linux/bpf.h> 6 6 #include <string.h> 7 + #include <sys/resource.h> 7 8 8 9 #include "libbpf.h" 9 10 #include "bpf_load.h" ··· 113 112 114 113 int main(int ac, char **argv) 115 114 { 115 + struct rlimit r = {1024*1024, RLIM_INFINITY}; 116 116 char filename[256]; 117 117 long key, next_key, value; 118 118 FILE *f; 119 119 int i; 120 120 121 121 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 122 + 123 + if (setrlimit(RLIMIT_MEMLOCK, &r)) { 124 + perror("setrlimit(RLIMIT_MEMLOCK)"); 125 + return 1; 126 + } 122 127 123 128 signal(SIGINT, int_exit); 124 129
+7
samples/bpf/tracex3_user.c
··· 11 11 #include <stdbool.h> 12 12 #include <string.h> 13 13 #include <linux/bpf.h> 14 + #include <sys/resource.h> 14 15 15 16 #include "libbpf.h" 16 17 #include "bpf_load.h" ··· 113 112 114 113 int main(int ac, char **argv) 115 114 { 115 + struct rlimit r = {1024*1024, RLIM_INFINITY}; 116 116 char filename[256]; 117 117 int i; 118 118 119 119 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 120 + 121 + if (setrlimit(RLIMIT_MEMLOCK, &r)) { 122 + perror("setrlimit(RLIMIT_MEMLOCK)"); 123 + return 1; 124 + } 120 125 121 126 if (load_bpf_file(filename)) { 122 127 printf("%s", bpf_log_buf);
+8
samples/bpf/tracex4_user.c
··· 12 12 #include <string.h> 13 13 #include <time.h> 14 14 #include <linux/bpf.h> 15 + #include <sys/resource.h> 16 + 15 17 #include "libbpf.h" 16 18 #include "bpf_load.h" 17 19 ··· 52 50 53 51 int main(int ac, char **argv) 54 52 { 53 + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 55 54 char filename[256]; 56 55 int i; 57 56 58 57 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 58 + 59 + if (setrlimit(RLIMIT_MEMLOCK, &r)) { 60 + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); 61 + return 1; 62 + } 59 63 60 64 if (load_bpf_file(filename)) { 61 65 printf("%s", bpf_log_buf);
+3 -3
tools/testing/selftests/bpf/Makefile
··· 13 13 14 14 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs 15 15 16 - TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 16 + TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o 17 17 18 18 TEST_PROGS := test_kmod.sh 19 19 ··· 34 34 CLANG ?= clang 35 35 36 36 %.o: %.c 37 - $(CLANG) -I../../../include/uapi -I../../../../samples/bpf/ \ 38 - -D__x86_64__ -Wno-compare-distinct-pointer-types \ 37 + $(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \ 38 + -Wno-compare-distinct-pointer-types \ 39 39 -O2 -target bpf -c $< -o $@
+1
tools/testing/selftests/bpf/gnu/stubs.h
··· 1 + /* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
+16
tools/testing/selftests/bpf/test_progs.c
··· 268 268 bpf_object__close(obj); 269 269 } 270 270 271 + static void test_tcp_estats(void) 272 + { 273 + const char *file = "./test_tcp_estats.o"; 274 + int err, prog_fd; 275 + struct bpf_object *obj; 276 + __u32 duration = 0; 277 + 278 + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); 279 + CHECK(err, "", "err %d errno %d\n", err, errno); 280 + if (err) 281 + return; 282 + 283 + bpf_object__close(obj); 284 + } 285 + 271 286 int main(void) 272 287 { 273 288 struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; ··· 292 277 test_pkt_access(); 293 278 test_xdp(); 294 279 test_l4lb(); 280 + test_tcp_estats(); 295 281 296 282 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); 297 283 return 0;
+258
tools/testing/selftests/bpf/test_tcp_estats.c
··· 1 + /* Copyright (c) 2017 Facebook 2 + * 3 + * This program is free software; you can redistribute it and/or 4 + * modify it under the terms of version 2 of the GNU General Public 5 + * License as published by the Free Software Foundation. 6 + */ 7 + 8 + /* This program shows clang/llvm is able to generate code pattern 9 + * like: 10 + * _tcp_send_active_reset: 11 + * 0: bf 16 00 00 00 00 00 00 r6 = r1 12 + * ...... 13 + * 335: b7 01 00 00 0f 00 00 00 r1 = 15 14 + * 336: 05 00 48 00 00 00 00 00 goto 72 15 + * 16 + * LBB0_3: 17 + * 337: b7 01 00 00 01 00 00 00 r1 = 1 18 + * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1 19 + * 408: b7 01 00 00 03 00 00 00 r1 = 3 20 + * 21 + * LBB0_4: 22 + * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2) 23 + * 410: bf a7 00 00 00 00 00 00 r7 = r10 24 + * 411: 07 07 00 00 b8 ff ff ff r7 += -72 25 + * 412: bf 73 00 00 00 00 00 00 r3 = r7 26 + * 413: 0f 13 00 00 00 00 00 00 r3 += r1 27 + * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2 28 + * 29 + * From the above code snippet, the code generated by the compiler 30 + * is reasonable. The "r1" is assigned to different values in basic 31 + * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4". 32 + * The verifier should be able to handle such code patterns. 33 + */ 34 + #include <string.h> 35 + #include <linux/bpf.h> 36 + #include <linux/ipv6.h> 37 + #include <linux/version.h> 38 + #include <sys/socket.h> 39 + #include "bpf_helpers.h" 40 + 41 + #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 42 + #define TCP_ESTATS_MAGIC 0xBAADBEEF 43 + 44 + /* This test case needs "sock" and "pt_regs" data structure. 45 + * Recursively, "sock" needs "sock_common" and "inet_sock". 46 + * However, this is a unit test case only for 47 + * verifier purpose without bpf program execution. 48 + * We can safely mock much simpler data structures, basically 49 + * only taking the necessary fields from kernel headers. 50 + */ 51 + typedef __u32 __bitwise __portpair; 52 + typedef __u64 __bitwise __addrpair; 53 + 54 + struct sock_common { 55 + unsigned short skc_family; 56 + union { 57 + __addrpair skc_addrpair; 58 + struct { 59 + __be32 skc_daddr; 60 + __be32 skc_rcv_saddr; 61 + }; 62 + }; 63 + union { 64 + __portpair skc_portpair; 65 + struct { 66 + __be16 skc_dport; 67 + __u16 skc_num; 68 + }; 69 + }; 70 + struct in6_addr skc_v6_daddr; 71 + struct in6_addr skc_v6_rcv_saddr; 72 + }; 73 + 74 + struct sock { 75 + struct sock_common __sk_common; 76 + #define sk_family __sk_common.skc_family 77 + #define sk_v6_daddr __sk_common.skc_v6_daddr 78 + #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr 79 + }; 80 + 81 + struct inet_sock { 82 + struct sock sk; 83 + #define inet_daddr sk.__sk_common.skc_daddr 84 + #define inet_dport sk.__sk_common.skc_dport 85 + __be32 inet_saddr; 86 + __be16 inet_sport; 87 + }; 88 + 89 + struct pt_regs { 90 + long di; 91 + }; 92 + 93 + static inline struct inet_sock *inet_sk(const struct sock *sk) 94 + { 95 + return (struct inet_sock *)sk; 96 + } 97 + 98 + /* Define various data structures for state recording. 99 + * Some fields are not used due to test simplification. 100 + */ 101 + enum tcp_estats_addrtype { 102 + TCP_ESTATS_ADDRTYPE_IPV4 = 1, 103 + TCP_ESTATS_ADDRTYPE_IPV6 = 2 104 + }; 105 + 106 + enum tcp_estats_event_type { 107 + TCP_ESTATS_ESTABLISH, 108 + TCP_ESTATS_PERIODIC, 109 + TCP_ESTATS_TIMEOUT, 110 + TCP_ESTATS_RETRANSMIT_TIMEOUT, 111 + TCP_ESTATS_RETRANSMIT_OTHER, 112 + TCP_ESTATS_SYN_RETRANSMIT, 113 + TCP_ESTATS_SYNACK_RETRANSMIT, 114 + TCP_ESTATS_TERM, 115 + TCP_ESTATS_TX_RESET, 116 + TCP_ESTATS_RX_RESET, 117 + TCP_ESTATS_WRITE_TIMEOUT, 118 + TCP_ESTATS_CONN_TIMEOUT, 119 + TCP_ESTATS_ACK_LATENCY, 120 + TCP_ESTATS_NEVENTS, 121 + }; 122 + 123 + struct tcp_estats_event { 124 + int pid; 125 + int cpu; 126 + unsigned long ts; 127 + unsigned int magic; 128 + enum tcp_estats_event_type event_type; 129 + }; 130 + 131 + /* The below data structure is packed in order for 132 + * llvm compiler to generate expected code. 133 + */ 134 + struct tcp_estats_conn_id { 135 + unsigned int localaddressType; 136 + struct { 137 + unsigned char data[16]; 138 + } localaddress; 139 + struct { 140 + unsigned char data[16]; 141 + } remaddress; 142 + unsigned short localport; 143 + unsigned short remport; 144 + } __attribute__((__packed__)); 145 + 146 + struct tcp_estats_basic_event { 147 + struct tcp_estats_event event; 148 + struct tcp_estats_conn_id conn_id; 149 + }; 150 + 151 + struct bpf_map_def SEC("maps") ev_record_map = { 152 + .type = BPF_MAP_TYPE_HASH, 153 + .key_size = sizeof(__u32), 154 + .value_size = sizeof(struct tcp_estats_basic_event), 155 + .max_entries = 1024, 156 + }; 157 + 158 + struct dummy_tracepoint_args { 159 + unsigned long long pad; 160 + struct sock *sock; 161 + }; 162 + 163 + static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event, 164 + enum tcp_estats_event_type type) 165 + { 166 + event->magic = TCP_ESTATS_MAGIC; 167 + event->ts = bpf_ktime_get_ns(); 168 + event->event_type = type; 169 + } 170 + 171 + static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from) 172 + { 173 + to[0] = _(from[0]); 174 + to[1] = _(from[1]); 175 + to[2] = _(from[2]); 176 + to[3] = _(from[3]); 177 + } 178 + 179 + static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id, 180 + __be32 *saddr, __be32 *daddr) 181 + { 182 + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4; 183 + 184 + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); 185 + unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr); 186 + } 187 + 188 + static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id, 189 + __be32 *saddr, __be32 *daddr) 190 + { 191 + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6; 192 + 193 + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); 194 + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32), 195 + (__u8 *)(saddr + 1)); 196 + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2, 197 + (__u8 *)(saddr + 2)); 198 + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3, 199 + (__u8 *)(saddr + 3)); 200 + 201 + unaligned_u32_set(conn_id->remaddress.data, 202 + (__u8 *)(daddr)); 203 + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32), 204 + (__u8 *)(daddr + 1)); 205 + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2, 206 + (__u8 *)(daddr + 2)); 207 + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3, 208 + (__u8 *)(daddr + 3)); 209 + } 210 + 211 + static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id, 212 + struct sock *sk) 213 + { 214 + conn_id->localport = _(inet_sk(sk)->inet_sport); 215 + conn_id->remport = _(inet_sk(sk)->inet_dport); 216 + 217 + if (_(sk->sk_family) == AF_INET6) 218 + conn_id_ipv6_init(conn_id, 219 + sk->sk_v6_rcv_saddr.s6_addr32, 220 + sk->sk_v6_daddr.s6_addr32); 221 + else 222 + conn_id_ipv4_init(conn_id, 223 + &inet_sk(sk)->inet_saddr, 224 + &inet_sk(sk)->inet_daddr); 225 + } 226 + 227 + static __always_inline void tcp_estats_init(struct sock *sk, 228 + struct tcp_estats_event *event, 229 + struct tcp_estats_conn_id *conn_id, 230 + enum tcp_estats_event_type type) 231 + { 232 + tcp_estats_ev_init(event, type); 233 + tcp_estats_conn_id_init(conn_id, sk); 234 + } 235 + 236 + static __always_inline void send_basic_event(struct sock *sk, 237 + enum tcp_estats_event_type type) 238 + { 239 + struct tcp_estats_basic_event ev; 240 + __u32 key = bpf_get_prandom_u32(); 241 + 242 + memset(&ev, 0, sizeof(ev)); 243 + tcp_estats_init(sk, &ev.event, &ev.conn_id, type); 244 + bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); 245 + } 246 + 247 + SEC("dummy_tracepoint") 248 + int _dummy_tracepoint(struct dummy_tracepoint_args *arg) 249 + { 250 + if (!arg->sock) 251 + return 0; 252 + 253 + send_basic_event(arg->sock, TCP_ESTATS_TX_RESET); 254 + return 0; 255 + } 256 + 257 + char _license[] SEC("license") = "GPL"; 258 + __u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */