Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'net-6.8.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from Paolo Abeni:
"Including fixes from bpf and netfilter.

Current release - regressions:

- af_unix: fix another unix GC hangup

Previous releases - regressions:

- core: fix a possible AF_UNIX deadlock

- bpf: fix NULL pointer dereference in sk_psock_verdict_data_ready()

- netfilter: nft_flow_offload: release dst in case direct xmit path
is used

- bridge: switchdev: ensure MDB events are delivered exactly once

- l2tp: pass correct message length to ip6_append_data

- dccp/tcp: unhash sk from ehash for tb2 alloc failure after
check_estalblished()

- tls: fixes for record type handling with PEEK

- devlink: fix possible use-after-free and memory leaks in
devlink_init()

Previous releases - always broken:

- bpf: fix an oops when attempting to read the vsyscall page through
bpf_probe_read_kernel

- sched: act_mirred: use the backlog for mirred ingress

- netfilter: nft_flow_offload: fix dst refcount underflow

- ipv6: sr: fix possible use-after-free and null-ptr-deref

- mptcp: fix several data races

- phonet: take correct lock to peek at the RX queue

Misc:

- handful of fixes and reliability improvements for selftests"

* tag 'net-6.8.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (72 commits)
l2tp: pass correct message length to ip6_append_data
net: phy: realtek: Fix rtl8211f_config_init() for RTL8211F(D)(I)-VD-CG PHY
selftests: ioam: refactoring to align with the fix
Fix write to cloned skb in ipv6_hop_ioam()
phonet/pep: fix racy skb_queue_empty() use
phonet: take correct lock to peek at the RX queue
net: sparx5: Add spinlock for frame transmission from CPU
net/sched: flower: Add lock protection when remove filter handle
devlink: fix port dump cmd type
net: stmmac: Fix EST offset for dwmac 5.10
tools: ynl: don't leak mcast_groups on init error
tools: ynl: make sure we always pass yarg to mnl_cb_run
net: mctp: put sock on tag allocation failure
netfilter: nf_tables: use kzalloc for hook allocation
netfilter: nf_tables: register hooks last when adding new chain/flowtable
netfilter: nft_flow_offload: release dst in case direct xmit path is used
netfilter: nft_flow_offload: reset dst in route object after setting up flow
netfilter: nf_tables: set dormant flag on hook register failure
selftests: tls: add test for peeking past a record of a different type
selftests: tls: add test for merging of same-type control messages
...

+870 -378
+1 -1
Documentation/process/maintainer-netdev.rst
··· 431 431 Checks in patchwork are mostly simple wrappers around existing kernel 432 432 scripts, the sources are available at: 433 433 434 - https://github.com/kuba-moo/nipa/tree/master/tests 434 + https://github.com/linux-netdev/nipa/tree/master/tests 435 435 436 436 **Do not** post your patches just to run them through the checks. 437 437 You must ensure that your patches are ready by testing them locally
+2
MAINTAINERS
··· 15242 15242 F: Documentation/networking/net_cachelines/ 15243 15243 F: Documentation/process/maintainer-netdev.rst 15244 15244 F: Documentation/userspace-api/netlink/ 15245 + F: include/linux/framer/framer-provider.h 15246 + F: include/linux/framer/framer.h 15245 15247 F: include/linux/in.h 15246 15248 F: include/linux/indirect_call_wrapper.h 15247 15249 F: include/linux/net.h
+10
arch/x86/include/asm/vsyscall.h
··· 4 4 5 5 #include <linux/seqlock.h> 6 6 #include <uapi/asm/vsyscall.h> 7 + #include <asm/page_types.h> 7 8 8 9 #ifdef CONFIG_X86_VSYSCALL_EMULATION 9 10 extern void map_vsyscall(void); ··· 24 23 return false; 25 24 } 26 25 #endif 26 + 27 + /* 28 + * The (legacy) vsyscall page is the long page in the kernel portion 29 + * of the address space that has user-accessible permissions. 30 + */ 31 + static inline bool is_vsyscall_vaddr(unsigned long vaddr) 32 + { 33 + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); 34 + } 27 35 28 36 #endif /* _ASM_X86_VSYSCALL_H */
-9
arch/x86/mm/fault.c
··· 798 798 show_opcodes(regs, loglvl); 799 799 } 800 800 801 - /* 802 - * The (legacy) vsyscall page is the long page in the kernel portion 803 - * of the address space that has user-accessible permissions. 804 - */ 805 - static bool is_vsyscall_vaddr(unsigned long vaddr) 806 - { 807 - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); 808 - } 809 - 810 801 static void 811 802 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, 812 803 unsigned long address, u32 pkey, int si_code)
+10
arch/x86/mm/maccess.c
··· 3 3 #include <linux/uaccess.h> 4 4 #include <linux/kernel.h> 5 5 6 + #include <asm/vsyscall.h> 7 + 6 8 #ifdef CONFIG_X86_64 7 9 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) 8 10 { ··· 15 13 * normal userspace and the userspace guard page: 16 14 */ 17 15 if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) 16 + return false; 17 + 18 + /* 19 + * Reading from the vsyscall page may cause an unhandled fault in 20 + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is 21 + * usually considered as a user space address. 22 + */ 23 + if (is_vsyscall_vaddr(vaddr)) 18 24 return false; 19 25 20 26 /*
+1
drivers/net/ethernet/adi/Kconfig
··· 7 7 bool "Analog Devices devices" 8 8 default y 9 9 depends on SPI 10 + select PHYLIB 10 11 help 11 12 If you have a network (Ethernet) card belonging to this class, say Y. 12 13
+3 -3
drivers/net/ethernet/broadcom/asp2/bcmasp.c
··· 535 535 int j = 0, i; 536 536 537 537 for (i = 0; i < NUM_NET_FILTERS; i++) { 538 - if (j == *rule_cnt) 539 - return -EMSGSIZE; 540 - 541 538 if (!priv->net_filters[i].claimed || 542 539 priv->net_filters[i].port != intf->port) 543 540 continue; ··· 543 546 priv->net_filters[i].wake_filter && 544 547 priv->net_filters[i - 1].wake_filter) 545 548 continue; 549 + 550 + if (j == *rule_cnt) 551 + return -EMSGSIZE; 546 552 547 553 rule_locs[j++] = priv->net_filters[i].fs.location; 548 554 }
+3
drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
··· 1050 1050 netdev_err(dev, "could not attach to PHY\n"); 1051 1051 goto err_phy_disable; 1052 1052 } 1053 + 1054 + /* Indicate that the MAC is responsible for PHY PM */ 1055 + phydev->mac_managed_pm = true; 1053 1056 } else if (!intf->wolopts) { 1054 1057 ret = phy_resume(dev->phydev); 1055 1058 if (ret)
+2 -1
drivers/net/ethernet/cisco/enic/vnic_vic.c
··· 49 49 50 50 tlv->type = htons(type); 51 51 tlv->length = htons(length); 52 - memcpy(tlv->value, value, length); 52 + unsafe_memcpy(tlv->value, value, length, 53 + /* Flexible array of flexible arrays */); 53 54 54 55 vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1); 55 56 vp->length = htonl(ntohl(vp->length) +
+4
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
··· 415 415 return; 416 416 } 417 417 418 + /* AF modifies given action iff PF/VF has requested for it */ 419 + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) 420 + return; 421 + 418 422 /* copy VF default entry action to the VF mcam entry */ 419 423 rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, 420 424 target_func);
+1
drivers/net/ethernet/microchip/sparx5/sparx5_main.c
··· 757 757 platform_set_drvdata(pdev, sparx5); 758 758 sparx5->pdev = pdev; 759 759 sparx5->dev = &pdev->dev; 760 + spin_lock_init(&sparx5->tx_lock); 760 761 761 762 /* Do switch core reset if available */ 762 763 reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch");
+1
drivers/net/ethernet/microchip/sparx5/sparx5_main.h
··· 280 280 int xtr_irq; 281 281 /* Frame DMA */ 282 282 int fdma_irq; 283 + spinlock_t tx_lock; /* lock for frame transmission */ 283 284 struct sparx5_rx rx; 284 285 struct sparx5_tx tx; 285 286 /* PTP */
+2
drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
··· 244 244 } 245 245 246 246 skb_tx_timestamp(skb); 247 + spin_lock(&sparx5->tx_lock); 247 248 if (sparx5->fdma_irq > 0) 248 249 ret = sparx5_fdma_xmit(sparx5, ifh, skb); 249 250 else 250 251 ret = sparx5_inject(sparx5, ifh, skb, dev); 252 + spin_unlock(&sparx5->tx_lock); 251 253 252 254 if (ret == -EBUSY) 253 255 goto busy;
+1 -1
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
··· 223 223 ionic_unmap_bars(ionic); 224 224 pci_release_regions(ionic->pdev); 225 225 226 - if (atomic_read(&ionic->pdev->enable_cnt) > 0) 226 + if (pci_is_enabled(ionic->pdev)) 227 227 pci_disable_device(ionic->pdev); 228 228 } 229 229
+1 -1
drivers/net/ethernet/stmicro/stmmac/hwif.c
··· 224 224 .regs = { 225 225 .ptp_off = PTP_GMAC4_OFFSET, 226 226 .mmc_off = MMC_GMAC4_OFFSET, 227 - .est_off = EST_XGMAC_OFFSET, 227 + .est_off = EST_GMAC4_OFFSET, 228 228 }, 229 229 .desc = &dwmac4_desc_ops, 230 230 .dma = &dwmac410_dma_ops,
-20
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
··· 6059 6059 struct net_device *dev = (struct net_device *)dev_id; 6060 6060 struct stmmac_priv *priv = netdev_priv(dev); 6061 6061 6062 - if (unlikely(!dev)) { 6063 - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); 6064 - return IRQ_NONE; 6065 - } 6066 - 6067 6062 /* Check if adapter is up */ 6068 6063 if (test_bit(STMMAC_DOWN, &priv->state)) 6069 6064 return IRQ_HANDLED; ··· 6073 6078 { 6074 6079 struct net_device *dev = (struct net_device *)dev_id; 6075 6080 struct stmmac_priv *priv = netdev_priv(dev); 6076 - 6077 - if (unlikely(!dev)) { 6078 - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); 6079 - return IRQ_NONE; 6080 - } 6081 6081 6082 6082 /* Check if adapter is up */ 6083 6083 if (test_bit(STMMAC_DOWN, &priv->state)) ··· 6094 6104 6095 6105 dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); 6096 6106 priv = container_of(dma_conf, struct stmmac_priv, dma_conf); 6097 - 6098 - if (unlikely(!data)) { 6099 - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); 6100 - return IRQ_NONE; 6101 - } 6102 6107 6103 6108 /* Check if adapter is up */ 6104 6109 if (test_bit(STMMAC_DOWN, &priv->state)) ··· 6120 6135 6121 6136 dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); 6122 6137 priv = container_of(dma_conf, struct stmmac_priv, dma_conf); 6123 - 6124 - if (unlikely(!data)) { 6125 - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); 6126 - return IRQ_NONE; 6127 - } 6128 6138 6129 6139 /* Check if adapter is up */ 6130 6140 if (test_bit(STMMAC_DOWN, &priv->state))
+5 -5
drivers/net/gtp.c
··· 1907 1907 if (err < 0) 1908 1908 goto error_out; 1909 1909 1910 - err = genl_register_family(&gtp_genl_family); 1910 + err = register_pernet_subsys(&gtp_net_ops); 1911 1911 if (err < 0) 1912 1912 goto unreg_rtnl_link; 1913 1913 1914 - err = register_pernet_subsys(&gtp_net_ops); 1914 + err = genl_register_family(&gtp_genl_family); 1915 1915 if (err < 0) 1916 - goto unreg_genl_family; 1916 + goto unreg_pernet_subsys; 1917 1917 1918 1918 pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", 1919 1919 sizeof(struct pdp_ctx)); 1920 1920 return 0; 1921 1921 1922 - unreg_genl_family: 1923 - genl_unregister_family(&gtp_genl_family); 1922 + unreg_pernet_subsys: 1923 + unregister_pernet_subsys(&gtp_net_ops); 1924 1924 unreg_rtnl_link: 1925 1925 rtnl_link_unregister(&gtp_link_ops); 1926 1926 error_out:
+1 -1
drivers/net/ipa/ipa_interrupt.c
··· 212 212 u32 unit_count; 213 213 u32 unit; 214 214 215 - unit_count = roundup(ipa->endpoint_count, 32); 215 + unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32); 216 216 for (unit = 0; unit < unit_count; unit++) { 217 217 const struct reg *reg; 218 218 u32 val;
+3 -1
drivers/net/phy/realtek.c
··· 421 421 ERR_PTR(ret)); 422 422 return ret; 423 423 } 424 + 425 + return genphy_soft_reset(phydev); 424 426 } 425 427 426 - return genphy_soft_reset(phydev); 428 + return 0; 427 429 } 428 430 429 431 static int rtl821x_suspend(struct phy_device *phydev)
+1 -1
include/net/netfilter/nf_flow_table.h
··· 276 276 } 277 277 278 278 void flow_offload_route_init(struct flow_offload *flow, 279 - const struct nf_flow_route *route); 279 + struct nf_flow_route *route); 280 280 281 281 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); 282 282 void flow_offload_refresh(struct nf_flowtable *flow_table,
+3
include/net/switchdev.h
··· 308 308 int switchdev_port_attr_set(struct net_device *dev, 309 309 const struct switchdev_attr *attr, 310 310 struct netlink_ext_ack *extack); 311 + bool switchdev_port_obj_act_is_deferred(struct net_device *dev, 312 + enum switchdev_notifier_type nt, 313 + const struct switchdev_obj *obj); 311 314 int switchdev_port_obj_add(struct net_device *dev, 312 315 const struct switchdev_obj *obj, 313 316 struct netlink_ext_ack *extack);
+1 -1
include/net/tcp.h
··· 2506 2506 /* cleanup ulp */ 2507 2507 void (*release)(struct sock *sk); 2508 2508 /* diagnostic */ 2509 - int (*get_info)(const struct sock *sk, struct sk_buff *skb); 2509 + int (*get_info)(struct sock *sk, struct sk_buff *skb); 2510 2510 size_t (*get_info_size)(const struct sock *sk); 2511 2511 /* clone ulp */ 2512 2512 void (*clone)(const struct request_sock *req, struct sock *newsk,
+4 -1
kernel/bpf/helpers.c
··· 1101 1101 struct bpf_prog *prog; 1102 1102 void __rcu *callback_fn; 1103 1103 void *value; 1104 + struct rcu_head rcu; 1104 1105 }; 1105 1106 1106 1107 /* the actual struct hidden inside uapi struct bpf_timer */ ··· 1333 1332 1334 1333 if (in_nmi()) 1335 1334 return -EOPNOTSUPP; 1335 + rcu_read_lock(); 1336 1336 __bpf_spin_lock_irqsave(&timer->lock); 1337 1337 t = timer->timer; 1338 1338 if (!t) { ··· 1355 1353 * if it was running. 1356 1354 */ 1357 1355 ret = ret ?: hrtimer_cancel(&t->timer); 1356 + rcu_read_unlock(); 1358 1357 return ret; 1359 1358 } 1360 1359 ··· 1410 1407 */ 1411 1408 if (this_cpu_read(hrtimer_running) != t) 1412 1409 hrtimer_cancel(&t->timer); 1413 - kfree(t); 1410 + kfree_rcu(t, rcu); 1414 1411 } 1415 1412 1416 1413 BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
+2
kernel/bpf/task_iter.c
··· 978 978 BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != 979 979 __alignof__(struct bpf_iter_task)); 980 980 981 + kit->pos = NULL; 982 + 981 983 switch (flags) { 982 984 case BPF_TASK_ITER_ALL_THREADS: 983 985 case BPF_TASK_ITER_ALL_PROCS:
+2
kernel/bpf/verifier.c
··· 5227 5227 #ifdef CONFIG_CGROUPS 5228 5228 BTF_ID(struct, cgroup) 5229 5229 #endif 5230 + #ifdef CONFIG_BPF_JIT 5230 5231 BTF_ID(struct, bpf_cpumask) 5232 + #endif 5231 5233 BTF_ID(struct, task_struct) 5232 5234 BTF_SET_END(rcu_protected_types) 5233 5235
+57 -29
net/bridge/br_switchdev.c
··· 595 595 } 596 596 597 597 static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, 598 + struct net_device *dev, 599 + unsigned long action, 598 600 enum switchdev_obj_id id, 599 601 const struct net_bridge_mdb_entry *mp, 600 602 struct net_device *orig_dev) 601 603 { 602 - struct switchdev_obj_port_mdb *mdb; 604 + struct switchdev_obj_port_mdb mdb = { 605 + .obj = { 606 + .id = id, 607 + .orig_dev = orig_dev, 608 + }, 609 + }; 610 + struct switchdev_obj_port_mdb *pmdb; 603 611 604 - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); 605 - if (!mdb) 612 + br_switchdev_mdb_populate(&mdb, mp); 613 + 614 + if (action == SWITCHDEV_PORT_OBJ_ADD && 615 + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { 616 + /* This event is already in the deferred queue of 617 + * events, so this replay must be elided, lest the 618 + * driver receives duplicate events for it. This can 619 + * only happen when replaying additions, since 620 + * modifications are always immediately visible in 621 + * br->mdb_list, whereas actual event delivery may be 622 + * delayed. 623 + */ 624 + return 0; 625 + } 626 + 627 + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); 628 + if (!pmdb) 606 629 return -ENOMEM; 607 630 608 - mdb->obj.id = id; 609 - mdb->obj.orig_dev = orig_dev; 610 - br_switchdev_mdb_populate(mdb, mp); 611 - list_add_tail(&mdb->obj.list, mdb_list); 612 - 631 + list_add_tail(&pmdb->obj.list, mdb_list); 613 632 return 0; 614 633 } 615 634 ··· 696 677 if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) 697 678 return 0; 698 679 699 - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, 700 - * because the write-side protection is br->multicast_lock. But we 701 - * need to emulate the [ blocking ] calling context of a regular 702 - * switchdev event, so since both br->multicast_lock and RCU read side 703 - * critical sections are atomic, we have no choice but to pick the RCU 704 - * read side lock, queue up all our events, leave the critical section 705 - * and notify switchdev from blocking context. 706 - */ 707 - rcu_read_lock(); 680 + if (adding) 681 + action = SWITCHDEV_PORT_OBJ_ADD; 682 + else 683 + action = SWITCHDEV_PORT_OBJ_DEL; 708 684 709 - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { 685 + /* br_switchdev_mdb_queue_one() will take care to not queue a 686 + * replay of an event that is already pending in the switchdev 687 + * deferred queue. In order to safely determine that, there 688 + * must be no new deferred MDB notifications enqueued for the 689 + * duration of the MDB scan. Therefore, grab the write-side 690 + * lock to avoid racing with any concurrent IGMP/MLD snooping. 691 + */ 692 + spin_lock_bh(&br->multicast_lock); 693 + 694 + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { 710 695 struct net_bridge_port_group __rcu * const *pp; 711 696 const struct net_bridge_port_group *p; 712 697 713 698 if (mp->host_joined) { 714 - err = br_switchdev_mdb_queue_one(&mdb_list, 699 + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, 715 700 SWITCHDEV_OBJ_ID_HOST_MDB, 716 701 mp, br_dev); 717 702 if (err) { 718 - rcu_read_unlock(); 703 + spin_unlock_bh(&br->multicast_lock); 719 704 goto out_free_mdb; 720 705 } 721 706 } 722 707 723 - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; 708 + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; 724 709 pp = &p->next) { 725 710 if (p->key.port->dev != dev) 726 711 continue; 727 712 728 - err = br_switchdev_mdb_queue_one(&mdb_list, 713 + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, 729 714 SWITCHDEV_OBJ_ID_PORT_MDB, 730 715 mp, dev); 731 716 if (err) { 732 - rcu_read_unlock(); 717 + spin_unlock_bh(&br->multicast_lock); 733 718 goto out_free_mdb; 734 719 } 735 720 } 736 721 } 737 722 738 - rcu_read_unlock(); 739 - 740 - if (adding) 741 - action = SWITCHDEV_PORT_OBJ_ADD; 742 - else 743 - action = SWITCHDEV_PORT_OBJ_DEL; 723 + spin_unlock_bh(&br->multicast_lock); 744 724 745 725 list_for_each_entry(obj, &mdb_list, list) { 746 726 err = br_switchdev_mdb_replay_one(nb, dev, ··· 804 786 br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); 805 787 806 788 br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); 789 + 790 + /* Make sure that the device leaving this bridge has seen all 791 + * relevant events before it is disassociated. In the normal 792 + * case, when the device is directly attached to the bridge, 793 + * this is covered by del_nbp(). If the association was indirect 794 + * however, e.g. via a team or bond, and the device is leaving 795 + * that intermediate device, then the bridge port remains in 796 + * place. 797 + */ 798 + switchdev_deferred_process(); 807 799 } 808 800 809 801 /* Let the bridge know that this port is offloaded, so that it can assign a
+5 -2
net/core/skmsg.c
··· 1226 1226 1227 1227 rcu_read_lock(); 1228 1228 psock = sk_psock(sk); 1229 - if (psock) 1230 - psock->saved_data_ready(sk); 1229 + if (psock) { 1230 + read_lock_bh(&sk->sk_callback_lock); 1231 + sk_psock_data_ready(sk, psock); 1232 + read_unlock_bh(&sk->sk_callback_lock); 1233 + } 1231 1234 rcu_read_unlock(); 1232 1235 } 1233 1236 }
+11 -12
net/core/sock.c
··· 1188 1188 */ 1189 1189 WRITE_ONCE(sk->sk_txrehash, (u8)val); 1190 1190 return 0; 1191 + case SO_PEEK_OFF: 1192 + { 1193 + int (*set_peek_off)(struct sock *sk, int val); 1194 + 1195 + set_peek_off = READ_ONCE(sock->ops)->set_peek_off; 1196 + if (set_peek_off) 1197 + ret = set_peek_off(sk, val); 1198 + else 1199 + ret = -EOPNOTSUPP; 1200 + return ret; 1201 + } 1191 1202 } 1192 1203 1193 1204 sockopt_lock_sock(sk); ··· 1440 1429 case SO_WIFI_STATUS: 1441 1430 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 1442 1431 break; 1443 - 1444 - case SO_PEEK_OFF: 1445 - { 1446 - int (*set_peek_off)(struct sock *sk, int val); 1447 - 1448 - set_peek_off = READ_ONCE(sock->ops)->set_peek_off; 1449 - if (set_peek_off) 1450 - ret = set_peek_off(sk, val); 1451 - else 1452 - ret = -EOPNOTSUPP; 1453 - break; 1454 - } 1455 1432 1456 1433 case SO_NOFCS: 1457 1434 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
+9 -3
net/devlink/core.c
··· 529 529 { 530 530 int err; 531 531 532 - err = genl_register_family(&devlink_nl_family); 533 - if (err) 534 - goto out; 535 532 err = register_pernet_subsys(&devlink_pernet_ops); 536 533 if (err) 537 534 goto out; 535 + err = genl_register_family(&devlink_nl_family); 536 + if (err) 537 + goto out_unreg_pernet_subsys; 538 538 err = register_netdevice_notifier(&devlink_port_netdevice_nb); 539 + if (!err) 540 + return 0; 539 541 542 + genl_unregister_family(&devlink_nl_family); 543 + 544 + out_unreg_pernet_subsys: 545 + unregister_pernet_subsys(&devlink_pernet_ops); 540 546 out: 541 547 WARN_ON(err); 542 548 return err;
+1 -1
net/devlink/port.c
··· 583 583 584 584 xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { 585 585 err = devlink_nl_port_fill(msg, devlink_port, 586 - DEVLINK_CMD_NEW, 586 + DEVLINK_CMD_PORT_NEW, 587 587 NETLINK_CB(cb->skb).portid, 588 588 cb->nlh->nlmsg_seq, flags, 589 589 cb->extack);
+2 -1
net/ipv4/arp.c
··· 1125 1125 if (neigh) { 1126 1126 if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { 1127 1127 read_lock_bh(&neigh->lock); 1128 - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); 1128 + memcpy(r->arp_ha.sa_data, neigh->ha, 1129 + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); 1129 1130 r->arp_flags = arp_state_to_flags(neigh); 1130 1131 read_unlock_bh(&neigh->lock); 1131 1132 r->arp_ha.sa_family = dev->type;
+17 -4
net/ipv4/devinet.c
··· 1825 1825 return err; 1826 1826 } 1827 1827 1828 + /* Combine dev_addr_genid and dev_base_seq to detect changes. 1829 + */ 1830 + static u32 inet_base_seq(const struct net *net) 1831 + { 1832 + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + 1833 + net->dev_base_seq; 1834 + 1835 + /* Must not return 0 (see nl_dump_check_consistent()). 1836 + * Chose a value far away from 0. 1837 + */ 1838 + if (!res) 1839 + res = 0x80000000; 1840 + return res; 1841 + } 1842 + 1828 1843 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1829 1844 { 1830 1845 const struct nlmsghdr *nlh = cb->nlh; ··· 1891 1876 idx = 0; 1892 1877 head = &tgt_net->dev_index_head[h]; 1893 1878 rcu_read_lock(); 1894 - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ 1895 - tgt_net->dev_base_seq; 1879 + cb->seq = inet_base_seq(tgt_net); 1896 1880 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1897 1881 if (idx < s_idx) 1898 1882 goto cont; ··· 2292 2278 idx = 0; 2293 2279 head = &net->dev_index_head[h]; 2294 2280 rcu_read_lock(); 2295 - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 2296 - net->dev_base_seq; 2281 + cb->seq = inet_base_seq(net); 2297 2282 hlist_for_each_entry_rcu(dev, head, index_hlist) { 2298 2283 if (idx < s_idx) 2299 2284 goto cont;
+24 -1
net/ipv4/inet_hashtables.c
··· 1130 1130 return 0; 1131 1131 1132 1132 error: 1133 + if (sk_hashed(sk)) { 1134 + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); 1135 + 1136 + sock_prot_inuse_add(net, sk->sk_prot, -1); 1137 + 1138 + spin_lock(lock); 1139 + sk_nulls_del_node_init_rcu(sk); 1140 + spin_unlock(lock); 1141 + 1142 + sk->sk_hash = 0; 1143 + inet_sk(sk)->inet_sport = 0; 1144 + inet_sk(sk)->inet_num = 0; 1145 + 1146 + if (tw) 1147 + inet_twsk_bind_unhash(tw, hinfo); 1148 + } 1149 + 1133 1150 spin_unlock(&head2->lock); 1134 1151 if (tb_created) 1135 1152 inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); 1136 - spin_unlock_bh(&head->lock); 1153 + spin_unlock(&head->lock); 1154 + 1155 + if (tw) 1156 + inet_twsk_deschedule_put(tw); 1157 + 1158 + local_bh_enable(); 1159 + 1137 1160 return -ENOMEM; 1138 1161 } 1139 1162
+1 -6
net/ipv4/udp.c
··· 1589 1589 1590 1590 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) 1591 1591 { 1592 - if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { 1593 - bool slow = lock_sock_fast(sk); 1594 - 1595 - sk_peek_offset_bwd(sk, len); 1596 - unlock_sock_fast(sk, slow); 1597 - } 1592 + sk_peek_offset_bwd(sk, len); 1598 1593 1599 1594 if (!skb_unref(skb)) 1600 1595 return;
+18 -3
net/ipv6/addrconf.c
··· 708 708 return err; 709 709 } 710 710 711 + /* Combine dev_addr_genid and dev_base_seq to detect changes. 712 + */ 713 + static u32 inet6_base_seq(const struct net *net) 714 + { 715 + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + 716 + net->dev_base_seq; 717 + 718 + /* Must not return 0 (see nl_dump_check_consistent()). 719 + * Chose a value far away from 0. 720 + */ 721 + if (!res) 722 + res = 0x80000000; 723 + return res; 724 + } 725 + 726 + 711 727 static int inet6_netconf_dump_devconf(struct sk_buff *skb, 712 728 struct netlink_callback *cb) 713 729 { ··· 757 741 idx = 0; 758 742 head = &net->dev_index_head[h]; 759 743 rcu_read_lock(); 760 - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ 761 - net->dev_base_seq; 744 + cb->seq = inet6_base_seq(net); 762 745 hlist_for_each_entry_rcu(dev, head, index_hlist) { 763 746 if (idx < s_idx) 764 747 goto cont; ··· 5377 5362 } 5378 5363 5379 5364 rcu_read_lock(); 5380 - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; 5365 + cb->seq = inet6_base_seq(tgt_net); 5381 5366 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 5382 5367 idx = 0; 5383 5368 head = &tgt_net->dev_index_head[h];
+10
net/ipv6/exthdrs.c
··· 177 177 case IPV6_TLV_IOAM: 178 178 if (!ipv6_hop_ioam(skb, off)) 179 179 return false; 180 + 181 + nh = skb_network_header(skb); 180 182 break; 181 183 case IPV6_TLV_JUMBO: 182 184 if (!ipv6_hop_jumbo(skb, off)) ··· 944 942 945 943 if (!skb_valid_dst(skb)) 946 944 ip6_route_input(skb); 945 + 946 + /* About to mangle packet header */ 947 + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) 948 + goto drop; 949 + 950 + /* Trace pointer may have changed */ 951 + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) 952 + + optoff + sizeof(*hdr)); 947 953 948 954 ioam6_fill_trace_data(skb, ns, trace, true); 949 955 break;
+11 -9
net/ipv6/seg6.c
··· 512 512 { 513 513 int err; 514 514 515 - err = genl_register_family(&seg6_genl_family); 515 + err = register_pernet_subsys(&ip6_segments_ops); 516 516 if (err) 517 517 goto out; 518 518 519 - err = register_pernet_subsys(&ip6_segments_ops); 519 + err = genl_register_family(&seg6_genl_family); 520 520 if (err) 521 - goto out_unregister_genl; 521 + goto out_unregister_pernet; 522 522 523 523 #ifdef CONFIG_IPV6_SEG6_LWTUNNEL 524 524 err = seg6_iptunnel_init(); 525 525 if (err) 526 - goto out_unregister_pernet; 526 + goto out_unregister_genl; 527 527 528 528 err = seg6_local_init(); 529 - if (err) 530 - goto out_unregister_pernet; 529 + if (err) { 530 + seg6_iptunnel_exit(); 531 + goto out_unregister_genl; 532 + } 531 533 #endif 532 534 533 535 #ifdef CONFIG_IPV6_SEG6_HMAC ··· 550 548 #endif 551 549 #endif 552 550 #ifdef CONFIG_IPV6_SEG6_LWTUNNEL 553 - out_unregister_pernet: 554 - unregister_pernet_subsys(&ip6_segments_ops); 555 - #endif 556 551 out_unregister_genl: 557 552 genl_unregister_family(&seg6_genl_family); 553 + #endif 554 + out_unregister_pernet: 555 + unregister_pernet_subsys(&ip6_segments_ops); 558 556 goto out; 559 557 } 560 558
+2 -2
net/iucv/iucv.c
··· 156 156 static LIST_HEAD(iucv_handler_list); 157 157 158 158 /* 159 - * iucv_path_table: an array of iucv_path structures. 159 + * iucv_path_table: array of pointers to iucv_path structures. 160 160 */ 161 161 static struct iucv_path **iucv_path_table; 162 162 static unsigned long iucv_max_pathid; ··· 544 544 545 545 cpus_read_lock(); 546 546 rc = -ENOMEM; 547 - alloc_size = iucv_max_pathid * sizeof(struct iucv_path); 547 + alloc_size = iucv_max_pathid * sizeof(*iucv_path_table); 548 548 iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); 549 549 if (!iucv_path_table) 550 550 goto out;
+1 -1
net/l2tp/l2tp_ip6.c
··· 627 627 628 628 back_from_confirm: 629 629 lock_sock(sk); 630 - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; 630 + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); 631 631 err = ip6_append_data(sk, ip_generic_getfrag, msg, 632 632 ulen, transhdrlen, &ipc6, 633 633 &fl6, (struct rt6_info *)dst,
+1 -1
net/mctp/route.c
··· 663 663 spin_unlock_irqrestore(&mns->keys_lock, flags); 664 664 665 665 if (!tagbits) { 666 - kfree(key); 666 + mctp_key_unref(key); 667 667 return ERR_PTR(-EBUSY); 668 668 } 669 669
+6 -2
net/mptcp/diag.c
··· 13 13 #include <uapi/linux/mptcp.h> 14 14 #include "protocol.h" 15 15 16 - static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) 16 + static int subflow_get_info(struct sock *sk, struct sk_buff *skb) 17 17 { 18 18 struct mptcp_subflow_context *sf; 19 19 struct nlattr *start; 20 20 u32 flags = 0; 21 + bool slow; 21 22 int err; 22 23 23 24 start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); 24 25 if (!start) 25 26 return -EMSGSIZE; 26 27 28 + slow = lock_sock_fast(sk); 27 29 rcu_read_lock(); 28 30 sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); 29 31 if (!sf) { ··· 65 63 sf->map_data_len) || 66 64 nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || 67 65 nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || 68 - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { 66 + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { 69 67 err = -EMSGSIZE; 70 68 goto nla_failure; 71 69 } 72 70 73 71 rcu_read_unlock(); 72 + unlock_sock_fast(sk, slow); 74 73 nla_nest_end(skb, start); 75 74 return 0; 76 75 77 76 nla_failure: 78 77 rcu_read_unlock(); 78 + unlock_sock_fast(sk, slow); 79 79 nla_nest_cancel(skb, start); 80 80 return err; 81 81 }
+43 -26
net/mptcp/pm_netlink.c
··· 396 396 } 397 397 } 398 398 399 - static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, 400 - const struct mptcp_addr_info *addr) 401 - { 402 - int i; 403 - 404 - for (i = 0; i < nr; i++) { 405 - if (addrs[i].id == addr->id) 406 - return true; 407 - } 408 - 409 - return false; 410 - } 411 - 412 399 /* Fill all the remote addresses into the array addrs[], 413 400 * and return the array size. 414 401 */ ··· 427 440 msk->pm.subflows++; 428 441 addrs[i++] = remote; 429 442 } else { 443 + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 444 + 445 + /* Forbid creation of new subflows matching existing 446 + * ones, possibly already created by incoming ADD_ADDR 447 + */ 448 + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 449 + mptcp_for_each_subflow(msk, subflow) 450 + if (READ_ONCE(subflow->local_id) == local->id) 451 + __set_bit(subflow->remote_id, unavail_id); 452 + 430 453 mptcp_for_each_subflow(msk, subflow) { 431 454 ssk = mptcp_subflow_tcp_sock(subflow); 432 455 remote_address((struct sock_common *)ssk, &addrs[i]); 433 - addrs[i].id = subflow->remote_id; 456 + addrs[i].id = READ_ONCE(subflow->remote_id); 434 457 if (deny_id0 && !addrs[i].id) 458 + continue; 459 + 460 + if (test_bit(addrs[i].id, unavail_id)) 435 461 continue; 436 462 437 463 if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 438 464 continue; 439 465 440 - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && 441 - msk->pm.subflows < subflows_max) { 466 + if (msk->pm.subflows < subflows_max) { 467 + /* forbid creating multiple address towards 468 + * this id 469 + */ 470 + __set_bit(addrs[i].id, unavail_id); 442 471 msk->pm.subflows++; 443 472 i++; 444 473 } ··· 802 799 803 800 mptcp_for_each_subflow_safe(msk, subflow, tmp) { 804 801 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 802 + u8 remote_id = READ_ONCE(subflow->remote_id); 805 803 int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 806 - u8 id = subflow->local_id; 804 + u8 id = subflow_get_local_id(subflow); 807 805 808 - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) 806 + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) 809 807 continue; 810 808 if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) 811 809 continue; 812 810 813 811 pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", 814 812 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 815 - i, rm_id, subflow->local_id, subflow->remote_id, 816 - msk->mpc_endpoint_id); 813 + i, rm_id, id, remote_id, msk->mpc_endpoint_id); 817 814 spin_unlock_bh(&msk->pm.lock); 818 815 mptcp_subflow_shutdown(sk, ssk, how); 819 816 ··· 904 901 } 905 902 906 903 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 907 - struct mptcp_pm_addr_entry *entry) 904 + struct mptcp_pm_addr_entry *entry, 905 + bool needs_id) 908 906 { 909 907 struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 910 908 unsigned int addr_max; ··· 953 949 } 954 950 } 955 951 956 - if (!entry->addr.id) { 952 + if (!entry->addr.id && needs_id) { 957 953 find_next: 958 954 entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 959 955 MPTCP_PM_MAX_ADDR_ID + 1, ··· 964 960 } 965 961 } 966 962 967 - if (!entry->addr.id) 963 + if (!entry->addr.id && needs_id) 968 964 goto out; 969 965 970 966 __set_bit(entry->addr.id, pernet->id_bitmap); ··· 1096 1092 entry->ifindex = 0; 1097 1093 entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; 1098 1094 entry->lsk = NULL; 1099 - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); 1095 + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); 1100 1096 if (ret < 0) 1101 1097 kfree(entry); 1102 1098 ··· 1289 1285 return 0; 1290 1286 } 1291 1287 1288 + static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 1289 + struct genl_info *info) 1290 + { 1291 + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 1292 + 1293 + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 1294 + mptcp_pm_address_nl_policy, info->extack) && 1295 + tb[MPTCP_PM_ADDR_ATTR_ID]) 1296 + return true; 1297 + return false; 1298 + } 1299 + 1292 1300 int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) 1293 1301 { 1294 1302 struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ··· 1342 1326 goto out_free; 1343 1327 } 1344 1328 } 1345 - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); 1329 + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 1330 + !mptcp_pm_has_addr_attr_id(attr, info)); 1346 1331 if (ret < 0) { 1347 1332 GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 1348 1333 goto out_free; ··· 1997 1980 if (WARN_ON_ONCE(!sf)) 1998 1981 return -EINVAL; 1999 1982 2000 - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) 1983 + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) 2001 1984 return -EMSGSIZE; 2002 1985 2003 1986 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
+8 -7
net/mptcp/pm_userspace.c
··· 26 26 } 27 27 28 28 static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, 29 - struct mptcp_pm_addr_entry *entry) 29 + struct mptcp_pm_addr_entry *entry, 30 + bool needs_id) 30 31 { 31 32 DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 32 33 struct mptcp_pm_addr_entry *match = NULL; ··· 42 41 spin_lock_bh(&msk->pm.lock); 43 42 list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { 44 43 addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); 45 - if (addr_match && entry->addr.id == 0) 44 + if (addr_match && entry->addr.id == 0 && needs_id) 46 45 entry->addr.id = e->addr.id; 47 46 id_match = (e->addr.id == entry->addr.id); 48 47 if (addr_match && id_match) { ··· 65 64 } 66 65 67 66 *e = *entry; 68 - if (!e->addr.id) 67 + if (!e->addr.id && needs_id) 69 68 e->addr.id = find_next_zero_bit(id_bitmap, 70 69 MPTCP_PM_MAX_ADDR_ID + 1, 71 70 1); ··· 154 153 if (new_entry.addr.port == msk_sport) 155 154 new_entry.addr.port = 0; 156 155 157 - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); 156 + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); 158 157 } 159 158 160 159 int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) ··· 199 198 goto announce_err; 200 199 } 201 200 202 - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); 201 + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); 203 202 if (err < 0) { 204 203 GENL_SET_ERR_MSG(info, "did not match address and id"); 205 204 goto announce_err; ··· 234 233 235 234 lock_sock(sk); 236 235 mptcp_for_each_subflow(msk, subflow) { 237 - if (subflow->local_id == 0) { 236 + if (READ_ONCE(subflow->local_id) == 0) { 238 237 has_id_0 = true; 239 238 break; 240 239 } ··· 379 378 } 380 379 381 380 local.addr = addr_l; 382 - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); 381 + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); 383 382 if (err < 0) { 384 383 GENL_SET_ERR_MSG(info, "did not match address and id"); 385 384 goto create_err;
+1 -1
net/mptcp/protocol.c
··· 85 85 subflow->subflow_id = msk->subflow_id++; 86 86 87 87 /* This is the first subflow, always with id 0 */ 88 - subflow->local_id_valid = 1; 88 + WRITE_ONCE(subflow->local_id, 0); 89 89 mptcp_sock_graft(msk->first, sk->sk_socket); 90 90 iput(SOCK_INODE(ssock)); 91 91
+12 -3
net/mptcp/protocol.h
··· 491 491 remote_key_valid : 1, /* received the peer key from */ 492 492 disposable : 1, /* ctx can be free at ulp release time */ 493 493 stale : 1, /* unable to snd/rcv data, do not use for xmit */ 494 - local_id_valid : 1, /* local_id is correctly initialized */ 495 494 valid_csum_seen : 1, /* at least one csum validated */ 496 495 is_mptfo : 1, /* subflow is doing TFO */ 497 - __unused : 9; 496 + __unused : 10; 498 497 bool data_avail; 499 498 bool scheduled; 500 499 u32 remote_nonce; ··· 504 505 u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ 505 506 u64 iasn; /* initial ack sequence number, MPC subflows only */ 506 507 }; 507 - u8 local_id; 508 + s16 local_id; /* if negative not initialized yet */ 508 509 u8 remote_id; 509 510 u8 reset_seen:1; 510 511 u8 reset_transient:1; ··· 555 556 { 556 557 memset(&subflow->reset, 0, sizeof(subflow->reset)); 557 558 subflow->request_mptcp = 1; 559 + WRITE_ONCE(subflow->local_id, -1); 558 560 } 559 561 560 562 static inline u64 ··· 1021 1021 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); 1022 1022 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); 1023 1023 int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); 1024 + 1025 + static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) 1026 + { 1027 + int local_id = READ_ONCE(subflow->local_id); 1028 + 1029 + if (local_id < 0) 1030 + return 0; 1031 + return local_id; 1032 + } 1024 1033 1025 1034 void __init mptcp_pm_nl_init(void); 1026 1035 void mptcp_pm_nl_work(struct mptcp_sock *msk);
+8 -7
net/mptcp/subflow.c
··· 535 535 subflow->backup = mp_opt.backup; 536 536 subflow->thmac = mp_opt.thmac; 537 537 subflow->remote_nonce = mp_opt.nonce; 538 - subflow->remote_id = mp_opt.join_id; 538 + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); 539 539 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", 540 540 subflow, subflow->thmac, subflow->remote_nonce, 541 541 subflow->backup); ··· 577 577 578 578 static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) 579 579 { 580 - subflow->local_id = local_id; 581 - subflow->local_id_valid = 1; 580 + WARN_ON_ONCE(local_id < 0 || local_id > 255); 581 + WRITE_ONCE(subflow->local_id, local_id); 582 582 } 583 583 584 584 static int subflow_chk_local_id(struct sock *sk) ··· 587 587 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 588 588 int err; 589 589 590 - if (likely(subflow->local_id_valid)) 590 + if (likely(subflow->local_id >= 0)) 591 591 return 0; 592 592 593 593 err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); ··· 1567 1567 pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, 1568 1568 remote_token, local_id, remote_id); 1569 1569 subflow->remote_token = remote_token; 1570 - subflow->remote_id = remote_id; 1570 + WRITE_ONCE(subflow->remote_id, remote_id); 1571 1571 subflow->request_join = 1; 1572 1572 subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1573 1573 subflow->subflow_id = msk->subflow_id++; ··· 1731 1731 pr_debug("subflow=%p", ctx); 1732 1732 1733 1733 ctx->tcp_sock = sk; 1734 + WRITE_ONCE(ctx->local_id, -1); 1734 1735 1735 1736 return ctx; 1736 1737 } ··· 1967 1966 new_ctx->idsn = subflow_req->idsn; 1968 1967 1969 1968 /* this is the first subflow, id is always 0 */ 1970 - new_ctx->local_id_valid = 1; 1969 + subflow_set_local_id(new_ctx, 0); 1971 1970 } else if (subflow_req->mp_join) { 1972 1971 new_ctx->ssn_offset = subflow_req->ssn_offset; 1973 1972 new_ctx->mp_join = 1; 1974 1973 new_ctx->fully_established = 1; 1975 1974 new_ctx->remote_key_valid = 1; 1976 1975 new_ctx->backup = subflow_req->backup; 1977 - new_ctx->remote_id = subflow_req->remote_id; 1976 + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); 1978 1977 new_ctx->token = subflow_req->token; 1979 1978 new_ctx->thmac = subflow_req->thmac; 1980 1979
+14 -3
net/netfilter/nf_flow_table_core.c
··· 87 87 return 0; 88 88 } 89 89 90 + static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, 91 + enum flow_offload_tuple_dir dir) 92 + { 93 + struct dst_entry *dst = route->tuple[dir].dst; 94 + 95 + route->tuple[dir].dst = NULL; 96 + 97 + return dst; 98 + } 99 + 90 100 static int flow_offload_fill_route(struct flow_offload *flow, 91 - const struct nf_flow_route *route, 101 + struct nf_flow_route *route, 92 102 enum flow_offload_tuple_dir dir) 93 103 { 94 104 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; 95 - struct dst_entry *dst = route->tuple[dir].dst; 105 + struct dst_entry *dst = nft_route_dst_fetch(route, dir); 96 106 int i, j = 0; 97 107 98 108 switch (flow_tuple->l3proto) { ··· 132 122 ETH_ALEN); 133 123 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; 134 124 flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; 125 + dst_release(dst); 135 126 break; 136 127 case FLOW_OFFLOAD_XMIT_XFRM: 137 128 case FLOW_OFFLOAD_XMIT_NEIGH: ··· 157 146 } 158 147 159 148 void flow_offload_route_init(struct flow_offload *flow, 160 - const struct nf_flow_route *route) 149 + struct nf_flow_route *route) 161 150 { 162 151 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); 163 152 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
+42 -39
net/netfilter/nf_tables_api.c
··· 684 684 return err; 685 685 } 686 686 687 - static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, 688 - struct nft_flowtable *flowtable) 687 + static struct nft_trans * 688 + nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, 689 + struct nft_flowtable *flowtable) 689 690 { 690 691 struct nft_trans *trans; 691 692 692 693 trans = nft_trans_alloc(ctx, msg_type, 693 694 sizeof(struct nft_trans_flowtable)); 694 695 if (trans == NULL) 695 - return -ENOMEM; 696 + return ERR_PTR(-ENOMEM); 696 697 697 698 if (msg_type == NFT_MSG_NEWFLOWTABLE) 698 699 nft_activate_next(ctx->net, flowtable); ··· 702 701 nft_trans_flowtable(trans) = flowtable; 703 702 nft_trans_commit_list_add_tail(ctx->net, trans); 704 703 705 - return 0; 704 + return trans; 706 705 } 707 706 708 707 static int nft_delflowtable(struct nft_ctx *ctx, 709 708 struct nft_flowtable *flowtable) 710 709 { 711 - int err; 710 + struct nft_trans *trans; 712 711 713 - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); 714 - if (err < 0) 715 - return err; 712 + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); 713 + if (IS_ERR(trans)) 714 + return PTR_ERR(trans); 716 715 717 716 nft_deactivate_next(ctx->net, flowtable); 718 717 nft_use_dec(&ctx->table->use); 719 718 720 - return err; 719 + return 0; 721 720 } 722 721 723 722 static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) ··· 1252 1251 return 0; 1253 1252 1254 1253 err_register_hooks: 1254 + ctx->table->flags |= NFT_TABLE_F_DORMANT; 1255 1255 nft_trans_destroy(trans); 1256 1256 return ret; 1257 1257 } ··· 2082 2080 struct nft_hook *hook; 2083 2081 int err; 2084 2082 2085 - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); 2083 + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); 2086 2084 if (!hook) { 2087 2085 err = -ENOMEM; 2088 2086 goto err_hook_alloc; ··· 2505 2503 RCU_INIT_POINTER(chain->blob_gen_0, blob); 2506 2504 RCU_INIT_POINTER(chain->blob_gen_1, blob); 2507 2505 2508 - err = nf_tables_register_hook(net, table, chain); 2509 - if (err < 0) 2510 - goto err_destroy_chain; 2511 - 2512 2506 if (!nft_use_inc(&table->use)) { 2513 2507 err = -EMFILE; 2514 - goto err_use; 2508 + goto err_destroy_chain; 2515 2509 } 2516 2510 2517 2511 trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); 2518 2512 if (IS_ERR(trans)) { 2519 2513 err = PTR_ERR(trans); 2520 - goto err_unregister_hook; 2514 + goto err_trans; 2521 2515 } 2522 2516 2523 2517 nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; ··· 2521 2523 nft_trans_chain_policy(trans) = policy; 2522 2524 2523 2525 err = nft_chain_add(table, chain); 2524 - if (err < 0) { 2525 - nft_trans_destroy(trans); 2526 - goto err_unregister_hook; 2527 - } 2526 + if (err < 0) 2527 + goto err_chain_add; 2528 + 2529 + /* This must be LAST to ensure no packets are walking over this chain. */ 2530 + err = nf_tables_register_hook(net, table, chain); 2531 + if (err < 0) 2532 + goto err_register_hook; 2528 2533 2529 2534 return 0; 2530 2535 2531 - err_unregister_hook: 2536 + err_register_hook: 2537 + nft_chain_del(chain); 2538 + err_chain_add: 2539 + nft_trans_destroy(trans); 2540 + err_trans: 2532 2541 nft_use_dec_restore(&table->use); 2533 - err_use: 2534 - nf_tables_unregister_hook(net, table, chain); 2535 2542 err_destroy_chain: 2536 2543 nf_tables_chain_destroy(ctx); 2537 2544 ··· 8458 8455 u8 family = info->nfmsg->nfgen_family; 8459 8456 const struct nf_flowtable_type *type; 8460 8457 struct nft_flowtable *flowtable; 8461 - struct nft_hook *hook, *next; 8462 8458 struct net *net = info->net; 8463 8459 struct nft_table *table; 8460 + struct nft_trans *trans; 8464 8461 struct nft_ctx ctx; 8465 8462 int err; 8466 8463 ··· 8540 8537 err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, 8541 8538 extack, true); 8542 8539 if (err < 0) 8543 - goto err4; 8540 + goto err_flowtable_parse_hooks; 8544 8541 8545 8542 list_splice(&flowtable_hook.list, &flowtable->hook_list); 8546 8543 flowtable->data.priority = flowtable_hook.priority; 8547 8544 flowtable->hooknum = flowtable_hook.num; 8548 8545 8546 + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); 8547 + if (IS_ERR(trans)) { 8548 + err = PTR_ERR(trans); 8549 + goto err_flowtable_trans; 8550 + } 8551 + 8552 + /* This must be LAST to ensure no packets are walking over this flowtable. */ 8549 8553 err = nft_register_flowtable_net_hooks(ctx.net, table, 8550 8554 &flowtable->hook_list, 8551 8555 flowtable); 8552 - if (err < 0) { 8553 - nft_hooks_destroy(&flowtable->hook_list); 8554 - goto err4; 8555 - } 8556 - 8557 - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); 8558 8556 if (err < 0) 8559 - goto err5; 8557 + goto err_flowtable_hooks; 8560 8558 8561 8559 list_add_tail_rcu(&flowtable->list, &table->flowtables); 8562 8560 8563 8561 return 0; 8564 - err5: 8565 - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { 8566 - nft_unregister_flowtable_hook(net, flowtable, hook); 8567 - list_del_rcu(&hook->list); 8568 - kfree_rcu(hook, rcu); 8569 - } 8570 - err4: 8562 + 8563 + err_flowtable_hooks: 8564 + nft_trans_destroy(trans); 8565 + err_flowtable_trans: 8566 + nft_hooks_destroy(&flowtable->hook_list); 8567 + err_flowtable_parse_hooks: 8571 8568 flowtable->data.type->free(&flowtable->data); 8572 8569 err3: 8573 8570 module_put(type->owner);
+2 -2
net/phonet/datagram.c
··· 34 34 35 35 switch (cmd) { 36 36 case SIOCINQ: 37 - lock_sock(sk); 37 + spin_lock_bh(&sk->sk_receive_queue.lock); 38 38 skb = skb_peek(&sk->sk_receive_queue); 39 39 *karg = skb ? skb->len : 0; 40 - release_sock(sk); 40 + spin_unlock_bh(&sk->sk_receive_queue.lock); 41 41 return 0; 42 42 43 43 case SIOCPNADDRESOURCE:
+32 -9
net/phonet/pep.c
··· 917 917 return 0; 918 918 } 919 919 920 + static unsigned int pep_first_packet_length(struct sock *sk) 921 + { 922 + struct pep_sock *pn = pep_sk(sk); 923 + struct sk_buff_head *q; 924 + struct sk_buff *skb; 925 + unsigned int len = 0; 926 + bool found = false; 927 + 928 + if (sock_flag(sk, SOCK_URGINLINE)) { 929 + q = &pn->ctrlreq_queue; 930 + spin_lock_bh(&q->lock); 931 + skb = skb_peek(q); 932 + if (skb) { 933 + len = skb->len; 934 + found = true; 935 + } 936 + spin_unlock_bh(&q->lock); 937 + } 938 + 939 + if (likely(!found)) { 940 + q = &sk->sk_receive_queue; 941 + spin_lock_bh(&q->lock); 942 + skb = skb_peek(q); 943 + if (skb) 944 + len = skb->len; 945 + spin_unlock_bh(&q->lock); 946 + } 947 + 948 + return len; 949 + } 950 + 920 951 static int pep_ioctl(struct sock *sk, int cmd, int *karg) 921 952 { 922 953 struct pep_sock *pn = pep_sk(sk); ··· 960 929 break; 961 930 } 962 931 963 - lock_sock(sk); 964 - if (sock_flag(sk, SOCK_URGINLINE) && 965 - !skb_queue_empty(&pn->ctrlreq_queue)) 966 - *karg = skb_peek(&pn->ctrlreq_queue)->len; 967 - else if (!skb_queue_empty(&sk->sk_receive_queue)) 968 - *karg = skb_peek(&sk->sk_receive_queue)->len; 969 - else 970 - *karg = 0; 971 - release_sock(sk); 932 + *karg = pep_first_packet_length(sk); 972 933 ret = 0; 973 934 break; 974 935
+15 -21
net/sched/act_mirred.c
··· 232 232 return err; 233 233 } 234 234 235 - static bool is_mirred_nested(void) 236 - { 237 - return unlikely(__this_cpu_read(mirred_nest_level) > 1); 238 - } 239 - 240 - static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) 235 + static int 236 + tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) 241 237 { 242 238 int err; 243 239 244 240 if (!want_ingress) 245 241 err = tcf_dev_queue_xmit(skb, dev_queue_xmit); 246 - else if (is_mirred_nested()) 242 + else if (!at_ingress) 247 243 err = netif_rx(skb); 248 244 else 249 245 err = netif_receive_skb(skb); ··· 266 270 if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { 267 271 net_notice_ratelimited("tc mirred to Houston: device %s is down\n", 268 272 dev->name); 269 - err = -ENODEV; 270 - goto out; 273 + goto err_cant_do; 271 274 } 272 275 273 276 /* we could easily avoid the clone only if called by ingress and clsact; ··· 278 283 tcf_mirred_can_reinsert(retval); 279 284 if (!dont_clone) { 280 285 skb_to_send = skb_clone(skb, GFP_ATOMIC); 281 - if (!skb_to_send) { 282 - err = -ENOMEM; 283 - goto out; 284 - } 286 + if (!skb_to_send) 287 + goto err_cant_do; 285 288 } 286 289 287 290 want_ingress = tcf_mirred_act_wants_ingress(m_eaction); ··· 312 319 313 320 skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); 314 321 315 - err = tcf_mirred_forward(want_ingress, skb_to_send); 322 + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); 316 323 } else { 317 - err = tcf_mirred_forward(want_ingress, skb_to_send); 324 + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); 318 325 } 319 - 320 - if (err) { 321 - out: 326 + if (err) 322 327 tcf_action_inc_overlimit_qstats(&m->common); 323 - if (is_redirect) 324 - retval = TC_ACT_SHOT; 325 - } 326 328 329 + return retval; 330 + 331 + err_cant_do: 332 + if (is_redirect) 333 + retval = TC_ACT_SHOT; 334 + tcf_action_inc_overlimit_qstats(&m->common); 327 335 return retval; 328 336 } 329 337
+4 -1
net/sched/cls_flower.c
··· 2460 2460 } 2461 2461 2462 2462 errout_idr: 2463 - if (!fold) 2463 + if (!fold) { 2464 + spin_lock(&tp->lock); 2464 2465 idr_remove(&head->handle_idr, fnew->handle); 2466 + spin_unlock(&tp->lock); 2467 + } 2465 2468 __fl_put(fnew); 2466 2469 errout_tb: 2467 2470 kfree(tb);
+73
net/switchdev/switchdev.c
··· 19 19 #include <linux/rtnetlink.h> 20 20 #include <net/switchdev.h> 21 21 22 + static bool switchdev_obj_eq(const struct switchdev_obj *a, 23 + const struct switchdev_obj *b) 24 + { 25 + const struct switchdev_obj_port_vlan *va, *vb; 26 + const struct switchdev_obj_port_mdb *ma, *mb; 27 + 28 + if (a->id != b->id || a->orig_dev != b->orig_dev) 29 + return false; 30 + 31 + switch (a->id) { 32 + case SWITCHDEV_OBJ_ID_PORT_VLAN: 33 + va = SWITCHDEV_OBJ_PORT_VLAN(a); 34 + vb = SWITCHDEV_OBJ_PORT_VLAN(b); 35 + return va->flags == vb->flags && 36 + va->vid == vb->vid && 37 + va->changed == vb->changed; 38 + case SWITCHDEV_OBJ_ID_PORT_MDB: 39 + case SWITCHDEV_OBJ_ID_HOST_MDB: 40 + ma = SWITCHDEV_OBJ_PORT_MDB(a); 41 + mb = SWITCHDEV_OBJ_PORT_MDB(b); 42 + return ma->vid == mb->vid && 43 + ether_addr_equal(ma->addr, mb->addr); 44 + default: 45 + break; 46 + } 47 + 48 + BUG(); 49 + } 50 + 22 51 static LIST_HEAD(deferred); 23 52 static DEFINE_SPINLOCK(deferred_lock); 24 53 ··· 335 306 return switchdev_port_obj_del_now(dev, obj); 336 307 } 337 308 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 309 + 310 + /** 311 + * switchdev_port_obj_act_is_deferred - Is object action pending? 312 + * 313 + * @dev: port device 314 + * @nt: type of action; add or delete 315 + * @obj: object to test 316 + * 317 + * Returns true if a deferred item is pending, which is 318 + * equivalent to the action @nt on an object @obj. 319 + * 320 + * rtnl_lock must be held. 321 + */ 322 + bool switchdev_port_obj_act_is_deferred(struct net_device *dev, 323 + enum switchdev_notifier_type nt, 324 + const struct switchdev_obj *obj) 325 + { 326 + struct switchdev_deferred_item *dfitem; 327 + bool found = false; 328 + 329 + ASSERT_RTNL(); 330 + 331 + spin_lock_bh(&deferred_lock); 332 + 333 + list_for_each_entry(dfitem, &deferred, list) { 334 + if (dfitem->dev != dev) 335 + continue; 336 + 337 + if ((dfitem->func == switchdev_port_obj_add_deferred && 338 + nt == SWITCHDEV_PORT_OBJ_ADD) || 339 + (dfitem->func == switchdev_port_obj_del_deferred && 340 + nt == SWITCHDEV_PORT_OBJ_DEL)) { 341 + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { 342 + found = true; 343 + break; 344 + } 345 + } 346 + } 347 + 348 + spin_unlock_bh(&deferred_lock); 349 + 350 + return found; 351 + } 352 + EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); 338 353 339 354 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); 340 355 static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
+1 -1
net/tls/tls_main.c
··· 1003 1003 return 0; 1004 1004 } 1005 1005 1006 - static int tls_get_info(const struct sock *sk, struct sk_buff *skb) 1006 + static int tls_get_info(struct sock *sk, struct sk_buff *skb) 1007 1007 { 1008 1008 u16 version, cipher_type; 1009 1009 struct tls_context *ctx;
+16 -8
net/tls/tls_sw.c
··· 1772 1772 u8 *control, 1773 1773 size_t skip, 1774 1774 size_t len, 1775 - bool is_peek) 1775 + bool is_peek, 1776 + bool *more) 1776 1777 { 1777 1778 struct sk_buff *skb = skb_peek(&ctx->rx_list); 1778 1779 struct tls_msg *tlm; ··· 1786 1785 1787 1786 err = tls_record_content_type(msg, tlm, control); 1788 1787 if (err <= 0) 1789 - goto out; 1788 + goto more; 1790 1789 1791 1790 if (skip < rxm->full_len) 1792 1791 break; ··· 1804 1803 1805 1804 err = tls_record_content_type(msg, tlm, control); 1806 1805 if (err <= 0) 1807 - goto out; 1806 + goto more; 1808 1807 1809 1808 err = skb_copy_datagram_msg(skb, rxm->offset + skip, 1810 1809 msg, chunk); 1811 1810 if (err < 0) 1812 - goto out; 1811 + goto more; 1813 1812 1814 1813 len = len - chunk; 1815 1814 copied = copied + chunk; ··· 1845 1844 1846 1845 out: 1847 1846 return copied ? : err; 1847 + more: 1848 + if (more) 1849 + *more = true; 1850 + goto out; 1848 1851 } 1849 1852 1850 1853 static bool ··· 1952 1947 int target, err; 1953 1948 bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); 1954 1949 bool is_peek = flags & MSG_PEEK; 1950 + bool rx_more = false; 1955 1951 bool released = true; 1956 1952 bool bpf_strp_enabled; 1957 1953 bool zc_capable; ··· 1972 1966 goto end; 1973 1967 1974 1968 /* Process pending decrypted records. It must be non-zero-copy */ 1975 - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); 1969 + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); 1976 1970 if (err < 0) 1977 1971 goto end; 1978 1972 1979 1973 copied = err; 1980 - if (len <= copied) 1974 + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) 1981 1975 goto end; 1982 1976 1983 1977 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); ··· 2070 2064 decrypted += chunk; 2071 2065 len -= chunk; 2072 2066 __skb_queue_tail(&ctx->rx_list, skb); 2067 + if (unlikely(control != TLS_RECORD_TYPE_DATA)) 2068 + break; 2073 2069 continue; 2074 2070 } 2075 2071 ··· 2136 2128 /* Drain records from the rx_list & copy if required */ 2137 2129 if (is_peek || is_kvec) 2138 2130 err = process_rx_list(ctx, msg, &control, copied, 2139 - decrypted, is_peek); 2131 + decrypted, is_peek, NULL); 2140 2132 else 2141 2133 err = process_rx_list(ctx, msg, &control, 0, 2142 - async_copy_bytes, is_peek); 2134 + async_copy_bytes, is_peek, NULL); 2143 2135 } 2144 2136 2145 2137 copied += decrypted;
+3 -16
net/unix/af_unix.c
··· 782 782 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 783 783 int); 784 784 785 - static int unix_set_peek_off(struct sock *sk, int val) 786 - { 787 - struct unix_sock *u = unix_sk(sk); 788 - 789 - if (mutex_lock_interruptible(&u->iolock)) 790 - return -EINTR; 791 - 792 - WRITE_ONCE(sk->sk_peek_off, val); 793 - mutex_unlock(&u->iolock); 794 - 795 - return 0; 796 - } 797 - 798 785 #ifdef CONFIG_PROC_FS 799 786 static int unix_count_nr_fds(struct sock *sk) 800 787 { ··· 849 862 .read_skb = unix_stream_read_skb, 850 863 .mmap = sock_no_mmap, 851 864 .splice_read = unix_stream_splice_read, 852 - .set_peek_off = unix_set_peek_off, 865 + .set_peek_off = sk_set_peek_off, 853 866 .show_fdinfo = unix_show_fdinfo, 854 867 }; 855 868 ··· 873 886 .read_skb = unix_read_skb, 874 887 .recvmsg = unix_dgram_recvmsg, 875 888 .mmap = sock_no_mmap, 876 - .set_peek_off = unix_set_peek_off, 889 + .set_peek_off = sk_set_peek_off, 877 890 .show_fdinfo = unix_show_fdinfo, 878 891 }; 879 892 ··· 896 909 .sendmsg = unix_seqpacket_sendmsg, 897 910 .recvmsg = unix_seqpacket_recvmsg, 898 911 .mmap = sock_no_mmap, 899 - .set_peek_off = unix_set_peek_off, 912 + .set_peek_off = sk_set_peek_off, 900 913 .show_fdinfo = unix_show_fdinfo, 901 914 }; 902 915
+9 -13
net/unix/garbage.c
··· 284 284 * which are creating the cycle(s). 285 285 */ 286 286 skb_queue_head_init(&hitlist); 287 - list_for_each_entry(u, &gc_candidates, link) 287 + list_for_each_entry(u, &gc_candidates, link) { 288 288 scan_children(&u->sk, inc_inflight, &hitlist); 289 + 290 + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 291 + if (u->oob_skb) { 292 + kfree_skb(u->oob_skb); 293 + u->oob_skb = NULL; 294 + } 295 + #endif 296 + } 289 297 290 298 /* not_cycle_list contains those sockets which do not make up a 291 299 * cycle. Restore these to the inflight list. ··· 321 313 322 314 /* Here we are. Hitlist is filled. Die. */ 323 315 __skb_queue_purge(&hitlist); 324 - 325 - #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 326 - while (!list_empty(&gc_candidates)) { 327 - u = list_entry(gc_candidates.next, struct unix_sock, link); 328 - if (u->oob_skb) { 329 - struct sk_buff *skb = u->oob_skb; 330 - 331 - u->oob_skb = NULL; 332 - kfree_skb(skb); 333 - } 334 - } 335 - #endif 336 316 337 317 spin_lock(&unix_gc_lock); 338 318
+2 -1
net/xdp/xsk.c
··· 722 722 memcpy(vaddr, buffer, len); 723 723 kunmap_local(vaddr); 724 724 725 - skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); 725 + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); 726 + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); 726 727 } 727 728 728 729 if (first_frag && desc->options & XDP_TX_METADATA) {
+1 -1
scripts/bpf_doc.py
··· 513 513 instructions to the kernel when the programs are loaded. The format for that 514 514 string is identical to the one in use for kernel modules (Dual licenses, such 515 515 as "Dual BSD/GPL", may be used). Some helper functions are only accessible to 516 - programs that are compatible with the GNU Privacy License (GPL). 516 + programs that are compatible with the GNU General Public License (GNU GPL). 517 517 518 518 In order to use such helpers, the eBPF program must be loaded with the correct 519 519 license string passed (via **attr**) to the **bpf**\\ () system call, and this
+15 -4
tools/net/ynl/lib/ynl.c
··· 466 466 467 467 int ynl_recv_ack(struct ynl_sock *ys, int ret) 468 468 { 469 + struct ynl_parse_arg yarg = { .ys = ys, }; 470 + 469 471 if (!ret) { 470 472 yerr(ys, YNL_ERROR_EXPECT_ACK, 471 473 "Expecting an ACK but nothing received"); ··· 480 478 return ret; 481 479 } 482 480 return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, 483 - ynl_cb_null, ys); 481 + ynl_cb_null, &yarg); 484 482 } 485 483 486 484 int ynl_cb_null(const struct nlmsghdr *nlh, void *data) ··· 588 586 return err; 589 587 } 590 588 591 - return ynl_recv_ack(ys, err); 589 + err = ynl_recv_ack(ys, err); 590 + if (err < 0) { 591 + free(ys->mcast_groups); 592 + return err; 593 + } 594 + 595 + return 0; 592 596 } 593 597 594 598 struct ynl_sock * ··· 749 741 750 742 static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) 751 743 { 752 - return ynl_ntf_parse((struct ynl_sock *)data, nlh); 744 + struct ynl_parse_arg *yarg = data; 745 + 746 + return ynl_ntf_parse(yarg->ys, nlh); 753 747 } 754 748 755 749 int ynl_ntf_check(struct ynl_sock *ys) 756 750 { 751 + struct ynl_parse_arg yarg = { .ys = ys, }; 757 752 ssize_t len; 758 753 int err; 759 754 ··· 778 767 return len; 779 768 780 769 err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, 781 - ynl_ntf_trampoline, ys, 770 + ynl_ntf_trampoline, &yarg, 782 771 ynl_cb_array, NLMSG_MIN_TYPE); 783 772 if (err < 0) 784 773 return err;
+1
tools/testing/selftests/bpf/prog_tests/iters.c
··· 193 193 ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); 194 194 ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); 195 195 ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); 196 + ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); 196 197 pthread_mutex_unlock(&do_nothing_mutex); 197 198 for (int i = 0; i < thread_num; i++) 198 199 ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
+57
tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2024. Huawei Technologies Co., Ltd */ 3 + #include "test_progs.h" 4 + #include "read_vsyscall.skel.h" 5 + 6 + #if defined(__x86_64__) 7 + /* For VSYSCALL_ADDR */ 8 + #include <asm/vsyscall.h> 9 + #else 10 + /* To prevent build failure on non-x86 arch */ 11 + #define VSYSCALL_ADDR 0UL 12 + #endif 13 + 14 + struct read_ret_desc { 15 + const char *name; 16 + int ret; 17 + } all_read[] = { 18 + { .name = "probe_read_kernel", .ret = -ERANGE }, 19 + { .name = "probe_read_kernel_str", .ret = -ERANGE }, 20 + { .name = "probe_read", .ret = -ERANGE }, 21 + { .name = "probe_read_str", .ret = -ERANGE }, 22 + { .name = "probe_read_user", .ret = -EFAULT }, 23 + { .name = "probe_read_user_str", .ret = -EFAULT }, 24 + { .name = "copy_from_user", .ret = -EFAULT }, 25 + { .name = "copy_from_user_task", .ret = -EFAULT }, 26 + }; 27 + 28 + void test_read_vsyscall(void) 29 + { 30 + struct read_vsyscall *skel; 31 + unsigned int i; 32 + int err; 33 + 34 + #if !defined(__x86_64__) 35 + test__skip(); 36 + return; 37 + #endif 38 + skel = read_vsyscall__open_and_load(); 39 + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load")) 40 + return; 41 + 42 + skel->bss->target_pid = getpid(); 43 + err = read_vsyscall__attach(skel); 44 + if (!ASSERT_EQ(err, 0, "read_vsyscall attach")) 45 + goto out; 46 + 47 + /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE, 48 + * but it doesn't affect the returned error codes. 49 + */ 50 + skel->bss->user_ptr = (void *)VSYSCALL_ADDR; 51 + usleep(1); 52 + 53 + for (i = 0; i < ARRAY_SIZE(all_read); i++) 54 + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name); 55 + out: 56 + read_vsyscall__destroy(skel); 57 + }
+34 -1
tools/testing/selftests/bpf/prog_tests/timer.c
··· 4 4 #include "timer.skel.h" 5 5 #include "timer_failure.skel.h" 6 6 7 + #define NUM_THR 8 8 + 9 + static void *spin_lock_thread(void *arg) 10 + { 11 + int i, err, prog_fd = *(int *)arg; 12 + LIBBPF_OPTS(bpf_test_run_opts, topts); 13 + 14 + for (i = 0; i < 10000; i++) { 15 + err = bpf_prog_test_run_opts(prog_fd, &topts); 16 + if (!ASSERT_OK(err, "test_run_opts err") || 17 + !ASSERT_OK(topts.retval, "test_run_opts retval")) 18 + break; 19 + } 20 + 21 + pthread_exit(arg); 22 + } 23 + 7 24 static int timer(struct timer *timer_skel) 8 25 { 9 - int err, prog_fd; 26 + int i, err, prog_fd; 10 27 LIBBPF_OPTS(bpf_test_run_opts, topts); 28 + pthread_t thread_id[NUM_THR]; 29 + void *ret; 11 30 12 31 err = timer__attach(timer_skel); 13 32 if (!ASSERT_OK(err, "timer_attach")) ··· 61 42 62 43 /* check that code paths completed */ 63 44 ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); 45 + 46 + prog_fd = bpf_program__fd(timer_skel->progs.race); 47 + for (i = 0; i < NUM_THR; i++) { 48 + err = pthread_create(&thread_id[i], NULL, 49 + &spin_lock_thread, &prog_fd); 50 + if (!ASSERT_OK(err, "pthread_create")) 51 + break; 52 + } 53 + 54 + while (i) { 55 + err = pthread_join(thread_id[--i], &ret); 56 + if (ASSERT_OK(err, "pthread_join")) 57 + ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); 58 + } 64 59 65 60 return 0; 66 61 }
+11 -1
tools/testing/selftests/bpf/progs/iters_task.c
··· 10 10 char _license[] SEC("license") = "GPL"; 11 11 12 12 pid_t target_pid; 13 - int procs_cnt, threads_cnt, proc_threads_cnt; 13 + int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt; 14 14 15 15 void bpf_rcu_read_lock(void) __ksym; 16 16 void bpf_rcu_read_unlock(void) __ksym; ··· 26 26 procs_cnt = threads_cnt = proc_threads_cnt = 0; 27 27 28 28 bpf_rcu_read_lock(); 29 + bpf_for_each(task, pos, NULL, ~0U) { 30 + /* Below instructions shouldn't be executed for invalid flags */ 31 + invalid_cnt++; 32 + } 33 + 34 + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) { 35 + /* Below instructions shouldn't be executed for invalid task__nullable */ 36 + invalid_cnt++; 37 + } 38 + 29 39 bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) 30 40 if (pos->pid == target_pid) 31 41 procs_cnt++;
+45
tools/testing/selftests/bpf/progs/read_vsyscall.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2024. Huawei Technologies Co., Ltd */ 3 + #include <linux/types.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + #include "bpf_misc.h" 7 + 8 + int target_pid = 0; 9 + void *user_ptr = 0; 10 + int read_ret[8]; 11 + 12 + char _license[] SEC("license") = "GPL"; 13 + 14 + SEC("fentry/" SYS_PREFIX "sys_nanosleep") 15 + int do_probe_read(void *ctx) 16 + { 17 + char buf[8]; 18 + 19 + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) 20 + return 0; 21 + 22 + read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr); 23 + read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr); 24 + read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr); 25 + read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr); 26 + read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr); 27 + read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); 28 + 29 + return 0; 30 + } 31 + 32 + SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") 33 + int do_copy_from_user(void *ctx) 34 + { 35 + char buf[8]; 36 + 37 + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) 38 + return 0; 39 + 40 + read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr); 41 + read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, 42 + bpf_get_current_task_btf(), 0); 43 + 44 + return 0; 45 + }
+33 -1
tools/testing/selftests/bpf/progs/timer.c
··· 51 51 __uint(max_entries, 1); 52 52 __type(key, int); 53 53 __type(value, struct elem); 54 - } abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); 54 + } abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"), 55 + race_array SEC(".maps"); 55 56 56 57 __u64 bss_data; 57 58 __u64 abs_data; ··· 388 387 { 389 388 bpf_printk("test5"); 390 389 test_pinned_timer(false); 390 + 391 + return 0; 392 + } 393 + 394 + static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer) 395 + { 396 + bpf_timer_start(timer, 1000000, 0); 397 + return 0; 398 + } 399 + 400 + SEC("syscall") 401 + int race(void *ctx) 402 + { 403 + struct bpf_timer *timer; 404 + int err, race_key = 0; 405 + struct elem init; 406 + 407 + __builtin_memset(&init, 0, sizeof(struct elem)); 408 + bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY); 409 + 410 + timer = bpf_map_lookup_elem(&race_array, &race_key); 411 + if (!timer) 412 + return 1; 413 + 414 + err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC); 415 + if (err && err != -EBUSY) 416 + return 1; 417 + 418 + bpf_timer_set_callback(timer, race_timer_callback); 419 + bpf_timer_start(timer, 0, 0); 420 + bpf_timer_cancel(timer); 391 421 392 422 return 0; 393 423 }
+2
tools/testing/selftests/drivers/net/bonding/bond_options.sh
··· 62 62 63 63 # create bond 64 64 bond_reset "${param}" 65 + # set active_slave to primary eth1 specifically 66 + ip -n ${s_ns} link set bond0 type bond active_slave eth1 65 67 66 68 # check bonding member prio value 67 69 ip -n ${s_ns} link set eth0 type bond_slave prio 0
-3
tools/testing/selftests/net/forwarding/tc_actions.sh
··· 235 235 check_err $? "didn't mirred redirect ICMP" 236 236 tc_check_packets "dev $h1 ingress" 102 10 237 237 check_err $? "didn't drop mirred ICMP" 238 - local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) 239 - test ${overlimits} = 10 240 - check_err $? "wrong overlimits, expected 10 got ${overlimits}" 241 238 242 239 tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower 243 240 tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
+17 -19
tools/testing/selftests/net/ioam6.sh
··· 367 367 local desc=$2 368 368 local node_src=$3 369 369 local node_dst=$4 370 - local ip6_src=$5 371 - local ip6_dst=$6 372 - local if_dst=$7 373 - local trace_type=$8 374 - local ioam_ns=$9 370 + local ip6_dst=$5 371 + local trace_type=$6 372 + local ioam_ns=$7 373 + local type=$8 375 374 376 - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ 377 - $trace_type $ioam_ns & 375 + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & 378 376 local spid=$! 379 377 sleep 0.1 380 378 ··· 487 489 trace prealloc type 0x800000 ns 0 size 4 dev veth0 488 490 489 491 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 490 - db01::2 db01::1 veth0 0x800000 0 492 + db01::1 0x800000 0 $1 491 493 492 494 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 493 495 } ··· 507 509 trace prealloc type 0xc00000 ns 123 size 4 dev veth0 508 510 509 511 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 510 - db01::2 db01::1 veth0 0xc00000 123 512 + db01::1 0xc00000 123 $1 511 513 512 514 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 513 515 } ··· 541 543 if [ $cmd_res != 0 ] 542 544 then 543 545 npassed=$((npassed+1)) 544 - log_test_passed "$descr" 546 + log_test_passed "$descr ($1 mode)" 545 547 else 546 548 nfailed=$((nfailed+1)) 547 - log_test_failed "$descr" 549 + log_test_failed "$descr ($1 mode)" 548 550 fi 549 551 else 550 552 run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ 551 - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 553 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 552 554 fi 553 555 done 554 556 ··· 572 574 trace prealloc type 0xfff002 ns 123 size 100 dev veth0 573 575 574 576 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 575 - db01::2 db01::1 veth0 0xfff002 123 577 + db01::1 0xfff002 123 $1 576 578 577 579 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 578 580 } ··· 602 604 trace prealloc type 0x800000 ns 0 size 4 dev veth0 603 605 604 606 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 605 - db01::2 db01::1 veth0 0x800000 0 607 + db01::1 0x800000 0 $1 606 608 607 609 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 608 610 } ··· 622 624 trace prealloc type 0xc00000 ns 123 size 4 dev veth0 623 625 624 626 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 625 - db01::2 db01::1 veth0 0xc00000 123 627 + db01::1 0xc00000 123 $1 626 628 627 629 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 628 630 } ··· 649 651 dev veth0 650 652 651 653 run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ 652 - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 654 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 653 655 done 654 656 655 657 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down ··· 677 679 trace prealloc type 0xc00000 ns 123 size 4 dev veth0 678 680 679 681 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 680 - db01::2 db01::1 veth0 0xc00000 123 682 + db01::1 0xc00000 123 $1 681 683 682 684 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 683 685 ··· 701 703 trace prealloc type 0xfff002 ns 123 size 80 dev veth0 702 704 703 705 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ 704 - db01::2 db01::1 veth0 0xfff002 123 706 + db01::1 0xfff002 123 $1 705 707 706 708 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down 707 709 } ··· 729 731 trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 730 732 731 733 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ 732 - db01::2 db02::2 veth0 0xfff002 123 734 + db02::2 0xfff002 123 $1 733 735 734 736 [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down 735 737 }
+48 -47
tools/testing/selftests/net/ioam6_parser.c
··· 8 8 #include <errno.h> 9 9 #include <limits.h> 10 10 #include <linux/const.h> 11 - #include <linux/if_ether.h> 12 11 #include <linux/ioam6.h> 13 12 #include <linux/ipv6.h> 14 13 #include <stdlib.h> ··· 511 512 return -1; 512 513 } 513 514 514 - static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) 515 - { 516 - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | 517 - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | 518 - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | 519 - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; 520 - } 521 - 522 515 static int get_u32(__u32 *val, const char *arg, int base) 523 516 { 524 517 unsigned long res; ··· 594 603 595 604 int main(int argc, char **argv) 596 605 { 597 - int fd, size, hoplen, tid, ret = 1; 598 - struct in6_addr src, dst; 606 + int fd, size, hoplen, tid, ret = 1, on = 1; 599 607 struct ioam6_hdr *opt; 600 - struct ipv6hdr *ip6h; 601 - __u8 buffer[400], *p; 602 - __u16 ioam_ns; 608 + struct cmsghdr *cmsg; 609 + struct msghdr msg; 610 + struct iovec iov; 611 + __u8 buffer[512]; 603 612 __u32 tr_type; 613 + __u16 ioam_ns; 614 + __u8 *ptr; 604 615 605 - if (argc != 7) 616 + if (argc != 5) 606 617 goto out; 607 618 608 - tid = str2id(argv[2]); 619 + tid = str2id(argv[1]); 609 620 if (tid < 0 || !func[tid]) 610 621 goto out; 611 622 612 - if (inet_pton(AF_INET6, argv[3], &src) != 1 || 613 - inet_pton(AF_INET6, argv[4], &dst) != 1) 623 + if (get_u32(&tr_type, argv[2], 16) || 624 + get_u16(&ioam_ns, argv[3], 0)) 614 625 goto out; 615 626 616 - if (get_u32(&tr_type, argv[5], 16) || 617 - get_u16(&ioam_ns, argv[6], 0)) 627 + fd = socket(PF_INET6, SOCK_RAW, 628 + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); 629 + if (fd < 0) 618 630 goto out; 619 631 620 - fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); 621 - if (!fd) 622 - goto out; 632 + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); 623 633 624 - if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, 625 - argv[1], strlen(argv[1]))) 634 + iov.iov_len = 1; 635 + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); 636 + if (!iov.iov_base) 626 637 goto close; 627 - 628 638 recv: 629 - size = recv(fd, buffer, sizeof(buffer), 0); 639 + memset(&msg, 0, sizeof(msg)); 640 + msg.msg_iov = &iov; 641 + msg.msg_iovlen = 1; 642 + msg.msg_control = buffer; 643 + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); 644 + 645 + size = recvmsg(fd, &msg, 0); 630 646 if (size <= 0) 631 647 goto close; 632 648 633 - ip6h = (struct ipv6hdr *)buffer; 649 + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { 650 + if (cmsg->cmsg_level != IPPROTO_IPV6 || 651 + cmsg->cmsg_type != IPV6_HOPOPTS || 652 + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) 653 + continue; 634 654 635 - if (!ipv6_addr_equal(&ip6h->saddr, &src) || 636 - !ipv6_addr_equal(&ip6h->daddr, &dst)) 637 - goto recv; 655 + ptr = (__u8 *)CMSG_DATA(cmsg); 638 656 639 - if (ip6h->nexthdr != IPPROTO_HOPOPTS) 640 - goto close; 657 + hoplen = (ptr[1] + 1) << 3; 658 + ptr += sizeof(struct ipv6_hopopt_hdr); 641 659 642 - p = buffer + sizeof(*ip6h); 643 - hoplen = (p[1] + 1) << 3; 644 - p += sizeof(struct ipv6_hopopt_hdr); 660 + while (hoplen > 0) { 661 + opt = (struct ioam6_hdr *)ptr; 645 662 646 - while (hoplen > 0) { 647 - opt = (struct ioam6_hdr *)p; 663 + if (opt->opt_type == IPV6_TLV_IOAM && 664 + opt->type == IOAM6_TYPE_PREALLOC) { 665 + ptr += sizeof(*opt); 666 + ret = func[tid](tid, 667 + (struct ioam6_trace_hdr *)ptr, 668 + tr_type, ioam_ns); 669 + goto close; 670 + } 648 671 649 - if (opt->opt_type == IPV6_TLV_IOAM && 650 - opt->type == IOAM6_TYPE_PREALLOC) { 651 - p += sizeof(*opt); 652 - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, 653 - tr_type, ioam_ns); 654 - break; 672 + ptr += opt->opt_len + 2; 673 + hoplen -= opt->opt_len + 2; 655 674 } 656 - 657 - p += opt->opt_len + 2; 658 - hoplen -= opt->opt_len + 2; 659 675 } 676 + 677 + goto recv; 660 678 close: 679 + free(iov.iov_base); 661 680 close(fd); 662 681 out: 663 682 return ret;
+25 -16
tools/testing/selftests/net/mptcp/diag.sh
··· 62 62 nr=$(eval $command) 63 63 64 64 printf "%-50s" "$msg" 65 - if [ $nr != $expected ]; then 66 - if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then 65 + if [ "$nr" != "$expected" ]; then 66 + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then 67 67 echo "[ skip ] Feature probably not supported" 68 68 mptcp_lib_result_skip "${msg}" 69 69 else ··· 166 166 chk_msk_inuse() 167 167 { 168 168 local expected=$1 169 - local msg="$2" 169 + local msg="....chk ${2:-${expected}} msk in use" 170 170 local listen_nr 171 + 172 + if [ "${expected}" -eq 0 ]; then 173 + msg+=" after flush" 174 + fi 171 175 172 176 listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) 173 177 expected=$((expected + listen_nr)) ··· 183 179 sleep 0.1 184 180 done 185 181 186 - __chk_nr get_msk_inuse $expected "$msg" 0 182 + __chk_nr get_msk_inuse $expected "${msg}" 0 187 183 } 188 184 189 185 # $1: cestab nr 190 186 chk_msk_cestab() 191 187 { 192 - local cestab=$1 188 + local expected=$1 189 + local msg="....chk ${2:-${expected}} cestab" 190 + 191 + if [ "${expected}" -eq 0 ]; then 192 + msg+=" after flush" 193 + fi 193 194 194 195 __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ 195 - "${cestab}" "....chk ${cestab} cestab" "" 196 + "${expected}" "${msg}" "" 196 197 } 197 198 198 199 wait_connected() ··· 236 227 chk_msk_nr 2 "after MPC handshake " 237 228 chk_msk_remote_key_nr 2 "....chk remote_key" 238 229 chk_msk_fallback_nr 0 "....chk no fallback" 239 - chk_msk_inuse 2 "....chk 2 msk in use" 230 + chk_msk_inuse 2 240 231 chk_msk_cestab 2 241 232 flush_pids 242 233 243 - chk_msk_inuse 0 "....chk 0 msk in use after flush" 244 - chk_msk_cestab 0 234 + chk_msk_inuse 0 "2->0" 235 + chk_msk_cestab 0 "2->0" 245 236 246 237 echo "a" | \ 247 238 timeout ${timeout_test} \ ··· 256 247 127.0.0.1 >/dev/null & 257 248 wait_connected $ns 10001 258 249 chk_msk_fallback_nr 1 "check fallback" 259 - chk_msk_inuse 1 "....chk 1 msk in use" 250 + chk_msk_inuse 1 260 251 chk_msk_cestab 1 261 252 flush_pids 262 253 263 - chk_msk_inuse 0 "....chk 0 msk in use after flush" 264 - chk_msk_cestab 0 254 + chk_msk_inuse 0 "1->0" 255 + chk_msk_cestab 0 "1->0" 265 256 266 257 NR_CLIENTS=100 267 258 for I in `seq 1 $NR_CLIENTS`; do ··· 282 273 done 283 274 284 275 wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" 285 - chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" 286 - chk_msk_cestab $((NR_CLIENTS*2)) 276 + chk_msk_inuse $((NR_CLIENTS*2)) "many" 277 + chk_msk_cestab $((NR_CLIENTS*2)) "many" 287 278 flush_pids 288 279 289 - chk_msk_inuse 0 "....chk 0 msk in use after flush" 290 - chk_msk_cestab 0 280 + chk_msk_inuse 0 "many->0" 281 + chk_msk_cestab 0 "many->0" 291 282 292 283 mptcp_lib_result_print_all_tap 293 284 exit $ret
+7 -1
tools/testing/selftests/net/mptcp/pm_netlink.sh
··· 183 183 subflow 10.0.1.1" " (nobackup)" 184 184 185 185 # fullmesh support has been added later 186 - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 186 + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null 187 187 if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || 188 188 mptcp_lib_expect_all_features; then 189 189 check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ ··· 194 194 ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh 195 195 check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ 196 196 subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" 197 + else 198 + for st in fullmesh nofullmesh backup,fullmesh; do 199 + st=" (${st})" 200 + printf "%-50s%s\n" "${st}" "[SKIP]" 201 + mptcp_lib_result_skip "${st}" 202 + done 197 203 fi 198 204 199 205 mptcp_lib_result_print_all_tap
+2 -1
tools/testing/selftests/net/mptcp/simult_flows.sh
··· 250 250 [ $bail -eq 0 ] || exit $ret 251 251 fi 252 252 253 - printf "%-60s" "$msg - reverse direction" 253 + msg+=" - reverse direction" 254 + printf "%-60s" "${msg}" 254 255 do_transfer $large $small $time 255 256 lret=$? 256 257 mptcp_lib_result_code "${lret}" "${msg}"
+2 -2
tools/testing/selftests/net/mptcp/userspace_pm.sh
··· 75 75 { 76 76 test_name="${1}" 77 77 78 - _printf "%-63s" "${test_name}" 78 + _printf "%-68s" "${test_name}" 79 79 } 80 80 81 81 print_results() ··· 542 542 local remid 543 543 local info 544 544 545 - info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})" 545 + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" 546 546 547 547 if [ "$e_type" = "$SUB_ESTABLISHED" ] 548 548 then
+45
tools/testing/selftests/net/tls.c
··· 1485 1485 EXPECT_EQ(memcmp(buf, test_str, send_len), 0); 1486 1486 } 1487 1487 1488 + TEST_F(tls, control_msg_nomerge) 1489 + { 1490 + char *rec1 = "1111"; 1491 + char *rec2 = "2222"; 1492 + int send_len = 5; 1493 + char buf[15]; 1494 + 1495 + if (self->notls) 1496 + SKIP(return, "no TLS support"); 1497 + 1498 + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); 1499 + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); 1500 + 1501 + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); 1502 + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); 1503 + 1504 + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); 1505 + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); 1506 + 1507 + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); 1508 + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); 1509 + 1510 + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); 1511 + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); 1512 + } 1513 + 1514 + TEST_F(tls, data_control_data) 1515 + { 1516 + char *rec1 = "1111"; 1517 + char *rec2 = "2222"; 1518 + char *rec3 = "3333"; 1519 + int send_len = 5; 1520 + char buf[15]; 1521 + 1522 + if (self->notls) 1523 + SKIP(return, "no TLS support"); 1524 + 1525 + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); 1526 + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); 1527 + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); 1528 + 1529 + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); 1530 + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); 1531 + } 1532 + 1488 1533 TEST_F(tls, shutdown) 1489 1534 { 1490 1535 char const *test_str = "test_read";