Merge tag 'net-7.0-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+2

.mailmap

··· 353 353 Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com> 354 354 Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com> 355 355 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com> 356 + Jason Xing <kerneljasonxing@gmail.com> <kernelxing@tencent.com> 356 357 <javier@osg.samsung.com> <javier.martinez@collabora.co.uk> 357 358 Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com> 358 359 Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com> ··· 402 401 Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com> 403 402 Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com> 404 403 Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org> 404 + Joe Damato <joe@dama.to> <jdamato@fastly.com> 405 405 Joel Granados <joel.granados@kernel.org> <j.granados@samsung.com> 406 406 Johan Hovold <johan@kernel.org> <jhovold@gmail.com> 407 407 Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>

+8

CREDITS

··· 1242 1242 E: vfalico@gmail.com 1243 1243 D: Co-maintainer and co-author of the network bonding driver. 1244 1244 1245 + N: Thomas Falcon 1246 + E: tlfalcon@linux.ibm.com 1247 + D: Initial author of the IBM ibmvnic network driver 1248 + 1245 1249 N: János Farkas 1246 1250 E: chexum@shadow.banki.hu 1247 1251 D: romfs, various (mostly networking) fixes ··· 2418 2414 S: Am Muehlenweg 38 2419 2415 S: D53424 Remagen 2420 2416 S: Germany 2417 + 2418 + N: Jonathan Lemon 2419 + E: jonathan.lemon@gmail.com 2420 + D: OpenCompute PTP clock driver (ptp_ocp) 2421 2421 2422 2422 N: Colin Leroy 2423 2423 E: colin@colino.net

+1

Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml

··· 87 87 88 88 allOf: 89 89 - $ref: can-controller.yaml# 90 + - $ref: /schemas/memory-controllers/mc-peripheral-props.yaml 90 91 - if: 91 92 properties: 92 93 compatible:

+5 -19

MAINTAINERS

··· 993 993 F: drivers/thermal/thermal_mmio.c 994 994 995 995 AMAZON ETHERNET DRIVERS 996 - M: Shay Agroskin <shayagr@amazon.com> 997 996 M: Arthur Kiyanovski <akiyano@amazon.com> 998 - R: David Arinzon <darinzon@amazon.com> 999 - R: Saeed Bishara <saeedb@amazon.com> 997 + M: David Arinzon <darinzon@amazon.com> 1000 998 L: netdev@vger.kernel.org 1001 999 S: Maintained 1002 1000 F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst ··· 4615 4617 4616 4618 BLUETOOTH SUBSYSTEM 4617 4619 M: Marcel Holtmann <marcel@holtmann.org> 4618 - M: Johan Hedberg <johan.hedberg@gmail.com> 4619 4620 M: Luiz Augusto von Dentz <luiz.dentz@gmail.com> 4620 4621 L: linux-bluetooth@vger.kernel.org 4621 4622 S: Supported ··· 10168 10171 10169 10172 FREESCALE IMX / MXC FEC DRIVER 10170 10173 M: Wei Fang <wei.fang@nxp.com> 10174 + R: Frank Li <frank.li@nxp.com> 10171 10175 R: Shenwei Wang <shenwei.wang@nxp.com> 10172 - R: Clark Wang <xiaoning.wang@nxp.com> 10173 10176 L: imx@lists.linux.dev 10174 10177 L: netdev@vger.kernel.org 10175 10178 S: Maintained ··· 12213 12216 M: Haren Myneni <haren@linux.ibm.com> 12214 12217 M: Rick Lindsley <ricklind@linux.ibm.com> 12215 12218 R: Nick Child <nnac123@linux.ibm.com> 12216 - R: Thomas Falcon <tlfalcon@linux.ibm.com> 12217 12219 L: netdev@vger.kernel.org 12218 12220 S: Maintained 12219 12221 F: drivers/net/ethernet/ibm/ibmvnic.* ··· 15371 15375 F: include/linux/soc/marvell/octeontx2/ 15372 15376 15373 15377 MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) 15374 - M: Mirko Lindner <mlindner@marvell.com> 15375 - M: Stephen Hemminger <stephen@networkplumber.org> 15376 15378 L: netdev@vger.kernel.org 15377 - S: Odd fixes 15379 + S: Orphan 15378 15380 F: drivers/net/ethernet/marvell/sk* 15379 15381 15380 15382 MARVELL LIBERTAS WIRELESS DRIVER ··· 15469 15475 M: Sunil Goutham <sgoutham@marvell.com> 15470 15476 M: Linu Cherian <lcherian@marvell.com> 15471 15477 M: Geetha sowjanya <gakula@marvell.com> 15472 - M: Jerin Jacob <jerinj@marvell.com> 15473 15478 M: hariprasad <hkelam@marvell.com> 15474 15479 M: Subbaraya Sundeep <sbhatta@marvell.com> 15475 15480 L: netdev@vger.kernel.org ··· 15483 15490 F: drivers/perf/marvell_pem_pmu.c 15484 15491 15485 15492 MARVELL PRESTERA ETHERNET SWITCH DRIVER 15486 - M: Taras Chornyi <taras.chornyi@plvision.eu> 15493 + M: Elad Nachman <enachman@marvell.com> 15487 15494 S: Supported 15488 15495 W: https://github.com/Marvell-switching/switchdev-prestera 15489 15496 F: drivers/net/ethernet/marvell/prestera/ ··· 16157 16164 16158 16165 MEDIATEK ETHERNET DRIVER 16159 16166 M: Felix Fietkau <nbd@nbd.name> 16160 - M: Sean Wang <sean.wang@mediatek.com> 16161 16167 M: Lorenzo Bianconi <lorenzo@kernel.org> 16162 16168 L: netdev@vger.kernel.org 16163 16169 S: Maintained ··· 16349 16357 MEDIATEK SWITCH DRIVER 16350 16358 M: Chester A. Unal <chester.a.unal@arinc9.com> 16351 16359 M: Daniel Golle <daniel@makrotopia.org> 16352 - M: DENG Qingfang <dqfext@gmail.com> 16353 - M: Sean Wang <sean.wang@mediatek.com> 16354 16360 L: netdev@vger.kernel.org 16355 16361 S: Maintained 16356 16362 F: drivers/net/dsa/mt7530-mdio.c ··· 19216 19226 19217 19227 OCELOT ETHERNET SWITCH DRIVER 19218 19228 M: Vladimir Oltean <vladimir.oltean@nxp.com> 19219 - M: Claudiu Manoil <claudiu.manoil@nxp.com> 19220 - M: Alexandre Belloni <alexandre.belloni@bootlin.com> 19221 19229 M: UNGLinuxDriver@microchip.com 19222 19230 L: netdev@vger.kernel.org 19223 19231 S: Supported ··· 19801 19813 F: include/dt-bindings/ 19802 19814 19803 19815 OPENCOMPUTE PTP CLOCK DRIVER 19804 - M: Jonathan Lemon <jonathan.lemon@gmail.com> 19805 19816 M: Vadim Fedorenko <vadim.fedorenko@linux.dev> 19806 19817 L: netdev@vger.kernel.org 19807 19818 S: Maintained ··· 21444 21457 F: drivers/scsi/qedi/ 21445 21458 21446 21459 QLOGIC QL4xxx ETHERNET DRIVER 21447 - M: Manish Chopra <manishc@marvell.com> 21448 21460 L: netdev@vger.kernel.org 21449 - S: Maintained 21461 + S: Orphan 21450 21462 F: drivers/net/ethernet/qlogic/qed/ 21451 21463 F: drivers/net/ethernet/qlogic/qede/ 21452 21464 F: include/linux/qed/

+7 -2

drivers/net/bonding/bond_main.c

··· 324 324 } 325 325 } 326 326 327 - bool bond_xdp_check(struct bonding *bond, int mode) 327 + bool __bond_xdp_check(int mode, int xmit_policy) 328 328 { 329 329 switch (mode) { 330 330 case BOND_MODE_ROUNDROBIN: ··· 335 335 /* vlan+srcmac is not supported with XDP as in most cases the 802.1q 336 336 * payload is not in the packet due to hardware offload. 337 337 */ 338 - if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) 338 + if (xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) 339 339 return true; 340 340 fallthrough; 341 341 default: 342 342 return false; 343 343 } 344 + } 345 + 346 + bool bond_xdp_check(struct bonding *bond, int mode) 347 + { 348 + return __bond_xdp_check(mode, bond->params.xmit_policy); 344 349 } 345 350 346 351 /*---------------------------------- VLAN -----------------------------------*/

+2

drivers/net/bonding/bond_options.c

··· 1575 1575 static int bond_option_xmit_hash_policy_set(struct bonding *bond, 1576 1576 const struct bond_opt_value *newval) 1577 1577 { 1578 + if (bond->xdp_prog && !__bond_xdp_check(BOND_MODE(bond), newval->value)) 1579 + return -EOPNOTSUPP; 1578 1580 netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n", 1579 1581 newval->string, newval->value); 1580 1582 bond->params.xmit_policy = newval->value;

+1

drivers/net/can/dummy_can.c

··· 241 241 242 242 dev->netdev_ops = &dummy_can_netdev_ops; 243 243 dev->ethtool_ops = &dummy_can_ethtool_ops; 244 + dev->flags |= IFF_ECHO; /* enable echo handling */ 244 245 priv = netdev_priv(dev); 245 246 priv->can.bittiming_const = &dummy_can_bittiming_const; 246 247 priv->can.bitrate_max = 20 * MEGA /* BPS */;

+14 -1

drivers/net/can/spi/mcp251x.c

··· 1214 1214 { 1215 1215 struct mcp251x_priv *priv = netdev_priv(net); 1216 1216 struct spi_device *spi = priv->spi; 1217 + bool release_irq = false; 1217 1218 unsigned long flags = 0; 1218 1219 int ret; 1219 1220 ··· 1258 1257 return 0; 1259 1258 1260 1259 out_free_irq: 1261 - free_irq(spi->irq, priv); 1260 + /* The IRQ handler might be running, and if so it will be waiting 1261 + * for the lock. But free_irq() must wait for the handler to finish 1262 + * so calling it here would deadlock. 1263 + * 1264 + * Setting priv->force_quit will let the handler exit right away 1265 + * without any access to the hardware. This make it safe to call 1266 + * free_irq() after the lock is released. 1267 + */ 1268 + priv->force_quit = 1; 1269 + release_irq = true; 1270 + 1262 1271 mcp251x_hw_sleep(spi); 1263 1272 out_close: 1264 1273 mcp251x_power_enable(priv->transceiver, 0); 1265 1274 close_candev(net); 1266 1275 mutex_unlock(&priv->mcp_lock); 1276 + if (release_irq) 1277 + free_irq(spi->irq, priv); 1267 1278 return ret; 1268 1279 } 1269 1280

+6 -1

drivers/net/can/usb/ems_usb.c

··· 445 445 start = CPC_HEADER_SIZE; 446 446 447 447 while (msg_count) { 448 + if (start + CPC_MSG_HEADER_LEN > urb->actual_length) { 449 + netdev_err(netdev, "format error\n"); 450 + break; 451 + } 452 + 448 453 msg = (struct ems_cpc_msg *)&ibuf[start]; 449 454 450 455 switch (msg->type) { ··· 479 474 start += CPC_MSG_HEADER_LEN + msg->length; 480 475 msg_count--; 481 476 482 - if (start > urb->transfer_buffer_length) { 477 + if (start > urb->actual_length) { 483 478 netdev_err(netdev, "format error\n"); 484 479 break; 485 480 }

+17 -13

drivers/net/can/usb/esd_usb.c

··· 272 272 273 273 struct usb_anchor rx_submitted; 274 274 275 + unsigned int rx_pipe; 276 + unsigned int tx_pipe; 277 + 275 278 int net_count; 276 279 u32 version; 277 280 int rxinitdone; ··· 540 537 } 541 538 542 539 resubmit_urb: 543 - usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), 540 + usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe, 544 541 urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE, 545 542 esd_usb_read_bulk_callback, dev); 546 543 ··· 629 626 { 630 627 int actual_length; 631 628 632 - return usb_bulk_msg(dev->udev, 633 - usb_sndbulkpipe(dev->udev, 2), 634 - msg, 629 + return usb_bulk_msg(dev->udev, dev->tx_pipe, msg, 635 630 msg->hdr.len * sizeof(u32), /* convert to # of bytes */ 636 631 &actual_length, 637 632 1000); ··· 640 639 { 641 640 int actual_length; 642 641 643 - return usb_bulk_msg(dev->udev, 644 - usb_rcvbulkpipe(dev->udev, 1), 645 - msg, 646 - sizeof(*msg), 647 - &actual_length, 648 - 1000); 642 + return usb_bulk_msg(dev->udev, dev->rx_pipe, msg, 643 + sizeof(*msg), &actual_length, 1000); 649 644 } 650 645 651 646 static int esd_usb_setup_rx_urbs(struct esd_usb *dev) ··· 674 677 675 678 urb->transfer_dma = buf_dma; 676 679 677 - usb_fill_bulk_urb(urb, dev->udev, 678 - usb_rcvbulkpipe(dev->udev, 1), 680 + usb_fill_bulk_urb(urb, dev->udev, dev->rx_pipe, 679 681 buf, ESD_USB_RX_BUFFER_SIZE, 680 682 esd_usb_read_bulk_callback, dev); 681 683 urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; ··· 899 903 /* hnd must not be 0 - MSB is stripped in txdone handling */ 900 904 msg->tx.hnd = BIT(31) | i; /* returned in TX done message */ 901 905 902 - usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, 906 + usb_fill_bulk_urb(urb, dev->udev, dev->tx_pipe, buf, 903 907 msg->hdr.len * sizeof(u32), /* convert to # of bytes */ 904 908 esd_usb_write_bulk_callback, context); 905 909 ··· 1294 1298 static int esd_usb_probe(struct usb_interface *intf, 1295 1299 const struct usb_device_id *id) 1296 1300 { 1301 + struct usb_endpoint_descriptor *ep_in, *ep_out; 1297 1302 struct esd_usb *dev; 1298 1303 union esd_usb_msg *msg; 1299 1304 int i, err; 1305 + 1306 + err = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, 1307 + NULL, NULL); 1308 + if (err) 1309 + return err; 1300 1310 1301 1311 dev = kzalloc_obj(*dev); 1302 1312 if (!dev) { ··· 1311 1309 } 1312 1310 1313 1311 dev->udev = interface_to_usbdev(intf); 1312 + dev->rx_pipe = usb_rcvbulkpipe(dev->udev, ep_in->bEndpointAddress); 1313 + dev->tx_pipe = usb_sndbulkpipe(dev->udev, ep_out->bEndpointAddress); 1314 1314 1315 1315 init_usb_anchor(&dev->rx_submitted); 1316 1316

+7 -1

drivers/net/can/usb/etas_es58x/es58x_core.c

··· 1461 1461 } 1462 1462 1463 1463 resubmit_urb: 1464 + usb_anchor_urb(urb, &es58x_dev->rx_urbs); 1464 1465 ret = usb_submit_urb(urb, GFP_ATOMIC); 1466 + if (!ret) 1467 + return; 1468 + 1469 + usb_unanchor_urb(urb); 1470 + 1465 1471 if (ret == -ENODEV) { 1466 1472 for (i = 0; i < es58x_dev->num_can_ch; i++) 1467 1473 if (es58x_dev->netdev[i]) 1468 1474 netif_device_detach(es58x_dev->netdev[i]); 1469 - } else if (ret) 1475 + } else 1470 1476 dev_err_ratelimited(dev, 1471 1477 "Failed resubmitting read bulk urb: %pe\n", 1472 1478 ERR_PTR(ret));

+40 -5

drivers/net/can/usb/f81604.c

··· 413 413 { 414 414 struct f81604_can_frame *frame = urb->transfer_buffer; 415 415 struct net_device *netdev = urb->context; 416 + struct f81604_port_priv *priv = netdev_priv(netdev); 416 417 int ret; 417 418 418 419 if (!netif_device_present(netdev)) ··· 446 445 f81604_process_rx_packet(netdev, frame); 447 446 448 447 resubmit_urb: 448 + usb_anchor_urb(urb, &priv->urbs_anchor); 449 449 ret = usb_submit_urb(urb, GFP_ATOMIC); 450 + if (!ret) 451 + return; 452 + usb_unanchor_urb(urb); 453 + 450 454 if (ret == -ENODEV) 451 455 netif_device_detach(netdev); 452 - else if (ret) 456 + else 453 457 netdev_err(netdev, 454 458 "%s: failed to resubmit read bulk urb: %pe\n", 455 459 __func__, ERR_PTR(ret)); ··· 626 620 netdev_info(netdev, "%s: Int URB aborted: %pe\n", __func__, 627 621 ERR_PTR(urb->status)); 628 622 623 + if (urb->actual_length < sizeof(*data)) { 624 + netdev_warn(netdev, "%s: short int URB: %u < %zu\n", 625 + __func__, urb->actual_length, sizeof(*data)); 626 + goto resubmit_urb; 627 + } 628 + 629 629 switch (urb->status) { 630 630 case 0: /* success */ 631 631 break; ··· 658 646 f81604_handle_tx(priv, data); 659 647 660 648 resubmit_urb: 649 + usb_anchor_urb(urb, &priv->urbs_anchor); 661 650 ret = usb_submit_urb(urb, GFP_ATOMIC); 651 + if (!ret) 652 + return; 653 + usb_unanchor_urb(urb); 654 + 662 655 if (ret == -ENODEV) 663 656 netif_device_detach(netdev); 664 - else if (ret) 657 + else 665 658 netdev_err(netdev, "%s: failed to resubmit int urb: %pe\n", 666 659 __func__, ERR_PTR(ret)); 667 660 } ··· 891 874 if (!netif_device_present(netdev)) 892 875 return; 893 876 894 - if (urb->status) 895 - netdev_info(netdev, "%s: Tx URB error: %pe\n", __func__, 896 - ERR_PTR(urb->status)); 877 + if (!urb->status) 878 + return; 879 + 880 + switch (urb->status) { 881 + case -ENOENT: 882 + case -ECONNRESET: 883 + case -ESHUTDOWN: 884 + return; 885 + default: 886 + break; 887 + } 888 + 889 + if (net_ratelimit()) 890 + netdev_err(netdev, "%s: Tx URB error: %pe\n", __func__, 891 + ERR_PTR(urb->status)); 892 + 893 + can_free_echo_skb(netdev, 0, NULL); 894 + netdev->stats.tx_dropped++; 895 + netdev->stats.tx_errors++; 896 + 897 + netif_wake_queue(netdev); 897 898 } 898 899 899 900 static void f81604_clear_reg_work(struct work_struct *work)

+16 -6

drivers/net/can/usb/gs_usb.c

··· 772 772 } 773 773 } 774 774 775 - static int gs_usb_set_bittiming(struct net_device *netdev) 775 + static int gs_usb_set_bittiming(struct gs_can *dev) 776 776 { 777 - struct gs_can *dev = netdev_priv(netdev); 778 777 struct can_bittiming *bt = &dev->can.bittiming; 779 778 struct gs_device_bittiming dbt = { 780 779 .prop_seg = cpu_to_le32(bt->prop_seg), ··· 790 791 GFP_KERNEL); 791 792 } 792 793 793 - static int gs_usb_set_data_bittiming(struct net_device *netdev) 794 + static int gs_usb_set_data_bittiming(struct gs_can *dev) 794 795 { 795 - struct gs_can *dev = netdev_priv(netdev); 796 796 struct can_bittiming *bt = &dev->can.fd.data_bittiming; 797 797 struct gs_device_bittiming dbt = { 798 798 .prop_seg = cpu_to_le32(bt->prop_seg), ··· 1054 1056 /* if hardware supports timestamps, enable it */ 1055 1057 if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) 1056 1058 flags |= GS_CAN_MODE_HW_TIMESTAMP; 1059 + 1060 + rc = gs_usb_set_bittiming(dev); 1061 + if (rc) { 1062 + netdev_err(netdev, "failed to set bittiming: %pe\n", ERR_PTR(rc)); 1063 + goto out_usb_kill_anchored_urbs; 1064 + } 1065 + 1066 + if (ctrlmode & CAN_CTRLMODE_FD) { 1067 + rc = gs_usb_set_data_bittiming(dev); 1068 + if (rc) { 1069 + netdev_err(netdev, "failed to set data bittiming: %pe\n", ERR_PTR(rc)); 1070 + goto out_usb_kill_anchored_urbs; 1071 + } 1072 + } 1057 1073 1058 1074 /* finally start device */ 1059 1075 dev->can.state = CAN_STATE_ERROR_ACTIVE; ··· 1382 1370 dev->can.state = CAN_STATE_STOPPED; 1383 1371 dev->can.clock.freq = le32_to_cpu(bt_const.fclk_can); 1384 1372 dev->can.bittiming_const = &dev->bt_const; 1385 - dev->can.do_set_bittiming = gs_usb_set_bittiming; 1386 1373 1387 1374 dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC; 1388 1375 ··· 1405 1394 * GS_CAN_FEATURE_BT_CONST_EXT is set. 1406 1395 */ 1407 1396 dev->can.fd.data_bittiming_const = &dev->bt_const; 1408 - dev->can.fd.do_set_data_bittiming = gs_usb_set_data_bittiming; 1409 1397 } 1410 1398 1411 1399 if (feature & GS_CAN_FEATURE_TERMINATION) {

+1 -1

drivers/net/can/usb/ucan.c

··· 748 748 len = le16_to_cpu(m->len); 749 749 750 750 /* check sanity (length of content) */ 751 - if (urb->actual_length - pos < len) { 751 + if ((len == 0) || (urb->actual_length - pos < len)) { 752 752 netdev_warn(up->netdev, 753 753 "invalid message (short; no data; l:%d)\n", 754 754 urb->actual_length);

+1 -1

drivers/net/dsa/realtek/rtl8365mb.c

··· 769 769 out: 770 770 rtl83xx_unlock(priv); 771 771 772 - return 0; 772 + return ret; 773 773 } 774 774 775 775 static int rtl8365mb_phy_read(struct realtek_priv *priv, int phy, int regnum)

+1 -1

drivers/net/ethernet/amd/xgbe/xgbe-common.h

··· 431 431 #define MAC_SSIR_SSINC_INDEX 16 432 432 #define MAC_SSIR_SSINC_WIDTH 8 433 433 #define MAC_TCR_SS_INDEX 29 434 - #define MAC_TCR_SS_WIDTH 2 434 + #define MAC_TCR_SS_WIDTH 3 435 435 #define MAC_TCR_TE_INDEX 0 436 436 #define MAC_TCR_TE_WIDTH 1 437 437 #define MAC_TCR_VNE_INDEX 24

-10

drivers/net/ethernet/amd/xgbe/xgbe-drv.c

··· 1120 1120 { 1121 1121 struct xgbe_prv_data *pdata = netdev_priv(netdev); 1122 1122 struct xgbe_hw_if *hw_if = &pdata->hw_if; 1123 - unsigned long flags; 1124 1123 1125 1124 DBGPR("-->xgbe_powerdown\n"); 1126 1125 ··· 1129 1130 DBGPR("<--xgbe_powerdown\n"); 1130 1131 return -EINVAL; 1131 1132 } 1132 - 1133 - spin_lock_irqsave(&pdata->lock, flags); 1134 1133 1135 1134 if (caller == XGMAC_DRIVER_CONTEXT) 1136 1135 netif_device_detach(netdev); ··· 1145 1148 1146 1149 pdata->power_down = 1; 1147 1150 1148 - spin_unlock_irqrestore(&pdata->lock, flags); 1149 - 1150 1151 DBGPR("<--xgbe_powerdown\n"); 1151 1152 1152 1153 return 0; ··· 1154 1159 { 1155 1160 struct xgbe_prv_data *pdata = netdev_priv(netdev); 1156 1161 struct xgbe_hw_if *hw_if = &pdata->hw_if; 1157 - unsigned long flags; 1158 1162 1159 1163 DBGPR("-->xgbe_powerup\n"); 1160 1164 ··· 1163 1169 DBGPR("<--xgbe_powerup\n"); 1164 1170 return -EINVAL; 1165 1171 } 1166 - 1167 - spin_lock_irqsave(&pdata->lock, flags); 1168 1172 1169 1173 pdata->power_down = 0; 1170 1174 ··· 1177 1185 netif_tx_start_all_queues(netdev); 1178 1186 1179 1187 xgbe_start_timers(pdata); 1180 - 1181 - spin_unlock_irqrestore(&pdata->lock, flags); 1182 1188 1183 1189 DBGPR("<--xgbe_powerup\n"); 1184 1190

-1

drivers/net/ethernet/amd/xgbe/xgbe-main.c

··· 76 76 pdata->netdev = netdev; 77 77 pdata->dev = dev; 78 78 79 - spin_lock_init(&pdata->lock); 80 79 spin_lock_init(&pdata->xpcs_lock); 81 80 mutex_init(&pdata->rss_mutex); 82 81 spin_lock_init(&pdata->tstamp_lock);

-3

drivers/net/ethernet/amd/xgbe/xgbe.h

··· 1004 1004 unsigned int pp3; 1005 1005 unsigned int pp4; 1006 1006 1007 - /* Overall device lock */ 1008 - spinlock_t lock; 1009 - 1010 1007 /* XPCS indirect addressing lock */ 1011 1008 spinlock_t xpcs_lock; 1012 1009 unsigned int xpcs_window_def_reg;

+2 -1

drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c

··· 1533 1533 if_id = (status & 0xFFFF0000) >> 16; 1534 1534 if (if_id >= ethsw->sw_attr.num_ifs) { 1535 1535 dev_err(dev, "Invalid if_id %d in IRQ status\n", if_id); 1536 - goto out; 1536 + goto out_clear; 1537 1537 } 1538 1538 port_priv = ethsw->ports[if_id]; 1539 1539 ··· 1553 1553 dpaa2_switch_port_connect_mac(port_priv); 1554 1554 } 1555 1555 1556 + out_clear: 1556 1557 err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, 1557 1558 DPSW_IRQ_INDEX_IF, status); 1558 1559 if (err)

+1 -1

drivers/net/ethernet/freescale/enetc/enetc.c

··· 3467 3467 priv->rx_ring[i] = bdr; 3468 3468 3469 3469 err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, 3470 - ENETC_RXB_DMA_SIZE_XDP); 3470 + ENETC_RXB_TRUESIZE); 3471 3471 if (err) 3472 3472 goto free_vector; 3473 3473

+1

drivers/net/ethernet/intel/e1000e/defines.h

··· 33 33 34 34 /* Extended Device Control */ 35 35 #define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */ 36 + #define E1000_CTRL_EXT_DPG_EN 0x00000008 /* Dynamic Power Gating Enable */ 36 37 #define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */ 37 38 #define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */ 38 39 #define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */

+3 -1

drivers/net/ethernet/intel/e1000e/e1000.h

··· 117 117 board_pch_cnp, 118 118 board_pch_tgp, 119 119 board_pch_adp, 120 - board_pch_mtp 120 + board_pch_mtp, 121 + board_pch_ptp 121 122 }; 122 123 123 124 struct e1000_ps_page { ··· 528 527 extern const struct e1000_info e1000_pch_tgp_info; 529 528 extern const struct e1000_info e1000_pch_adp_info; 530 529 extern const struct e1000_info e1000_pch_mtp_info; 530 + extern const struct e1000_info e1000_pch_ptp_info; 531 531 extern const struct e1000_info e1000_es2_info; 532 532 533 533 void e1000e_ptp_init(struct e1000_adapter *adapter);

-2

drivers/net/ethernet/intel/e1000e/hw.h

··· 118 118 #define E1000_DEV_ID_PCH_ARL_I219_V24 0x57A1 119 119 #define E1000_DEV_ID_PCH_PTP_I219_LM25 0x57B3 120 120 #define E1000_DEV_ID_PCH_PTP_I219_V25 0x57B4 121 - #define E1000_DEV_ID_PCH_PTP_I219_LM26 0x57B5 122 - #define E1000_DEV_ID_PCH_PTP_I219_V26 0x57B6 123 121 #define E1000_DEV_ID_PCH_PTP_I219_LM27 0x57B7 124 122 #define E1000_DEV_ID_PCH_PTP_I219_V27 0x57B8 125 123 #define E1000_DEV_ID_PCH_NVL_I219_LM29 0x57B9

+30 -1

drivers/net/ethernet/intel/e1000e/ich8lan.c

··· 528 528 529 529 phy->id = e1000_phy_unknown; 530 530 531 - if (hw->mac.type == e1000_pch_mtp) { 531 + if (hw->mac.type == e1000_pch_mtp || hw->mac.type == e1000_pch_ptp) { 532 532 phy->retry_count = 2; 533 533 e1000e_enable_phy_retry(hw); 534 534 } ··· 4932 4932 reg |= E1000_KABGTXD_BGSQLBIAS; 4933 4933 ew32(KABGTXD, reg); 4934 4934 4935 + /* The hardware reset value of the DPG_EN bit is 1. 4936 + * Clear DPG_EN to prevent unexpected autonomous power gating. 4937 + */ 4938 + if (hw->mac.type >= e1000_pch_ptp) { 4939 + reg = er32(CTRL_EXT); 4940 + reg &= ~E1000_CTRL_EXT_DPG_EN; 4941 + ew32(CTRL_EXT, reg); 4942 + } 4943 + 4935 4944 return 0; 4936 4945 } 4937 4946 ··· 6200 6191 6201 6192 const struct e1000_info e1000_pch_mtp_info = { 6202 6193 .mac = e1000_pch_mtp, 6194 + .flags = FLAG_IS_ICH 6195 + | FLAG_HAS_WOL 6196 + | FLAG_HAS_HW_TIMESTAMP 6197 + | FLAG_HAS_CTRLEXT_ON_LOAD 6198 + | FLAG_HAS_AMT 6199 + | FLAG_HAS_FLASH 6200 + | FLAG_HAS_JUMBO_FRAMES 6201 + | FLAG_APME_IN_WUC, 6202 + .flags2 = FLAG2_HAS_PHY_STATS 6203 + | FLAG2_HAS_EEE, 6204 + .pba = 26, 6205 + .max_hw_frame_size = 9022, 6206 + .get_variants = e1000_get_variants_ich8lan, 6207 + .mac_ops = &ich8_mac_ops, 6208 + .phy_ops = &ich8_phy_ops, 6209 + .nvm_ops = &spt_nvm_ops, 6210 + }; 6211 + 6212 + const struct e1000_info e1000_pch_ptp_info = { 6213 + .mac = e1000_pch_ptp, 6203 6214 .flags = FLAG_IS_ICH 6204 6215 | FLAG_HAS_WOL 6205 6216 | FLAG_HAS_HW_TIMESTAMP

+7 -8

drivers/net/ethernet/intel/e1000e/netdev.c

··· 55 55 [board_pch_tgp] = &e1000_pch_tgp_info, 56 56 [board_pch_adp] = &e1000_pch_adp_info, 57 57 [board_pch_mtp] = &e1000_pch_mtp_info, 58 + [board_pch_ptp] = &e1000_pch_ptp_info, 58 59 }; 59 60 60 61 struct e1000_reg_info { ··· 7923 7922 { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_mtp }, 7924 7923 { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_LM24), board_pch_mtp }, 7925 7924 { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_V24), board_pch_mtp }, 7926 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_mtp }, 7927 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_mtp }, 7928 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM26), board_pch_mtp }, 7929 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V26), board_pch_mtp }, 7930 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_mtp }, 7931 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_mtp }, 7932 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_mtp }, 7933 - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_mtp }, 7925 + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_ptp }, 7926 + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_ptp }, 7927 + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_ptp }, 7928 + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_ptp }, 7929 + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_ptp }, 7930 + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_ptp }, 7934 7931 7935 7932 { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ 7936 7933 };

+24 -17

drivers/net/ethernet/intel/i40e/i40e_main.c

··· 3569 3569 u16 pf_q = vsi->base_queue + ring->queue_index; 3570 3570 struct i40e_hw *hw = &vsi->back->hw; 3571 3571 struct i40e_hmc_obj_rxq rx_ctx; 3572 + u32 xdp_frame_sz; 3572 3573 int err = 0; 3573 3574 bool ok; 3574 3575 ··· 3579 3578 memset(&rx_ctx, 0, sizeof(rx_ctx)); 3580 3579 3581 3580 ring->rx_buf_len = vsi->rx_buf_len; 3581 + xdp_frame_sz = i40e_rx_pg_size(ring) / 2; 3582 3582 3583 3583 /* XDP RX-queue info only needed for RX rings exposed to XDP */ 3584 3584 if (ring->vsi->type != I40E_VSI_MAIN) 3585 3585 goto skip; 3586 3586 3587 - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { 3588 - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 3589 - ring->queue_index, 3590 - ring->q_vector->napi.napi_id, 3591 - ring->rx_buf_len); 3592 - if (err) 3593 - return err; 3594 - } 3595 - 3596 3587 ring->xsk_pool = i40e_xsk_pool(ring); 3597 3588 if (ring->xsk_pool) { 3598 - xdp_rxq_info_unreg(&ring->xdp_rxq); 3589 + xdp_frame_sz = xsk_pool_get_rx_frag_step(ring->xsk_pool); 3599 3590 ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); 3600 3591 err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 3601 3592 ring->queue_index, 3602 3593 ring->q_vector->napi.napi_id, 3603 - ring->rx_buf_len); 3594 + xdp_frame_sz); 3604 3595 if (err) 3605 3596 return err; 3606 3597 err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 3607 3598 MEM_TYPE_XSK_BUFF_POOL, 3608 3599 NULL); 3609 3600 if (err) 3610 - return err; 3601 + goto unreg_xdp; 3611 3602 dev_info(&vsi->back->pdev->dev, 3612 3603 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", 3613 3604 ring->queue_index); 3614 3605 3615 3606 } else { 3607 + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 3608 + ring->queue_index, 3609 + ring->q_vector->napi.napi_id, 3610 + xdp_frame_sz); 3611 + if (err) 3612 + return err; 3616 3613 err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 3617 3614 MEM_TYPE_PAGE_SHARED, 3618 3615 NULL); 3619 3616 if (err) 3620 - return err; 3617 + goto unreg_xdp; 3621 3618 } 3622 3619 3623 3620 skip: 3624 - xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); 3621 + xdp_init_buff(&ring->xdp, xdp_frame_sz, &ring->xdp_rxq); 3625 3622 3626 3623 rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, 3627 3624 BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); ··· 3653 3654 dev_info(&vsi->back->pdev->dev, 3654 3655 "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", 3655 3656 ring->queue_index, pf_q, err); 3656 - return -ENOMEM; 3657 + err = -ENOMEM; 3658 + goto unreg_xdp; 3657 3659 } 3658 3660 3659 3661 /* set the context in the HMC */ ··· 3663 3663 dev_info(&vsi->back->pdev->dev, 3664 3664 "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", 3665 3665 ring->queue_index, pf_q, err); 3666 - return -ENOMEM; 3666 + err = -ENOMEM; 3667 + goto unreg_xdp; 3667 3668 } 3668 3669 3669 3670 /* configure Rx buffer alignment */ ··· 3672 3671 if (I40E_2K_TOO_SMALL_WITH_PADDING) { 3673 3672 dev_info(&vsi->back->pdev->dev, 3674 3673 "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); 3675 - return -EOPNOTSUPP; 3674 + err = -EOPNOTSUPP; 3675 + goto unreg_xdp; 3676 3676 } 3677 3677 clear_ring_build_skb_enabled(ring); 3678 3678 } else { ··· 3703 3701 } 3704 3702 3705 3703 return 0; 3704 + unreg_xdp: 3705 + if (ring->vsi->type == I40E_VSI_MAIN) 3706 + xdp_rxq_info_unreg(&ring->xdp_rxq); 3707 + 3708 + return err; 3706 3709 } 3707 3710 3708 3711 /**

+1 -1

drivers/net/ethernet/intel/i40e/i40e_trace.h

··· 88 88 __entry->rx_clean_complete = rx_clean_complete; 89 89 __entry->tx_clean_complete = tx_clean_complete; 90 90 __entry->irq_num = q->irq_num; 91 - __entry->curr_cpu = get_cpu(); 91 + __entry->curr_cpu = smp_processor_id(); 92 92 __assign_str(qname); 93 93 __assign_str(dev_name); 94 94 __assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask),

+3 -2

drivers/net/ethernet/intel/i40e/i40e_txrx.c

··· 1470 1470 if (!rx_ring->rx_bi) 1471 1471 return; 1472 1472 1473 + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 1474 + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 1475 + 1473 1476 if (rx_ring->xsk_pool) { 1474 1477 i40e_xsk_clean_rx_ring(rx_ring); 1475 1478 goto skip_free; ··· 1530 1527 void i40e_free_rx_resources(struct i40e_ring *rx_ring) 1531 1528 { 1532 1529 i40e_clean_rx_ring(rx_ring); 1533 - if (rx_ring->vsi->type == I40E_VSI_MAIN) 1534 - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 1535 1530 rx_ring->xdp_prog = NULL; 1536 1531 kfree(rx_ring->rx_bi); 1537 1532 rx_ring->rx_bi = NULL;

+16 -1

drivers/net/ethernet/intel/iavf/iavf_main.c

··· 2793 2793 netdev->watchdog_timeo = 5 * HZ; 2794 2794 2795 2795 netdev->min_mtu = ETH_MIN_MTU; 2796 - netdev->max_mtu = LIBIE_MAX_MTU; 2796 + 2797 + /* PF/VF API: vf_res->max_mtu is max frame size (not MTU). 2798 + * Convert to MTU. 2799 + */ 2800 + if (!adapter->vf_res->max_mtu) { 2801 + netdev->max_mtu = LIBIE_MAX_MTU; 2802 + } else if (adapter->vf_res->max_mtu < LIBETH_RX_LL_LEN + ETH_MIN_MTU || 2803 + adapter->vf_res->max_mtu > 2804 + LIBETH_RX_LL_LEN + LIBIE_MAX_MTU) { 2805 + netdev_warn_once(adapter->netdev, 2806 + "invalid max frame size %d from PF, using default MTU %d", 2807 + adapter->vf_res->max_mtu, LIBIE_MAX_MTU); 2808 + netdev->max_mtu = LIBIE_MAX_MTU; 2809 + } else { 2810 + netdev->max_mtu = adapter->vf_res->max_mtu - LIBETH_RX_LL_LEN; 2811 + } 2797 2812 2798 2813 if (!is_valid_ether_addr(adapter->hw.mac.addr)) { 2799 2814 dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",

+1

drivers/net/ethernet/intel/ice/ice.h

··· 987 987 void ice_print_link_msg(struct ice_vsi *vsi, bool isup); 988 988 int ice_plug_aux_dev(struct ice_pf *pf); 989 989 void ice_unplug_aux_dev(struct ice_pf *pf); 990 + void ice_rdma_finalize_setup(struct ice_pf *pf); 990 991 int ice_init_rdma(struct ice_pf *pf); 991 992 void ice_deinit_rdma(struct ice_pf *pf); 992 993 bool ice_is_wol_supported(struct ice_hw *hw);

+14 -24

drivers/net/ethernet/intel/ice/ice_base.c

··· 124 124 if (vsi->type == ICE_VSI_VF) { 125 125 ice_calc_vf_reg_idx(vsi->vf, q_vector); 126 126 goto out; 127 + } else if (vsi->type == ICE_VSI_LB) { 128 + goto skip_alloc; 127 129 } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) { 128 130 struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); 129 131 ··· 661 659 { 662 660 struct device *dev = ice_pf_to_dev(ring->vsi->back); 663 661 u32 num_bufs = ICE_DESC_UNUSED(ring); 664 - u32 rx_buf_len; 665 662 int err; 666 663 667 - if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { 668 - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { 669 - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 670 - ring->q_index, 671 - ring->q_vector->napi.napi_id, 672 - ring->rx_buf_len); 673 - if (err) 674 - return err; 675 - } 676 - 664 + if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF || 665 + ring->vsi->type == ICE_VSI_LB) { 677 666 ice_rx_xsk_pool(ring); 678 667 err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool); 679 668 if (err) 680 669 return err; 681 670 682 671 if (ring->xsk_pool) { 683 - xdp_rxq_info_unreg(&ring->xdp_rxq); 684 - 685 - rx_buf_len = 686 - xsk_pool_get_rx_frame_size(ring->xsk_pool); 672 + u32 frag_size = 673 + xsk_pool_get_rx_frag_step(ring->xsk_pool); 687 674 err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 688 675 ring->q_index, 689 676 ring->q_vector->napi.napi_id, 690 - rx_buf_len); 677 + frag_size); 691 678 if (err) 692 679 return err; 693 680 err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, ··· 693 702 if (err) 694 703 return err; 695 704 696 - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { 697 - err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 698 - ring->q_index, 699 - ring->q_vector->napi.napi_id, 700 - ring->rx_buf_len); 701 - if (err) 702 - goto err_destroy_fq; 703 - } 705 + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 706 + ring->q_index, 707 + ring->q_vector->napi.napi_id, 708 + ring->truesize); 709 + if (err) 710 + goto err_destroy_fq; 711 + 704 712 xdp_rxq_info_attach_page_pool(&ring->xdp_rxq, 705 713 ring->pp); 706 714 }

+11 -4

drivers/net/ethernet/intel/ice/ice_common.c

··· 1816 1816 case ice_aqc_opc_lldp_stop: 1817 1817 case ice_aqc_opc_lldp_start: 1818 1818 case ice_aqc_opc_lldp_filter_ctrl: 1819 + case ice_aqc_opc_sff_eeprom: 1819 1820 return true; 1820 1821 } 1821 1822 ··· 1842 1841 { 1843 1842 struct libie_aq_desc desc_cpy; 1844 1843 bool is_cmd_for_retry; 1844 + u8 *buf_cpy = NULL; 1845 1845 u8 idx = 0; 1846 1846 u16 opcode; 1847 1847 int status; ··· 1852 1850 memset(&desc_cpy, 0, sizeof(desc_cpy)); 1853 1851 1854 1852 if (is_cmd_for_retry) { 1855 - /* All retryable cmds are direct, without buf. */ 1856 - WARN_ON(buf); 1853 + if (buf) { 1854 + buf_cpy = kmemdup(buf, buf_size, GFP_KERNEL); 1855 + if (!buf_cpy) 1856 + return -ENOMEM; 1857 + } 1857 1858 1858 1859 memcpy(&desc_cpy, desc, sizeof(desc_cpy)); 1859 1860 } ··· 1868 1863 hw->adminq.sq_last_status != LIBIE_AQ_RC_EBUSY) 1869 1864 break; 1870 1865 1866 + if (buf_cpy) 1867 + memcpy(buf, buf_cpy, buf_size); 1871 1868 memcpy(desc, &desc_cpy, sizeof(desc_cpy)); 1872 - 1873 1869 msleep(ICE_SQ_SEND_DELAY_TIME_MS); 1874 1870 1875 1871 } while (++idx < ICE_SQ_SEND_MAX_EXECUTE); 1876 1872 1873 + kfree(buf_cpy); 1877 1874 return status; 1878 1875 } 1879 1876 ··· 6398 6391 struct ice_aqc_lldp_filter_ctrl *cmd; 6399 6392 struct libie_aq_desc desc; 6400 6393 6401 - if (vsi->type != ICE_VSI_PF || !ice_fw_supports_lldp_fltr_ctrl(hw)) 6394 + if (!ice_fw_supports_lldp_fltr_ctrl(hw)) 6402 6395 return -EOPNOTSUPP; 6403 6396 6404 6397 cmd = libie_aq_raw(&desc);

+28 -23

drivers/net/ethernet/intel/ice/ice_ethtool.c

··· 1289 1289 test_vsi->netdev = netdev; 1290 1290 tx_ring = test_vsi->tx_rings[0]; 1291 1291 rx_ring = test_vsi->rx_rings[0]; 1292 + /* Dummy q_vector and napi. Fill the minimum required for 1293 + * ice_rxq_pp_create(). 1294 + */ 1295 + rx_ring->q_vector->napi.dev = netdev; 1292 1296 1293 1297 if (ice_lbtest_prepare_rings(test_vsi)) { 1294 1298 ret = 2; ··· 3332 3328 rx_rings = kzalloc_objs(*rx_rings, vsi->num_rxq); 3333 3329 if (!rx_rings) { 3334 3330 err = -ENOMEM; 3335 - goto done; 3331 + goto free_xdp; 3336 3332 } 3337 3333 3338 3334 ice_for_each_rxq(vsi, i) { ··· 3342 3338 rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; 3343 3339 rx_rings[i].desc = NULL; 3344 3340 rx_rings[i].xdp_buf = NULL; 3341 + rx_rings[i].xdp_rxq = (struct xdp_rxq_info){ }; 3345 3342 3346 3343 /* this is to allow wr32 to have something to write to 3347 3344 * during early allocation of Rx buffers ··· 3360 3355 } 3361 3356 kfree(rx_rings); 3362 3357 err = -ENOMEM; 3363 - goto free_tx; 3358 + goto free_xdp; 3364 3359 } 3365 3360 } 3366 3361 ··· 3411 3406 ice_up(vsi); 3412 3407 } 3413 3408 goto done; 3409 + 3410 + free_xdp: 3411 + if (xdp_rings) { 3412 + ice_for_each_xdp_txq(vsi, i) 3413 + ice_free_tx_ring(&xdp_rings[i]); 3414 + kfree(xdp_rings); 3415 + } 3414 3416 3415 3417 free_tx: 3416 3418 /* error cleanup if the Rx allocations failed after getting Tx */ ··· 4517 4505 u8 addr = ICE_I2C_EEPROM_DEV_ADDR; 4518 4506 struct ice_hw *hw = &pf->hw; 4519 4507 bool is_sfp = false; 4520 - unsigned int i, j; 4508 + unsigned int i; 4521 4509 u16 offset = 0; 4522 4510 u8 page = 0; 4523 4511 int status; ··· 4559 4547 if (page == 0 || !(data[0x2] & 0x4)) { 4560 4548 u32 copy_len; 4561 4549 4562 - /* If i2c bus is busy due to slow page change or 4563 - * link management access, call can fail. This is normal. 4564 - * So we retry this a few times. 4565 - */ 4566 - for (j = 0; j < 4; j++) { 4567 - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 4568 - !is_sfp, value, 4569 - SFF_READ_BLOCK_SIZE, 4570 - 0, NULL); 4571 - netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n", 4572 - addr, offset, page, is_sfp, 4573 - value[0], value[1], value[2], value[3], 4574 - value[4], value[5], value[6], value[7], 4575 - status); 4576 - if (status) { 4577 - usleep_range(1500, 2500); 4578 - memset(value, 0, SFF_READ_BLOCK_SIZE); 4579 - continue; 4580 - } 4581 - break; 4550 + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 4551 + !is_sfp, value, 4552 + SFF_READ_BLOCK_SIZE, 4553 + 0, NULL); 4554 + netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%pe)\n", 4555 + addr, offset, page, is_sfp, 4556 + value[0], value[1], value[2], value[3], 4557 + value[4], value[5], value[6], value[7], 4558 + ERR_PTR(status)); 4559 + if (status) { 4560 + netdev_err(netdev, "%s: error reading module EEPROM: status %pe\n", 4561 + __func__, ERR_PTR(status)); 4562 + return status; 4582 4563 } 4583 4564 4584 4565 /* Make sure we have enough room for the new block */

+34 -10

drivers/net/ethernet/intel/ice/ice_idc.c

··· 361 361 } 362 362 363 363 /** 364 + * ice_rdma_finalize_setup - Complete RDMA setup after VSI is ready 365 + * @pf: ptr to ice_pf 366 + * 367 + * Sets VSI-dependent information and plugs aux device. 368 + * Must be called after ice_init_rdma(), ice_vsi_rebuild(), and 369 + * ice_dcb_rebuild() complete. 370 + */ 371 + void ice_rdma_finalize_setup(struct ice_pf *pf) 372 + { 373 + struct device *dev = ice_pf_to_dev(pf); 374 + struct iidc_rdma_priv_dev_info *privd; 375 + int ret; 376 + 377 + if (!ice_is_rdma_ena(pf) || !pf->cdev_info) 378 + return; 379 + 380 + privd = pf->cdev_info->iidc_priv; 381 + if (!privd || !pf->vsi || !pf->vsi[0] || !pf->vsi[0]->netdev) 382 + return; 383 + 384 + /* Assign VSI info now that VSI is valid */ 385 + privd->netdev = pf->vsi[0]->netdev; 386 + privd->vport_id = pf->vsi[0]->vsi_num; 387 + 388 + /* Update QoS info after DCB has been rebuilt */ 389 + ice_setup_dcb_qos_info(pf, &privd->qos_info); 390 + 391 + ret = ice_plug_aux_dev(pf); 392 + if (ret) 393 + dev_warn(dev, "Failed to plug RDMA aux device: %d\n", ret); 394 + } 395 + 396 + /** 364 397 * ice_init_rdma - initializes PF for RDMA use 365 398 * @pf: ptr to ice_pf 366 399 */ ··· 431 398 } 432 399 433 400 cdev->iidc_priv = privd; 434 - privd->netdev = pf->vsi[0]->netdev; 435 401 436 402 privd->hw_addr = (u8 __iomem *)pf->hw.hw_addr; 437 403 cdev->pdev = pf->pdev; 438 - privd->vport_id = pf->vsi[0]->vsi_num; 439 404 440 405 pf->cdev_info->rdma_protocol |= IIDC_RDMA_PROTOCOL_ROCEV2; 441 - ice_setup_dcb_qos_info(pf, &privd->qos_info); 442 - ret = ice_plug_aux_dev(pf); 443 - if (ret) 444 - goto err_plug_aux_dev; 406 + 445 407 return 0; 446 408 447 - err_plug_aux_dev: 448 - pf->cdev_info->adev = NULL; 449 - xa_erase(&ice_aux_id, pf->aux_idx); 450 409 err_alloc_xa: 451 410 kfree(privd); 452 411 err_privd_alloc: ··· 457 432 if (!ice_is_rdma_ena(pf)) 458 433 return; 459 434 460 - ice_unplug_aux_dev(pf); 461 435 xa_erase(&ice_aux_id, pf->aux_idx); 462 436 kfree(pf->cdev_info->iidc_priv); 463 437 kfree(pf->cdev_info);

+10 -5

drivers/net/ethernet/intel/ice/ice_lib.c

··· 107 107 if (!vsi->rxq_map) 108 108 goto err_rxq_map; 109 109 110 - /* There is no need to allocate q_vectors for a loopback VSI. */ 111 - if (vsi->type == ICE_VSI_LB) 112 - return 0; 113 - 114 110 /* allocate memory for q_vector pointers */ 115 111 vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors, 116 112 sizeof(*vsi->q_vectors), GFP_KERNEL); ··· 237 241 case ICE_VSI_LB: 238 242 vsi->alloc_txq = 1; 239 243 vsi->alloc_rxq = 1; 244 + /* A dummy q_vector, no actual IRQ. */ 245 + vsi->num_q_vectors = 1; 240 246 break; 241 247 default: 242 248 dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type); ··· 2424 2426 } 2425 2427 break; 2426 2428 case ICE_VSI_LB: 2427 - ret = ice_vsi_alloc_rings(vsi); 2429 + ret = ice_vsi_alloc_q_vectors(vsi); 2428 2430 if (ret) 2429 2431 goto unroll_vsi_init; 2432 + 2433 + ret = ice_vsi_alloc_rings(vsi); 2434 + if (ret) 2435 + goto unroll_alloc_q_vector; 2430 2436 2431 2437 ret = ice_vsi_alloc_ring_stats(vsi); 2432 2438 if (ret) 2433 2439 goto unroll_vector_base; 2440 + 2441 + /* Simply map the dummy q_vector to the only rx_ring */ 2442 + vsi->rx_rings[0]->q_vector = vsi->q_vectors[0]; 2434 2443 2435 2444 break; 2436 2445 default:

+6 -1

drivers/net/ethernet/intel/ice/ice_main.c

··· 5138 5138 if (err) 5139 5139 goto err_init_rdma; 5140 5140 5141 + /* Finalize RDMA: VSI already created, assign info and plug device */ 5142 + ice_rdma_finalize_setup(pf); 5143 + 5141 5144 ice_service_task_restart(pf); 5142 5145 5143 5146 clear_bit(ICE_DOWN, pf->state); ··· 5172 5169 5173 5170 devl_assert_locked(priv_to_devlink(pf)); 5174 5171 5172 + ice_unplug_aux_dev(pf); 5175 5173 ice_deinit_rdma(pf); 5176 5174 ice_deinit_features(pf); 5177 5175 ice_tc_indir_block_unregister(vsi); ··· 5599 5595 */ 5600 5596 disabled = ice_service_task_stop(pf); 5601 5597 5598 + ice_unplug_aux_dev(pf); 5602 5599 ice_deinit_rdma(pf); 5603 5600 5604 5601 /* Already suspended?, then there is nothing to do */ ··· 7864 7859 7865 7860 ice_health_clear(pf); 7866 7861 7867 - ice_plug_aux_dev(pf); 7862 + ice_rdma_finalize_setup(pf); 7868 7863 if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) 7869 7864 ice_lag_rebuild(pf); 7870 7865

+3 -1

drivers/net/ethernet/intel/ice/ice_txrx.c

··· 560 560 i = 0; 561 561 } 562 562 563 - if (rx_ring->vsi->type == ICE_VSI_PF && 563 + if ((rx_ring->vsi->type == ICE_VSI_PF || 564 + rx_ring->vsi->type == ICE_VSI_SF || 565 + rx_ring->vsi->type == ICE_VSI_LB) && 564 566 xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { 565 567 xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); 566 568 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);

+3

drivers/net/ethernet/intel/ice/ice_xsk.c

··· 899 899 u16 ntc = rx_ring->next_to_clean; 900 900 u16 ntu = rx_ring->next_to_use; 901 901 902 + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) 903 + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 904 + 902 905 while (ntc != ntu) { 903 906 struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc); 904 907

-3

drivers/net/ethernet/intel/idpf/idpf_ethtool.c

··· 307 307 vport_config = vport->adapter->vport_config[np->vport_idx]; 308 308 user_config = &vport_config->user_config; 309 309 310 - if (!idpf_sideband_action_ena(vport, fsp)) 311 - return -EOPNOTSUPP; 312 - 313 310 rule = kzalloc_flex(*rule, rule_info, 1); 314 311 if (!rule) 315 312 return -ENOMEM;

+1

drivers/net/ethernet/intel/idpf/idpf_lib.c

··· 1318 1318 1319 1319 free_rss_key: 1320 1320 kfree(rss_data->rss_key); 1321 + rss_data->rss_key = NULL; 1321 1322 free_qreg_chunks: 1322 1323 idpf_vport_deinit_queue_reg_chunks(adapter->vport_config[idx]); 1323 1324 free_vector_idxs:

+10 -4

drivers/net/ethernet/intel/idpf/idpf_txrx.c

··· 1314 1314 struct idpf_txq_group *txq_grp = &rsrc->txq_grps[i]; 1315 1315 1316 1316 for (unsigned int j = 0; j < txq_grp->num_txq; j++) { 1317 + if (!txq_grp->txqs[j]) 1318 + continue; 1319 + 1317 1320 if (idpf_queue_has(FLOW_SCH_EN, txq_grp->txqs[j])) { 1318 1321 kfree(txq_grp->txqs[j]->refillq); 1319 1322 txq_grp->txqs[j]->refillq = NULL; ··· 1342 1339 */ 1343 1340 static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp) 1344 1341 { 1342 + if (!rx_qgrp->splitq.bufq_sets) 1343 + return; 1344 + 1345 1345 for (unsigned int i = 0; i < rx_qgrp->splitq.num_bufq_sets; i++) { 1346 1346 struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i]; 1347 1347 ··· 2342 2336 2343 2337 do { 2344 2338 struct idpf_splitq_4b_tx_compl_desc *tx_desc; 2345 - struct idpf_tx_queue *target; 2339 + struct idpf_tx_queue *target = NULL; 2346 2340 u32 ctype_gen, id; 2347 2341 2348 2342 tx_desc = flow ? &complq->comp[ntc].common : ··· 2362 2356 target = complq->txq_grp->txqs[id]; 2363 2357 2364 2358 idpf_queue_clear(SW_MARKER, target); 2365 - if (target == txq) 2366 - break; 2367 2359 2368 2360 next: 2369 2361 if (unlikely(++ntc == complq->desc_count)) { 2370 2362 ntc = 0; 2371 2363 gen_flag = !gen_flag; 2372 2364 } 2365 + if (target == txq) 2366 + break; 2373 2367 } while (time_before(jiffies, timeout)); 2374 2368 2375 2369 idpf_queue_assign(GEN_CHK, complq, gen_flag); ··· 4065 4059 continue; 4066 4060 4067 4061 name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, 4068 - vec_name, vidx); 4062 + vec_name, vector); 4069 4063 4070 4064 err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, 4071 4065 name, q_vector);

+5 -1

drivers/net/ethernet/intel/idpf/xdp.c

··· 47 47 { 48 48 const struct idpf_vport *vport = rxq->q_vector->vport; 49 49 const struct idpf_q_vec_rsrc *rsrc; 50 + u32 frag_size = 0; 50 51 bool split; 51 52 int err; 52 53 54 + if (idpf_queue_has(XSK, rxq)) 55 + frag_size = rxq->bufq_sets[0].bufq.truesize; 56 + 53 57 err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx, 54 58 rxq->q_vector->napi.napi_id, 55 - rxq->rx_buf_size); 59 + frag_size); 56 60 if (err) 57 61 return err; 58 62

+1

drivers/net/ethernet/intel/idpf/xsk.c

··· 403 403 bufq->pending = fq.pending; 404 404 bufq->thresh = fq.thresh; 405 405 bufq->rx_buf_size = fq.buf_len; 406 + bufq->truesize = fq.truesize; 406 407 407 408 if (!idpf_xskfq_refill(bufq)) 408 409 netdev_err(bufq->pool->netdev,

+30 -8

drivers/net/ethernet/intel/igb/igb_xsk.c

··· 524 524 return nb_pkts < budget; 525 525 } 526 526 527 + static u32 igb_sw_irq_prep(struct igb_q_vector *q_vector) 528 + { 529 + u32 eics = 0; 530 + 531 + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 532 + eics = q_vector->eims_value; 533 + 534 + return eics; 535 + } 536 + 527 537 int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) 528 538 { 529 539 struct igb_adapter *adapter = netdev_priv(dev); ··· 552 542 553 543 ring = adapter->tx_ring[qid]; 554 544 555 - if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) 556 - return -ENETDOWN; 557 - 558 545 if (!READ_ONCE(ring->xsk_pool)) 559 546 return -EINVAL; 560 547 561 - if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { 548 + if (flags & XDP_WAKEUP_TX) { 549 + if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) 550 + return -ENETDOWN; 551 + 552 + eics |= igb_sw_irq_prep(ring->q_vector); 553 + } 554 + 555 + if (flags & XDP_WAKEUP_RX) { 556 + /* If IGB_FLAG_QUEUE_PAIRS is active, the q_vector 557 + * and NAPI is shared between RX and TX. 558 + * If NAPI is already running it would be marked as missed 559 + * from the TX path, making this RX call a NOP 560 + */ 561 + ring = adapter->rx_ring[qid]; 562 + eics |= igb_sw_irq_prep(ring->q_vector); 563 + } 564 + 565 + if (eics) { 562 566 /* Cause software interrupt */ 563 - if (adapter->flags & IGB_FLAG_HAS_MSIX) { 564 - eics |= ring->q_vector->eims_value; 567 + if (adapter->flags & IGB_FLAG_HAS_MSIX) 565 568 wr32(E1000_EICS, eics); 566 - } else { 569 + else 567 570 wr32(E1000_ICS, E1000_ICS_RXDMT0); 568 - } 569 571 } 570 572 571 573 return 0;

+24 -10

drivers/net/ethernet/intel/igc/igc_main.c

··· 6906 6906 return nxmit; 6907 6907 } 6908 6908 6909 - static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6910 - struct igc_q_vector *q_vector) 6909 + static u32 igc_sw_irq_prep(struct igc_q_vector *q_vector) 6911 6910 { 6912 - struct igc_hw *hw = &adapter->hw; 6913 6911 u32 eics = 0; 6914 6912 6915 - eics |= q_vector->eims_value; 6916 - wr32(IGC_EICS, eics); 6913 + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6914 + eics = q_vector->eims_value; 6915 + 6916 + return eics; 6917 6917 } 6918 6918 6919 6919 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6920 6920 { 6921 6921 struct igc_adapter *adapter = netdev_priv(dev); 6922 - struct igc_q_vector *q_vector; 6922 + struct igc_hw *hw = &adapter->hw; 6923 6923 struct igc_ring *ring; 6924 + u32 eics = 0; 6924 6925 6925 6926 if (test_bit(__IGC_DOWN, &adapter->state)) 6926 6927 return -ENETDOWN; 6927 6928 6928 6929 if (!igc_xdp_is_enabled(adapter)) 6929 6930 return -ENXIO; 6930 - 6931 + /* Check if queue_id is valid. Tx and Rx queue numbers are always same */ 6931 6932 if (queue_id >= adapter->num_rx_queues) 6932 6933 return -EINVAL; 6933 6934 ··· 6937 6936 if (!ring->xsk_pool) 6938 6937 return -ENXIO; 6939 6938 6940 - q_vector = adapter->q_vector[queue_id]; 6941 - if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6942 - igc_trigger_rxtxq_interrupt(adapter, q_vector); 6939 + if (flags & XDP_WAKEUP_RX) 6940 + eics |= igc_sw_irq_prep(ring->q_vector); 6941 + 6942 + if (flags & XDP_WAKEUP_TX) { 6943 + /* If IGC_FLAG_QUEUE_PAIRS is active, the q_vector 6944 + * and NAPI is shared between RX and TX. 6945 + * If NAPI is already running it would be marked as missed 6946 + * from the RX path, making this TX call a NOP 6947 + */ 6948 + ring = adapter->tx_ring[queue_id]; 6949 + eics |= igc_sw_irq_prep(ring->q_vector); 6950 + } 6951 + 6952 + if (eics) 6953 + /* Cause software interrupt */ 6954 + wr32(IGC_EICS, eics); 6943 6955 6944 6956 return 0; 6945 6957 }

+2 -1

drivers/net/ethernet/intel/igc/igc_ptp.c

··· 550 550 tstamp->buffer_type = 0; 551 551 552 552 /* Trigger txrx interrupt for transmit completion */ 553 - igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 0); 553 + igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 554 + XDP_WAKEUP_TX); 554 555 555 556 return; 556 557 }

+2 -1

drivers/net/ethernet/intel/ixgbevf/vf.c

··· 852 852 if (!mac->get_link_status) 853 853 goto out; 854 854 855 - if (hw->mac.type == ixgbe_mac_e610_vf) { 855 + if (hw->mac.type == ixgbe_mac_e610_vf && 856 + hw->api_version >= ixgbe_mbox_api_16) { 856 857 ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); 857 858 if (ret_val) 858 859 goto out;

+1

drivers/net/ethernet/intel/libeth/xsk.c

··· 167 167 fq->pending = fq->count; 168 168 fq->thresh = libeth_xdp_queue_threshold(fq->count); 169 169 fq->buf_len = xsk_pool_get_rx_frame_size(fq->pool); 170 + fq->truesize = xsk_pool_get_rx_frag_step(fq->pool); 170 171 171 172 return 0; 172 173 }

+4

drivers/net/ethernet/intel/libie/fwlog.c

··· 1049 1049 { 1050 1050 int status; 1051 1051 1052 + /* if FW logging isn't supported it means no configuration was done */ 1053 + if (!libie_fwlog_supported(fwlog)) 1054 + return; 1055 + 1052 1056 /* make sure FW logging is disabled to not put the FW in a weird state 1053 1057 * for the next driver load 1054 1058 */

+32 -16

drivers/net/ethernet/marvell/octeon_ep/octep_main.c

··· 554 554 } 555 555 556 556 /** 557 + * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. 558 + * 559 + * @iq: Octeon Tx queue data structure. 560 + * @oq: Octeon Rx queue data structure. 561 + */ 562 + static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq) 563 + { 564 + u32 pkts_pend = READ_ONCE(oq->pkts_pending); 565 + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); 566 + u32 pkts_processed = READ_ONCE(iq->pkts_processed); 567 + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); 568 + 569 + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 570 + if (pkts_processed) { 571 + writel(pkts_processed, iq->inst_cnt_reg); 572 + readl(iq->inst_cnt_reg); 573 + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); 574 + WRITE_ONCE(iq->pkts_processed, 0); 575 + } 576 + if (last_pkt_count - pkts_pend) { 577 + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); 578 + readl(oq->pkts_sent_reg); 579 + WRITE_ONCE(oq->last_pkt_count, pkts_pend); 580 + } 581 + 582 + /* Flush the previous wrties before writing to RESEND bit */ 583 + smp_wmb(); 584 + } 585 + 586 + /** 557 587 * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. 558 588 * 559 589 * @iq: Octeon Tx queue data structure. ··· 591 561 */ 592 562 static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) 593 563 { 594 - u32 pkts_pend = oq->pkts_pending; 595 - 596 - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 597 - if (iq->pkts_processed) { 598 - writel(iq->pkts_processed, iq->inst_cnt_reg); 599 - iq->pkt_in_done -= iq->pkts_processed; 600 - iq->pkts_processed = 0; 601 - } 602 - if (oq->last_pkt_count - pkts_pend) { 603 - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); 604 - oq->last_pkt_count = pkts_pend; 605 - } 606 - 607 - /* Flush the previous wrties before writing to RESEND bit */ 608 - wmb(); 609 564 writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); 610 565 writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); 611 566 } ··· 616 601 if (tx_pending || rx_done >= budget) 617 602 return budget; 618 603 619 - napi_complete(napi); 604 + octep_update_pkt(ioq_vector->iq, ioq_vector->oq); 605 + napi_complete_done(napi, rx_done); 620 606 octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); 621 607 return rx_done; 622 608 }

+19 -8

drivers/net/ethernet/marvell/octeon_ep/octep_rx.c

··· 324 324 struct octep_oq *oq) 325 325 { 326 326 u32 pkt_count, new_pkts; 327 + u32 last_pkt_count, pkts_pending; 327 328 328 329 pkt_count = readl(oq->pkts_sent_reg); 329 - new_pkts = pkt_count - oq->last_pkt_count; 330 + last_pkt_count = READ_ONCE(oq->last_pkt_count); 331 + new_pkts = pkt_count - last_pkt_count; 330 332 333 + if (pkt_count < last_pkt_count) { 334 + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", 335 + oq->q_no, pkt_count, last_pkt_count); 336 + } 331 337 /* Clear the hardware packets counter register if the rx queue is 332 338 * being processed continuously with-in a single interrupt and 333 339 * reached half its max value. ··· 344 338 pkt_count = readl(oq->pkts_sent_reg); 345 339 new_pkts += pkt_count; 346 340 } 347 - oq->last_pkt_count = pkt_count; 348 - oq->pkts_pending += new_pkts; 341 + WRITE_ONCE(oq->last_pkt_count, pkt_count); 342 + pkts_pending = READ_ONCE(oq->pkts_pending); 343 + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); 349 344 return new_pkts; 350 345 } 351 346 ··· 421 414 u16 rx_ol_flags; 422 415 u32 read_idx; 423 416 424 - read_idx = oq->host_read_idx; 417 + read_idx = READ_ONCE(oq->host_read_idx); 425 418 rx_bytes = 0; 426 419 desc_used = 0; 427 420 for (pkt = 0; pkt < pkts_to_process; pkt++) { ··· 506 499 napi_gro_receive(oq->napi, skb); 507 500 } 508 501 509 - oq->host_read_idx = read_idx; 502 + WRITE_ONCE(oq->host_read_idx, read_idx); 510 503 oq->refill_count += desc_used; 511 504 oq->stats->packets += pkt; 512 505 oq->stats->bytes += rx_bytes; ··· 529 522 { 530 523 u32 pkts_available, pkts_processed, total_pkts_processed; 531 524 struct octep_device *oct = oq->octep_dev; 525 + u32 pkts_pending; 532 526 533 527 pkts_available = 0; 534 528 pkts_processed = 0; 535 529 total_pkts_processed = 0; 536 530 while (total_pkts_processed < budget) { 537 531 /* update pending count only when current one exhausted */ 538 - if (oq->pkts_pending == 0) 532 + pkts_pending = READ_ONCE(oq->pkts_pending); 533 + if (pkts_pending == 0) 539 534 octep_oq_check_hw_for_pkts(oct, oq); 535 + pkts_pending = READ_ONCE(oq->pkts_pending); 540 536 pkts_available = min(budget - total_pkts_processed, 541 - oq->pkts_pending); 537 + pkts_pending); 542 538 if (!pkts_available) 543 539 break; 544 540 545 541 pkts_processed = __octep_oq_process_rx(oct, oq, 546 542 pkts_available); 547 - oq->pkts_pending -= pkts_processed; 543 + pkts_pending = READ_ONCE(oq->pkts_pending); 544 + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); 548 545 total_pkts_processed += pkts_processed; 549 546 } 550 547

+34 -16

drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c

··· 286 286 } 287 287 288 288 /** 289 + * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. 290 + * 291 + * @iq: Octeon Tx queue data structure. 292 + * @oq: Octeon Rx queue data structure. 293 + */ 294 + 295 + static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq) 296 + { 297 + u32 pkts_pend = READ_ONCE(oq->pkts_pending); 298 + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); 299 + u32 pkts_processed = READ_ONCE(iq->pkts_processed); 300 + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); 301 + 302 + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 303 + if (pkts_processed) { 304 + writel(pkts_processed, iq->inst_cnt_reg); 305 + readl(iq->inst_cnt_reg); 306 + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); 307 + WRITE_ONCE(iq->pkts_processed, 0); 308 + } 309 + if (last_pkt_count - pkts_pend) { 310 + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); 311 + readl(oq->pkts_sent_reg); 312 + WRITE_ONCE(oq->last_pkt_count, pkts_pend); 313 + } 314 + 315 + /* Flush the previous wrties before writing to RESEND bit */ 316 + smp_wmb(); 317 + } 318 + 319 + /** 289 320 * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. 290 321 * 291 322 * @iq: Octeon Tx queue data structure. 292 323 * @oq: Octeon Rx queue data structure. 293 324 */ 294 - static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq) 325 + static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, 326 + struct octep_vf_oq *oq) 295 327 { 296 - u32 pkts_pend = oq->pkts_pending; 297 - 298 - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 299 - if (iq->pkts_processed) { 300 - writel(iq->pkts_processed, iq->inst_cnt_reg); 301 - iq->pkt_in_done -= iq->pkts_processed; 302 - iq->pkts_processed = 0; 303 - } 304 - if (oq->last_pkt_count - pkts_pend) { 305 - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); 306 - oq->last_pkt_count = pkts_pend; 307 - } 308 - 309 - /* Flush the previous wrties before writing to RESEND bit */ 310 - smp_wmb(); 311 328 writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); 312 329 writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); 313 330 } ··· 350 333 if (tx_pending || rx_done >= budget) 351 334 return budget; 352 335 336 + octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq); 353 337 if (likely(napi_complete_done(napi, rx_done))) 354 338 octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); 355 339

+20 -8

drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c

··· 325 325 struct octep_vf_oq *oq) 326 326 { 327 327 u32 pkt_count, new_pkts; 328 + u32 last_pkt_count, pkts_pending; 328 329 329 330 pkt_count = readl(oq->pkts_sent_reg); 330 - new_pkts = pkt_count - oq->last_pkt_count; 331 + last_pkt_count = READ_ONCE(oq->last_pkt_count); 332 + new_pkts = pkt_count - last_pkt_count; 333 + 334 + if (pkt_count < last_pkt_count) { 335 + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", 336 + oq->q_no, pkt_count, last_pkt_count); 337 + } 331 338 332 339 /* Clear the hardware packets counter register if the rx queue is 333 340 * being processed continuously with-in a single interrupt and ··· 346 339 pkt_count = readl(oq->pkts_sent_reg); 347 340 new_pkts += pkt_count; 348 341 } 349 - oq->last_pkt_count = pkt_count; 350 - oq->pkts_pending += new_pkts; 342 + WRITE_ONCE(oq->last_pkt_count, pkt_count); 343 + pkts_pending = READ_ONCE(oq->pkts_pending); 344 + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); 351 345 return new_pkts; 352 346 } 353 347 ··· 377 369 struct sk_buff *skb; 378 370 u32 read_idx; 379 371 380 - read_idx = oq->host_read_idx; 372 + read_idx = READ_ONCE(oq->host_read_idx); 381 373 rx_bytes = 0; 382 374 desc_used = 0; 383 375 for (pkt = 0; pkt < pkts_to_process; pkt++) { ··· 471 463 napi_gro_receive(oq->napi, skb); 472 464 } 473 465 474 - oq->host_read_idx = read_idx; 466 + WRITE_ONCE(oq->host_read_idx, read_idx); 475 467 oq->refill_count += desc_used; 476 468 oq->stats->packets += pkt; 477 469 oq->stats->bytes += rx_bytes; ··· 494 486 { 495 487 u32 pkts_available, pkts_processed, total_pkts_processed; 496 488 struct octep_vf_device *oct = oq->octep_vf_dev; 489 + u32 pkts_pending; 497 490 498 491 pkts_available = 0; 499 492 pkts_processed = 0; 500 493 total_pkts_processed = 0; 501 494 while (total_pkts_processed < budget) { 502 495 /* update pending count only when current one exhausted */ 503 - if (oq->pkts_pending == 0) 496 + pkts_pending = READ_ONCE(oq->pkts_pending); 497 + if (pkts_pending == 0) 504 498 octep_vf_oq_check_hw_for_pkts(oct, oq); 499 + pkts_pending = READ_ONCE(oq->pkts_pending); 505 500 pkts_available = min(budget - total_pkts_processed, 506 - oq->pkts_pending); 501 + pkts_pending); 507 502 if (!pkts_available) 508 503 break; 509 504 510 505 pkts_processed = __octep_vf_oq_process_rx(oct, oq, 511 506 pkts_available); 512 - oq->pkts_pending -= pkts_processed; 507 + pkts_pending = READ_ONCE(oq->pkts_pending); 508 + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); 513 509 total_pkts_processed += pkts_processed; 514 510 } 515 511

+12 -3

drivers/net/ethernet/mediatek/mtk_eth_soc.c

··· 3748 3748 mtk_stop(dev); 3749 3749 3750 3750 old_prog = rcu_replace_pointer(eth->prog, prog, lockdep_rtnl_is_held()); 3751 + 3752 + if (netif_running(dev) && need_update) { 3753 + int err; 3754 + 3755 + err = mtk_open(dev); 3756 + if (err) { 3757 + rcu_assign_pointer(eth->prog, old_prog); 3758 + 3759 + return err; 3760 + } 3761 + } 3762 + 3751 3763 if (old_prog) 3752 3764 bpf_prog_put(old_prog); 3753 - 3754 - if (netif_running(dev) && need_update) 3755 - return mtk_open(dev); 3756 3765 3757 3766 return 0; 3758 3767 }

+18 -5

drivers/net/ethernet/microsoft/mana/mana_en.c

··· 1770 1770 ndev = txq->ndev; 1771 1771 apc = netdev_priv(ndev); 1772 1772 1773 + /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the 1774 + * doorbell can be rung in time for the hardware's requirement 1775 + * of at least one doorbell ring every 8 wraparounds. 1776 + */ 1773 1777 comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, 1774 - CQE_POLLING_BUFFER); 1778 + min((cq->gdma_cq->queue_size / 1779 + COMP_ENTRY_SIZE) * 4, 1780 + CQE_POLLING_BUFFER)); 1775 1781 1776 1782 if (comp_read < 1) 1777 1783 return; ··· 2162 2156 struct mana_rxq *rxq = cq->rxq; 2163 2157 int comp_read, i; 2164 2158 2165 - comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); 2159 + /* Limit CQEs polled to 4 wraparounds of the CQ to ensure the 2160 + * doorbell can be rung in time for the hardware's requirement 2161 + * of at least one doorbell ring every 8 wraparounds. 2162 + */ 2163 + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, 2164 + min((cq->gdma_cq->queue_size / 2165 + COMP_ENTRY_SIZE) * 4, 2166 + CQE_POLLING_BUFFER)); 2166 2167 WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); 2167 2168 2168 2169 rxq->xdp_flush = false; ··· 2214 2201 mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); 2215 2202 cq->work_done_since_doorbell = 0; 2216 2203 napi_complete_done(&cq->napi, w); 2217 - } else if (cq->work_done_since_doorbell > 2218 - cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { 2204 + } else if (cq->work_done_since_doorbell >= 2205 + (cq->gdma_cq->queue_size / COMP_ENTRY_SIZE) * 4) { 2219 2206 /* MANA hardware requires at least one doorbell ring every 8 2220 2207 * wraparounds of CQ even if there is no need to arm the CQ. 2221 - * This driver rings the doorbell as soon as we have exceeded 2208 + * This driver rings the doorbell as soon as it has processed 2222 2209 * 4 wraparounds. 2223 2210 */ 2224 2211 mana_gd_ring_cq(gdma_queue, 0);

+1

drivers/net/ethernet/stmicro/stmmac/stmmac.h

··· 323 323 void __iomem *ptpaddr; 324 324 void __iomem *estaddr; 325 325 unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; 326 + unsigned int num_double_vlans; 326 327 int sfty_irq; 327 328 int sfty_ce_irq; 328 329 int sfty_ue_irq;

+47 -6

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

··· 156 156 static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); 157 157 static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, 158 158 u32 rxmode, u32 chan); 159 + static int stmmac_vlan_restore(struct stmmac_priv *priv); 159 160 160 161 #ifdef CONFIG_DEBUG_FS 161 162 static const struct net_device_ops stmmac_netdev_ops; ··· 4108 4107 4109 4108 phylink_start(priv->phylink); 4110 4109 4110 + stmmac_vlan_restore(priv); 4111 + 4111 4112 ret = stmmac_request_irq(dev); 4112 4113 if (ret) 4113 4114 goto irq_error; ··· 6769 6766 hash = 0; 6770 6767 } 6771 6768 6769 + if (!netif_running(priv->dev)) 6770 + return 0; 6771 + 6772 6772 return stmmac_update_vlan_hash(priv, priv->hw, hash, pmatch, is_double); 6773 6773 } 6774 6774 ··· 6781 6775 static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) 6782 6776 { 6783 6777 struct stmmac_priv *priv = netdev_priv(ndev); 6778 + unsigned int num_double_vlans; 6784 6779 bool is_double = false; 6785 6780 int ret; 6786 6781 ··· 6793 6786 is_double = true; 6794 6787 6795 6788 set_bit(vid, priv->active_vlans); 6796 - ret = stmmac_vlan_update(priv, is_double); 6789 + num_double_vlans = priv->num_double_vlans + is_double; 6790 + ret = stmmac_vlan_update(priv, num_double_vlans); 6797 6791 if (ret) { 6798 6792 clear_bit(vid, priv->active_vlans); 6799 6793 goto err_pm_put; ··· 6802 6794 6803 6795 if (priv->hw->num_vlan) { 6804 6796 ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); 6805 - if (ret) 6797 + if (ret) { 6798 + clear_bit(vid, priv->active_vlans); 6799 + stmmac_vlan_update(priv, priv->num_double_vlans); 6806 6800 goto err_pm_put; 6801 + } 6807 6802 } 6803 + 6804 + priv->num_double_vlans = num_double_vlans; 6805 + 6808 6806 err_pm_put: 6809 6807 pm_runtime_put(priv->device); 6810 6808 ··· 6823 6809 static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) 6824 6810 { 6825 6811 struct stmmac_priv *priv = netdev_priv(ndev); 6812 + unsigned int num_double_vlans; 6826 6813 bool is_double = false; 6827 6814 int ret; 6828 6815 ··· 6835 6820 is_double = true; 6836 6821 6837 6822 clear_bit(vid, priv->active_vlans); 6823 + num_double_vlans = priv->num_double_vlans - is_double; 6824 + ret = stmmac_vlan_update(priv, num_double_vlans); 6825 + if (ret) { 6826 + set_bit(vid, priv->active_vlans); 6827 + goto del_vlan_error; 6828 + } 6838 6829 6839 6830 if (priv->hw->num_vlan) { 6840 6831 ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); 6841 - if (ret) 6832 + if (ret) { 6833 + set_bit(vid, priv->active_vlans); 6834 + stmmac_vlan_update(priv, priv->num_double_vlans); 6842 6835 goto del_vlan_error; 6836 + } 6843 6837 } 6844 6838 6845 - ret = stmmac_vlan_update(priv, is_double); 6839 + priv->num_double_vlans = num_double_vlans; 6846 6840 6847 6841 del_vlan_error: 6848 6842 pm_runtime_put(priv->device); 6843 + 6844 + return ret; 6845 + } 6846 + 6847 + static int stmmac_vlan_restore(struct stmmac_priv *priv) 6848 + { 6849 + int ret; 6850 + 6851 + if (!(priv->dev->features & NETIF_F_VLAN_FEATURES)) 6852 + return 0; 6853 + 6854 + if (priv->hw->num_vlan) 6855 + stmmac_restore_hw_vlan_rx_fltr(priv, priv->dev, priv->hw); 6856 + 6857 + ret = stmmac_vlan_update(priv, priv->num_double_vlans); 6858 + if (ret) 6859 + netdev_err(priv->dev, "Failed to restore VLANs\n"); 6849 6860 6850 6861 return ret; 6851 6862 } ··· 8300 8259 stmmac_init_coalesce(priv); 8301 8260 phylink_rx_clk_stop_block(priv->phylink); 8302 8261 stmmac_set_rx_mode(ndev); 8303 - 8304 - stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw); 8305 8262 phylink_rx_clk_stop_unblock(priv->phylink); 8263 + 8264 + stmmac_vlan_restore(priv); 8306 8265 8307 8266 stmmac_enable_all_queues(priv); 8308 8267 stmmac_enable_all_dma_irq(priv);

+31 -29

drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c

··· 76 76 } 77 77 78 78 hw->vlan_filter[0] = vid; 79 - vlan_write_single(dev, vid); 79 + 80 + if (netif_running(dev)) 81 + vlan_write_single(dev, vid); 80 82 81 83 return 0; 82 84 } ··· 99 97 return -EPERM; 100 98 } 101 99 102 - ret = vlan_write_filter(dev, hw, index, val); 100 + if (netif_running(dev)) { 101 + ret = vlan_write_filter(dev, hw, index, val); 102 + if (ret) 103 + return ret; 104 + } 103 105 104 - if (!ret) 105 - hw->vlan_filter[index] = val; 106 + hw->vlan_filter[index] = val; 106 107 107 - return ret; 108 + return 0; 108 109 } 109 110 110 111 static int vlan_del_hw_rx_fltr(struct net_device *dev, ··· 120 115 if (hw->num_vlan == 1) { 121 116 if ((hw->vlan_filter[0] & VLAN_TAG_VID) == vid) { 122 117 hw->vlan_filter[0] = 0; 123 - vlan_write_single(dev, 0); 118 + 119 + if (netif_running(dev)) 120 + vlan_write_single(dev, 0); 124 121 } 125 122 return 0; 126 123 } ··· 131 124 for (i = 0; i < hw->num_vlan; i++) { 132 125 if ((hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) && 133 126 ((hw->vlan_filter[i] & VLAN_TAG_DATA_VID) == vid)) { 134 - ret = vlan_write_filter(dev, hw, i, 0); 135 127 136 - if (!ret) 137 - hw->vlan_filter[i] = 0; 138 - else 139 - return ret; 128 + if (netif_running(dev)) { 129 + ret = vlan_write_filter(dev, hw, i, 0); 130 + if (ret) 131 + return ret; 132 + } 133 + 134 + hw->vlan_filter[i] = 0; 140 135 } 141 136 } 142 137 143 - return ret; 138 + return 0; 144 139 } 145 140 146 141 static void vlan_restore_hw_rx_fltr(struct net_device *dev, 147 142 struct mac_device_info *hw) 148 143 { 149 - void __iomem *ioaddr = hw->pcsr; 150 - u32 value; 151 - u32 hash; 152 - u32 val; 153 144 int i; 154 145 155 146 /* Single Rx VLAN Filter */ ··· 157 152 } 158 153 159 154 /* Extended Rx VLAN Filter Enable */ 160 - for (i = 0; i < hw->num_vlan; i++) { 161 - if (hw->vlan_filter[i] & VLAN_TAG_DATA_VEN) { 162 - val = hw->vlan_filter[i]; 163 - vlan_write_filter(dev, hw, i, val); 164 - } 165 - } 166 - 167 - hash = readl(ioaddr + VLAN_HASH_TABLE); 168 - if (hash & VLAN_VLHT) { 169 - value = readl(ioaddr + VLAN_TAG); 170 - value |= VLAN_VTHM; 171 - writel(value, ioaddr + VLAN_TAG); 172 - } 155 + for (i = 0; i < hw->num_vlan; i++) 156 + vlan_write_filter(dev, hw, i, hw->vlan_filter[i]); 173 157 } 174 158 175 159 static void vlan_update_hash(struct mac_device_info *hw, u32 hash, ··· 177 183 value |= VLAN_EDVLP; 178 184 value |= VLAN_ESVL; 179 185 value |= VLAN_DOVLTC; 186 + } else { 187 + value &= ~VLAN_EDVLP; 188 + value &= ~VLAN_ESVL; 189 + value &= ~VLAN_DOVLTC; 180 190 } 181 191 182 192 writel(value, ioaddr + VLAN_TAG); ··· 191 193 value |= VLAN_EDVLP; 192 194 value |= VLAN_ESVL; 193 195 value |= VLAN_DOVLTC; 196 + } else { 197 + value &= ~VLAN_EDVLP; 198 + value &= ~VLAN_ESVL; 199 + value &= ~VLAN_DOVLTC; 194 200 } 195 201 196 202 writel(value | perfect_match, ioaddr + VLAN_TAG);

+1 -1

drivers/net/ethernet/ti/am65-cpsw-nuss.c

··· 391 391 cpsw_ale_set_allmulti(common->ale, 392 392 ndev->flags & IFF_ALLMULTI, port->port_id); 393 393 394 - port_mask = ALE_PORT_HOST; 394 + port_mask = BIT(port->port_id) | ALE_PORT_HOST; 395 395 /* Clear all mcast from ALE */ 396 396 cpsw_ale_flush_multicast(common->ale, port_mask, -1); 397 397

+4 -5

drivers/net/ethernet/ti/cpsw_ale.c

··· 450 450 ale->port_mask_bits); 451 451 if ((mask & port_mask) == 0) 452 452 return; /* ports dont intersect, not interested */ 453 - mask &= ~port_mask; 453 + mask &= (~port_mask | ALE_PORT_HOST); 454 454 455 - /* free if only remaining port is host port */ 456 - if (mask) 455 + if (mask == 0x0 || mask == ALE_PORT_HOST) 456 + cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); 457 + else 457 458 cpsw_ale_set_port_mask(ale_entry, mask, 458 459 ale->port_mask_bits); 459 - else 460 - cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); 461 460 } 462 461 463 462 int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid)

+8

drivers/net/ethernet/ti/icssg/icssg_prueth.c

··· 273 273 if (ret) 274 274 goto disable_class; 275 275 276 + /* Reset link state to force reconfiguration in 277 + * emac_adjust_link(). Without this, if the link was already up 278 + * before restart, emac_adjust_link() won't detect any state 279 + * change and will skip critical configuration like writing 280 + * speed to firmware. 281 + */ 282 + emac->link = 0; 283 + 276 284 mutex_lock(&emac->ndev->phydev->lock); 277 285 emac_adjust_link(emac->ndev); 278 286 mutex_unlock(&emac->ndev->phydev->lock);

+1 -1

drivers/net/netconsole.c

··· 617 617 bool release_enabled; 618 618 619 619 dynamic_netconsole_mutex_lock(); 620 - release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); 620 + release_enabled = !!(nt->sysdata_fields & SYSDATA_RELEASE); 621 621 dynamic_netconsole_mutex_unlock(); 622 622 623 623 return sysfs_emit(buf, "%d\n", release_enabled);

+1

drivers/net/usb/r8152.c

··· 10054 10054 { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, 10055 10055 { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, 10056 10056 { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, 10057 + { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) }, 10057 10058 {} 10058 10059 }; 10059 10060

+5

drivers/net/vxlan/vxlan_core.c

··· 2130 2130 { 2131 2131 struct ipv6hdr *pip6; 2132 2132 2133 + /* check if nd_tbl is not initiliazed due to 2134 + * ipv6.disable=1 set during boot 2135 + */ 2136 + if (!ipv6_stub->nd_tbl) 2137 + return false; 2133 2138 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 2134 2139 return false; 2135 2140 pip6 = ipv6_hdr(skb);

+3 -3

drivers/net/wireless/ath/ath12k/mac.c

··· 5430 5430 ar->last_tx_power_update)) 5431 5431 goto send_tx_power; 5432 5432 5433 - params.pdev_id = ar->pdev->pdev_id; 5433 + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); 5434 5434 params.vdev_id = arvif->vdev_id; 5435 5435 params.stats_id = WMI_REQUEST_PDEV_STAT; 5436 5436 ret = ath12k_mac_get_fw_stats(ar, &params); ··· 13452 13452 /* TODO: Use real NF instead of default one. */ 13453 13453 signal = rate_info.rssi_comb; 13454 13454 13455 - params.pdev_id = ar->pdev->pdev_id; 13455 + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); 13456 13456 params.vdev_id = 0; 13457 13457 params.stats_id = WMI_REQUEST_VDEV_STAT; 13458 13458 ··· 13580 13580 spin_unlock_bh(&ar->ab->dp->dp_lock); 13581 13581 13582 13582 if (!signal && ahsta->ahvif->vdev_type == WMI_VDEV_TYPE_STA) { 13583 - params.pdev_id = ar->pdev->pdev_id; 13583 + params.pdev_id = ath12k_mac_get_target_pdev_id(ar); 13584 13584 params.vdev_id = 0; 13585 13585 params.stats_id = WMI_REQUEST_VDEV_STAT; 13586 13586

+13 -23

drivers/net/wireless/ath/ath12k/wmi.c

··· 8241 8241 struct ath12k_fw_stats *stats = parse->stats; 8242 8242 struct ath12k *ar; 8243 8243 struct ath12k_link_vif *arvif; 8244 - struct ieee80211_sta *sta; 8245 - struct ath12k_sta *ahsta; 8246 8244 struct ath12k_link_sta *arsta; 8247 8245 int i, ret = 0; 8248 8246 const void *data = ptr; ··· 8276 8278 8277 8279 arvif = ath12k_mac_get_arvif(ar, le32_to_cpu(src->vdev_id)); 8278 8280 if (arvif) { 8279 - sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar), 8280 - arvif->bssid, 8281 - NULL); 8282 - if (sta) { 8283 - ahsta = ath12k_sta_to_ahsta(sta); 8284 - arsta = &ahsta->deflink; 8281 + spin_lock_bh(&ab->base_lock); 8282 + arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid); 8283 + if (arsta) { 8285 8284 arsta->rssi_beacon = le32_to_cpu(src->beacon_snr); 8286 8285 ath12k_dbg(ab, ATH12K_DBG_WMI, 8287 8286 "wmi stats vdev id %d snr %d\n", 8288 8287 src->vdev_id, src->beacon_snr); 8289 8288 } else { 8290 - ath12k_dbg(ab, ATH12K_DBG_WMI, 8291 - "not found station bssid %pM for vdev stat\n", 8292 - arvif->bssid); 8289 + ath12k_warn(ab, 8290 + "not found link sta with bssid %pM for vdev stat\n", 8291 + arvif->bssid); 8293 8292 } 8293 + spin_unlock_bh(&ab->base_lock); 8294 8294 } 8295 8295 8296 8296 data += sizeof(*src); ··· 8359 8363 struct ath12k_fw_stats *stats = parse->stats; 8360 8364 struct ath12k_link_vif *arvif; 8361 8365 struct ath12k_link_sta *arsta; 8362 - struct ieee80211_sta *sta; 8363 - struct ath12k_sta *ahsta; 8364 8366 struct ath12k *ar; 8365 8367 int vdev_id; 8366 8368 int j; ··· 8394 8400 "stats bssid %pM vif %p\n", 8395 8401 arvif->bssid, arvif->ahvif->vif); 8396 8402 8397 - sta = ieee80211_find_sta_by_ifaddr(ath12k_ar_to_hw(ar), 8398 - arvif->bssid, 8399 - NULL); 8400 - if (!sta) { 8401 - ath12k_dbg(ab, ATH12K_DBG_WMI, 8402 - "not found station of bssid %pM for rssi chain\n", 8403 - arvif->bssid); 8403 + guard(spinlock_bh)(&ab->base_lock); 8404 + arsta = ath12k_link_sta_find_by_addr(ab, arvif->bssid); 8405 + if (!arsta) { 8406 + ath12k_warn(ab, 8407 + "not found link sta with bssid %pM for rssi chain\n", 8408 + arvif->bssid); 8404 8409 return -EPROTO; 8405 8410 } 8406 - 8407 - ahsta = ath12k_sta_to_ahsta(sta); 8408 - arsta = &ahsta->deflink; 8409 8411 8410 8412 BUILD_BUG_ON(ARRAY_SIZE(arsta->chain_signal) > 8411 8413 ARRAY_SIZE(stats_rssi->rssi_avg_beacon));

+1

drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c

··· 413 413 u32 val; 414 414 415 415 if (ieee80211_is_action(fc) && 416 + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 && 416 417 mgmt->u.action.category == WLAN_CATEGORY_BACK && 417 418 mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { 418 419 u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);

+1

drivers/net/wireless/mediatek/mt76/mt7925/mac.c

··· 668 668 u32 val; 669 669 670 670 if (ieee80211_is_action(fc) && 671 + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && 671 672 mgmt->u.action.category == WLAN_CATEGORY_BACK && 672 673 mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) 673 674 tid = MT_TX_ADDBA;

+1

drivers/net/wireless/mediatek/mt76/mt7996/mac.c

··· 800 800 u32 val; 801 801 802 802 if (ieee80211_is_action(fc) && 803 + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && 803 804 mgmt->u.action.category == WLAN_CATEGORY_BACK && 804 805 mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { 805 806 if (is_mt7990(&dev->mt76))

+1 -1

drivers/net/wireless/rsi/rsi_91x_mac80211.c

··· 668 668 struct rsi_hw *adapter = hw->priv; 669 669 struct rsi_common *common = adapter->priv; 670 670 struct ieee80211_conf *conf = &hw->conf; 671 - int status = -EOPNOTSUPP; 671 + int status = 0; 672 672 673 673 mutex_lock(&common->mutex); 674 674

+2

drivers/net/wireless/st/cw1200/pm.c

··· 264 264 wiphy_err(priv->hw->wiphy, 265 265 "PM request failed: %d. WoW is disabled.\n", ret); 266 266 cw1200_wow_resume(hw); 267 + mutex_unlock(&priv->conf_mutex); 267 268 return -EBUSY; 268 269 } 269 270 270 271 /* Force resume if event is coming from the device. */ 271 272 if (atomic_read(&priv->bh_rx)) { 272 273 cw1200_wow_resume(hw); 274 + mutex_unlock(&priv->conf_mutex); 273 275 return -EAGAIN; 274 276 } 275 277

+2 -2

drivers/net/wireless/ti/wlcore/main.c

··· 1875 1875 wl->wow_enabled); 1876 1876 WARN_ON(!wl->wow_enabled); 1877 1877 1878 + mutex_lock(&wl->mutex); 1879 + 1878 1880 ret = pm_runtime_force_resume(wl->dev); 1879 1881 if (ret < 0) { 1880 1882 wl1271_error("ELP wakeup failure!"); ··· 1892 1890 if (test_and_clear_bit(WL1271_FLAG_PENDING_WORK, &wl->flags)) 1893 1891 run_irq_work = true; 1894 1892 spin_unlock_irqrestore(&wl->wl_lock, flags); 1895 - 1896 - mutex_lock(&wl->mutex); 1897 1893 1898 1894 /* test the recovery flag before calling any SDIO functions */ 1899 1895 pending_recovery = test_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS,

+11 -7

include/linux/indirect_call_wrapper.h

··· 16 16 */ 17 17 #define INDIRECT_CALL_1(f, f1, ...) \ 18 18 ({ \ 19 - likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \ 19 + typeof(f) __f1 = (f); \ 20 + likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \ 20 21 }) 21 22 #define INDIRECT_CALL_2(f, f2, f1, ...) \ 22 23 ({ \ 23 - likely(f == f2) ? f2(__VA_ARGS__) : \ 24 - INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ 24 + typeof(f) __f2 = (f); \ 25 + likely(__f2 == f2) ? f2(__VA_ARGS__) : \ 26 + INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \ 25 27 }) 26 28 #define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ 27 29 ({ \ 28 - likely(f == f3) ? f3(__VA_ARGS__) : \ 29 - INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ 30 + typeof(f) __f3 = (f); \ 31 + likely(__f3 == f3) ? f3(__VA_ARGS__) : \ 32 + INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \ 30 33 }) 31 34 #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ 32 35 ({ \ 33 - likely(f == f4) ? f4(__VA_ARGS__) : \ 34 - INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ 36 + typeof(f) __f4 = (f); \ 37 + likely(__f4 == f4) ? f4(__VA_ARGS__) : \ 38 + INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \ 35 39 }) 36 40 37 41 #define INDIRECT_CALLABLE_DECLARE(f) f

+22 -5

include/linux/netdevice.h

··· 4711 4711 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) 4712 4712 { 4713 4713 spin_lock(&txq->_xmit_lock); 4714 - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ 4714 + /* Pairs with READ_ONCE() in netif_tx_owned() */ 4715 4715 WRITE_ONCE(txq->xmit_lock_owner, cpu); 4716 4716 } 4717 4717 ··· 4729 4729 static inline void __netif_tx_lock_bh(struct netdev_queue *txq) 4730 4730 { 4731 4731 spin_lock_bh(&txq->_xmit_lock); 4732 - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ 4732 + /* Pairs with READ_ONCE() in netif_tx_owned() */ 4733 4733 WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); 4734 4734 } 4735 4735 ··· 4738 4738 bool ok = spin_trylock(&txq->_xmit_lock); 4739 4739 4740 4740 if (likely(ok)) { 4741 - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ 4741 + /* Pairs with READ_ONCE() in netif_tx_owned() */ 4742 4742 WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); 4743 4743 } 4744 4744 return ok; ··· 4746 4746 4747 4747 static inline void __netif_tx_unlock(struct netdev_queue *txq) 4748 4748 { 4749 - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ 4749 + /* Pairs with READ_ONCE() in netif_tx_owned() */ 4750 4750 WRITE_ONCE(txq->xmit_lock_owner, -1); 4751 4751 spin_unlock(&txq->_xmit_lock); 4752 4752 } 4753 4753 4754 4754 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) 4755 4755 { 4756 - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ 4756 + /* Pairs with READ_ONCE() in netif_tx_owned() */ 4757 4757 WRITE_ONCE(txq->xmit_lock_owner, -1); 4758 4758 spin_unlock_bh(&txq->_xmit_lock); 4759 4759 } ··· 4845 4845 spin_unlock(&dev->tx_global_lock); 4846 4846 local_bh_enable(); 4847 4847 } 4848 + 4849 + #ifndef CONFIG_PREEMPT_RT 4850 + static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) 4851 + { 4852 + /* Other cpus might concurrently change txq->xmit_lock_owner 4853 + * to -1 or to their cpu id, but not to our id. 4854 + */ 4855 + return READ_ONCE(txq->xmit_lock_owner) == cpu; 4856 + } 4857 + 4858 + #else 4859 + static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) 4860 + { 4861 + return rt_mutex_owner(&txq->_xmit_lock.lock) == current; 4862 + } 4863 + 4864 + #endif 4848 4865 4849 4866 static inline void netif_addr_lock(struct net_device *dev) 4850 4867 {

+1

include/linux/usb/r8152.h

··· 32 32 #define VENDOR_ID_DLINK 0x2001 33 33 #define VENDOR_ID_DELL 0x413c 34 34 #define VENDOR_ID_ASUS 0x0b05 35 + #define VENDOR_ID_TRENDNET 0x20f4 35 36 36 37 #if IS_REACHABLE(CONFIG_USB_RTL8152) 37 38 extern u8 rtl8152_get_version(struct usb_interface *intf);

+1

include/net/act_api.h

··· 70 70 #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) 71 71 #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) 72 72 #define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) 73 + #define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5)) 73 74 74 75 /* Update lastuse only if needed, to avoid dirtying a cache line. 75 76 * We use a temp variable to avoid fetching jiffies twice.

+1

include/net/bonding.h

··· 699 699 void bond_debug_unregister(struct bonding *bond); 700 700 void bond_debug_reregister(struct bonding *bond); 701 701 const char *bond_mode_name(int mode); 702 + bool __bond_xdp_check(int mode, int xmit_policy); 702 703 bool bond_xdp_check(struct bonding *bond, int mode); 703 704 void bond_setup(struct net_device *bond_dev); 704 705 unsigned int bond_get_num_tx_queues(void);

+1 -1

include/net/inet6_hashtables.h

··· 175 175 { 176 176 if (!net_eq(sock_net(sk), net) || 177 177 sk->sk_family != AF_INET6 || 178 - sk->sk_portpair != ports || 178 + READ_ONCE(sk->sk_portpair) != ports || 179 179 !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || 180 180 !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) 181 181 return false;

+1 -1

include/net/inet_hashtables.h

··· 345 345 int dif, int sdif) 346 346 { 347 347 if (!net_eq(sock_net(sk), net) || 348 - sk->sk_portpair != ports || 348 + READ_ONCE(sk->sk_portpair) != ports || 349 349 sk->sk_addrpair != cookie) 350 350 return false; 351 351

+1 -1

include/net/ip.h

··· 101 101 102 102 ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); 103 103 ipcm->addr = inet->inet_saddr; 104 - ipcm->protocol = inet->inet_num; 104 + ipcm->protocol = READ_ONCE(inet->inet_num); 105 105 } 106 106 107 107 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))

+1 -1

include/net/ip_fib.h

··· 559 559 siphash_aligned_key_t hash_key; 560 560 u32 mp_seed; 561 561 562 - mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; 562 + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed); 563 563 fib_multipath_hash_construct_key(&hash_key, mp_seed); 564 564 565 565 return flow_hash_from_keys_seed(keys, &hash_key);

+3

include/net/libeth/xsk.h

··· 597 597 * @pending: current number of XSkFQEs to refill 598 598 * @thresh: threshold below which the queue is refilled 599 599 * @buf_len: HW-writeable length per each buffer 600 + * @truesize: step between consecutive buffers, 0 if none exists 600 601 * @nid: ID of the closest NUMA node with memory 601 602 */ 602 603 struct libeth_xskfq { ··· 615 614 u32 thresh; 616 615 617 616 u32 buf_len; 617 + u32 truesize; 618 + 618 619 int nid; 619 620 }; 620 621

+7

include/net/netfilter/nf_tables.h

··· 320 320 * @NFT_ITER_UNSPEC: unspecified, to catch errors 321 321 * @NFT_ITER_READ: read-only iteration over set elements 322 322 * @NFT_ITER_UPDATE: iteration under mutex to update set element state 323 + * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element 323 324 */ 324 325 enum nft_iter_type { 325 326 NFT_ITER_UNSPEC, 326 327 NFT_ITER_READ, 327 328 NFT_ITER_UPDATE, 329 + NFT_ITER_UPDATE_CLONE, 328 330 }; 329 331 330 332 struct nft_set; ··· 1862 1860 struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT]; 1863 1861 struct rcu_head rcu; 1864 1862 }; 1863 + 1864 + static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) 1865 + { 1866 + return NFT_TRANS_GC_BATCHCOUNT - trans->count; 1867 + } 1865 1868 1866 1869 static inline void nft_ctx_update(struct nft_ctx *ctx, 1867 1870 const struct nft_trans *trans)

+10

include/net/sch_generic.h

··· 778 778 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) 779 779 { 780 780 struct Qdisc *qdisc; 781 + bool nolock; 781 782 782 783 for (; i < dev->num_tx_queues; i++) { 783 784 qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); 784 785 if (qdisc) { 786 + nolock = qdisc->flags & TCQ_F_NOLOCK; 787 + 788 + if (nolock) 789 + spin_lock_bh(&qdisc->seqlock); 785 790 spin_lock_bh(qdisc_lock(qdisc)); 786 791 qdisc_reset(qdisc); 787 792 spin_unlock_bh(qdisc_lock(qdisc)); 793 + if (nolock) { 794 + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); 795 + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); 796 + spin_unlock_bh(&qdisc->seqlock); 797 + } 788 798 } 789 799 } 790 800 }

+38 -7

include/net/secure_seq.h

··· 5 5 #include <linux/types.h> 6 6 7 7 struct net; 8 + extern struct net init_net; 9 + 10 + union tcp_seq_and_ts_off { 11 + struct { 12 + u32 seq; 13 + u32 ts_off; 14 + }; 15 + u64 hash64; 16 + }; 8 17 9 18 u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); 10 19 u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, 11 20 __be16 dport); 12 - u32 secure_tcp_seq(__be32 saddr, __be32 daddr, 13 - __be16 sport, __be16 dport); 14 - u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); 15 - u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, 16 - __be16 sport, __be16 dport); 17 - u32 secure_tcpv6_ts_off(const struct net *net, 18 - const __be32 *saddr, const __be32 *daddr); 21 + union tcp_seq_and_ts_off 22 + secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, 23 + __be16 sport, __be16 dport); 19 24 25 + static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr, 26 + __be16 sport, __be16 dport) 27 + { 28 + union tcp_seq_and_ts_off ts; 29 + 30 + ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr, 31 + sport, dport); 32 + 33 + return ts.seq; 34 + } 35 + 36 + union tcp_seq_and_ts_off 37 + secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, 38 + const __be32 *daddr, 39 + __be16 sport, __be16 dport); 40 + 41 + static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, 42 + __be16 sport, __be16 dport) 43 + { 44 + union tcp_seq_and_ts_off ts; 45 + 46 + ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr, 47 + sport, dport); 48 + 49 + return ts.seq; 50 + } 20 51 #endif /* _NET_SECURE_SEQ */

+26 -7

include/net/tc_act/tc_gate.h

··· 32 32 s32 tcfg_clockid; 33 33 size_t num_entries; 34 34 struct list_head entries; 35 + struct rcu_head rcu; 35 36 }; 36 37 37 38 #define GATE_ACT_GATE_OPEN BIT(0) ··· 40 39 41 40 struct tcf_gate { 42 41 struct tc_action common; 43 - struct tcf_gate_params param; 42 + struct tcf_gate_params __rcu *param; 44 43 u8 current_gate_status; 45 44 ktime_t current_close_time; 46 45 u32 current_entry_octets; ··· 52 51 53 52 #define to_gate(a) ((struct tcf_gate *)a) 54 53 54 + static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) 55 + { 56 + struct tcf_gate *gact = to_gate(a); 57 + 58 + return rcu_dereference_protected(gact->param, 59 + lockdep_is_held(&gact->tcf_lock)); 60 + } 61 + 55 62 static inline s32 tcf_gate_prio(const struct tc_action *a) 56 63 { 64 + struct tcf_gate_params *p; 57 65 s32 tcfg_prio; 58 66 59 - tcfg_prio = to_gate(a)->param.tcfg_priority; 67 + p = tcf_gate_params_locked(a); 68 + tcfg_prio = p->tcfg_priority; 60 69 61 70 return tcfg_prio; 62 71 } 63 72 64 73 static inline u64 tcf_gate_basetime(const struct tc_action *a) 65 74 { 75 + struct tcf_gate_params *p; 66 76 u64 tcfg_basetime; 67 77 68 - tcfg_basetime = to_gate(a)->param.tcfg_basetime; 78 + p = tcf_gate_params_locked(a); 79 + tcfg_basetime = p->tcfg_basetime; 69 80 70 81 return tcfg_basetime; 71 82 } 72 83 73 84 static inline u64 tcf_gate_cycletime(const struct tc_action *a) 74 85 { 86 + struct tcf_gate_params *p; 75 87 u64 tcfg_cycletime; 76 88 77 - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; 89 + p = tcf_gate_params_locked(a); 90 + tcfg_cycletime = p->tcfg_cycletime; 78 91 79 92 return tcfg_cycletime; 80 93 } 81 94 82 95 static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) 83 96 { 97 + struct tcf_gate_params *p; 84 98 u64 tcfg_cycletimeext; 85 99 86 - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; 100 + p = tcf_gate_params_locked(a); 101 + tcfg_cycletimeext = p->tcfg_cycletime_ext; 87 102 88 103 return tcfg_cycletimeext; 89 104 } 90 105 91 106 static inline u32 tcf_gate_num_entries(const struct tc_action *a) 92 107 { 108 + struct tcf_gate_params *p; 93 109 u32 num_entries; 94 110 95 - num_entries = to_gate(a)->param.num_entries; 111 + p = tcf_gate_params_locked(a); 112 + num_entries = p->num_entries; 96 113 97 114 return num_entries; 98 115 } ··· 124 105 u32 num_entries; 125 106 int i = 0; 126 107 127 - p = &to_gate(a)->param; 108 + p = tcf_gate_params_locked(a); 128 109 num_entries = p->num_entries; 129 110 130 111 list_for_each_entry(entry, &p->entries, list)

+1 -3

include/net/tc_act/tc_ife.h

··· 13 13 u8 eth_src[ETH_ALEN]; 14 14 u16 eth_type; 15 15 u16 flags; 16 - 16 + struct list_head metalist; 17 17 struct rcu_head rcu; 18 18 }; 19 19 20 20 struct tcf_ife_info { 21 21 struct tc_action common; 22 22 struct tcf_ife_params __rcu *params; 23 - /* list of metaids allowed */ 24 - struct list_head metalist; 25 23 }; 26 24 #define to_ife(a) ((struct tcf_ife_info *)a) 27 25

+4 -2

include/net/tcp.h

··· 43 43 #include <net/dst.h> 44 44 #include <net/mptcp.h> 45 45 #include <net/xfrm.h> 46 + #include <net/secure_seq.h> 46 47 47 48 #include <linux/seq_file.h> 48 49 #include <linux/memcontrol.h> ··· 2465 2464 struct flowi *fl, 2466 2465 struct request_sock *req, 2467 2466 u32 tw_isn); 2468 - u32 (*init_seq)(const struct sk_buff *skb); 2469 - u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); 2467 + union tcp_seq_and_ts_off (*init_seq_and_ts_off)( 2468 + const struct net *net, 2469 + const struct sk_buff *skb); 2470 2470 int (*send_synack)(const struct sock *sk, struct dst_entry *dst, 2471 2471 struct flowi *fl, struct request_sock *req, 2472 2472 struct tcp_fastopen_cookie *foc,

+13 -3

include/net/xdp_sock_drv.h

··· 51 51 return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); 52 52 } 53 53 54 + static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) 55 + { 56 + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); 57 + } 58 + 54 59 static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, 55 60 struct xdp_rxq_info *rxq) 56 61 { ··· 127 122 goto out; 128 123 129 124 list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { 130 - list_del(&pos->list_node); 125 + list_del_init(&pos->list_node); 131 126 xp_free(pos); 132 127 } 133 128 ··· 162 157 frag = list_first_entry_or_null(&xskb->pool->xskb_list, 163 158 struct xdp_buff_xsk, list_node); 164 159 if (frag) { 165 - list_del(&frag->list_node); 160 + list_del_init(&frag->list_node); 166 161 ret = &frag->xdp; 167 162 } 168 163 ··· 173 168 { 174 169 struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); 175 170 176 - list_del(&xskb->list_node); 171 + list_del_init(&xskb->list_node); 177 172 } 178 173 179 174 static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) ··· 338 333 } 339 334 340 335 static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) 336 + { 337 + return 0; 338 + } 339 + 340 + static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) 341 341 { 342 342 return 0; 343 343 }

+15 -11

net/atm/lec.c

··· 1260 1260 struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); 1261 1261 struct net_device *dev = (struct net_device *)vcc->proto_data; 1262 1262 1263 - vcc->pop = vpriv->old_pop; 1264 - if (vpriv->xoff) 1265 - netif_wake_queue(dev); 1266 - kfree(vpriv); 1267 - vcc->user_back = NULL; 1268 - vcc->push = entry->old_push; 1269 - vcc_release_async(vcc, -EPIPE); 1263 + if (vpriv) { 1264 + vcc->pop = vpriv->old_pop; 1265 + if (vpriv->xoff) 1266 + netif_wake_queue(dev); 1267 + kfree(vpriv); 1268 + vcc->user_back = NULL; 1269 + vcc->push = entry->old_push; 1270 + vcc_release_async(vcc, -EPIPE); 1271 + } 1270 1272 entry->vcc = NULL; 1271 1273 } 1272 1274 if (entry->recv_vcc) { 1273 1275 struct atm_vcc *vcc = entry->recv_vcc; 1274 1276 struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); 1275 1277 1276 - kfree(vpriv); 1277 - vcc->user_back = NULL; 1278 + if (vpriv) { 1279 + kfree(vpriv); 1280 + vcc->user_back = NULL; 1278 1281 1279 - entry->recv_vcc->push = entry->old_recv_push; 1280 - vcc_release_async(entry->recv_vcc, -EPIPE); 1282 + entry->recv_vcc->push = entry->old_recv_push; 1283 + vcc_release_async(entry->recv_vcc, -EPIPE); 1284 + } 1281 1285 entry->recv_vcc = NULL; 1282 1286 } 1283 1287 }

+9 -1

net/batman-adv/bat_v_elp.c

··· 111 111 /* unsupported WiFi driver version */ 112 112 goto default_throughput; 113 113 114 - real_netdev = batadv_get_real_netdev(hard_iface->net_dev); 114 + /* only use rtnl_trylock because the elp worker will be cancelled while 115 + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise 116 + * wait forever when the elp work_item was started and it is then also 117 + * trying to rtnl_lock 118 + */ 119 + if (!rtnl_trylock()) 120 + return false; 121 + real_netdev = __batadv_get_real_netdev(hard_iface->net_dev); 122 + rtnl_unlock(); 115 123 if (!real_netdev) 116 124 goto default_throughput; 117 125

+4 -4

net/batman-adv/hard-interface.c

··· 204 204 } 205 205 206 206 /** 207 - * batadv_get_real_netdevice() - check if the given netdev struct is a virtual 207 + * __batadv_get_real_netdev() - check if the given netdev struct is a virtual 208 208 * interface on top of another 'real' interface 209 209 * @netdev: the device to check 210 210 * ··· 214 214 * Return: the 'real' net device or the original net device and NULL in case 215 215 * of an error. 216 216 */ 217 - static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) 217 + struct net_device *__batadv_get_real_netdev(struct net_device *netdev) 218 218 { 219 219 struct batadv_hard_iface *hard_iface = NULL; 220 220 struct net_device *real_netdev = NULL; ··· 267 267 struct net_device *real_netdev; 268 268 269 269 rtnl_lock(); 270 - real_netdev = batadv_get_real_netdevice(net_device); 270 + real_netdev = __batadv_get_real_netdev(net_device); 271 271 rtnl_unlock(); 272 272 273 273 return real_netdev; ··· 336 336 if (batadv_is_cfg80211_netdev(net_device)) 337 337 wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; 338 338 339 - real_netdev = batadv_get_real_netdevice(net_device); 339 + real_netdev = __batadv_get_real_netdev(net_device); 340 340 if (!real_netdev) 341 341 return wifi_flags; 342 342

+1

net/batman-adv/hard-interface.h

··· 67 67 68 68 extern struct notifier_block batadv_hard_if_notifier; 69 69 70 + struct net_device *__batadv_get_real_netdev(struct net_device *net_device); 70 71 struct net_device *batadv_get_real_netdev(struct net_device *net_device); 71 72 bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface); 72 73 bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);

+1 -1

net/bridge/br_device.c

··· 74 74 eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && 75 75 br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { 76 76 br_do_proxy_suppress_arp(skb, br, vid, NULL); 77 - } else if (IS_ENABLED(CONFIG_IPV6) && 77 + } else if (ipv6_mod_enabled() && 78 78 skb->protocol == htons(ETH_P_IPV6) && 79 79 br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && 80 80 pskb_may_pull(skb, sizeof(struct ipv6hdr) +

+1 -1

net/bridge/br_input.c

··· 170 170 (skb->protocol == htons(ETH_P_ARP) || 171 171 skb->protocol == htons(ETH_P_RARP))) { 172 172 br_do_proxy_suppress_arp(skb, br, vid, p); 173 - } else if (IS_ENABLED(CONFIG_IPV6) && 173 + } else if (ipv6_mod_enabled() && 174 174 skb->protocol == htons(ETH_P_IPV6) && 175 175 br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && 176 176 pskb_may_pull(skb, sizeof(struct ipv6hdr) +

+10

net/bridge/br_private.h

··· 1345 1345 } 1346 1346 1347 1347 static inline bool 1348 + br_multicast_port_ctx_options_equal(const struct net_bridge_mcast_port *pmctx1, 1349 + const struct net_bridge_mcast_port *pmctx2) 1350 + { 1351 + return br_multicast_ngroups_get(pmctx1) == 1352 + br_multicast_ngroups_get(pmctx2) && 1353 + br_multicast_ngroups_get_max(pmctx1) == 1354 + br_multicast_ngroups_get_max(pmctx2); 1355 + } 1356 + 1357 + static inline bool 1348 1358 br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx) 1349 1359 { 1350 1360 bool vlan_snooping_enabled;

+23 -3

net/bridge/br_vlan_options.c

··· 43 43 u8 range_mc_rtr = br_vlan_multicast_router(range_end); 44 44 u8 curr_mc_rtr = br_vlan_multicast_router(v_curr); 45 45 46 - return v_curr->state == range_end->state && 47 - __vlan_tun_can_enter_range(v_curr, range_end) && 48 - curr_mc_rtr == range_mc_rtr; 46 + if (v_curr->state != range_end->state) 47 + return false; 48 + 49 + if (!__vlan_tun_can_enter_range(v_curr, range_end)) 50 + return false; 51 + 52 + if (curr_mc_rtr != range_mc_rtr) 53 + return false; 54 + 55 + /* Check user-visible priv_flags that affect output */ 56 + if ((v_curr->priv_flags ^ range_end->priv_flags) & 57 + (BR_VLFLAG_NEIGH_SUPPRESS_ENABLED | BR_VLFLAG_MCAST_ENABLED)) 58 + return false; 59 + 60 + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING 61 + if (!br_vlan_is_master(v_curr) && 62 + !br_multicast_port_ctx_vlan_disabled(&v_curr->port_mcast_ctx) && 63 + !br_multicast_port_ctx_options_equal(&v_curr->port_mcast_ctx, 64 + &range_end->port_mcast_ctx)) 65 + return false; 66 + #endif 67 + 68 + return true; 49 69 } 50 70 51 71 bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,

+1

net/can/bcm.c

··· 1176 1176 if (!op) 1177 1177 return -ENOMEM; 1178 1178 1179 + spin_lock_init(&op->bcm_tx_lock); 1179 1180 op->can_id = msg_head->can_id; 1180 1181 op->nframes = msg_head->nframes; 1181 1182 op->cfsiz = CFSIZ(msg_head->flags);

+13 -11

net/core/dev.c

··· 3987 3987 if (shinfo->nr_frags > 0) { 3988 3988 niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0])); 3989 3989 if (net_is_devmem_iov(niov) && 3990 - net_devmem_iov_binding(niov)->dev != dev) 3990 + READ_ONCE(net_devmem_iov_binding(niov)->dev) != dev) 3991 3991 goto out_free; 3992 3992 } 3993 3993 ··· 4818 4818 if (dev->flags & IFF_UP) { 4819 4819 int cpu = smp_processor_id(); /* ok because BHs are off */ 4820 4820 4821 - /* Other cpus might concurrently change txq->xmit_lock_owner 4822 - * to -1 or to their cpu id, but not to our id. 4823 - */ 4824 - if (READ_ONCE(txq->xmit_lock_owner) != cpu) { 4821 + if (!netif_tx_owned(txq, cpu)) { 4825 4822 bool is_list = false; 4826 4823 4827 4824 if (dev_xmit_recursion()) ··· 7791 7794 return -1; 7792 7795 } 7793 7796 7794 - static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll) 7797 + static void napi_threaded_poll_loop(struct napi_struct *napi, 7798 + unsigned long *busy_poll_last_qs) 7795 7799 { 7800 + unsigned long last_qs = busy_poll_last_qs ? *busy_poll_last_qs : jiffies; 7796 7801 struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; 7797 7802 struct softnet_data *sd; 7798 - unsigned long last_qs = jiffies; 7799 7803 7800 7804 for (;;) { 7801 7805 bool repoll = false; ··· 7825 7827 /* When busy poll is enabled, the old packets are not flushed in 7826 7828 * napi_complete_done. So flush them here. 7827 7829 */ 7828 - if (busy_poll) 7830 + if (busy_poll_last_qs) 7829 7831 gro_flush_normal(&napi->gro, HZ >= 1000); 7830 7832 local_bh_enable(); 7831 7833 7832 7834 /* Call cond_resched here to avoid watchdog warnings. */ 7833 - if (repoll || busy_poll) { 7835 + if (repoll || busy_poll_last_qs) { 7834 7836 rcu_softirq_qs_periodic(last_qs); 7835 7837 cond_resched(); 7836 7838 } ··· 7838 7840 if (!repoll) 7839 7841 break; 7840 7842 } 7843 + 7844 + if (busy_poll_last_qs) 7845 + *busy_poll_last_qs = last_qs; 7841 7846 } 7842 7847 7843 7848 static int napi_threaded_poll(void *data) 7844 7849 { 7845 7850 struct napi_struct *napi = data; 7851 + unsigned long last_qs = jiffies; 7846 7852 bool want_busy_poll; 7847 7853 bool in_busy_poll; 7848 7854 unsigned long val; ··· 7864 7862 assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state, 7865 7863 want_busy_poll); 7866 7864 7867 - napi_threaded_poll_loop(napi, want_busy_poll); 7865 + napi_threaded_poll_loop(napi, want_busy_poll ? &last_qs : NULL); 7868 7866 } 7869 7867 7870 7868 return 0; ··· 13177 13175 { 13178 13176 struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); 13179 13177 13180 - napi_threaded_poll_loop(&sd->backlog, false); 13178 + napi_threaded_poll_loop(&sd->backlog, NULL); 13181 13179 } 13182 13180 13183 13181 static void backlog_napi_setup(unsigned int cpu)

+4 -2

net/core/devmem.c

··· 396 396 * net_device. 397 397 */ 398 398 dst_dev = dst_dev_rcu(dst); 399 - if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { 399 + if (unlikely(!dst_dev) || 400 + unlikely(dst_dev != READ_ONCE(binding->dev))) { 400 401 err = -ENODEV; 401 402 goto out_unlock; 402 403 } ··· 514 513 xa_erase(&binding->bound_rxqs, xa_idx); 515 514 if (xa_empty(&binding->bound_rxqs)) { 516 515 mutex_lock(&binding->lock); 517 - binding->dev = NULL; 516 + ASSERT_EXCLUSIVE_WRITER(binding->dev); 517 + WRITE_ONCE(binding->dev, NULL); 518 518 mutex_unlock(&binding->lock); 519 519 } 520 520 break;

+4 -2

net/core/filter.c

··· 4150 4150 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 4151 4151 skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; 4152 4152 struct xdp_rxq_info *rxq = xdp->rxq; 4153 - unsigned int tailroom; 4153 + int tailroom; 4154 4154 4155 4155 if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) 4156 4156 return -EOPNOTSUPP; 4157 4157 4158 - tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); 4158 + tailroom = rxq->frag_size - skb_frag_size(frag) - 4159 + skb_frag_off(frag) % rxq->frag_size; 4160 + WARN_ON_ONCE(tailroom < 0); 4159 4161 if (unlikely(offset > tailroom)) 4160 4162 return -EINVAL; 4161 4163

+1 -1

net/core/netpoll.c

··· 132 132 for (i = 0; i < dev->num_tx_queues; i++) { 133 133 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 134 134 135 - if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) 135 + if (netif_tx_owned(txq, smp_processor_id())) 136 136 return 1; 137 137 } 138 138

+29 -51

net/core/secure_seq.c

··· 20 20 #include <net/tcp.h> 21 21 22 22 static siphash_aligned_key_t net_secret; 23 - static siphash_aligned_key_t ts_secret; 24 23 25 24 #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) 26 25 27 26 static __always_inline void net_secret_init(void) 28 27 { 29 28 net_get_random_once(&net_secret, sizeof(net_secret)); 30 - } 31 - 32 - static __always_inline void ts_secret_init(void) 33 - { 34 - net_get_random_once(&ts_secret, sizeof(ts_secret)); 35 29 } 36 30 #endif 37 31 ··· 47 53 #endif 48 54 49 55 #if IS_ENABLED(CONFIG_IPV6) 50 - u32 secure_tcpv6_ts_off(const struct net *net, 51 - const __be32 *saddr, const __be32 *daddr) 52 - { 53 - const struct { 54 - struct in6_addr saddr; 55 - struct in6_addr daddr; 56 - } __aligned(SIPHASH_ALIGNMENT) combined = { 57 - .saddr = *(struct in6_addr *)saddr, 58 - .daddr = *(struct in6_addr *)daddr, 59 - }; 60 - 61 - if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) 62 - return 0; 63 - 64 - ts_secret_init(); 65 - return siphash(&combined, offsetofend(typeof(combined), daddr), 66 - &ts_secret); 67 - } 68 - EXPORT_IPV6_MOD(secure_tcpv6_ts_off); 69 - 70 - u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, 71 - __be16 sport, __be16 dport) 56 + union tcp_seq_and_ts_off 57 + secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, 58 + const __be32 *daddr, __be16 sport, __be16 dport) 72 59 { 73 60 const struct { 74 61 struct in6_addr saddr; ··· 62 87 .sport = sport, 63 88 .dport = dport 64 89 }; 65 - u32 hash; 90 + union tcp_seq_and_ts_off st; 66 91 67 92 net_secret_init(); 68 - hash = siphash(&combined, offsetofend(typeof(combined), dport), 69 - &net_secret); 70 - return seq_scale(hash); 93 + 94 + st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport), 95 + &net_secret); 96 + 97 + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) 98 + st.ts_off = 0; 99 + 100 + st.seq = seq_scale(st.seq); 101 + return st; 71 102 } 72 - EXPORT_SYMBOL(secure_tcpv6_seq); 103 + EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off); 73 104 74 105 u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, 75 106 __be16 dport) ··· 99 118 #endif 100 119 101 120 #ifdef CONFIG_INET 102 - u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) 103 - { 104 - if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) 105 - return 0; 106 - 107 - ts_secret_init(); 108 - return siphash_2u32((__force u32)saddr, (__force u32)daddr, 109 - &ts_secret); 110 - } 111 - 112 121 /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), 113 122 * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, 114 123 * it would be easy enough to have the former function use siphash_4u32, passing 115 124 * the arguments as separate u32. 116 125 */ 117 - u32 secure_tcp_seq(__be32 saddr, __be32 daddr, 118 - __be16 sport, __be16 dport) 126 + union tcp_seq_and_ts_off 127 + secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, 128 + __be16 sport, __be16 dport) 119 129 { 120 - u32 hash; 130 + u32 ports = (__force u32)sport << 16 | (__force u32)dport; 131 + union tcp_seq_and_ts_off st; 121 132 122 133 net_secret_init(); 123 - hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, 124 - (__force u32)sport << 16 | (__force u32)dport, 125 - &net_secret); 126 - return seq_scale(hash); 134 + 135 + st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr, 136 + ports, &net_secret); 137 + 138 + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) 139 + st.ts_off = 0; 140 + 141 + st.seq = seq_scale(st.seq); 142 + return st; 127 143 } 128 - EXPORT_SYMBOL_GPL(secure_tcp_seq); 144 + EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off); 129 145 130 146 u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) 131 147 {

+7 -7

net/core/skmsg.c

··· 1205 1205 return; 1206 1206 1207 1207 psock->saved_data_ready = sk->sk_data_ready; 1208 - sk->sk_data_ready = sk_psock_strp_data_ready; 1209 - sk->sk_write_space = sk_psock_write_space; 1208 + WRITE_ONCE(sk->sk_data_ready, sk_psock_strp_data_ready); 1209 + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); 1210 1210 } 1211 1211 1212 1212 void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) ··· 1216 1216 if (!psock->saved_data_ready) 1217 1217 return; 1218 1218 1219 - sk->sk_data_ready = psock->saved_data_ready; 1220 - psock->saved_data_ready = NULL; 1219 + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); 1220 + WRITE_ONCE(psock->saved_data_ready, NULL); 1221 1221 strp_stop(&psock->strp); 1222 1222 } 1223 1223 ··· 1296 1296 return; 1297 1297 1298 1298 psock->saved_data_ready = sk->sk_data_ready; 1299 - sk->sk_data_ready = sk_psock_verdict_data_ready; 1300 - sk->sk_write_space = sk_psock_write_space; 1299 + WRITE_ONCE(sk->sk_data_ready, sk_psock_verdict_data_ready); 1300 + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); 1301 1301 } 1302 1302 1303 1303 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) ··· 1308 1308 if (!psock->saved_data_ready) 1309 1309 return; 1310 1310 1311 - sk->sk_data_ready = psock->saved_data_ready; 1311 + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); 1312 1312 psock->saved_data_ready = NULL; 1313 1313 }

+2

net/ipv4/Kconfig

··· 748 748 config TCP_AO 749 749 bool "TCP: Authentication Option (RFC5925)" 750 750 select CRYPTO 751 + select CRYPTO_LIB_UTILS 751 752 select TCP_SIGPOOL 752 753 depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) 753 754 help ··· 762 761 config TCP_MD5SIG 763 762 bool "TCP: MD5 Signature Option support (RFC2385)" 764 763 select CRYPTO_LIB_MD5 764 + select CRYPTO_LIB_UTILS 765 765 help 766 766 RFC2385 specifies a method of giving MD5 protection to TCP sessions. 767 767 Its main (only?) use is to protect BGP sessions between core routers

+4 -4

net/ipv4/inet_hashtables.c

··· 200 200 void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 201 201 struct inet_bind2_bucket *tb2, unsigned short port) 202 202 { 203 - inet_sk(sk)->inet_num = port; 203 + WRITE_ONCE(inet_sk(sk)->inet_num, port); 204 204 inet_csk(sk)->icsk_bind_hash = tb; 205 205 inet_csk(sk)->icsk_bind2_hash = tb2; 206 206 sk_add_bind_node(sk, &tb2->owners); ··· 224 224 spin_lock(&head->lock); 225 225 tb = inet_csk(sk)->icsk_bind_hash; 226 226 inet_csk(sk)->icsk_bind_hash = NULL; 227 - inet_sk(sk)->inet_num = 0; 227 + WRITE_ONCE(inet_sk(sk)->inet_num, 0); 228 228 sk->sk_userlocks &= ~SOCK_CONNECT_BIND; 229 229 230 230 spin_lock(&head2->lock); ··· 352 352 { 353 353 int score = -1; 354 354 355 - if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && 355 + if (net_eq(sock_net(sk), net) && READ_ONCE(sk->sk_num) == hnum && 356 356 !ipv6_only_sock(sk)) { 357 357 if (sk->sk_rcv_saddr != daddr) 358 358 return -1; ··· 1206 1206 1207 1207 sk->sk_hash = 0; 1208 1208 inet_sk(sk)->inet_sport = 0; 1209 - inet_sk(sk)->inet_num = 0; 1209 + WRITE_ONCE(inet_sk(sk)->inet_num, 0); 1210 1210 1211 1211 if (tw) 1212 1212 inet_twsk_bind_unhash(tw, hinfo);

+8 -3

net/ipv4/syncookies.c

··· 378 378 tcp_parse_options(net, skb, &tcp_opt, 0, NULL); 379 379 380 380 if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { 381 - tsoff = secure_tcp_ts_off(net, 382 - ip_hdr(skb)->daddr, 383 - ip_hdr(skb)->saddr); 381 + union tcp_seq_and_ts_off st; 382 + 383 + st = secure_tcp_seq_and_ts_off(net, 384 + ip_hdr(skb)->daddr, 385 + ip_hdr(skb)->saddr, 386 + tcp_hdr(skb)->dest, 387 + tcp_hdr(skb)->source); 388 + tsoff = st.ts_off; 384 389 tcp_opt.rcv_tsecr -= tsoff; 385 390 } 386 391

+3 -2

net/ipv4/sysctl_net_ipv4.c

··· 486 486 proc_fib_multipath_hash_rand_seed), 487 487 }; 488 488 489 - WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); 489 + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.user_seed, new.user_seed); 490 + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed, new.mp_seed); 490 491 } 491 492 492 493 static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write, ··· 501 500 int ret; 502 501 503 502 mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; 504 - user_seed = mphs->user_seed; 503 + user_seed = READ_ONCE(mphs->user_seed); 505 504 506 505 tmp = *table; 507 506 tmp.data = &user_seed;

+4 -3

net/ipv4/tcp.c

··· 244 244 #define pr_fmt(fmt) "TCP: " fmt 245 245 246 246 #include <crypto/md5.h> 247 + #include <crypto/utils.h> 247 248 #include <linux/kernel.h> 248 249 #include <linux/module.h> 249 250 #include <linux/types.h> ··· 1447 1446 err = sk_stream_error(sk, flags, err); 1448 1447 /* make sure we wake any epoll edge trigger waiter */ 1449 1448 if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { 1450 - sk->sk_write_space(sk); 1449 + READ_ONCE(sk->sk_write_space)(sk); 1451 1450 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); 1452 1451 } 1453 1452 if (binding) ··· 4182 4181 break; 4183 4182 case TCP_NOTSENT_LOWAT: 4184 4183 WRITE_ONCE(tp->notsent_lowat, val); 4185 - sk->sk_write_space(sk); 4184 + READ_ONCE(sk->sk_write_space)(sk); 4186 4185 break; 4187 4186 case TCP_INQ: 4188 4187 if (val > 1 || val < 0) ··· 4971 4970 tcp_v4_md5_hash_skb(newhash, key, NULL, skb); 4972 4971 else 4973 4972 tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); 4974 - if (memcmp(hash_location, newhash, 16) != 0) { 4973 + if (crypto_memneq(hash_location, newhash, 16)) { 4975 4974 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 4976 4975 trace_tcp_hash_md5_mismatch(sk, skb); 4977 4976 return SKB_DROP_REASON_TCP_MD5FAILURE;

+2 -1

net/ipv4/tcp_ao.c

··· 10 10 #define pr_fmt(fmt) "TCP: " fmt 11 11 12 12 #include <crypto/hash.h> 13 + #include <crypto/utils.h> 13 14 #include <linux/inetdevice.h> 14 15 #include <linux/tcp.h> 15 16 ··· 923 922 /* XXX: make it per-AF callback? */ 924 923 tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, 925 924 (phash - (u8 *)th), sne); 926 - if (memcmp(phash, hash_buf, maclen)) { 925 + if (crypto_memneq(phash, hash_buf, maclen)) { 927 926 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); 928 927 atomic64_inc(&info->counters.pkt_bad); 929 928 atomic64_inc(&key->pkt_bad);

+1 -1

net/ipv4/tcp_bpf.c

··· 725 725 WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); 726 726 tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); 727 727 } else { 728 - sk->sk_write_space = psock->saved_write_space; 728 + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); 729 729 /* Pairs with lockless read in sk_clone_lock() */ 730 730 sock_replace_proto(sk, psock->sk_proto); 731 731 }

+1 -1

net/ipv4/tcp_diag.c

··· 509 509 if (r->sdiag_family != AF_UNSPEC && 510 510 sk->sk_family != r->sdiag_family) 511 511 goto next_normal; 512 - if (r->id.idiag_sport != htons(sk->sk_num) && 512 + if (r->id.idiag_sport != htons(READ_ONCE(sk->sk_num)) && 513 513 r->id.idiag_sport) 514 514 goto next_normal; 515 515 if (r->id.idiag_dport != sk->sk_dport &&

+15 -23

net/ipv4/tcp_input.c

··· 5374 5374 static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); 5375 5375 static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); 5376 5376 5377 - /* Check if this incoming skb can be added to socket receive queues 5378 - * while satisfying sk->sk_rcvbuf limit. 5379 - * 5380 - * In theory we should use skb->truesize, but this can cause problems 5381 - * when applications use too small SO_RCVBUF values. 5382 - * When LRO / hw gro is used, the socket might have a high tp->scaling_ratio, 5383 - * allowing RWIN to be close to available space. 5384 - * Whenever the receive queue gets full, we can receive a small packet 5385 - * filling RWIN, but with a high skb->truesize, because most NIC use 4K page 5386 - * plus sk_buff metadata even when receiving less than 1500 bytes of payload. 5387 - * 5388 - * Note that we use skb->len to decide to accept or drop this packet, 5389 - * but sk->sk_rmem_alloc is the sum of all skb->truesize. 5390 - */ 5391 5377 static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb) 5392 5378 { 5393 5379 unsigned int rmem = atomic_read(&sk->sk_rmem_alloc); 5394 5380 5395 - return rmem + skb->len <= sk->sk_rcvbuf; 5381 + return rmem <= sk->sk_rcvbuf; 5396 5382 } 5397 5383 5398 5384 static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb, ··· 5411 5425 5412 5426 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { 5413 5427 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); 5414 - sk->sk_data_ready(sk); 5428 + READ_ONCE(sk->sk_data_ready)(sk); 5415 5429 tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM); 5416 5430 return; 5417 5431 } ··· 5621 5635 void tcp_data_ready(struct sock *sk) 5622 5636 { 5623 5637 if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE)) 5624 - sk->sk_data_ready(sk); 5638 + READ_ONCE(sk->sk_data_ready)(sk); 5625 5639 } 5626 5640 5627 5641 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ··· 5677 5691 inet_csk(sk)->icsk_ack.pending |= 5678 5692 (ICSK_ACK_NOMEM | ICSK_ACK_NOW); 5679 5693 inet_csk_schedule_ack(sk); 5680 - sk->sk_data_ready(sk); 5694 + READ_ONCE(sk->sk_data_ready)(sk); 5681 5695 5682 5696 if (skb_queue_len(&sk->sk_receive_queue) && skb->len) { 5683 5697 reason = SKB_DROP_REASON_PROTO_MEM; ··· 6100 6114 tp->snd_cwnd_stamp = tcp_jiffies32; 6101 6115 } 6102 6116 6103 - INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); 6117 + INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space), 6118 + sk_stream_write_space, 6119 + sk); 6104 6120 } 6105 6121 6106 6122 /* Caller made space either from: ··· 6313 6325 BUG(); 6314 6326 WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); 6315 6327 if (!sock_flag(sk, SOCK_DEAD)) 6316 - sk->sk_data_ready(sk); 6328 + READ_ONCE(sk->sk_data_ready)(sk); 6317 6329 } 6318 6330 } 6319 6331 } ··· 7646 7658 const struct tcp_sock *tp = tcp_sk(sk); 7647 7659 struct net *net = sock_net(sk); 7648 7660 struct sock *fastopen_sk = NULL; 7661 + union tcp_seq_and_ts_off st; 7649 7662 struct request_sock *req; 7650 7663 bool want_cookie = false; 7651 7664 struct dst_entry *dst; ··· 7716 7727 if (!dst) 7717 7728 goto drop_and_free; 7718 7729 7730 + if (tmp_opt.tstamp_ok || (!want_cookie && !isn)) 7731 + st = af_ops->init_seq_and_ts_off(net, skb); 7732 + 7719 7733 if (tmp_opt.tstamp_ok) { 7720 7734 tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); 7721 - tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); 7735 + tcp_rsk(req)->ts_off = st.ts_off; 7722 7736 } 7723 7737 if (!want_cookie && !isn) { 7724 7738 int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); ··· 7743 7751 goto drop_and_release; 7744 7752 } 7745 7753 7746 - isn = af_ops->init_seq(skb); 7754 + isn = st.seq; 7747 7755 } 7748 7756 7749 7757 tcp_ecn_create_request(req, skb, sk, dst); ··· 7784 7792 sock_put(fastopen_sk); 7785 7793 goto drop_and_free; 7786 7794 } 7787 - sk->sk_data_ready(sk); 7795 + READ_ONCE(sk->sk_data_ready)(sk); 7788 7796 bh_unlock_sock(fastopen_sk); 7789 7797 sock_put(fastopen_sk); 7790 7798 } else {

+19 -21

net/ipv4/tcp_ipv4.c

··· 88 88 #include <linux/skbuff_ref.h> 89 89 90 90 #include <crypto/md5.h> 91 + #include <crypto/utils.h> 91 92 92 93 #include <trace/events/tcp.h> 93 94 ··· 105 104 106 105 static DEFINE_MUTEX(tcp_exit_batch_mutex); 107 106 108 - static u32 tcp_v4_init_seq(const struct sk_buff *skb) 107 + static union tcp_seq_and_ts_off 108 + tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) 109 109 { 110 - return secure_tcp_seq(ip_hdr(skb)->daddr, 111 - ip_hdr(skb)->saddr, 112 - tcp_hdr(skb)->dest, 113 - tcp_hdr(skb)->source); 114 - } 115 - 116 - static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) 117 - { 118 - return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); 110 + return secure_tcp_seq_and_ts_off(net, 111 + ip_hdr(skb)->daddr, 112 + ip_hdr(skb)->saddr, 113 + tcp_hdr(skb)->dest, 114 + tcp_hdr(skb)->source); 119 115 } 120 116 121 117 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) ··· 324 326 rt = NULL; 325 327 326 328 if (likely(!tp->repair)) { 329 + union tcp_seq_and_ts_off st; 330 + 331 + st = secure_tcp_seq_and_ts_off(net, 332 + inet->inet_saddr, 333 + inet->inet_daddr, 334 + inet->inet_sport, 335 + usin->sin_port); 327 336 if (!tp->write_seq) 328 - WRITE_ONCE(tp->write_seq, 329 - secure_tcp_seq(inet->inet_saddr, 330 - inet->inet_daddr, 331 - inet->inet_sport, 332 - usin->sin_port)); 333 - WRITE_ONCE(tp->tsoffset, 334 - secure_tcp_ts_off(net, inet->inet_saddr, 335 - inet->inet_daddr)); 337 + WRITE_ONCE(tp->write_seq, st.seq); 338 + WRITE_ONCE(tp->tsoffset, st.ts_off); 336 339 } 337 340 338 341 atomic_set(&inet->inet_id, get_random_u16()); ··· 838 839 goto out; 839 840 840 841 tcp_v4_md5_hash_skb(newhash, key, NULL, skb); 841 - if (memcmp(md5_hash_location, newhash, 16) != 0) 842 + if (crypto_memneq(md5_hash_location, newhash, 16)) 842 843 goto out; 843 844 } 844 845 ··· 1675 1676 .cookie_init_seq = cookie_v4_init_sequence, 1676 1677 #endif 1677 1678 .route_req = tcp_v4_route_req, 1678 - .init_seq = tcp_v4_init_seq, 1679 - .init_ts_off = tcp_v4_init_ts_off, 1679 + .init_seq_and_ts_off = tcp_v4_init_seq_and_ts_off, 1680 1680 .send_synack = tcp_v4_send_synack, 1681 1681 }; 1682 1682

+1 -1

net/ipv4/tcp_minisocks.c

··· 1004 1004 reason = tcp_rcv_state_process(child, skb); 1005 1005 /* Wakeup parent, send SIGIO */ 1006 1006 if (state == TCP_SYN_RECV && child->sk_state != state) 1007 - parent->sk_data_ready(parent); 1007 + READ_ONCE(parent->sk_data_ready)(parent); 1008 1008 } else { 1009 1009 /* Alas, it is possible again, because we do lookup 1010 1010 * in main socket hash table and lock on listening

+15 -10

net/ipv4/udp.c

··· 1787 1787 * using prepare_to_wait_exclusive(). 1788 1788 */ 1789 1789 while (nb) { 1790 - INDIRECT_CALL_1(sk->sk_data_ready, 1790 + INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready), 1791 1791 sock_def_readable, sk); 1792 1792 nb--; 1793 1793 } ··· 2287 2287 udp_sk(sk)->udp_port_hash); 2288 2288 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 2289 2289 nhslot2 = udp_hashslot2(udptable, newhash); 2290 - udp_sk(sk)->udp_portaddr_hash = newhash; 2291 2290 2292 2291 if (hslot2 != nhslot2 || 2293 2292 rcu_access_pointer(sk->sk_reuseport_cb)) { ··· 2320 2321 if (udp_hashed4(sk)) { 2321 2322 spin_lock_bh(&hslot->lock); 2322 2323 2323 - udp_rehash4(udptable, sk, newhash4); 2324 - if (hslot2 != nhslot2) { 2325 - spin_lock(&hslot2->lock); 2326 - udp_hash4_dec(hslot2); 2327 - spin_unlock(&hslot2->lock); 2324 + if (inet_rcv_saddr_any(sk)) { 2325 + udp_unhash4(udptable, sk); 2326 + } else { 2327 + udp_rehash4(udptable, sk, newhash4); 2328 + if (hslot2 != nhslot2) { 2329 + spin_lock(&hslot2->lock); 2330 + udp_hash4_dec(hslot2); 2331 + spin_unlock(&hslot2->lock); 2328 2332 2329 - spin_lock(&nhslot2->lock); 2330 - udp_hash4_inc(nhslot2); 2331 - spin_unlock(&nhslot2->lock); 2333 + spin_lock(&nhslot2->lock); 2334 + udp_hash4_inc(nhslot2); 2335 + spin_unlock(&nhslot2->lock); 2336 + } 2332 2337 } 2333 2338 2334 2339 spin_unlock_bh(&hslot->lock); 2335 2340 } 2341 + 2342 + udp_sk(sk)->udp_portaddr_hash = newhash; 2336 2343 } 2337 2344 } 2338 2345 EXPORT_IPV6_MOD(udp_lib_rehash);

+1 -1

net/ipv4/udp_bpf.c

··· 158 158 int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; 159 159 160 160 if (restore) { 161 - sk->sk_write_space = psock->saved_write_space; 161 + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); 162 162 sock_replace_proto(sk, psock->sk_proto); 163 163 return 0; 164 164 }

+2 -1

net/ipv6/inet6_hashtables.c

··· 95 95 { 96 96 int score = -1; 97 97 98 - if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && 98 + if (net_eq(sock_net(sk), net) && 99 + READ_ONCE(inet_sk(sk)->inet_num) == hnum && 99 100 sk->sk_family == PF_INET6) { 100 101 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) 101 102 return -1;

+5 -6

net/ipv6/route.c

··· 1063 1063 */ 1064 1064 if (netif_is_l3_slave(dev) && 1065 1065 !rt6_need_strict(&res->f6i->fib6_dst.addr)) 1066 - dev = l3mdev_master_dev_rcu(dev); 1066 + dev = l3mdev_master_dev_rcu(dev) ? : 1067 + dev_net(dev)->loopback_dev; 1067 1068 else if (!netif_is_l3_master(dev)) 1068 1069 dev = dev_net(dev)->loopback_dev; 1069 1070 /* last case is netif_is_l3_master(dev) is true in which ··· 3583 3582 netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker; 3584 3583 struct net_device *dev = NULL; 3585 3584 struct inet6_dev *idev = NULL; 3586 - int addr_type; 3587 3585 int err; 3588 3586 3589 3587 fib6_nh->fib_nh_family = AF_INET6; ··· 3624 3624 3625 3625 fib6_nh->fib_nh_weight = 1; 3626 3626 3627 - /* We cannot add true routes via loopback here, 3628 - * they would result in kernel looping; promote them to reject routes 3627 + /* Reset the nexthop device to the loopback device in case of reject 3628 + * routes. 3629 3629 */ 3630 - addr_type = ipv6_addr_type(&cfg->fc_dst); 3631 - if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { 3630 + if (cfg->fc_flags & RTF_REJECT) { 3632 3631 /* hold loopback dev/idev if we haven't done so. */ 3633 3632 if (dev != net->loopback_dev) { 3634 3633 if (dev) {

+8 -3

net/ipv6/syncookies.c

··· 151 151 tcp_parse_options(net, skb, &tcp_opt, 0, NULL); 152 152 153 153 if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { 154 - tsoff = secure_tcpv6_ts_off(net, 155 - ipv6_hdr(skb)->daddr.s6_addr32, 156 - ipv6_hdr(skb)->saddr.s6_addr32); 154 + union tcp_seq_and_ts_off st; 155 + 156 + st = secure_tcpv6_seq_and_ts_off(net, 157 + ipv6_hdr(skb)->daddr.s6_addr32, 158 + ipv6_hdr(skb)->saddr.s6_addr32, 159 + tcp_hdr(skb)->dest, 160 + tcp_hdr(skb)->source); 161 + tsoff = st.ts_off; 157 162 tcp_opt.rcv_tsecr -= tsoff; 158 163 } 159 164

+19 -21

net/ipv6/tcp_ipv6.c

··· 68 68 #include <linux/seq_file.h> 69 69 70 70 #include <crypto/md5.h> 71 + #include <crypto/utils.h> 71 72 72 73 #include <trace/events/tcp.h> 73 74 ··· 105 104 } 106 105 } 107 106 108 - static u32 tcp_v6_init_seq(const struct sk_buff *skb) 107 + static union tcp_seq_and_ts_off 108 + tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) 109 109 { 110 - return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 111 - ipv6_hdr(skb)->saddr.s6_addr32, 112 - tcp_hdr(skb)->dest, 113 - tcp_hdr(skb)->source); 114 - } 115 - 116 - static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 117 - { 118 - return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 119 - ipv6_hdr(skb)->saddr.s6_addr32); 110 + return secure_tcpv6_seq_and_ts_off(net, 111 + ipv6_hdr(skb)->daddr.s6_addr32, 112 + ipv6_hdr(skb)->saddr.s6_addr32, 113 + tcp_hdr(skb)->dest, 114 + tcp_hdr(skb)->source); 120 115 } 121 116 122 117 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, ··· 316 319 sk_set_txhash(sk); 317 320 318 321 if (likely(!tp->repair)) { 322 + union tcp_seq_and_ts_off st; 323 + 324 + st = secure_tcpv6_seq_and_ts_off(net, 325 + np->saddr.s6_addr32, 326 + sk->sk_v6_daddr.s6_addr32, 327 + inet->inet_sport, 328 + inet->inet_dport); 319 329 if (!tp->write_seq) 320 - WRITE_ONCE(tp->write_seq, 321 - secure_tcpv6_seq(np->saddr.s6_addr32, 322 - sk->sk_v6_daddr.s6_addr32, 323 - inet->inet_sport, 324 - inet->inet_dport)); 325 - tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 326 - sk->sk_v6_daddr.s6_addr32); 330 + WRITE_ONCE(tp->write_seq, st.seq); 331 + tp->tsoffset = st.ts_off; 327 332 } 328 333 329 334 if (tcp_fastopen_defer_connect(sk, &err)) ··· 815 816 .cookie_init_seq = cookie_v6_init_sequence, 816 817 #endif 817 818 .route_req = tcp_v6_route_req, 818 - .init_seq = tcp_v6_init_seq, 819 - .init_ts_off = tcp_v6_init_ts_off, 819 + .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off, 820 820 .send_synack = tcp_v6_send_synack, 821 821 }; 822 822 ··· 1046 1048 key.type = TCP_KEY_MD5; 1047 1049 1048 1050 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1049 - if (memcmp(md5_hash_location, newhash, 16) != 0) 1051 + if (crypto_memneq(md5_hash_location, newhash, 16)) 1050 1052 goto out; 1051 1053 } 1052 1054 #endif

+1

net/mac80211/eht.c

··· 154 154 u8 *ptr = mgmt->u.action.u.eml_omn.variable; 155 155 struct ieee80211_eml_params eml_params = { 156 156 .link_id = status->link_id, 157 + .control = control, 157 158 }; 158 159 struct sta_info *sta; 159 160 int opt_len = 0;

+43 -12

net/mptcp/pm.c

··· 212 212 spin_lock_bh(&msk->pm.lock); 213 213 } 214 214 215 - void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) 215 + static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow, 216 + const struct mptcp_rm_list *rm_list) 216 217 { 217 - struct mptcp_subflow_context *subflow, *alt = NULL; 218 + u8 i, id = subflow_get_local_id(subflow); 219 + 220 + for (i = 0; i < rm_list->nr; i++) { 221 + if (rm_list->ids[i] == id) 222 + return true; 223 + } 224 + 225 + return false; 226 + } 227 + 228 + static void 229 + mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk, 230 + const struct mptcp_rm_list *rm_list) 231 + { 232 + struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL; 218 233 219 234 msk_owned_by_me(msk); 220 235 lockdep_assert_held(&msk->pm.lock); ··· 239 224 return; 240 225 241 226 mptcp_for_each_subflow(msk, subflow) { 242 - if (__mptcp_subflow_active(subflow)) { 243 - if (!subflow->stale) { 244 - mptcp_pm_send_ack(msk, subflow, false, false); 245 - return; 246 - } 227 + if (!__mptcp_subflow_active(subflow)) 228 + continue; 247 229 248 - if (!alt) 249 - alt = subflow; 230 + if (unlikely(subflow->stale)) { 231 + if (!stale) 232 + stale = subflow; 233 + } else if (unlikely(rm_list && 234 + subflow_in_rm_list(subflow, rm_list))) { 235 + if (!same_id) 236 + same_id = subflow; 237 + } else { 238 + goto send_ack; 250 239 } 251 240 } 252 241 253 - if (alt) 254 - mptcp_pm_send_ack(msk, alt, false, false); 242 + if (same_id) 243 + subflow = same_id; 244 + else if (stale) 245 + subflow = stale; 246 + else 247 + return; 248 + 249 + send_ack: 250 + mptcp_pm_send_ack(msk, subflow, false, false); 251 + } 252 + 253 + void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) 254 + { 255 + mptcp_pm_addr_send_ack_avoid_list(msk, NULL); 255 256 } 256 257 257 258 int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, ··· 501 470 msk->pm.rm_list_tx = *rm_list; 502 471 rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); 503 472 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 504 - mptcp_pm_addr_send_ack(msk); 473 + mptcp_pm_addr_send_ack_avoid_list(msk, rm_list); 505 474 return 0; 506 475 } 507 476

+9

net/mptcp/pm_kernel.c

··· 418 418 } 419 419 420 420 exit: 421 + /* If an endpoint has both the signal and subflow flags, but it is not 422 + * possible to create subflows -- the 'while' loop body above never 423 + * executed -- then still mark the endp as used, which is somehow the 424 + * case. This avoids issues later when removing the endpoint and calling 425 + * __mark_subflow_endp_available(), which expects the increment here. 426 + */ 427 + if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id) 428 + msk->pm.local_addr_used++; 429 + 421 430 mptcp_pm_nl_check_work_pending(msk); 422 431 } 423 432

+25 -20

net/netfilter/nf_tables_api.c

··· 833 833 } 834 834 } 835 835 836 + /* Use NFT_ITER_UPDATE iterator even if this may be called from the preparation 837 + * phase, the set clone might already exist from a previous command, or it might 838 + * be a set that is going away and does not require a clone. The netns and 839 + * netlink release paths also need to work on the live set. 840 + */ 836 841 static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) 837 842 { 838 843 struct nft_set_iter iter = { ··· 7175 7170 struct nft_data_desc desc; 7176 7171 enum nft_registers dreg; 7177 7172 struct nft_trans *trans; 7173 + bool set_full = false; 7178 7174 u64 expiration; 7179 7175 u64 timeout; 7180 7176 int err, i; ··· 7467 7461 if (err < 0) 7468 7462 goto err_elem_free; 7469 7463 7464 + if (!(flags & NFT_SET_ELEM_CATCHALL)) { 7465 + unsigned int max = nft_set_maxsize(set), nelems; 7466 + 7467 + nelems = atomic_inc_return(&set->nelems); 7468 + if (nelems > max) 7469 + set_full = true; 7470 + } 7471 + 7470 7472 trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); 7471 7473 if (trans == NULL) { 7472 7474 err = -ENOMEM; 7473 - goto err_elem_free; 7475 + goto err_set_size; 7474 7476 } 7475 7477 7476 7478 ext->genmask = nft_genmask_cur(ctx->net); ··· 7530 7516 7531 7517 ue->priv = elem_priv; 7532 7518 nft_trans_commit_list_add_elem(ctx->net, trans); 7533 - goto err_elem_free; 7519 + goto err_set_size; 7534 7520 } 7535 7521 } 7536 7522 } ··· 7548 7534 goto err_element_clash; 7549 7535 } 7550 7536 7551 - if (!(flags & NFT_SET_ELEM_CATCHALL)) { 7552 - unsigned int max = nft_set_maxsize(set); 7553 - 7554 - if (!atomic_add_unless(&set->nelems, 1, max)) { 7555 - err = -ENFILE; 7556 - goto err_set_full; 7557 - } 7558 - } 7559 - 7560 7537 nft_trans_container_elem(trans)->elems[0].priv = elem.priv; 7561 7538 nft_trans_commit_list_add_elem(ctx->net, trans); 7562 - return 0; 7563 7539 7564 - err_set_full: 7565 - nft_setelem_remove(ctx->net, set, elem.priv); 7540 + return set_full ? -ENFILE : 0; 7541 + 7566 7542 err_element_clash: 7567 7543 kfree(trans); 7544 + err_set_size: 7545 + if (!(flags & NFT_SET_ELEM_CATCHALL)) 7546 + atomic_dec(&set->nelems); 7568 7547 err_elem_free: 7569 7548 nf_tables_set_elem_destroy(ctx, set, elem.priv); 7570 7549 err_parse_data: ··· 7908 7901 7909 7902 static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask) 7910 7903 { 7904 + /* The set backend might need to clone the set, do it now from the 7905 + * preparation phase, use NFT_ITER_UPDATE_CLONE iterator type. 7906 + */ 7911 7907 struct nft_set_iter iter = { 7912 7908 .genmask = genmask, 7913 - .type = NFT_ITER_UPDATE, 7909 + .type = NFT_ITER_UPDATE_CLONE, 7914 7910 .fn = nft_setelem_flush, 7915 7911 }; 7916 7912 ··· 10491 10481 spin_unlock(&nf_tables_gc_list_lock); 10492 10482 10493 10483 schedule_work(&trans_gc_work); 10494 - } 10495 - 10496 - static int nft_trans_gc_space(struct nft_trans_gc *trans) 10497 - { 10498 - return NFT_TRANS_GC_BATCHCOUNT - trans->count; 10499 10484 } 10500 10485 10501 10486 struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,

+1

net/netfilter/nft_set_hash.c

··· 374 374 { 375 375 switch (iter->type) { 376 376 case NFT_ITER_UPDATE: 377 + case NFT_ITER_UPDATE_CLONE: 377 378 /* only relevant for netlink dumps which use READ type */ 378 379 WARN_ON_ONCE(iter->skip != 0); 379 380

+52 -10

net/netfilter/nft_set_pipapo.c

··· 1680 1680 } 1681 1681 1682 1682 /** 1683 - * pipapo_gc() - Drop expired entries from set, destroy start and end elements 1683 + * pipapo_gc_scan() - Drop expired entries from set and link them to gc list 1684 1684 * @set: nftables API set representation 1685 1685 * @m: Matching data 1686 1686 */ 1687 - static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) 1687 + static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m) 1688 1688 { 1689 1689 struct nft_pipapo *priv = nft_set_priv(set); 1690 1690 struct net *net = read_pnet(&set->net); ··· 1696 1696 gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); 1697 1697 if (!gc) 1698 1698 return; 1699 + 1700 + list_add(&gc->list, &priv->gc_head); 1699 1701 1700 1702 while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { 1701 1703 union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; ··· 1726 1724 * NFT_SET_ELEM_DEAD_BIT. 1727 1725 */ 1728 1726 if (__nft_set_elem_expired(&e->ext, tstamp)) { 1729 - gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); 1730 - if (!gc) 1731 - return; 1727 + if (!nft_trans_gc_space(gc)) { 1728 + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); 1729 + if (!gc) 1730 + return; 1731 + 1732 + list_add(&gc->list, &priv->gc_head); 1733 + } 1732 1734 1733 1735 nft_pipapo_gc_deactivate(net, set, e); 1734 1736 pipapo_drop(m, rulemap); ··· 1746 1740 } 1747 1741 } 1748 1742 1749 - gc = nft_trans_gc_catchall_sync(gc); 1743 + priv->last_gc = jiffies; 1744 + } 1745 + 1746 + /** 1747 + * pipapo_gc_queue() - Free expired elements 1748 + * @set: nftables API set representation 1749 + */ 1750 + static void pipapo_gc_queue(struct nft_set *set) 1751 + { 1752 + struct nft_pipapo *priv = nft_set_priv(set); 1753 + struct nft_trans_gc *gc, *next; 1754 + 1755 + /* always do a catchall cycle: */ 1756 + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); 1750 1757 if (gc) { 1758 + gc = nft_trans_gc_catchall_sync(gc); 1759 + if (gc) 1760 + nft_trans_gc_queue_sync_done(gc); 1761 + } 1762 + 1763 + /* always purge queued gc elements. */ 1764 + list_for_each_entry_safe(gc, next, &priv->gc_head, list) { 1765 + list_del(&gc->list); 1751 1766 nft_trans_gc_queue_sync_done(gc); 1752 - priv->last_gc = jiffies; 1753 1767 } 1754 1768 } 1755 1769 ··· 1823 1797 * 1824 1798 * We also need to create a new working copy for subsequent insertions and 1825 1799 * deletions. 1800 + * 1801 + * After the live copy has been replaced by the clone, we can safely queue 1802 + * expired elements that have been collected by pipapo_gc_scan() for 1803 + * memory reclaim. 1826 1804 */ 1827 1805 static void nft_pipapo_commit(struct nft_set *set) 1828 1806 { ··· 1837 1807 return; 1838 1808 1839 1809 if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) 1840 - pipapo_gc(set, priv->clone); 1810 + pipapo_gc_scan(set, priv->clone); 1841 1811 1842 1812 old = rcu_replace_pointer(priv->match, priv->clone, 1843 1813 nft_pipapo_transaction_mutex_held(set)); ··· 1845 1815 1846 1816 if (old) 1847 1817 call_rcu(&old->rcu, pipapo_reclaim_match); 1818 + 1819 + pipapo_gc_queue(set); 1848 1820 } 1849 1821 1850 1822 static void nft_pipapo_abort(const struct nft_set *set) ··· 2176 2144 const struct nft_pipapo_match *m; 2177 2145 2178 2146 switch (iter->type) { 2179 - case NFT_ITER_UPDATE: 2147 + case NFT_ITER_UPDATE_CLONE: 2180 2148 m = pipapo_maybe_clone(set); 2181 2149 if (!m) { 2182 2150 iter->err = -ENOMEM; 2183 2151 return; 2184 2152 } 2185 - 2153 + nft_pipapo_do_walk(ctx, set, m, iter); 2154 + break; 2155 + case NFT_ITER_UPDATE: 2156 + if (priv->clone) 2157 + m = priv->clone; 2158 + else 2159 + m = rcu_dereference_protected(priv->match, 2160 + nft_pipapo_transaction_mutex_held(set)); 2186 2161 nft_pipapo_do_walk(ctx, set, m, iter); 2187 2162 break; 2188 2163 case NFT_ITER_READ: ··· 2311 2272 f->mt = NULL; 2312 2273 } 2313 2274 2275 + INIT_LIST_HEAD(&priv->gc_head); 2314 2276 rcu_assign_pointer(priv->match, m); 2315 2277 2316 2278 return 0; ··· 2360 2320 { 2361 2321 struct nft_pipapo *priv = nft_set_priv(set); 2362 2322 struct nft_pipapo_match *m; 2323 + 2324 + WARN_ON_ONCE(!list_empty(&priv->gc_head)); 2363 2325 2364 2326 m = rcu_dereference_protected(priv->match, true); 2365 2327

+2

net/netfilter/nft_set_pipapo.h

··· 156 156 * @clone: Copy where pending insertions and deletions are kept 157 157 * @width: Total bytes to be matched for one packet, including padding 158 158 * @last_gc: Timestamp of last garbage collection run, jiffies 159 + * @gc_head: list of nft_trans_gc to queue up for mem reclaim 159 160 */ 160 161 struct nft_pipapo { 161 162 struct nft_pipapo_match __rcu *match; 162 163 struct nft_pipapo_match *clone; 163 164 int width; 164 165 unsigned long last_gc; 166 + struct list_head gc_head; 165 167 }; 166 168 167 169 struct nft_pipapo_elem;

+5 -3

net/netfilter/nft_set_rbtree.c

··· 861 861 struct nft_rbtree *priv = nft_set_priv(set); 862 862 863 863 switch (iter->type) { 864 - case NFT_ITER_UPDATE: 865 - lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); 866 - 864 + case NFT_ITER_UPDATE_CLONE: 867 865 if (nft_array_may_resize(set) < 0) { 868 866 iter->err = -ENOMEM; 869 867 break; 870 868 } 869 + fallthrough; 870 + case NFT_ITER_UPDATE: 871 + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); 872 + 871 873 nft_rbtree_do_walk(ctx, set, iter); 872 874 break; 873 875 case NFT_ITER_READ:

+6 -2

net/nfc/digital_core.c

··· 707 707 int rc; 708 708 709 709 data_exch = kzalloc_obj(*data_exch); 710 - if (!data_exch) 710 + if (!data_exch) { 711 + kfree_skb(skb); 711 712 return -ENOMEM; 713 + } 712 714 713 715 data_exch->cb = cb; 714 716 data_exch->cb_context = cb_context; ··· 733 731 data_exch); 734 732 735 733 exit: 736 - if (rc) 734 + if (rc) { 735 + kfree_skb(skb); 737 736 kfree(data_exch); 737 + } 738 738 739 739 return rc; 740 740 }

+27 -3

net/nfc/nci/core.c

··· 567 567 flush_workqueue(ndev->cmd_wq); 568 568 timer_delete_sync(&ndev->cmd_timer); 569 569 timer_delete_sync(&ndev->data_timer); 570 + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) 571 + nci_data_exchange_complete(ndev, NULL, 572 + ndev->cur_conn_id, 573 + -ENODEV); 570 574 mutex_unlock(&ndev->req_lock); 571 575 return 0; 572 576 } ··· 602 598 flush_workqueue(ndev->cmd_wq); 603 599 604 600 timer_delete_sync(&ndev->cmd_timer); 601 + timer_delete_sync(&ndev->data_timer); 602 + 603 + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) 604 + nci_data_exchange_complete(ndev, NULL, ndev->cur_conn_id, 605 + -ENODEV); 605 606 606 607 /* Clear flags except NCI_UNREG */ 607 608 ndev->flags &= BIT(NCI_UNREG); ··· 1044 1035 struct nci_conn_info *conn_info; 1045 1036 1046 1037 conn_info = ndev->rf_conn_info; 1047 - if (!conn_info) 1038 + if (!conn_info) { 1039 + kfree_skb(skb); 1048 1040 return -EPROTO; 1041 + } 1049 1042 1050 1043 pr_debug("target_idx %d, len %d\n", target->idx, skb->len); 1051 1044 1052 1045 if (!ndev->target_active_prot) { 1053 1046 pr_err("unable to exchange data, no active target\n"); 1047 + kfree_skb(skb); 1054 1048 return -EINVAL; 1055 1049 } 1056 1050 1057 - if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) 1051 + if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) { 1052 + kfree_skb(skb); 1058 1053 return -EBUSY; 1054 + } 1059 1055 1060 1056 /* store cb and context to be used on receiving data */ 1061 1057 conn_info->data_exchange_cb = cb; ··· 1496 1482 unsigned int hdr_size = NCI_CTRL_HDR_SIZE; 1497 1483 1498 1484 if (skb->len < hdr_size || 1499 - !nci_plen(skb->data) || 1500 1485 skb->len < hdr_size + nci_plen(skb->data)) { 1501 1486 return false; 1502 1487 } 1488 + 1489 + if (!nci_plen(skb->data)) { 1490 + /* Allow zero length in proprietary notifications (0x20 - 0x3F). */ 1491 + if (nci_opcode_oid(nci_opcode(skb->data)) >= 0x20 && 1492 + nci_mt(skb->data) == NCI_MT_NTF_PKT) 1493 + return true; 1494 + 1495 + /* Disallow zero length otherwise. */ 1496 + return false; 1497 + } 1498 + 1503 1499 return true; 1504 1500 } 1505 1501

+8 -4

net/nfc/nci/data.c

··· 33 33 conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); 34 34 if (!conn_info) { 35 35 kfree_skb(skb); 36 - goto exit; 36 + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); 37 + return; 37 38 } 38 39 39 40 cb = conn_info->data_exchange_cb; ··· 46 45 timer_delete_sync(&ndev->data_timer); 47 46 clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); 48 47 48 + /* Mark the exchange as done before calling the callback. 49 + * The callback (e.g. rawsock_data_exchange_complete) may 50 + * want to immediately queue another data exchange. 51 + */ 52 + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); 53 + 49 54 if (cb) { 50 55 /* forward skb to nfc core */ 51 56 cb(cb_context, skb, err); ··· 61 54 /* no waiting callback, free skb */ 62 55 kfree_skb(skb); 63 56 } 64 - 65 - exit: 66 - clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); 67 57 } 68 58 69 59 /* ----------------- NCI TX Data ----------------- */

+11

net/nfc/rawsock.c

··· 67 67 if (sock->type == SOCK_RAW) 68 68 nfc_sock_unlink(&raw_sk_list, sk); 69 69 70 + if (sk->sk_state == TCP_ESTABLISHED) { 71 + /* Prevent rawsock_tx_work from starting new transmits and 72 + * wait for any in-progress work to finish. This must happen 73 + * before the socket is orphaned to avoid a race where 74 + * rawsock_tx_work runs after the NCI device has been freed. 75 + */ 76 + sk->sk_shutdown |= SEND_SHUTDOWN; 77 + cancel_work_sync(&nfc_rawsock(sk)->tx_work); 78 + rawsock_write_queue_purge(sk); 79 + } 80 + 70 81 sock_orphan(sk); 71 82 sock_put(sk); 72 83

+10 -4

net/rds/tcp.c

··· 490 490 struct rds_tcp_net *rtn; 491 491 492 492 tcp_sock_set_nodelay(sock->sk); 493 - lock_sock(sk); 494 493 /* TCP timer functions might access net namespace even after 495 494 * a process which created this net namespace terminated. 496 495 */ 497 496 if (!sk->sk_net_refcnt) { 498 - if (!maybe_get_net(net)) { 499 - release_sock(sk); 497 + if (!maybe_get_net(net)) 500 498 return false; 501 - } 499 + /* 500 + * sk_net_refcnt_upgrade() must be called before lock_sock() 501 + * because it does a GFP_KERNEL allocation, which can trigger 502 + * fs_reclaim and create a circular lock dependency with the 503 + * socket lock. The fields it modifies (sk_net_refcnt, 504 + * ns_tracker) are not accessed by any concurrent code path 505 + * at this point. 506 + */ 502 507 sk_net_refcnt_upgrade(sk); 503 508 put_net(net); 504 509 } 510 + lock_sock(sk); 505 511 rtn = net_generic(net, rds_tcp_netid); 506 512 if (rtn->sndbuf_size > 0) { 507 513 sk->sk_sndbuf = rtn->sndbuf_size;

+6

net/sched/act_ct.c

··· 1360 1360 return -EINVAL; 1361 1361 } 1362 1362 1363 + if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) { 1364 + NL_SET_ERR_MSG_MOD(extack, 1365 + "Attaching ct to a non ingress/clsact qdisc is unsupported"); 1366 + return -EOPNOTSUPP; 1367 + } 1368 + 1363 1369 err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); 1364 1370 if (err < 0) 1365 1371 return err;

+187 -80

net/sched/act_gate.c

··· 32 32 return KTIME_MAX; 33 33 } 34 34 35 - static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) 35 + static void tcf_gate_params_free_rcu(struct rcu_head *head); 36 + 37 + static void gate_get_start_time(struct tcf_gate *gact, 38 + const struct tcf_gate_params *param, 39 + ktime_t *start) 36 40 { 37 - struct tcf_gate_params *param = &gact->param; 38 41 ktime_t now, base, cycle; 39 42 u64 n; 40 43 ··· 72 69 { 73 70 struct tcf_gate *gact = container_of(timer, struct tcf_gate, 74 71 hitimer); 75 - struct tcf_gate_params *p = &gact->param; 76 72 struct tcfg_gate_entry *next; 73 + struct tcf_gate_params *p; 77 74 ktime_t close_time, now; 78 75 79 76 spin_lock(&gact->tcf_lock); 80 77 78 + p = rcu_dereference_protected(gact->param, 79 + lockdep_is_held(&gact->tcf_lock)); 81 80 next = gact->next_entry; 82 81 83 82 /* cycle start, clear pending bit, clear total octets */ ··· 230 225 } 231 226 } 232 227 228 + static int tcf_gate_copy_entries(struct tcf_gate_params *dst, 229 + const struct tcf_gate_params *src, 230 + struct netlink_ext_ack *extack) 231 + { 232 + struct tcfg_gate_entry *entry; 233 + int i = 0; 234 + 235 + list_for_each_entry(entry, &src->entries, list) { 236 + struct tcfg_gate_entry *new; 237 + 238 + new = kzalloc(sizeof(*new), GFP_ATOMIC); 239 + if (!new) { 240 + NL_SET_ERR_MSG(extack, "Not enough memory for entry"); 241 + return -ENOMEM; 242 + } 243 + 244 + new->index = entry->index; 245 + new->gate_state = entry->gate_state; 246 + new->interval = entry->interval; 247 + new->ipv = entry->ipv; 248 + new->maxoctets = entry->maxoctets; 249 + list_add_tail(&new->list, &dst->entries); 250 + i++; 251 + } 252 + 253 + dst->num_entries = i; 254 + return 0; 255 + } 256 + 233 257 static int parse_gate_list(struct nlattr *list_attr, 234 258 struct tcf_gate_params *sched, 235 259 struct netlink_ext_ack *extack) ··· 304 270 return err; 305 271 } 306 272 307 - static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, 308 - enum tk_offsets tko, s32 clockid, 309 - bool do_init) 273 + static bool gate_timer_needs_cancel(u64 basetime, u64 old_basetime, 274 + enum tk_offsets tko, 275 + enum tk_offsets old_tko, 276 + s32 clockid, s32 old_clockid) 310 277 { 311 - if (!do_init) { 312 - if (basetime == gact->param.tcfg_basetime && 313 - tko == gact->tk_offset && 314 - clockid == gact->param.tcfg_clockid) 315 - return; 278 + return basetime != old_basetime || 279 + clockid != old_clockid || 280 + tko != old_tko; 281 + } 316 282 317 - spin_unlock_bh(&gact->tcf_lock); 318 - hrtimer_cancel(&gact->hitimer); 319 - spin_lock_bh(&gact->tcf_lock); 283 + static int gate_clock_resolve(s32 clockid, enum tk_offsets *tko, 284 + struct netlink_ext_ack *extack) 285 + { 286 + switch (clockid) { 287 + case CLOCK_REALTIME: 288 + *tko = TK_OFFS_REAL; 289 + return 0; 290 + case CLOCK_MONOTONIC: 291 + *tko = TK_OFFS_MAX; 292 + return 0; 293 + case CLOCK_BOOTTIME: 294 + *tko = TK_OFFS_BOOT; 295 + return 0; 296 + case CLOCK_TAI: 297 + *tko = TK_OFFS_TAI; 298 + return 0; 299 + default: 300 + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); 301 + return -EINVAL; 320 302 } 321 - gact->param.tcfg_basetime = basetime; 322 - gact->param.tcfg_clockid = clockid; 323 - gact->tk_offset = tko; 324 - hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); 303 + } 304 + 305 + static void gate_setup_timer(struct tcf_gate *gact, s32 clockid, 306 + enum tk_offsets tko) 307 + { 308 + WRITE_ONCE(gact->tk_offset, tko); 309 + hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, 310 + HRTIMER_MODE_ABS_SOFT); 325 311 } 326 312 327 313 static int tcf_gate_init(struct net *net, struct nlattr *nla, ··· 350 296 struct netlink_ext_ack *extack) 351 297 { 352 298 struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); 353 - enum tk_offsets tk_offset = TK_OFFS_TAI; 299 + u64 cycletime = 0, basetime = 0, cycletime_ext = 0; 300 + struct tcf_gate_params *p = NULL, *old_p = NULL; 301 + enum tk_offsets old_tk_offset = TK_OFFS_TAI; 302 + const struct tcf_gate_params *cur_p = NULL; 354 303 bool bind = flags & TCA_ACT_FLAGS_BIND; 355 304 struct nlattr *tb[TCA_GATE_MAX + 1]; 305 + enum tk_offsets tko = TK_OFFS_TAI; 356 306 struct tcf_chain *goto_ch = NULL; 357 - u64 cycletime = 0, basetime = 0; 358 - struct tcf_gate_params *p; 307 + s32 timer_clockid = CLOCK_TAI; 308 + bool use_old_entries = false; 309 + s32 old_clockid = CLOCK_TAI; 310 + bool need_cancel = false; 359 311 s32 clockid = CLOCK_TAI; 360 312 struct tcf_gate *gact; 361 313 struct tc_gate *parm; 314 + u64 old_basetime = 0; 362 315 int ret = 0, err; 363 316 u32 gflags = 0; 364 317 s32 prio = -1; ··· 382 321 if (!tb[TCA_GATE_PARMS]) 383 322 return -EINVAL; 384 323 385 - if (tb[TCA_GATE_CLOCKID]) { 324 + if (tb[TCA_GATE_CLOCKID]) 386 325 clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); 387 - switch (clockid) { 388 - case CLOCK_REALTIME: 389 - tk_offset = TK_OFFS_REAL; 390 - break; 391 - case CLOCK_MONOTONIC: 392 - tk_offset = TK_OFFS_MAX; 393 - break; 394 - case CLOCK_BOOTTIME: 395 - tk_offset = TK_OFFS_BOOT; 396 - break; 397 - case CLOCK_TAI: 398 - tk_offset = TK_OFFS_TAI; 399 - break; 400 - default: 401 - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); 402 - return -EINVAL; 403 - } 404 - } 405 326 406 327 parm = nla_data(tb[TCA_GATE_PARMS]); 407 328 index = parm->index; ··· 409 366 return -EEXIST; 410 367 } 411 368 369 + gact = to_gate(*a); 370 + 371 + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 372 + if (err < 0) 373 + goto release_idr; 374 + 375 + p = kzalloc(sizeof(*p), GFP_KERNEL); 376 + if (!p) { 377 + err = -ENOMEM; 378 + goto chain_put; 379 + } 380 + INIT_LIST_HEAD(&p->entries); 381 + 382 + use_old_entries = !tb[TCA_GATE_ENTRY_LIST]; 383 + if (!use_old_entries) { 384 + err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); 385 + if (err < 0) 386 + goto err_free; 387 + use_old_entries = !err; 388 + } 389 + 390 + if (ret == ACT_P_CREATED && use_old_entries) { 391 + NL_SET_ERR_MSG(extack, "The entry list is empty"); 392 + err = -EINVAL; 393 + goto err_free; 394 + } 395 + 396 + if (ret != ACT_P_CREATED) { 397 + rcu_read_lock(); 398 + cur_p = rcu_dereference(gact->param); 399 + 400 + old_basetime = cur_p->tcfg_basetime; 401 + old_clockid = cur_p->tcfg_clockid; 402 + old_tk_offset = READ_ONCE(gact->tk_offset); 403 + 404 + basetime = old_basetime; 405 + cycletime_ext = cur_p->tcfg_cycletime_ext; 406 + prio = cur_p->tcfg_priority; 407 + gflags = cur_p->tcfg_flags; 408 + 409 + if (!tb[TCA_GATE_CLOCKID]) 410 + clockid = old_clockid; 411 + 412 + err = 0; 413 + if (use_old_entries) { 414 + err = tcf_gate_copy_entries(p, cur_p, extack); 415 + if (!err && !tb[TCA_GATE_CYCLE_TIME]) 416 + cycletime = cur_p->tcfg_cycletime; 417 + } 418 + rcu_read_unlock(); 419 + if (err) 420 + goto err_free; 421 + } 422 + 412 423 if (tb[TCA_GATE_PRIORITY]) 413 424 prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); 414 425 ··· 472 375 if (tb[TCA_GATE_FLAGS]) 473 376 gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); 474 377 475 - gact = to_gate(*a); 476 - if (ret == ACT_P_CREATED) 477 - INIT_LIST_HEAD(&gact->param.entries); 478 - 479 - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 480 - if (err < 0) 481 - goto release_idr; 482 - 483 - spin_lock_bh(&gact->tcf_lock); 484 - p = &gact->param; 485 - 486 378 if (tb[TCA_GATE_CYCLE_TIME]) 487 379 cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); 488 380 489 - if (tb[TCA_GATE_ENTRY_LIST]) { 490 - err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); 491 - if (err < 0) 492 - goto chain_put; 493 - } 381 + if (tb[TCA_GATE_CYCLE_TIME_EXT]) 382 + cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); 383 + 384 + err = gate_clock_resolve(clockid, &tko, extack); 385 + if (err) 386 + goto err_free; 387 + timer_clockid = clockid; 388 + 389 + need_cancel = ret != ACT_P_CREATED && 390 + gate_timer_needs_cancel(basetime, old_basetime, 391 + tko, old_tk_offset, 392 + timer_clockid, old_clockid); 393 + 394 + if (need_cancel) 395 + hrtimer_cancel(&gact->hitimer); 396 + 397 + spin_lock_bh(&gact->tcf_lock); 494 398 495 399 if (!cycletime) { 496 400 struct tcfg_gate_entry *entry; ··· 500 402 list_for_each_entry(entry, &p->entries, list) 501 403 cycle = ktime_add_ns(cycle, entry->interval); 502 404 cycletime = cycle; 503 - if (!cycletime) { 504 - err = -EINVAL; 505 - goto chain_put; 506 - } 507 405 } 508 406 p->tcfg_cycletime = cycletime; 407 + p->tcfg_cycletime_ext = cycletime_ext; 509 408 510 - if (tb[TCA_GATE_CYCLE_TIME_EXT]) 511 - p->tcfg_cycletime_ext = 512 - nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); 513 - 514 - gate_setup_timer(gact, basetime, tk_offset, clockid, 515 - ret == ACT_P_CREATED); 409 + if (need_cancel || ret == ACT_P_CREATED) 410 + gate_setup_timer(gact, timer_clockid, tko); 516 411 p->tcfg_priority = prio; 517 412 p->tcfg_flags = gflags; 518 - gate_get_start_time(gact, &start); 413 + p->tcfg_basetime = basetime; 414 + p->tcfg_clockid = timer_clockid; 415 + gate_get_start_time(gact, p, &start); 416 + 417 + old_p = rcu_replace_pointer(gact->param, p, 418 + lockdep_is_held(&gact->tcf_lock)); 519 419 520 420 gact->current_close_time = start; 521 421 gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; ··· 530 434 if (goto_ch) 531 435 tcf_chain_put_by_act(goto_ch); 532 436 437 + if (old_p) 438 + call_rcu(&old_p->rcu, tcf_gate_params_free_rcu); 439 + 533 440 return ret; 534 441 442 + err_free: 443 + release_entry_list(&p->entries); 444 + kfree(p); 535 445 chain_put: 536 - spin_unlock_bh(&gact->tcf_lock); 537 - 538 446 if (goto_ch) 539 447 tcf_chain_put_by_act(goto_ch); 540 448 release_idr: ··· 546 446 * without taking tcf_lock. 547 447 */ 548 448 if (ret == ACT_P_CREATED) 549 - gate_setup_timer(gact, gact->param.tcfg_basetime, 550 - gact->tk_offset, gact->param.tcfg_clockid, 551 - true); 449 + gate_setup_timer(gact, timer_clockid, tko); 450 + 552 451 tcf_idr_release(*a, bind); 553 452 return err; 453 + } 454 + 455 + static void tcf_gate_params_free_rcu(struct rcu_head *head) 456 + { 457 + struct tcf_gate_params *p = container_of(head, struct tcf_gate_params, rcu); 458 + 459 + release_entry_list(&p->entries); 460 + kfree(p); 554 461 } 555 462 556 463 static void tcf_gate_cleanup(struct tc_action *a) ··· 565 458 struct tcf_gate *gact = to_gate(a); 566 459 struct tcf_gate_params *p; 567 460 568 - p = &gact->param; 569 461 hrtimer_cancel(&gact->hitimer); 570 - release_entry_list(&p->entries); 462 + p = rcu_dereference_protected(gact->param, 1); 463 + if (p) 464 + call_rcu(&p->rcu, tcf_gate_params_free_rcu); 571 465 } 572 466 573 467 static int dumping_entry(struct sk_buff *skb, ··· 617 509 struct nlattr *entry_list; 618 510 struct tcf_t t; 619 511 620 - spin_lock_bh(&gact->tcf_lock); 621 - opt.action = gact->tcf_action; 622 - 623 - p = &gact->param; 512 + rcu_read_lock(); 513 + opt.action = READ_ONCE(gact->tcf_action); 514 + p = rcu_dereference(gact->param); 624 515 625 516 if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt)) 626 517 goto nla_put_failure; ··· 659 552 tcf_tm_dump(&t, &gact->tcf_tm); 660 553 if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD)) 661 554 goto nla_put_failure; 662 - spin_unlock_bh(&gact->tcf_lock); 555 + rcu_read_unlock(); 663 556 664 557 return skb->len; 665 558 666 559 nla_put_failure: 667 - spin_unlock_bh(&gact->tcf_lock); 560 + rcu_read_unlock(); 668 561 nlmsg_trim(skb, b); 669 562 return -1; 670 563 }

+44 -49

net/sched/act_ife.c

··· 293 293 /* called when adding new meta information 294 294 */ 295 295 static int __add_metainfo(const struct tcf_meta_ops *ops, 296 - struct tcf_ife_info *ife, u32 metaid, void *metaval, 297 - int len, bool atomic, bool exists) 296 + struct tcf_ife_params *p, u32 metaid, void *metaval, 297 + int len, bool atomic) 298 298 { 299 299 struct tcf_meta_info *mi = NULL; 300 300 int ret = 0; ··· 313 313 } 314 314 } 315 315 316 - if (exists) 317 - spin_lock_bh(&ife->tcf_lock); 318 - list_add_tail(&mi->metalist, &ife->metalist); 319 - if (exists) 320 - spin_unlock_bh(&ife->tcf_lock); 316 + list_add_tail(&mi->metalist, &p->metalist); 321 317 322 318 return ret; 323 319 } 324 320 325 321 static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, 326 - struct tcf_ife_info *ife, u32 metaid, 327 - bool exists) 322 + struct tcf_ife_params *p, u32 metaid) 328 323 { 329 324 int ret; 330 325 331 326 if (!try_module_get(ops->owner)) 332 327 return -ENOENT; 333 - ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); 328 + ret = __add_metainfo(ops, p, metaid, NULL, 0, true); 334 329 if (ret) 335 330 module_put(ops->owner); 336 331 return ret; 337 332 } 338 333 339 - static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, 340 - int len, bool exists) 334 + static int add_metainfo(struct tcf_ife_params *p, u32 metaid, void *metaval, 335 + int len) 341 336 { 342 337 const struct tcf_meta_ops *ops = find_ife_oplist(metaid); 343 338 int ret; 344 339 345 340 if (!ops) 346 341 return -ENOENT; 347 - ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); 342 + ret = __add_metainfo(ops, p, metaid, metaval, len, false); 348 343 if (ret) 349 344 /*put back what find_ife_oplist took */ 350 345 module_put(ops->owner); 351 346 return ret; 352 347 } 353 348 354 - static int use_all_metadata(struct tcf_ife_info *ife, bool exists) 349 + static int use_all_metadata(struct tcf_ife_params *p) 355 350 { 356 351 struct tcf_meta_ops *o; 357 352 int rc = 0; ··· 354 359 355 360 read_lock(&ife_mod_lock); 356 361 list_for_each_entry(o, &ifeoplist, list) { 357 - rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); 362 + rc = add_metainfo_and_get_ops(o, p, o->metaid); 358 363 if (rc == 0) 359 364 installed += 1; 360 365 } ··· 366 371 return -EINVAL; 367 372 } 368 373 369 - static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) 374 + static int dump_metalist(struct sk_buff *skb, struct tcf_ife_params *p) 370 375 { 371 376 struct tcf_meta_info *e; 372 377 struct nlattr *nest; ··· 374 379 int total_encoded = 0; 375 380 376 381 /*can only happen on decode */ 377 - if (list_empty(&ife->metalist)) 382 + if (list_empty(&p->metalist)) 378 383 return 0; 379 384 380 385 nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); 381 386 if (!nest) 382 387 goto out_nlmsg_trim; 383 388 384 - list_for_each_entry(e, &ife->metalist, metalist) { 389 + list_for_each_entry(e, &p->metalist, metalist) { 385 390 if (!e->ops->get(skb, e)) 386 391 total_encoded += 1; 387 392 } ··· 398 403 return -1; 399 404 } 400 405 401 - /* under ife->tcf_lock */ 402 - static void _tcf_ife_cleanup(struct tc_action *a) 406 + static void __tcf_ife_cleanup(struct tcf_ife_params *p) 403 407 { 404 - struct tcf_ife_info *ife = to_ife(a); 405 408 struct tcf_meta_info *e, *n; 406 409 407 - list_for_each_entry_safe(e, n, &ife->metalist, metalist) { 410 + list_for_each_entry_safe(e, n, &p->metalist, metalist) { 408 411 list_del(&e->metalist); 409 412 if (e->metaval) { 410 413 if (e->ops->release) ··· 415 422 } 416 423 } 417 424 425 + static void tcf_ife_cleanup_params(struct rcu_head *head) 426 + { 427 + struct tcf_ife_params *p = container_of(head, struct tcf_ife_params, 428 + rcu); 429 + 430 + __tcf_ife_cleanup(p); 431 + kfree(p); 432 + } 433 + 418 434 static void tcf_ife_cleanup(struct tc_action *a) 419 435 { 420 436 struct tcf_ife_info *ife = to_ife(a); 421 437 struct tcf_ife_params *p; 422 438 423 - spin_lock_bh(&ife->tcf_lock); 424 - _tcf_ife_cleanup(a); 425 - spin_unlock_bh(&ife->tcf_lock); 426 - 427 439 p = rcu_dereference_protected(ife->params, 1); 428 440 if (p) 429 - kfree_rcu(p, rcu); 441 + call_rcu(&p->rcu, tcf_ife_cleanup_params); 430 442 } 431 443 432 444 static int load_metalist(struct nlattr **tb, bool rtnl_held) ··· 453 455 return 0; 454 456 } 455 457 456 - static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, 457 - bool exists, bool rtnl_held) 458 + static int populate_metalist(struct tcf_ife_params *p, struct nlattr **tb) 458 459 { 459 460 int len = 0; 460 461 int rc = 0; ··· 465 468 val = nla_data(tb[i]); 466 469 len = nla_len(tb[i]); 467 470 468 - rc = add_metainfo(ife, i, val, len, exists); 471 + rc = add_metainfo(p, i, val, len); 469 472 if (rc) 470 473 return rc; 471 474 } ··· 520 523 p = kzalloc_obj(*p); 521 524 if (!p) 522 525 return -ENOMEM; 526 + INIT_LIST_HEAD(&p->metalist); 523 527 524 528 if (tb[TCA_IFE_METALST]) { 525 529 err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, ··· 565 567 } 566 568 567 569 ife = to_ife(*a); 568 - if (ret == ACT_P_CREATED) 569 - INIT_LIST_HEAD(&ife->metalist); 570 570 571 571 err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 572 572 if (err < 0) ··· 596 600 } 597 601 598 602 if (tb[TCA_IFE_METALST]) { 599 - err = populate_metalist(ife, tb2, exists, 600 - !(flags & TCA_ACT_FLAGS_NO_RTNL)); 603 + err = populate_metalist(p, tb2); 601 604 if (err) 602 605 goto metadata_parse_err; 603 606 } else { ··· 605 610 * as we can. You better have at least one else we are 606 611 * going to bail out 607 612 */ 608 - err = use_all_metadata(ife, exists); 613 + err = use_all_metadata(p); 609 614 if (err) 610 615 goto metadata_parse_err; 611 616 } ··· 621 626 if (goto_ch) 622 627 tcf_chain_put_by_act(goto_ch); 623 628 if (p) 624 - kfree_rcu(p, rcu); 629 + call_rcu(&p->rcu, tcf_ife_cleanup_params); 625 630 626 631 return ret; 627 632 metadata_parse_err: 628 633 if (goto_ch) 629 634 tcf_chain_put_by_act(goto_ch); 630 635 release_idr: 636 + __tcf_ife_cleanup(p); 631 637 kfree(p); 632 638 tcf_idr_release(*a, bind); 633 639 return err; ··· 675 679 if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) 676 680 goto nla_put_failure; 677 681 678 - if (dump_metalist(skb, ife)) { 682 + if (dump_metalist(skb, p)) { 679 683 /*ignore failure to dump metalist */ 680 684 pr_info("Failed to dump metalist\n"); 681 685 } ··· 689 693 return -1; 690 694 } 691 695 692 - static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, 696 + static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_params *p, 693 697 u16 metaid, u16 mlen, void *mdata) 694 698 { 695 699 struct tcf_meta_info *e; 696 700 697 701 /* XXX: use hash to speed up */ 698 - list_for_each_entry(e, &ife->metalist, metalist) { 702 + list_for_each_entry_rcu(e, &p->metalist, metalist) { 699 703 if (metaid == e->metaid) { 700 704 if (e->ops) { 701 705 /* We check for decode presence already */ ··· 712 716 { 713 717 struct tcf_ife_info *ife = to_ife(a); 714 718 int action = ife->tcf_action; 719 + struct tcf_ife_params *p; 715 720 u8 *ifehdr_end; 716 721 u8 *tlv_data; 717 722 u16 metalen; 723 + 724 + p = rcu_dereference_bh(ife->params); 718 725 719 726 bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); 720 727 tcf_lastuse_update(&ife->tcf_tm); ··· 744 745 return TC_ACT_SHOT; 745 746 } 746 747 747 - if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { 748 + if (find_decode_metaid(skb, p, mtype, dlen, curr_data)) { 748 749 /* abuse overlimits to count when we receive metadata 749 750 * but dont have an ops for it 750 751 */ ··· 768 769 /*XXX: check if we can do this at install time instead of current 769 770 * send data path 770 771 **/ 771 - static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) 772 + static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_params *p) 772 773 { 773 - struct tcf_meta_info *e, *n; 774 + struct tcf_meta_info *e; 774 775 int tot_run_sz = 0, run_sz = 0; 775 776 776 - list_for_each_entry_safe(e, n, &ife->metalist, metalist) { 777 + list_for_each_entry_rcu(e, &p->metalist, metalist) { 777 778 if (e->ops->check_presence) { 778 779 run_sz = e->ops->check_presence(skb, e); 779 780 tot_run_sz += run_sz; ··· 794 795 OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA 795 796 where ORIGDATA = original ethernet header ... 796 797 */ 797 - u16 metalen = ife_get_sz(skb, ife); 798 + u16 metalen = ife_get_sz(skb, p); 798 799 int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; 799 800 unsigned int skboff = 0; 800 801 int new_len = skb->len + hdrm; ··· 832 833 if (!ife_meta) 833 834 goto drop; 834 835 835 - spin_lock(&ife->tcf_lock); 836 - 837 836 /* XXX: we dont have a clever way of telling encode to 838 837 * not repeat some of the computations that are done by 839 838 * ops->presence_check... 840 839 */ 841 - list_for_each_entry(e, &ife->metalist, metalist) { 840 + list_for_each_entry_rcu(e, &p->metalist, metalist) { 842 841 if (e->ops->encode) { 843 842 err = e->ops->encode(skb, (void *)(ife_meta + skboff), 844 843 e); 845 844 } 846 845 if (err < 0) { 847 846 /* too corrupt to keep around if overwritten */ 848 - spin_unlock(&ife->tcf_lock); 849 847 goto drop; 850 848 } 851 849 skboff += err; 852 850 } 853 - spin_unlock(&ife->tcf_lock); 854 851 oethh = (struct ethhdr *)skb->data; 855 852 856 853 if (!is_zero_ether_addr(p->eth_src))

+7

net/sched/cls_api.c

··· 2228 2228 return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); 2229 2229 } 2230 2230 2231 + static bool is_ingress_or_clsact(struct tcf_block *block, struct Qdisc *q) 2232 + { 2233 + return tcf_block_shared(block) || (q && !!(q->flags & TCQ_F_INGRESS)); 2234 + } 2235 + 2231 2236 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 2232 2237 struct netlink_ext_ack *extack) 2233 2238 { ··· 2425 2420 flags |= TCA_ACT_FLAGS_NO_RTNL; 2426 2421 if (is_qdisc_ingress(parent)) 2427 2422 flags |= TCA_ACT_FLAGS_AT_INGRESS; 2423 + if (is_ingress_or_clsact(block, q)) 2424 + flags |= TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT; 2428 2425 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, 2429 2426 flags, extack); 2430 2427 if (err == 0) {

+25 -28

net/sched/sch_cake.c

··· 391 391 1239850263, 1191209601, 1147878294, 1108955788 392 392 }; 393 393 394 - static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, 395 - u64 target_ns, u64 rtt_est_ns); 394 + static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); 395 + 396 396 /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots 397 397 * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) 398 398 * ··· 2013 2013 u64 delay; 2014 2014 u32 len; 2015 2015 2016 - if (q->config->is_shared && now - q->last_checked_active >= q->config->sync_time) { 2016 + if (q->config->is_shared && q->rate_ns && 2017 + now - q->last_checked_active >= q->config->sync_time) { 2017 2018 struct net_device *dev = qdisc_dev(sch); 2018 2019 struct cake_sched_data *other_priv; 2019 2020 u64 new_rate = q->config->rate_bps; ··· 2040 2039 if (num_active_qs > 1) 2041 2040 new_rate = div64_u64(q->config->rate_bps, num_active_qs); 2042 2041 2043 - /* mtu = 0 is used to only update the rate and not mess with cobalt params */ 2044 - cake_set_rate(b, new_rate, 0, 0, 0); 2042 + cake_configure_rates(sch, new_rate, true); 2045 2043 q->last_checked_active = now; 2046 2044 q->active_queues = num_active_qs; 2047 - q->rate_ns = b->tin_rate_ns; 2048 - q->rate_shft = b->tin_rate_shft; 2049 2045 } 2050 2046 2051 2047 begin: ··· 2359 2361 b->cparams.p_dec = 1 << 20; /* 1/4096 */ 2360 2362 } 2361 2363 2362 - static int cake_config_besteffort(struct Qdisc *sch) 2364 + static int cake_config_besteffort(struct Qdisc *sch, u64 rate, u32 mtu) 2363 2365 { 2364 2366 struct cake_sched_data *q = qdisc_priv(sch); 2365 2367 struct cake_tin_data *b = &q->tins[0]; 2366 - u32 mtu = psched_mtu(qdisc_dev(sch)); 2367 - u64 rate = q->config->rate_bps; 2368 2368 2369 2369 q->tin_cnt = 1; 2370 2370 ··· 2376 2380 return 0; 2377 2381 } 2378 2382 2379 - static int cake_config_precedence(struct Qdisc *sch) 2383 + static int cake_config_precedence(struct Qdisc *sch, u64 rate, u32 mtu) 2380 2384 { 2381 2385 /* convert high-level (user visible) parameters into internal format */ 2382 2386 struct cake_sched_data *q = qdisc_priv(sch); 2383 - u32 mtu = psched_mtu(qdisc_dev(sch)); 2384 - u64 rate = q->config->rate_bps; 2385 2387 u32 quantum = 256; 2386 2388 u32 i; 2387 2389 ··· 2450 2456 * Total 12 traffic classes. 2451 2457 */ 2452 2458 2453 - static int cake_config_diffserv8(struct Qdisc *sch) 2459 + static int cake_config_diffserv8(struct Qdisc *sch, u64 rate, u32 mtu) 2454 2460 { 2455 2461 /* Pruned list of traffic classes for typical applications: 2456 2462 * ··· 2467 2473 */ 2468 2474 2469 2475 struct cake_sched_data *q = qdisc_priv(sch); 2470 - u32 mtu = psched_mtu(qdisc_dev(sch)); 2471 - u64 rate = q->config->rate_bps; 2472 2476 u32 quantum = 256; 2473 2477 u32 i; 2474 2478 ··· 2496 2504 return 0; 2497 2505 } 2498 2506 2499 - static int cake_config_diffserv4(struct Qdisc *sch) 2507 + static int cake_config_diffserv4(struct Qdisc *sch, u64 rate, u32 mtu) 2500 2508 { 2501 2509 /* Further pruned list of traffic classes for four-class system: 2502 2510 * ··· 2509 2517 */ 2510 2518 2511 2519 struct cake_sched_data *q = qdisc_priv(sch); 2512 - u32 mtu = psched_mtu(qdisc_dev(sch)); 2513 - u64 rate = q->config->rate_bps; 2514 2520 u32 quantum = 1024; 2515 2521 2516 2522 q->tin_cnt = 4; ··· 2536 2546 return 0; 2537 2547 } 2538 2548 2539 - static int cake_config_diffserv3(struct Qdisc *sch) 2549 + static int cake_config_diffserv3(struct Qdisc *sch, u64 rate, u32 mtu) 2540 2550 { 2541 2551 /* Simplified Diffserv structure with 3 tins. 2542 2552 * Latency Sensitive (CS7, CS6, EF, VA, TOS4) ··· 2544 2554 * Low Priority (LE, CS1) 2545 2555 */ 2546 2556 struct cake_sched_data *q = qdisc_priv(sch); 2547 - u32 mtu = psched_mtu(qdisc_dev(sch)); 2548 - u64 rate = q->config->rate_bps; 2549 2557 u32 quantum = 1024; 2550 2558 2551 2559 q->tin_cnt = 3; ··· 2568 2580 return 0; 2569 2581 } 2570 2582 2571 - static void cake_reconfigure(struct Qdisc *sch) 2583 + static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust) 2572 2584 { 2585 + u32 mtu = likely(rate_adjust) ? 0 : psched_mtu(qdisc_dev(sch)); 2573 2586 struct cake_sched_data *qd = qdisc_priv(sch); 2574 2587 struct cake_sched_config *q = qd->config; 2575 2588 int c, ft; 2576 2589 2577 2590 switch (q->tin_mode) { 2578 2591 case CAKE_DIFFSERV_BESTEFFORT: 2579 - ft = cake_config_besteffort(sch); 2592 + ft = cake_config_besteffort(sch, rate, mtu); 2580 2593 break; 2581 2594 2582 2595 case CAKE_DIFFSERV_PRECEDENCE: 2583 - ft = cake_config_precedence(sch); 2596 + ft = cake_config_precedence(sch, rate, mtu); 2584 2597 break; 2585 2598 2586 2599 case CAKE_DIFFSERV_DIFFSERV8: 2587 - ft = cake_config_diffserv8(sch); 2600 + ft = cake_config_diffserv8(sch, rate, mtu); 2588 2601 break; 2589 2602 2590 2603 case CAKE_DIFFSERV_DIFFSERV4: 2591 - ft = cake_config_diffserv4(sch); 2604 + ft = cake_config_diffserv4(sch, rate, mtu); 2592 2605 break; 2593 2606 2594 2607 case CAKE_DIFFSERV_DIFFSERV3: 2595 2608 default: 2596 - ft = cake_config_diffserv3(sch); 2609 + ft = cake_config_diffserv3(sch, rate, mtu); 2597 2610 break; 2598 2611 } 2599 2612 ··· 2605 2616 2606 2617 qd->rate_ns = qd->tins[ft].tin_rate_ns; 2607 2618 qd->rate_shft = qd->tins[ft].tin_rate_shft; 2619 + } 2620 + 2621 + static void cake_reconfigure(struct Qdisc *sch) 2622 + { 2623 + struct cake_sched_data *qd = qdisc_priv(sch); 2624 + struct cake_sched_config *q = qd->config; 2625 + 2626 + cake_configure_rates(sch, qd->config->rate_bps, false); 2608 2627 2609 2628 if (q->buffer_config_limit) { 2610 2629 qd->buffer_limit = q->buffer_config_limit;

+8 -4

net/sched/sch_ets.c

··· 115 115 struct ets_sched *q = qdisc_priv(sch); 116 116 struct tc_ets_qopt_offload qopt; 117 117 unsigned int w_psum_prev = 0; 118 - unsigned int q_psum = 0; 119 - unsigned int q_sum = 0; 120 118 unsigned int quantum; 121 119 unsigned int w_psum; 122 120 unsigned int weight; 123 121 unsigned int i; 122 + u64 q_psum = 0; 123 + u64 q_sum = 0; 124 124 125 125 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 126 126 return; ··· 138 138 139 139 for (i = 0; i < q->nbands; i++) { 140 140 quantum = q->classes[i].quantum; 141 - q_psum += quantum; 142 - w_psum = quantum ? q_psum * 100 / q_sum : 0; 141 + if (quantum) { 142 + q_psum += quantum; 143 + w_psum = div64_u64(q_psum * 100, q_sum); 144 + } else { 145 + w_psum = 0; 146 + } 143 147 weight = w_psum - w_psum_prev; 144 148 w_psum_prev = w_psum; 145 149

+1

net/sched/sch_fq.c

··· 827 827 for (idx = 0; idx < FQ_BANDS; idx++) { 828 828 q->band_flows[idx].new_flows.first = NULL; 829 829 q->band_flows[idx].old_flows.first = NULL; 830 + q->band_pkt_count[idx] = 0; 830 831 } 831 832 q->delayed = RB_ROOT; 832 833 q->flows = 0;

+4 -4

net/unix/af_unix.c

··· 1785 1785 __skb_queue_tail(&other->sk_receive_queue, skb); 1786 1786 spin_unlock(&other->sk_receive_queue.lock); 1787 1787 unix_state_unlock(other); 1788 - other->sk_data_ready(other); 1788 + READ_ONCE(other->sk_data_ready)(other); 1789 1789 sock_put(other); 1790 1790 return 0; 1791 1791 ··· 2278 2278 scm_stat_add(other, skb); 2279 2279 skb_queue_tail(&other->sk_receive_queue, skb); 2280 2280 unix_state_unlock(other); 2281 - other->sk_data_ready(other); 2281 + READ_ONCE(other->sk_data_ready)(other); 2282 2282 sock_put(other); 2283 2283 scm_destroy(&scm); 2284 2284 return len; ··· 2351 2351 2352 2352 sk_send_sigurg(other); 2353 2353 unix_state_unlock(other); 2354 - other->sk_data_ready(other); 2354 + READ_ONCE(other->sk_data_ready)(other); 2355 2355 2356 2356 return 0; 2357 2357 out_unlock: ··· 2477 2477 spin_unlock(&other->sk_receive_queue.lock); 2478 2478 2479 2479 unix_state_unlock(other); 2480 - other->sk_data_ready(other); 2480 + READ_ONCE(other->sk_data_ready)(other); 2481 2481 sent += size; 2482 2482 } 2483 2483

+17 -11

net/xdp/xsk.c

··· 167 167 struct xdp_buff_xsk *pos, *tmp; 168 168 struct list_head *xskb_list; 169 169 u32 contd = 0; 170 + u32 num_desc; 170 171 int err; 171 172 172 - if (frags) 173 - contd = XDP_PKT_CONTD; 174 - 175 - err = __xsk_rcv_zc(xs, xskb, len, contd); 176 - if (err) 177 - goto err; 178 - if (likely(!frags)) 173 + if (likely(!frags)) { 174 + err = __xsk_rcv_zc(xs, xskb, len, contd); 175 + if (err) 176 + goto err; 179 177 return 0; 178 + } 180 179 180 + contd = XDP_PKT_CONTD; 181 + num_desc = xdp_get_shared_info_from_buff(xdp)->nr_frags + 1; 182 + if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) { 183 + xs->rx_queue_full++; 184 + err = -ENOBUFS; 185 + goto err; 186 + } 187 + 188 + __xsk_rcv_zc(xs, xskb, len, contd); 181 189 xskb_list = &xskb->pool->xskb_list; 182 190 list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { 183 191 if (list_is_singular(xskb_list)) 184 192 contd = 0; 185 193 len = pos->xdp.data_end - pos->xdp.data; 186 - err = __xsk_rcv_zc(xs, pos, len, contd); 187 - if (err) 188 - goto err; 189 - list_del(&pos->list_node); 194 + __xsk_rcv_zc(xs, pos, len, contd); 195 + list_del_init(&pos->list_node); 190 196 } 191 197 192 198 return 0;

+58

tools/testing/selftests/bpf/prog_tests/xdp_bonding.c

··· 610 610 system("ip link del bond"); 611 611 } 612 612 613 + /* 614 + * Test that changing xmit_hash_policy to vlan+srcmac is rejected when a 615 + * native XDP program is loaded on a bond in 802.3ad or balance-xor mode. 616 + * These modes support XDP only when xmit_hash_policy != vlan+srcmac; freely 617 + * changing the policy creates an inconsistency that triggers a WARNING in 618 + * dev_xdp_uninstall() during device teardown. 619 + */ 620 + static void test_xdp_bonding_xmit_policy_compat(struct skeletons *skeletons) 621 + { 622 + struct nstoken *nstoken = NULL; 623 + int bond_ifindex = -1; 624 + int xdp_fd, err; 625 + 626 + SYS(out, "ip netns add ns_xmit_policy"); 627 + nstoken = open_netns("ns_xmit_policy"); 628 + if (!ASSERT_OK_PTR(nstoken, "open ns_xmit_policy")) 629 + goto out; 630 + 631 + /* 802.3ad with layer2+3 policy: native XDP is supported */ 632 + SYS(out, "ip link add bond0 type bond mode 802.3ad xmit_hash_policy layer2+3"); 633 + SYS(out, "ip link add veth0 type veth peer name veth0p"); 634 + SYS(out, "ip link set veth0 master bond0"); 635 + SYS(out, "ip link set bond0 up"); 636 + 637 + bond_ifindex = if_nametoindex("bond0"); 638 + if (!ASSERT_GT(bond_ifindex, 0, "bond0 ifindex")) 639 + goto out; 640 + 641 + xdp_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog); 642 + if (!ASSERT_GE(xdp_fd, 0, "xdp_dummy fd")) 643 + goto out; 644 + 645 + err = bpf_xdp_attach(bond_ifindex, xdp_fd, XDP_FLAGS_DRV_MODE, NULL); 646 + if (!ASSERT_OK(err, "attach XDP to bond0")) 647 + goto out; 648 + 649 + /* With XDP loaded, switching to vlan+srcmac must be rejected */ 650 + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); 651 + ASSERT_NEQ(err, 0, "vlan+srcmac change with XDP loaded should fail"); 652 + 653 + /* Detach XDP first, then the same change must succeed */ 654 + ASSERT_OK(bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL), 655 + "detach XDP from bond0"); 656 + 657 + bond_ifindex = -1; 658 + err = system("ip link set bond0 type bond xmit_hash_policy vlan+srcmac 2>/dev/null"); 659 + ASSERT_OK(err, "vlan+srcmac change without XDP should succeed"); 660 + 661 + out: 662 + if (bond_ifindex > 0) 663 + bpf_xdp_detach(bond_ifindex, XDP_FLAGS_DRV_MODE, NULL); 664 + close_netns(nstoken); 665 + SYS_NOFAIL("ip netns del ns_xmit_policy"); 666 + } 667 + 613 668 static int libbpf_debug_print(enum libbpf_print_level level, 614 669 const char *format, va_list args) 615 670 { ··· 731 676 test_case->mode, 732 677 test_case->xmit_policy); 733 678 } 679 + 680 + if (test__start_subtest("xdp_bonding_xmit_policy_compat")) 681 + test_xdp_bonding_xmit_policy_compat(&skeletons); 734 682 735 683 if (test__start_subtest("xdp_bonding_redirect_multi")) 736 684 test_xdp_bonding_redirect_multi(&skeletons);

+5 -2

tools/testing/selftests/kselftest_harness.h

··· 76 76 memset(s, c, n); 77 77 } 78 78 79 + #define KSELFTEST_PRIO_TEST_F 20000 80 + #define KSELFTEST_PRIO_XFAIL 20001 81 + 79 82 #define TEST_TIMEOUT_DEFAULT 30 80 83 81 84 /* Utilities exposed to the test definitions */ ··· 468 465 fixture_name##_teardown(_metadata, self, variant); \ 469 466 } \ 470 467 static struct __test_metadata *_##fixture_name##_##test_name##_object; \ 471 - static void __attribute__((constructor)) \ 468 + static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \ 472 469 _register_##fixture_name##_##test_name(void) \ 473 470 { \ 474 471 struct __test_metadata *object = mmap(NULL, sizeof(*object), \ ··· 883 880 .fixture = &_##fixture_name##_fixture_object, \ 884 881 .variant = &_##fixture_name##_##variant_name##_object, \ 885 882 }; \ 886 - static void __attribute__((constructor)) \ 883 + static void __attribute__((constructor(KSELFTEST_PRIO_XFAIL))) \ 887 884 _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \ 888 885 { \ 889 886 _##fixture_name##_##variant_name##_##test_name##_xfail.test = \

+1

tools/testing/selftests/net/Makefile

··· 15 15 big_tcp.sh \ 16 16 bind_bhash.sh \ 17 17 bpf_offload.py \ 18 + bridge_vlan_dump.sh \ 18 19 broadcast_ether_dst.sh \ 19 20 broadcast_pmtu.sh \ 20 21 busy_poll_test.sh \

+204

tools/testing/selftests/net/bridge_vlan_dump.sh

··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # Test bridge VLAN range grouping. VLANs are collapsed into a range entry in 5 + # the dump if they have the same per-VLAN options. These tests verify that 6 + # VLANs with different per-VLAN option values are not grouped together. 7 + 8 + # shellcheck disable=SC1091,SC2034,SC2154,SC2317 9 + source lib.sh 10 + 11 + ALL_TESTS=" 12 + vlan_range_neigh_suppress 13 + vlan_range_mcast_max_groups 14 + vlan_range_mcast_n_groups 15 + vlan_range_mcast_enabled 16 + " 17 + 18 + setup_prepare() 19 + { 20 + setup_ns NS 21 + defer cleanup_all_ns 22 + 23 + ip -n "$NS" link add name br0 type bridge vlan_filtering 1 \ 24 + vlan_default_pvid 0 mcast_snooping 1 mcast_vlan_snooping 1 25 + ip -n "$NS" link set dev br0 up 26 + 27 + ip -n "$NS" link add name dummy0 type dummy 28 + ip -n "$NS" link set dev dummy0 master br0 29 + ip -n "$NS" link set dev dummy0 up 30 + } 31 + 32 + vlan_range_neigh_suppress() 33 + { 34 + RET=0 35 + 36 + # Add two new consecutive VLANs for range grouping test 37 + bridge -n "$NS" vlan add vid 10 dev dummy0 38 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 39 + 40 + bridge -n "$NS" vlan add vid 11 dev dummy0 41 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 42 + 43 + # Configure different neigh_suppress values and verify no range grouping 44 + bridge -n "$NS" vlan set vid 10 dev dummy0 neigh_suppress on 45 + check_err $? "Failed to set neigh_suppress for VLAN 10" 46 + 47 + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress off 48 + check_err $? "Failed to set neigh_suppress for VLAN 11" 49 + 50 + # Verify VLANs are not shown as a range, but individual entries exist 51 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 52 + check_fail $? "VLANs with different neigh_suppress incorrectly grouped" 53 + 54 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" 55 + check_err $? "VLAN 10 individual entry not found" 56 + 57 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" 58 + check_err $? "VLAN 11 individual entry not found" 59 + 60 + # Configure same neigh_suppress value and verify range grouping 61 + bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress on 62 + check_err $? "Failed to set neigh_suppress for VLAN 11" 63 + 64 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 65 + check_err $? "VLANs with same neigh_suppress not grouped" 66 + 67 + log_test "VLAN range grouping with neigh_suppress" 68 + } 69 + 70 + vlan_range_mcast_max_groups() 71 + { 72 + RET=0 73 + 74 + # Add two new consecutive VLANs for range grouping test 75 + bridge -n "$NS" vlan add vid 10 dev dummy0 76 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 77 + 78 + bridge -n "$NS" vlan add vid 11 dev dummy0 79 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 80 + 81 + # Configure different mcast_max_groups values and verify no range grouping 82 + bridge -n "$NS" vlan set vid 10 dev dummy0 mcast_max_groups 100 83 + check_err $? "Failed to set mcast_max_groups for VLAN 10" 84 + 85 + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 200 86 + check_err $? "Failed to set mcast_max_groups for VLAN 11" 87 + 88 + # Verify VLANs are not shown as a range, but individual entries exist 89 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 90 + check_fail $? "VLANs with different mcast_max_groups incorrectly grouped" 91 + 92 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" 93 + check_err $? "VLAN 10 individual entry not found" 94 + 95 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" 96 + check_err $? "VLAN 11 individual entry not found" 97 + 98 + # Configure same mcast_max_groups value and verify range grouping 99 + bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 100 100 + check_err $? "Failed to set mcast_max_groups for VLAN 11" 101 + 102 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 103 + check_err $? "VLANs with same mcast_max_groups not grouped" 104 + 105 + log_test "VLAN range grouping with mcast_max_groups" 106 + } 107 + 108 + vlan_range_mcast_n_groups() 109 + { 110 + RET=0 111 + 112 + # Add two new consecutive VLANs for range grouping test 113 + bridge -n "$NS" vlan add vid 10 dev dummy0 114 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 115 + 116 + bridge -n "$NS" vlan add vid 11 dev dummy0 117 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 118 + 119 + # Add different numbers of multicast groups to each VLAN 120 + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 10 121 + check_err $? "Failed to add mdb entry to VLAN 10" 122 + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 10 123 + 124 + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 10 125 + check_err $? "Failed to add second mdb entry to VLAN 10" 126 + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 10 127 + 128 + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 11 129 + check_err $? "Failed to add mdb entry to VLAN 11" 130 + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 11 131 + 132 + # Verify VLANs are not shown as a range due to different mcast_n_groups 133 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 134 + check_fail $? "VLANs with different mcast_n_groups incorrectly grouped" 135 + 136 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" 137 + check_err $? "VLAN 10 individual entry not found" 138 + 139 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" 140 + check_err $? "VLAN 11 individual entry not found" 141 + 142 + # Add another group to VLAN 11 to match VLAN 10's count 143 + bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 11 144 + check_err $? "Failed to add second mdb entry to VLAN 11" 145 + defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 11 146 + 147 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 148 + check_err $? "VLANs with same mcast_n_groups not grouped" 149 + 150 + log_test "VLAN range grouping with mcast_n_groups" 151 + } 152 + 153 + vlan_range_mcast_enabled() 154 + { 155 + RET=0 156 + 157 + # Add two new consecutive VLANs for range grouping test 158 + bridge -n "$NS" vlan add vid 10 dev br0 self 159 + defer bridge -n "$NS" vlan del vid 10 dev br0 self 160 + 161 + bridge -n "$NS" vlan add vid 11 dev br0 self 162 + defer bridge -n "$NS" vlan del vid 11 dev br0 self 163 + 164 + bridge -n "$NS" vlan add vid 10 dev dummy0 165 + defer bridge -n "$NS" vlan del vid 10 dev dummy0 166 + 167 + bridge -n "$NS" vlan add vid 11 dev dummy0 168 + defer bridge -n "$NS" vlan del vid 11 dev dummy0 169 + 170 + # Configure different mcast_snooping for bridge VLANs 171 + # Port VLANs inherit BR_VLFLAG_MCAST_ENABLED from bridge VLANs 172 + bridge -n "$NS" vlan global set dev br0 vid 10 mcast_snooping 1 173 + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 0 174 + 175 + # Verify port VLANs are not grouped due to different mcast_enabled 176 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 177 + check_fail $? "VLANs with different mcast_enabled incorrectly grouped" 178 + 179 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$" 180 + check_err $? "VLAN 10 individual entry not found" 181 + 182 + bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$" 183 + check_err $? "VLAN 11 individual entry not found" 184 + 185 + # Configure same mcast_snooping and verify range grouping 186 + bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 1 187 + 188 + bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11" 189 + check_err $? "VLANs with same mcast_enabled not grouped" 190 + 191 + log_test "VLAN range grouping with mcast_enabled" 192 + } 193 + 194 + # Verify the newest tested option is supported 195 + if ! bridge vlan help 2>&1 | grep -q "neigh_suppress"; then 196 + echo "SKIP: iproute2 too old, missing per-VLAN neighbor suppression support" 197 + exit "$ksft_skip" 198 + fi 199 + 200 + trap defer_scopes_cleanup EXIT 201 + setup_prepare 202 + tests_run 203 + 204 + exit "$EXIT_STATUS"

+11

tools/testing/selftests/net/fib_nexthops.sh

··· 1672 1672 1673 1673 run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" 1674 1674 log_test $? 2 "IPv4 route with invalid IPv6 gateway" 1675 + 1676 + # Test IPv4 route with loopback IPv6 nexthop 1677 + # Regression test: loopback IPv6 nexthop was misclassified as reject 1678 + # route, skipping nhc_pcpu_rth_output allocation, causing panic when 1679 + # an IPv4 route references it and triggers __mkroute_output(). 1680 + run_cmd "$IP -6 nexthop add id 20 dev lo" 1681 + run_cmd "$IP ro add 172.20.20.0/24 nhid 20" 1682 + run_cmd "ip netns exec $me ping -c1 -W1 172.20.20.1" 1683 + log_test $? 1 "IPv4 route with loopback IPv6 nexthop (no crash)" 1684 + run_cmd "$IP ro del 172.20.20.0/24" 1685 + run_cmd "$IP nexthop del id 20" 1675 1686 } 1676 1687 1677 1688 ipv4_fcnal_runtime()

+49

tools/testing/selftests/net/mptcp/mptcp_join.sh

··· 104 104 6 0 0 65535, 105 105 6 0 0 0" 106 106 107 + # IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption 108 + # generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c && 109 + # (ip[66] & 0xf0) == 0x40'" 110 + CBPF_MPTCP_SUBOPTION_RM_ADDR="13, 111 + 48 0 0 0, 112 + 84 0 0 240, 113 + 21 0 9 64, 114 + 48 0 0 32, 115 + 84 0 0 240, 116 + 21 0 6 192, 117 + 48 0 0 53, 118 + 21 0 4 12, 119 + 48 0 0 66, 120 + 84 0 0 240, 121 + 21 0 1 64, 122 + 6 0 0 65535, 123 + 6 0 0 0" 124 + 107 125 init_partial() 108 126 { 109 127 capout=$(mktemp) ··· 2626 2608 chk_rst_nr 0 0 2627 2609 fi 2628 2610 2611 + # signal+subflow with limits, remove 2612 + if reset "remove signal+subflow with limits"; then 2613 + pm_nl_set_limits $ns1 0 0 2614 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow 2615 + pm_nl_set_limits $ns2 0 0 2616 + addr_nr_ns1=-1 speed=slow \ 2617 + run_tests $ns1 $ns2 10.0.1.1 2618 + chk_join_nr 0 0 0 2619 + chk_add_nr 1 1 2620 + chk_rm_nr 1 0 invert 2621 + chk_rst_nr 0 0 2622 + fi 2623 + 2629 2624 # addresses remove 2630 2625 if reset "remove addresses"; then 2631 2626 pm_nl_set_limits $ns1 3 3 ··· 4248 4217 chk_subflow_nr "after no reject" 3 4249 4218 chk_mptcp_info subflows 2 subflows 2 4250 4219 4220 + # To make sure RM_ADDR are sent over a different subflow, but 4221 + # allow the rest to quickly and cleanly close the subflow 4222 + local ipt=1 4223 + ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \ 4224 + -p tcp -m tcp --tcp-option 30 \ 4225 + -m bpf --bytecode \ 4226 + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ 4227 + -j DROP || ipt=0 4251 4228 local i 4252 4229 for i in $(seq 3); do 4253 4230 pm_nl_del_endpoint $ns2 1 10.0.1.2 ··· 4268 4229 chk_subflow_nr "after re-add id 0 ($i)" 3 4269 4230 chk_mptcp_info subflows 3 subflows 3 4270 4231 done 4232 + [ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1 4271 4233 4272 4234 mptcp_lib_kill_group_wait $tests_pid 4273 4235 ··· 4328 4288 chk_mptcp_info subflows 2 subflows 2 4329 4289 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 4330 4290 4291 + # To make sure RM_ADDR are sent over a different subflow, but 4292 + # allow the rest to quickly and cleanly close the subflow 4293 + local ipt=1 4294 + ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \ 4295 + -p tcp -m tcp --tcp-option 30 \ 4296 + -m bpf --bytecode \ 4297 + "$CBPF_MPTCP_SUBOPTION_RM_ADDR" \ 4298 + -j DROP || ipt=0 4331 4299 pm_nl_del_endpoint $ns1 42 10.0.1.1 4332 4300 sleep 0.5 4333 4301 chk_subflow_nr "after delete ID 0" 2 4334 4302 chk_mptcp_info subflows 2 subflows 2 4335 4303 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 4304 + [ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1 4336 4305 4337 4306 pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal 4338 4307 wait_mpj 4

+7 -4

tools/testing/selftests/net/mptcp/simult_flows.sh

··· 237 237 for dev in ns2eth1 ns2eth2; do 238 238 tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 239 239 done 240 - tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 241 - tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 242 - tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 243 - tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 240 + 241 + # keep the queued pkts number low, or the RTT estimator will see 242 + # increasing latency over time. 243 + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50 244 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50 245 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50 246 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50 244 247 245 248 # time is measured in ms, account for transfer size, aggregated link speed 246 249 # and header overhead (10%)

+8 -2

tools/testing/selftests/net/netfilter/nf_queue.c

··· 18 18 struct options { 19 19 bool count_packets; 20 20 bool gso_enabled; 21 + bool failopen; 21 22 int verbose; 22 23 unsigned int queue_num; 23 24 unsigned int timeout; ··· 31 30 32 31 static void help(const char *p) 33 32 { 34 - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); 33 + printf("Usage: %s [-c|-v [-vv] ] [-o] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); 35 34 } 36 35 37 36 static int parse_attr_cb(const struct nlattr *attr, void *data) ··· 237 236 238 237 flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; 239 238 flags |= NFQA_CFG_F_UID_GID; 239 + if (opts.failopen) 240 + flags |= NFQA_CFG_F_FAIL_OPEN; 240 241 mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); 241 242 mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); 242 243 ··· 332 329 { 333 330 int c; 334 331 335 - while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { 332 + while ((c = getopt(argc, argv, "chvot:q:Q:d:G")) != -1) { 336 333 switch (c) { 337 334 case 'c': 338 335 opts.count_packets = true; ··· 368 365 break; 369 366 case 'G': 370 367 opts.gso_enabled = false; 368 + break; 369 + case 'o': 370 + opts.failopen = true; 371 371 break; 372 372 case 'v': 373 373 opts.verbose++;

+9 -4

tools/testing/selftests/net/netfilter/nft_queue.sh

··· 591 591 test_udp_gro_ct() 592 592 { 593 593 local errprefix="FAIL: test_udp_gro_ct:" 594 + local timeout=5 594 595 595 596 ip netns exec "$nsrouter" conntrack -F 2>/dev/null 596 597 ··· 631 630 } 632 631 } 633 632 EOF 634 - timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & 633 + timeout "$timeout" ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & 635 634 local rpid=$! 636 635 637 - ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" & 636 + ip netns exec "$nsrouter" nice -n -19 ./nf_queue -G -c -q 1 -o -t 2 > "$TMPFILE2" & 638 637 local nfqpid=$! 639 638 640 639 ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on ··· 644 643 645 644 local bs=512 646 645 local count=$(((32 * 1024 * 1024) / bs)) 647 - dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do 648 - timeout 5 ip netns exec "$ns1" \ 646 + 647 + local nprocs=$(nproc) 648 + [ $nprocs -gt 1 ] && nprocs=$((nprocs - 1)) 649 + 650 + dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 $nprocs); do 651 + timeout "$timeout" nice -n 19 ip netns exec "$ns1" \ 649 652 socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 & 650 653 done 651 654

+1 -1

tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt

··· 38 38 39 39 // If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd 40 40 +0 < P. 4001:54001(50000) ack 1 win 257 41 - +0 > . 1:1(0) ack 54001 win 0 41 + * > . 1:1(0) ack 54001 win 0 42 42 43 43 // Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. 44 44 +0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`

-33

tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - 3 - --mss=1000 4 - 5 - `./defaults.sh` 6 - 7 - 0 `nstat -n` 8 - 9 - // Establish a connection. 10 - +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 11 - +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 12 - +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 13 - +0 bind(3, ..., ...) = 0 14 - +0 listen(3, 1) = 0 15 - 16 - +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> 17 - +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> 18 - +.1 < . 1:1(0) ack 1 win 257 19 - 20 - +0 accept(3, ..., ...) = 4 21 - 22 - +0 < P. 1:20001(20000) ack 1 win 257 23 - +.04 > . 1:1(0) ack 20001 win 18000 24 - 25 - +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 26 - +0 < P. 20001:80001(60000) ack 1 win 257 27 - +0 > . 1:1(0) ack 20001 win 18000 28 - 29 - +0 read(4, ..., 20000) = 20000 30 - // A too big packet is accepted if the receive queue is empty 31 - +0 < P. 20001:80001(60000) ack 1 win 257 32 - +0 > . 1:1(0) ack 80001 win 0 33 -

+6 -6

tools/testing/selftests/net/tun.c

··· 944 944 ASSERT_EQ(ret, off); 945 945 946 946 ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss); 947 - ASSERT_EQ(ret, variant->data_size); 948 - ASSERT_EQ(r_num_mss, variant->r_num_mss); 947 + EXPECT_EQ(ret, variant->data_size); 948 + EXPECT_EQ(r_num_mss, variant->r_num_mss); 949 949 } 950 950 951 951 TEST_F(tun_vnet_udptnl, recv_gso_packet) ··· 955 955 int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; 956 956 957 957 ret = send_gso_packet_into_tunnel(self, variant); 958 - ASSERT_EQ(ret, variant->data_size); 958 + EXPECT_EQ(ret, variant->data_size); 959 959 960 960 memset(&vnet_hdr, 0, sizeof(vnet_hdr)); 961 961 ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr); 962 - ASSERT_EQ(ret, variant->data_size); 962 + EXPECT_EQ(ret, variant->data_size); 963 963 964 964 if (!variant->no_gso) { 965 - ASSERT_EQ(vh->gso_size, variant->gso_size); 965 + EXPECT_EQ(vh->gso_size, variant->gso_size); 966 966 gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? 967 967 (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) : 968 968 (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6); 969 - ASSERT_EQ(vh->gso_type, gso_type); 969 + EXPECT_EQ(vh->gso_type, gso_type); 970 970 } 971 971 } 972 972

+159

tools/testing/selftests/tc-testing/tc-tests/actions/ct.json

··· 505 505 "teardown": [ 506 506 "$TC qdisc del dev $DEV1 ingress" 507 507 ] 508 + }, 509 + { 510 + "id": "8883", 511 + "name": "Try to attach act_ct to an ets qdisc", 512 + "category": [ 513 + "actions", 514 + "ct" 515 + ], 516 + "plugins": { 517 + "requires": "nsPlugin" 518 + }, 519 + "setup": [ 520 + [ 521 + "$TC actions flush action ct", 522 + 0, 523 + 1, 524 + 255 525 + ], 526 + "$TC qdisc add dev $DEV1 root handle 1: ets bands 2" 527 + ], 528 + "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: prio 1 protocol ip matchall action ct index 42", 529 + "expExitCode": "2", 530 + "verifyCmd": "$TC -j filter ls dev $DEV1 parent 1: prio 1 protocol ip", 531 + "matchJSON": [], 532 + "teardown": [ 533 + "$TC qdisc del dev $DEV1 root" 534 + ] 535 + }, 536 + { 537 + "id": "3b10", 538 + "name": "Attach act_ct to an ingress qdisc", 539 + "category": [ 540 + "actions", 541 + "ct" 542 + ], 543 + "plugins": { 544 + "requires": "nsPlugin" 545 + }, 546 + "setup": [ 547 + [ 548 + "$TC actions flush action ct", 549 + 0, 550 + 1, 551 + 255 552 + ], 553 + "$TC qdisc add dev $DEV1 ingress" 554 + ], 555 + "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 1 protocol ip matchall action ct index 42", 556 + "expExitCode": "0", 557 + "verifyCmd": "$TC -j filter ls dev $DEV1 ingress prio 1 protocol ip", 558 + "matchJSON": [ 559 + { 560 + "kind": "matchall" 561 + }, 562 + { 563 + "options": { 564 + "actions": [ 565 + { 566 + "order": 1, 567 + "kind": "ct", 568 + "index": 42, 569 + "ref": 1, 570 + "bind": 1 571 + } 572 + ] 573 + } 574 + } 575 + ], 576 + "teardown": [ 577 + "$TC qdisc del dev $DEV1 ingress" 578 + ] 579 + }, 580 + { 581 + "id": "0337", 582 + "name": "Attach act_ct to a clsact/egress qdisc", 583 + "category": [ 584 + "actions", 585 + "ct" 586 + ], 587 + "plugins": { 588 + "requires": "nsPlugin" 589 + }, 590 + "setup": [ 591 + [ 592 + "$TC actions flush action ct", 593 + 0, 594 + 1, 595 + 255 596 + ], 597 + "$TC qdisc add dev $DEV1 clsact" 598 + ], 599 + "cmdUnderTest": "$TC filter add dev $DEV1 egress prio 1 protocol ip matchall action ct index 42", 600 + "expExitCode": "0", 601 + "verifyCmd": "$TC -j filter ls dev $DEV1 egress prio 1 protocol ip", 602 + "matchJSON": [ 603 + { 604 + "kind": "matchall" 605 + }, 606 + { 607 + "options": { 608 + "actions": [ 609 + { 610 + "order": 1, 611 + "kind": "ct", 612 + "index": 42, 613 + "ref": 1, 614 + "bind": 1 615 + } 616 + ] 617 + } 618 + } 619 + ], 620 + "teardown": [ 621 + "$TC qdisc del dev $DEV1 clsact" 622 + ] 623 + }, 624 + { 625 + "id": "4f60", 626 + "name": "Attach act_ct to a shared block", 627 + "category": [ 628 + "actions", 629 + "ct" 630 + ], 631 + "plugins": { 632 + "requires": "nsPlugin" 633 + }, 634 + "setup": [ 635 + [ 636 + "$TC actions flush action ct", 637 + 0, 638 + 1, 639 + 255 640 + ], 641 + "$TC qdisc add dev $DEV1 ingress_block 21 clsact" 642 + ], 643 + "cmdUnderTest": "$TC filter add block 21 prio 1 protocol ip matchall action ct index 42", 644 + "expExitCode": "0", 645 + "verifyCmd": "$TC -j filter ls block 21 prio 1 protocol ip", 646 + "matchJSON": [ 647 + { 648 + "kind": "matchall" 649 + }, 650 + { 651 + "options": { 652 + "actions": [ 653 + { 654 + "order": 1, 655 + "kind": "ct", 656 + "index": 42, 657 + "ref": 1, 658 + "bind": 1 659 + } 660 + ] 661 + } 662 + } 663 + ], 664 + "teardown": [ 665 + "$TC qdisc del dev $DEV1 ingress_block 21 clsact" 666 + ] 508 667 } 509 668 ]

+99

tools/testing/selftests/tc-testing/tc-tests/actions/ife.json

··· 1279 1279 "teardown": [ 1280 1280 "$TC actions flush action ife" 1281 1281 ] 1282 + }, 1283 + { 1284 + "id": "f2a0", 1285 + "name": "Update decode ife action with encode metadata", 1286 + "category": [ 1287 + "actions", 1288 + "ife" 1289 + ], 1290 + "plugins": { 1291 + "requires": "nsPlugin" 1292 + }, 1293 + "setup": [ 1294 + [ 1295 + "$TC actions flush action ife", 1296 + 0, 1297 + 1, 1298 + 255 1299 + ], 1300 + "$TC actions add action ife decode index 10" 1301 + ], 1302 + "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 index 10", 1303 + "expExitCode": "0", 1304 + "verifyCmd": "$TC -j actions get action ife index 10", 1305 + "matchJSON": [ 1306 + { 1307 + "total acts": 0 1308 + }, 1309 + { 1310 + "actions": [ 1311 + { 1312 + "order": 1, 1313 + "kind": "ife", 1314 + "mode": "encode", 1315 + "control_action": { 1316 + "type": "pipe" 1317 + }, 1318 + "type": "0xed3e", 1319 + "tcindex": 1, 1320 + "index": 10, 1321 + "ref": 1, 1322 + "bind": 0, 1323 + "not_in_hw": true 1324 + } 1325 + ] 1326 + } 1327 + ], 1328 + "teardown": [ 1329 + "$TC actions flush action ife" 1330 + ] 1331 + }, 1332 + { 1333 + "id": "d352", 1334 + "name": "Update decode ife action into encode with multiple metadata", 1335 + "category": [ 1336 + "actions", 1337 + "ife" 1338 + ], 1339 + "plugins": { 1340 + "requires": "nsPlugin" 1341 + }, 1342 + "setup": [ 1343 + [ 1344 + "$TC actions flush action ife", 1345 + 0, 1346 + 1, 1347 + 255 1348 + ], 1349 + "$TC actions add action ife decode index 10" 1350 + ], 1351 + "cmdUnderTest": "$TC actions replace action ife encode use tcindex 1 use mark 22 index 10", 1352 + "expExitCode": "0", 1353 + "verifyCmd": "$TC -j actions get action ife index 10", 1354 + "matchJSON": [ 1355 + { 1356 + "total acts": 0 1357 + }, 1358 + { 1359 + "actions": [ 1360 + { 1361 + "order": 1, 1362 + "kind": "ife", 1363 + "mode": "encode", 1364 + "control_action": { 1365 + "type": "pipe" 1366 + }, 1367 + "type": "0xed3e", 1368 + "tcindex": 1, 1369 + "mark": 22, 1370 + "index": 10, 1371 + "ref": 1, 1372 + "bind": 0, 1373 + "not_in_hw": true 1374 + } 1375 + ] 1376 + } 1377 + ], 1378 + "teardown": [ 1379 + "$TC actions flush action ife" 1380 + ] 1282 1381 } 1283 1382 ]

+7 -3

tools/testing/selftests/tc-testing/tdc_helper.py

··· 38 38 39 39 40 40 def list_categories(testlist): 41 - """ Show all categories that are present in a test case file. """ 42 - categories = set(map(lambda x: x['category'], testlist)) 41 + """Show all unique categories present in the test cases.""" 42 + categories = set() 43 + for t in testlist: 44 + if 'category' in t: 45 + categories.update(t['category']) 46 + 43 47 print("Available categories:") 44 - print(", ".join(str(s) for s in categories)) 48 + print(", ".join(sorted(categories))) 45 49 print("") 46 50 47 51