Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: use NUMA drop counters for softnet_data.dropped

Hosts under DOS attack can suffer from false sharing
in enqueue_to_backlog() : atomic_inc(&sd->dropped).

This is because sd->dropped can be touched from many cpus,
possibly residing on different NUMA nodes.

Generalize the sk_drop_counters infrastucture
added in commit c51613fa276f ("net: add sk->sk_drop_counters")
and use it to replace softnet_data.dropped
with NUMA friendly softnet_data.drop_counters.

This adds 64 bytes per cpu, maybe more in the future
if we increase the number of counters (currently 2)
per 'struct numa_drop_counters'.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
fdae0ab6 278289bc

+45 -31
+1 -1
include/linux/ipv6.h
··· 295 295 __u32 offset; /* checksum offset */ 296 296 struct icmp6_filter filter; 297 297 __u32 ip6mr_table; 298 - struct socket_drop_counters drop_counters; 298 + struct numa_drop_counters drop_counters; 299 299 struct ipv6_pinfo inet6; 300 300 }; 301 301
+27 -1
include/linux/netdevice.h
··· 3459 3459 return dev->header_ops && dev->header_ops->create; 3460 3460 } 3461 3461 3462 + struct numa_drop_counters { 3463 + atomic_t drops0 ____cacheline_aligned_in_smp; 3464 + atomic_t drops1 ____cacheline_aligned_in_smp; 3465 + }; 3466 + 3467 + static inline int numa_drop_read(const struct numa_drop_counters *ndc) 3468 + { 3469 + return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1); 3470 + } 3471 + 3472 + static inline void numa_drop_add(struct numa_drop_counters *ndc, int val) 3473 + { 3474 + int n = numa_node_id() % 2; 3475 + 3476 + if (n) 3477 + atomic_add(val, &ndc->drops1); 3478 + else 3479 + atomic_add(val, &ndc->drops0); 3480 + } 3481 + 3482 + static inline void numa_drop_reset(struct numa_drop_counters *ndc) 3483 + { 3484 + atomic_set(&ndc->drops0, 0); 3485 + atomic_set(&ndc->drops1, 0); 3486 + } 3487 + 3462 3488 /* 3463 3489 * Incoming packets are placed on per-CPU queues 3464 3490 */ ··· 3530 3504 struct sk_buff_head input_pkt_queue; 3531 3505 struct napi_struct backlog; 3532 3506 3533 - atomic_t dropped ____cacheline_aligned_in_smp; 3507 + struct numa_drop_counters drop_counters; 3534 3508 3535 3509 /* Another possibly contended cache line */ 3536 3510 spinlock_t defer_lock ____cacheline_aligned_in_smp;
+1 -1
include/linux/udp.h
··· 108 108 * the last UDP socket cacheline. 109 109 */ 110 110 struct hlist_node tunnel_list; 111 - struct socket_drop_counters drop_counters; 111 + struct numa_drop_counters drop_counters; 112 112 }; 113 113 114 114 #define udp_test_bit(nr, sk) \
+1 -1
include/net/raw.h
··· 81 81 struct inet_sock inet; 82 82 struct icmp_filter filter; 83 83 u32 ipmr_table; 84 - struct socket_drop_counters drop_counters; 84 + struct numa_drop_counters drop_counters; 85 85 }; 86 86 87 87 #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
+12 -25
include/net/sock.h
··· 102 102 typedef __u32 __bitwise __portpair; 103 103 typedef __u64 __bitwise __addrpair; 104 104 105 - struct socket_drop_counters { 106 - atomic_t drops0 ____cacheline_aligned_in_smp; 107 - atomic_t drops1 ____cacheline_aligned_in_smp; 108 - }; 109 - 110 105 /** 111 106 * struct sock_common - minimal network layer representation of sockets 112 107 * @skc_daddr: Foreign IPv4 addr ··· 282 287 * @sk_err_soft: errors that don't cause failure but are the cause of a 283 288 * persistent failure not just 'timed out' 284 289 * @sk_drops: raw/udp drops counter 285 - * @sk_drop_counters: optional pointer to socket_drop_counters 290 + * @sk_drop_counters: optional pointer to numa_drop_counters 286 291 * @sk_ack_backlog: current listen backlog 287 292 * @sk_max_ack_backlog: listen backlog set in listen() 288 293 * @sk_uid: user id of owner ··· 451 456 #ifdef CONFIG_XFRM 452 457 struct xfrm_policy __rcu *sk_policy[2]; 453 458 #endif 454 - struct socket_drop_counters *sk_drop_counters; 459 + struct numa_drop_counters *sk_drop_counters; 455 460 __cacheline_group_end(sock_read_rxtx); 456 461 457 462 __cacheline_group_begin(sock_write_rxtx); ··· 2693 2698 2694 2699 static inline void sk_drops_add(struct sock *sk, int segs) 2695 2700 { 2696 - struct socket_drop_counters *sdc = sk->sk_drop_counters; 2701 + struct numa_drop_counters *ndc = sk->sk_drop_counters; 2697 2702 2698 - if (sdc) { 2699 - int n = numa_node_id() % 2; 2700 - 2701 - if (n) 2702 - atomic_add(segs, &sdc->drops1); 2703 - else 2704 - atomic_add(segs, &sdc->drops0); 2705 - } else { 2703 + if (ndc) 2704 + numa_drop_add(ndc, segs); 2705 + else 2706 2706 atomic_add(segs, &sk->sk_drops); 2707 - } 2708 2707 } 2709 2708 2710 2709 static inline void sk_drops_inc(struct sock *sk) ··· 2708 2719 2709 2720 static inline int sk_drops_read(const struct sock *sk) 2710 2721 { 2711 - const struct socket_drop_counters *sdc = sk->sk_drop_counters; 2722 + const struct numa_drop_counters *ndc = sk->sk_drop_counters; 2712 2723 2713 - if (sdc) { 2724 + if (ndc) { 2714 2725 DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops)); 2715 - return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1); 2726 + return numa_drop_read(ndc); 2716 2727 } 2717 2728 return atomic_read(&sk->sk_drops); 2718 2729 } 2719 2730 2720 2731 static inline void sk_drops_reset(struct sock *sk) 2721 2732 { 2722 - struct socket_drop_counters *sdc = sk->sk_drop_counters; 2733 + struct numa_drop_counters *ndc = sk->sk_drop_counters; 2723 2734 2724 - if (sdc) { 2725 - atomic_set(&sdc->drops0, 0); 2726 - atomic_set(&sdc->drops1, 0); 2727 - } 2735 + if (ndc) 2736 + numa_drop_reset(ndc); 2728 2737 atomic_set(&sk->sk_drops, 0); 2729 2738 } 2730 2739
+1 -1
net/core/dev.c
··· 5248 5248 backlog_unlock_irq_restore(sd, &flags); 5249 5249 5250 5250 cpu_backlog_drop: 5251 - atomic_inc(&sd->dropped); 5251 + numa_drop_add(&sd->drop_counters, 1); 5252 5252 bad_dev: 5253 5253 dev_core_stats_rx_dropped_inc(skb->dev); 5254 5254 kfree_skb_reason(skb, reason);
+2 -1
net/core/net-procfs.c
··· 145 145 seq_printf(seq, 146 146 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " 147 147 "%08x %08x\n", 148 - READ_ONCE(sd->processed), atomic_read(&sd->dropped), 148 + READ_ONCE(sd->processed), 149 + numa_drop_read(&sd->drop_counters), 149 150 READ_ONCE(sd->time_squeeze), 0, 150 151 0, 0, 0, 0, /* was fastroute */ 151 152 0, /* was cpu_collision */