Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-rps-misc'

Eric Dumazet says:

====================
net: rps: misc changes

Make RPS/RFS a bit more efficient with better cache locality
and heuristics.

Aso shrink include/linux/netdevice.h a bit.

v2: fixed a build issue in patch 6/8 with CONFIG_RPS=n
(Jakub and kernel build bots)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+95 -70
+3 -35
include/linux/netdevice.h
··· 3204 3204 struct softnet_data *rps_ipi_list; 3205 3205 #endif 3206 3206 3207 + unsigned int received_rps; 3207 3208 bool in_net_rx_action; 3208 3209 bool in_napi_threaded_poll; 3209 3210 ··· 3237 3236 unsigned int cpu; 3238 3237 unsigned int input_queue_tail; 3239 3238 #endif 3240 - unsigned int received_rps; 3241 - unsigned int dropped; 3242 3239 struct sk_buff_head input_pkt_queue; 3243 3240 struct napi_struct backlog; 3241 + 3242 + atomic_t dropped ____cacheline_aligned_in_smp; 3244 3243 3245 3244 /* Another possibly contended cache line */ 3246 3245 spinlock_t defer_lock ____cacheline_aligned_in_smp; ··· 3250 3249 call_single_data_t defer_csd; 3251 3250 }; 3252 3251 3253 - static inline void input_queue_head_incr(struct softnet_data *sd) 3254 - { 3255 - #ifdef CONFIG_RPS 3256 - sd->input_queue_head++; 3257 - #endif 3258 - } 3259 - 3260 - static inline void input_queue_tail_incr_save(struct softnet_data *sd, 3261 - unsigned int *qtail) 3262 - { 3263 - #ifdef CONFIG_RPS 3264 - *qtail = ++sd->input_queue_tail; 3265 - #endif 3266 - } 3267 - 3268 3252 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); 3269 3253 3270 3254 static inline int dev_recursion_level(void) ··· 3257 3271 return this_cpu_read(softnet_data.xmit.recursion); 3258 3272 } 3259 3273 3260 - #define XMIT_RECURSION_LIMIT 8 3261 - static inline bool dev_xmit_recursion(void) 3262 - { 3263 - return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > 3264 - XMIT_RECURSION_LIMIT); 3265 - } 3266 - 3267 - static inline void dev_xmit_recursion_inc(void) 3268 - { 3269 - __this_cpu_inc(softnet_data.xmit.recursion); 3270 - } 3271 - 3272 - static inline void dev_xmit_recursion_dec(void) 3273 - { 3274 - __this_cpu_dec(softnet_data.xmit.recursion); 3275 - } 3276 - 3277 - void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); 3278 3274 void __netif_schedule(struct Qdisc *q); 3279 3275 void netif_schedule_queue(struct netdev_queue *txq); 3280 3276
+28
include/net/rps.h
··· 122 122 #endif 123 123 } 124 124 125 + static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) 126 + { 127 + #ifdef CONFIG_RPS 128 + return ++sd->input_queue_tail; 129 + #else 130 + return 0; 131 + #endif 132 + } 133 + 134 + static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) 135 + { 136 + #ifdef CONFIG_RPS 137 + WRITE_ONCE(*dest, tail); 138 + #endif 139 + } 140 + 141 + static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) 142 + { 143 + #ifdef CONFIG_RPS 144 + WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); 145 + #endif 146 + } 147 + 148 + static inline void rps_input_queue_head_incr(struct softnet_data *sd) 149 + { 150 + rps_input_queue_head_add(sd, 1); 151 + } 152 + 125 153 #endif /* _NET_RPS_H */
+42 -31
net/core/dev.c
··· 4528 4528 out: 4529 4529 #endif 4530 4530 rflow->last_qtail = 4531 - per_cpu(softnet_data, next_cpu).input_queue_head; 4531 + READ_ONCE(per_cpu(softnet_data, next_cpu).input_queue_head); 4532 4532 } 4533 4533 4534 4534 rflow->cpu = next_cpu; ··· 4610 4610 */ 4611 4611 if (unlikely(tcpu != next_cpu) && 4612 4612 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || 4613 - ((int)(per_cpu(softnet_data, tcpu).input_queue_head - 4614 - rflow->last_qtail)) >= 0)) { 4613 + ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) - 4614 + READ_ONCE(rflow->last_qtail))) >= 0)) { 4615 4615 tcpu = next_cpu; 4616 4616 rflow = set_rps_cpu(dev, skb, rflow, next_cpu); 4617 4617 } ··· 4665 4665 rflow = &flow_table->flows[flow_id]; 4666 4666 cpu = READ_ONCE(rflow->cpu); 4667 4667 if (rflow->filter == filter_id && cpu < nr_cpu_ids && 4668 - ((int)(per_cpu(softnet_data, cpu).input_queue_head - 4669 - rflow->last_qtail) < 4668 + ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - 4669 + READ_ONCE(rflow->last_qtail)) < 4670 4670 (int)(10 * flow_table->mask))) 4671 4671 expire = false; 4672 4672 } ··· 4800 4800 struct softnet_data *sd; 4801 4801 unsigned long flags; 4802 4802 unsigned int qlen; 4803 + int max_backlog; 4804 + u32 tail; 4803 4805 4804 - reason = SKB_DROP_REASON_NOT_SPECIFIED; 4806 + reason = SKB_DROP_REASON_DEV_READY; 4807 + if (!netif_running(skb->dev)) 4808 + goto bad_dev; 4809 + 4810 + reason = SKB_DROP_REASON_CPU_BACKLOG; 4805 4811 sd = &per_cpu(softnet_data, cpu); 4806 4812 4813 + qlen = skb_queue_len_lockless(&sd->input_pkt_queue); 4814 + max_backlog = READ_ONCE(net_hotdata.max_backlog); 4815 + if (unlikely(qlen > max_backlog)) 4816 + goto cpu_backlog_drop; 4807 4817 backlog_lock_irq_save(sd, &flags); 4808 - if (!netif_running(skb->dev)) 4809 - goto drop; 4810 4818 qlen = skb_queue_len(&sd->input_pkt_queue); 4811 - if (qlen <= READ_ONCE(net_hotdata.max_backlog) && 4812 - !skb_flow_limit(skb, qlen)) { 4813 - if (qlen) { 4814 - enqueue: 4815 - __skb_queue_tail(&sd->input_pkt_queue, skb); 4816 - input_queue_tail_incr_save(sd, qtail); 4817 - backlog_unlock_irq_restore(sd, &flags); 4818 - return NET_RX_SUCCESS; 4819 + if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) { 4820 + if (!qlen) { 4821 + /* Schedule NAPI for backlog device. We can use 4822 + * non atomic operation as we own the queue lock. 4823 + */ 4824 + if (!__test_and_set_bit(NAPI_STATE_SCHED, 4825 + &sd->backlog.state)) 4826 + napi_schedule_rps(sd); 4819 4827 } 4828 + __skb_queue_tail(&sd->input_pkt_queue, skb); 4829 + tail = rps_input_queue_tail_incr(sd); 4830 + backlog_unlock_irq_restore(sd, &flags); 4820 4831 4821 - /* Schedule NAPI for backlog device 4822 - * We can use non atomic operation since we own the queue lock 4823 - */ 4824 - if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) 4825 - napi_schedule_rps(sd); 4826 - goto enqueue; 4832 + /* save the tail outside of the critical section */ 4833 + rps_input_queue_tail_save(qtail, tail); 4834 + return NET_RX_SUCCESS; 4827 4835 } 4828 - reason = SKB_DROP_REASON_CPU_BACKLOG; 4829 4836 4830 - drop: 4831 - sd->dropped++; 4832 4837 backlog_unlock_irq_restore(sd, &flags); 4833 4838 4839 + cpu_backlog_drop: 4840 + atomic_inc(&sd->dropped); 4841 + bad_dev: 4834 4842 dev_core_stats_rx_dropped_inc(skb->dev); 4835 4843 kfree_skb_reason(skb, reason); 4836 4844 return NET_RX_DROP; ··· 5908 5900 if (skb->dev->reg_state == NETREG_UNREGISTERING) { 5909 5901 __skb_unlink(skb, &sd->input_pkt_queue); 5910 5902 dev_kfree_skb_irq(skb); 5911 - input_queue_head_incr(sd); 5903 + rps_input_queue_head_incr(sd); 5912 5904 } 5913 5905 } 5914 5906 backlog_unlock_irq_enable(sd); ··· 5917 5909 if (skb->dev->reg_state == NETREG_UNREGISTERING) { 5918 5910 __skb_unlink(skb, &sd->process_queue); 5919 5911 kfree_skb(skb); 5920 - input_queue_head_incr(sd); 5912 + rps_input_queue_head_incr(sd); 5921 5913 } 5922 5914 } 5923 5915 local_bh_enable(); ··· 6045 6037 rcu_read_lock(); 6046 6038 __netif_receive_skb(skb); 6047 6039 rcu_read_unlock(); 6048 - input_queue_head_incr(sd); 6049 - if (++work >= quota) 6040 + if (++work >= quota) { 6041 + rps_input_queue_head_add(sd, work); 6050 6042 return work; 6043 + } 6051 6044 6052 6045 } 6053 6046 ··· 6071 6062 backlog_unlock_irq_enable(sd); 6072 6063 } 6073 6064 6065 + if (work) 6066 + rps_input_queue_head_add(sd, work); 6074 6067 return work; 6075 6068 } 6076 6069 ··· 11462 11451 /* Process offline CPU's input_pkt_queue */ 11463 11452 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 11464 11453 netif_rx(skb); 11465 - input_queue_head_incr(oldsd); 11454 + rps_input_queue_head_incr(oldsd); 11466 11455 } 11467 11456 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { 11468 11457 netif_rx(skb); 11469 - input_queue_head_incr(oldsd); 11458 + rps_input_queue_head_incr(oldsd); 11470 11459 } 11471 11460 11472 11461 return 0;
+20 -3
net/core/dev.h
··· 4 4 5 5 #include <linux/types.h> 6 6 #include <linux/rwsem.h> 7 + #include <linux/netdevice.h> 7 8 8 9 struct net; 9 - struct net_device; 10 - struct netdev_bpf; 11 - struct netdev_phys_item_id; 12 10 struct netlink_ext_ack; 13 11 struct cpumask; 14 12 ··· 148 150 #endif 149 151 150 152 struct napi_struct *napi_by_id(unsigned int napi_id); 153 + void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); 154 + 155 + #define XMIT_RECURSION_LIMIT 8 156 + static inline bool dev_xmit_recursion(void) 157 + { 158 + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > 159 + XMIT_RECURSION_LIMIT); 160 + } 161 + 162 + static inline void dev_xmit_recursion_inc(void) 163 + { 164 + __this_cpu_inc(softnet_data.xmit.recursion); 165 + } 166 + 167 + static inline void dev_xmit_recursion_dec(void) 168 + { 169 + __this_cpu_dec(softnet_data.xmit.recursion); 170 + } 171 + 151 172 #endif
+2 -1
net/core/net-procfs.c
··· 144 144 seq_printf(seq, 145 145 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " 146 146 "%08x %08x\n", 147 - sd->processed, sd->dropped, sd->time_squeeze, 0, 147 + sd->processed, atomic_read(&sd->dropped), 148 + sd->time_squeeze, 0, 148 149 0, 0, 0, 0, /* was fastroute */ 149 150 0, /* was cpu_collision */ 150 151 sd->received_rps, flow_limit_count,