at master 5.0 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2#ifndef _NET_RPS_H 3#define _NET_RPS_H 4 5#include <linux/types.h> 6#include <linux/static_key.h> 7#include <net/sock.h> 8#include <net/hotdata.h> 9 10#ifdef CONFIG_RPS 11 12extern struct static_key_false rps_needed; 13extern struct static_key_false rfs_needed; 14 15/* 16 * This structure holds an RPS map which can be of variable length. The 17 * map is an array of CPUs. 18 */ 19struct rps_map { 20 unsigned int len; 21 struct rcu_head rcu; 22 u16 cpus[]; 23}; 24#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) 25 26/* 27 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the 28 * tail pointer for that CPU's input queue at the time of last enqueue, a 29 * hardware filter index, and the hash of the flow if aRFS is enabled. 30 */ 31struct rps_dev_flow { 32 u16 cpu; 33 u16 filter; 34 unsigned int last_qtail; 35#ifdef CONFIG_RFS_ACCEL 36 u32 hash; 37#endif 38}; 39#define RPS_NO_FILTER 0xffff 40 41/* 42 * The rps_dev_flow_table structure contains a table of flow mappings. 43 */ 44struct rps_dev_flow_table { 45 u8 log; 46 struct rcu_head rcu; 47 struct rps_dev_flow flows[]; 48}; 49#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ 50 ((_num) * sizeof(struct rps_dev_flow))) 51 52/* 53 * The rps_sock_flow_table contains mappings of flows to the last CPU 54 * on which they were processed by the application (set in recvmsg). 55 * Each entry is a 32bit value. Upper part is the high-order bits 56 * of flow hash, lower part is CPU number. 57 * rps_cpu_mask is used to partition the space, depending on number of 58 * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 59 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, 60 * meaning we use 32-6=26 bits for the hash. 61 */ 62struct rps_sock_flow_table { 63 struct rcu_head rcu; 64 u32 mask; 65 66 u32 ents[] ____cacheline_aligned_in_smp; 67}; 68#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) 69 70#define RPS_NO_CPU 0xffff 71 72static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, 73 u32 hash) 74{ 75 unsigned int index = hash & table->mask; 76 u32 val = hash & ~net_hotdata.rps_cpu_mask; 77 78 /* We only give a hint, preemption can change CPU under us */ 79 val |= raw_smp_processor_id(); 80 81 /* The following WRITE_ONCE() is paired with the READ_ONCE() 82 * here, and another one in get_rps_cpu(). 83 */ 84 if (READ_ONCE(table->ents[index]) != val) 85 WRITE_ONCE(table->ents[index], val); 86} 87 88static inline void _sock_rps_record_flow_hash(__u32 hash) 89{ 90 struct rps_sock_flow_table *sock_flow_table; 91 92 if (!hash) 93 return; 94 rcu_read_lock(); 95 sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); 96 if (sock_flow_table) 97 rps_record_sock_flow(sock_flow_table, hash); 98 rcu_read_unlock(); 99} 100 101static inline void _sock_rps_record_flow(const struct sock *sk) 102{ 103 /* Reading sk->sk_rxhash might incur an expensive cache line 104 * miss. 105 * 106 * TCP_ESTABLISHED does cover almost all states where RFS 107 * might be useful, and is cheaper [1] than testing : 108 * IPv4: inet_sk(sk)->inet_daddr 109 * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) 110 * OR an additional socket flag 111 * [1] : sk_state and sk_prot are in the same cache line. 112 */ 113 if (sk->sk_state == TCP_ESTABLISHED) { 114 /* This READ_ONCE() is paired with the WRITE_ONCE() 115 * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). 116 */ 117 _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); 118 } 119} 120 121static inline void _sock_rps_delete_flow(const struct sock *sk) 122{ 123 struct rps_sock_flow_table *table; 124 u32 hash, index; 125 126 hash = READ_ONCE(sk->sk_rxhash); 127 if (!hash) 128 return; 129 130 rcu_read_lock(); 131 table = rcu_dereference(net_hotdata.rps_sock_flow_table); 132 if (table) { 133 index = hash & table->mask; 134 if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) 135 WRITE_ONCE(table->ents[index], RPS_NO_CPU); 136 } 137 rcu_read_unlock(); 138} 139#endif /* CONFIG_RPS */ 140 141static inline bool rfs_is_needed(void) 142{ 143#ifdef CONFIG_RPS 144 return static_branch_unlikely(&rfs_needed); 145#else 146 return false; 147#endif 148} 149 150static inline void sock_rps_record_flow_hash(__u32 hash) 151{ 152#ifdef CONFIG_RPS 153 if (!rfs_is_needed()) 154 return; 155 156 _sock_rps_record_flow_hash(hash); 157#endif 158} 159 160static inline void sock_rps_record_flow(const struct sock *sk) 161{ 162#ifdef CONFIG_RPS 163 if (!rfs_is_needed()) 164 return; 165 166 _sock_rps_record_flow(sk); 167#endif 168} 169 170static inline void sock_rps_delete_flow(const struct sock *sk) 171{ 172#ifdef CONFIG_RPS 173 if (!rfs_is_needed()) 174 return; 175 176 _sock_rps_delete_flow(sk); 177#endif 178} 179 180static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) 181{ 182#ifdef CONFIG_RPS 183 return ++sd->input_queue_tail; 184#else 185 return 0; 186#endif 187} 188 189static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) 190{ 191#ifdef CONFIG_RPS 192 WRITE_ONCE(*dest, tail); 193#endif 194} 195 196static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) 197{ 198#ifdef CONFIG_RPS 199 WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); 200#endif 201} 202 203static inline void rps_input_queue_head_incr(struct softnet_data *sd) 204{ 205 rps_input_queue_head_add(sd, 1); 206} 207 208#endif /* _NET_RPS_H */