Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'flow_keys_digest'

Tom Herbert says:

====================
net: Eliminate calls to flow_dissector and introduce flow_keys_digest

In this patch set we add skb_get_hash_perturb which gets the skbuff
hash for a packet and perturbs it using a provided key and jhash1.
This function is used in serveral qdiscs and eliminates many calls
to flow_dissector and jhash3 to get a perturbed hash for a packet.

To handle the sch_choke issue (passes flow_keys in skbuff cb) we
add flow_keys_digest which is a digest of a flow constructed
from a flow_keys structure.

This is the second version of these patches I posted a while ago,
and is prerequisite work to increasing the size of the flow_keys
structure and hashing over it (full IPv6 address, flow label, VLAN ID,
etc.).

Version 2:

- Add keyval parameter to __flow_hash_from_keys which allows caller to
set the initval for jhash
- Perturb always does flow dissection and creates hash based on
input perturb value which acts as the keyval to __flow_hash_from_keys
- Added a _flow_keys_digest_data which is used in make_flow_keys_digest.
This fills out the digest by populating individual fields instead
of copying the whole structure.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+92 -86
+2
include/linux/skbuff.h
··· 927 927 return skb->hash; 928 928 } 929 929 930 + __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); 931 + 930 932 static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) 931 933 { 932 934 return skb->hash;
+16
include/net/flow_keys.h
··· 42 42 u32 flow_hash_from_keys(struct flow_keys *keys); 43 43 unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, 44 44 __be16 protocol); 45 + 46 + /* struct flow_keys_digest: 47 + * 48 + * This structure is used to hold a digest of the full flow keys. This is a 49 + * larger "hash" of a flow to allow definitively matching specific flows where 50 + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so 51 + * that it can by used in CB of skb (see sch_choke for an example). 52 + */ 53 + #define FLOW_KEYS_DIGEST_LEN 16 54 + struct flow_keys_digest { 55 + u8 data[FLOW_KEYS_DIGEST_LEN]; 56 + }; 57 + 58 + void make_flow_keys_digest(struct flow_keys_digest *digest, 59 + const struct flow_keys *flow); 60 + 45 61 #endif
+57 -8
net/core/flow_dissector.c
··· 267 267 net_get_random_once(&hashrnd, sizeof(hashrnd)); 268 268 } 269 269 270 - static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) 270 + static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) 271 271 { 272 - __flow_hash_secret_init(); 273 - return jhash_3words(a, b, c, hashrnd); 272 + return jhash_3words(a, b, c, keyval); 274 273 } 275 274 276 - static inline u32 __flow_hash_from_keys(struct flow_keys *keys) 275 + static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) 277 276 { 278 277 u32 hash; 279 278 ··· 286 287 287 288 hash = __flow_hash_3words((__force u32)keys->dst, 288 289 (__force u32)keys->src, 289 - (__force u32)keys->ports); 290 + (__force u32)keys->ports, 291 + keyval); 290 292 if (!hash) 291 293 hash = 1; 292 294 ··· 296 296 297 297 u32 flow_hash_from_keys(struct flow_keys *keys) 298 298 { 299 - return __flow_hash_from_keys(keys); 299 + __flow_hash_secret_init(); 300 + return __flow_hash_from_keys(keys, hashrnd); 300 301 } 301 302 EXPORT_SYMBOL(flow_hash_from_keys); 303 + 304 + static inline u32 ___skb_get_hash(const struct sk_buff *skb, 305 + struct flow_keys *keys, u32 keyval) 306 + { 307 + if (!skb_flow_dissect(skb, keys)) 308 + return 0; 309 + 310 + return __flow_hash_from_keys(keys, keyval); 311 + } 312 + 313 + struct _flow_keys_digest_data { 314 + __be16 n_proto; 315 + u8 ip_proto; 316 + u8 padding; 317 + __be32 ports; 318 + __be32 src; 319 + __be32 dst; 320 + }; 321 + 322 + void make_flow_keys_digest(struct flow_keys_digest *digest, 323 + const struct flow_keys *flow) 324 + { 325 + struct _flow_keys_digest_data *data = 326 + (struct _flow_keys_digest_data *)digest; 327 + 328 + BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); 329 + 330 + memset(digest, 0, sizeof(*digest)); 331 + 332 + data->n_proto = flow->n_proto; 333 + data->ip_proto = flow->ip_proto; 334 + data->ports = flow->ports; 335 + data->src = flow->src; 336 + data->dst = flow->dst; 337 + } 338 + EXPORT_SYMBOL(make_flow_keys_digest); 302 339 303 340 /* 304 341 * __skb_get_hash: calculate a flow hash based on src/dst addresses ··· 346 309 void __skb_get_hash(struct sk_buff *skb) 347 310 { 348 311 struct flow_keys keys; 312 + u32 hash; 349 313 350 - if (!skb_flow_dissect(skb, &keys)) 314 + __flow_hash_secret_init(); 315 + 316 + hash = ___skb_get_hash(skb, &keys, hashrnd); 317 + if (!hash) 351 318 return; 352 319 353 320 if (keys.ports) ··· 359 318 360 319 skb->sw_hash = 1; 361 320 362 - skb->hash = __flow_hash_from_keys(&keys); 321 + skb->hash = hash; 363 322 } 364 323 EXPORT_SYMBOL(__skb_get_hash); 324 + 325 + __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) 326 + { 327 + struct flow_keys keys; 328 + 329 + return ___skb_get_hash(skb, &keys, perturb); 330 + } 331 + EXPORT_SYMBOL(skb_get_hash_perturb); 365 332 366 333 /* 367 334 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+4 -10
net/sched/sch_choke.c
··· 133 133 --sch->q.qlen; 134 134 } 135 135 136 - /* private part of skb->cb[] that a qdisc is allowed to use 137 - * is limited to QDISC_CB_PRIV_LEN bytes. 138 - * As a flow key might be too large, we store a part of it only. 139 - */ 140 - #define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3) 141 - 142 136 struct choke_skb_cb { 143 137 u16 classid; 144 138 u8 keys_valid; 145 - u8 keys[QDISC_CB_PRIV_LEN - 3]; 139 + struct flow_keys_digest keys; 146 140 }; 147 141 148 142 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) ··· 171 177 if (!choke_skb_cb(skb1)->keys_valid) { 172 178 choke_skb_cb(skb1)->keys_valid = 1; 173 179 skb_flow_dissect(skb1, &temp); 174 - memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); 180 + make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); 175 181 } 176 182 177 183 if (!choke_skb_cb(skb2)->keys_valid) { 178 184 choke_skb_cb(skb2)->keys_valid = 1; 179 185 skb_flow_dissect(skb2, &temp); 180 - memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); 186 + make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); 181 187 } 182 188 183 189 return !memcmp(&choke_skb_cb(skb1)->keys, 184 190 &choke_skb_cb(skb2)->keys, 185 - CHOKE_K_LEN); 191 + sizeof(choke_skb_cb(skb1)->keys)); 186 192 } 187 193 188 194 /*
+2 -9
net/sched/sch_fq_codel.c
··· 23 23 #include <linux/vmalloc.h> 24 24 #include <net/netlink.h> 25 25 #include <net/pkt_sched.h> 26 - #include <net/flow_keys.h> 27 26 #include <net/codel.h> 28 27 29 28 /* Fair Queue CoDel. ··· 67 68 }; 68 69 69 70 static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, 70 - const struct sk_buff *skb) 71 + struct sk_buff *skb) 71 72 { 72 - struct flow_keys keys; 73 - unsigned int hash; 74 - 75 - skb_flow_dissect(skb, &keys); 76 - hash = jhash_3words((__force u32)keys.dst, 77 - (__force u32)keys.src ^ keys.ip_proto, 78 - (__force u32)keys.ports, q->perturbation); 73 + u32 hash = skb_get_hash_perturb(skb, q->perturbation); 79 74 80 75 return reciprocal_scale(hash, q->flows_cnt); 81 76 }
+1 -18
net/sched/sch_hhf.c
··· 9 9 #include <linux/module.h> 10 10 #include <linux/skbuff.h> 11 11 #include <linux/vmalloc.h> 12 - #include <net/flow_keys.h> 13 12 #include <net/pkt_sched.h> 14 13 #include <net/sock.h> 15 14 ··· 175 176 return jiffies; 176 177 } 177 178 178 - static unsigned int skb_hash(const struct hhf_sched_data *q, 179 - const struct sk_buff *skb) 180 - { 181 - struct flow_keys keys; 182 - unsigned int hash; 183 - 184 - if (skb->sk && skb->sk->sk_hash) 185 - return skb->sk->sk_hash; 186 - 187 - skb_flow_dissect(skb, &keys); 188 - hash = jhash_3words((__force u32)keys.dst, 189 - (__force u32)keys.src ^ keys.ip_proto, 190 - (__force u32)keys.ports, q->perturbation); 191 - return hash; 192 - } 193 - 194 179 /* Looks up a heavy-hitter flow in a chaining list of table T. */ 195 180 static struct hh_flow_state *seek_list(const u32 hash, 196 181 struct list_head *head, ··· 263 280 } 264 281 265 282 /* Get hashed flow-id of the skb. */ 266 - hash = skb_hash(q, skb); 283 + hash = skb_get_hash_perturb(skb, q->perturbation); 267 284 268 285 /* Check if this packet belongs to an already established HH flow. */ 269 286 flow_pos = hash & HHF_BIT_MASK;
+8 -16
net/sched/sch_sfb.c
··· 26 26 #include <net/ip.h> 27 27 #include <net/pkt_sched.h> 28 28 #include <net/inet_ecn.h> 29 - #include <net/flow_keys.h> 30 29 31 30 /* 32 31 * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) ··· 284 285 int i; 285 286 u32 p_min = ~0; 286 287 u32 minqlen = ~0; 287 - u32 r, slot, salt, sfbhash; 288 + u32 r, sfbhash; 289 + u32 slot = q->slot; 288 290 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 289 - struct flow_keys keys; 290 291 291 292 if (unlikely(sch->q.qlen >= q->limit)) { 292 293 qdisc_qstats_overlimit(sch); ··· 308 309 309 310 fl = rcu_dereference_bh(q->filter_list); 310 311 if (fl) { 312 + u32 salt; 313 + 311 314 /* If using external classifiers, get result and record it. */ 312 315 if (!sfb_classify(skb, fl, &ret, &salt)) 313 316 goto other_drop; 314 - keys.src = salt; 315 - keys.dst = 0; 316 - keys.ports = 0; 317 + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); 317 318 } else { 318 - skb_flow_dissect(skb, &keys); 319 + sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); 319 320 } 320 321 321 - slot = q->slot; 322 322 323 - sfbhash = jhash_3words((__force u32)keys.dst, 324 - (__force u32)keys.src, 325 - (__force u32)keys.ports, 326 - q->bins[slot].perturbation); 327 323 if (!sfbhash) 328 324 sfbhash = 1; 329 325 sfb_skb_cb(skb)->hashes[slot] = sfbhash; ··· 350 356 if (unlikely(p_min >= SFB_MAX_PROB)) { 351 357 /* Inelastic flow */ 352 358 if (q->double_buffering) { 353 - sfbhash = jhash_3words((__force u32)keys.dst, 354 - (__force u32)keys.src, 355 - (__force u32)keys.ports, 356 - q->bins[slot].perturbation); 359 + sfbhash = skb_get_hash_perturb(skb, 360 + q->bins[slot].perturbation); 357 361 if (!sfbhash) 358 362 sfbhash = 1; 359 363 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
+2 -25
net/sched/sch_sfq.c
··· 23 23 #include <linux/vmalloc.h> 24 24 #include <net/netlink.h> 25 25 #include <net/pkt_sched.h> 26 - #include <net/flow_keys.h> 27 26 #include <net/red.h> 28 27 29 28 ··· 155 156 return &q->dep[val - SFQ_MAX_FLOWS]; 156 157 } 157 158 158 - /* 159 - * In order to be able to quickly rehash our queue when timer changes 160 - * q->perturbation, we store flow_keys in skb->cb[] 161 - */ 162 - struct sfq_skb_cb { 163 - struct flow_keys keys; 164 - }; 165 - 166 - static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) 167 - { 168 - qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb)); 169 - return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; 170 - } 171 - 172 159 static unsigned int sfq_hash(const struct sfq_sched_data *q, 173 160 const struct sk_buff *skb) 174 161 { 175 - const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; 176 - unsigned int hash; 177 - 178 - hash = jhash_3words((__force u32)keys->dst, 179 - (__force u32)keys->src ^ keys->ip_proto, 180 - (__force u32)keys->ports, q->perturbation); 181 - return hash & (q->divisor - 1); 162 + return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); 182 163 } 183 164 184 165 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, ··· 175 196 return TC_H_MIN(skb->priority); 176 197 177 198 fl = rcu_dereference_bh(q->filter_list); 178 - if (!fl) { 179 - skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); 199 + if (!fl) 180 200 return sfq_hash(q, skb) + 1; 181 - } 182 201 183 202 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 184 203 result = tc_classify(skb, fl, &res);