Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: hash prefixed policies based on preflen thresholds

The idea is an extension of the current policy hashing.

Today only non-prefixed policies are stored in a hash table. This
patch relaxes the constraints, and hashes policies whose prefix
lengths are greater or equal to a configurable threshold.

Each hash table (one per direction) maintains its own set of IPv4 and
IPv6 thresholds (dbits4, sbits4, dbits6, sbits6), by default (32, 32,
128, 128).

Example, if the output hash table is configured with values (16, 24,
56, 64):

ip xfrm policy add dir out src 10.22.0.0/20 dst 10.24.1.0/24 ... => hashed
ip xfrm policy add dir out src 10.22.0.0/16 dst 10.24.1.1/32 ... => hashed
ip xfrm policy add dir out src 10.22.0.0/16 dst 10.24.0.0/16 ... => unhashed

ip xfrm policy add dir out \
src 3ffe:304:124:2200::/60 dst 3ffe:304:124:2401::/64 ... => hashed
ip xfrm policy add dir out \
src 3ffe:304:124:2200::/56 dst 3ffe:304:124:2401::2/128 ... => hashed
ip xfrm policy add dir out \
src 3ffe:304:124:2200::/56 dst 3ffe:304:124:2400::/56 ... => unhashed

The high order bits of the addresses (up to the threshold) are used to
compute the hash key.

Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Christophe Gouault and committed by
Steffen Klassert
b58555f1 0244790c

+117 -16
+4
include/net/netns/xfrm.h
··· 13 13 struct xfrm_policy_hash { 14 14 struct hlist_head *table; 15 15 unsigned int hmask; 16 + u8 dbits4; 17 + u8 sbits4; 18 + u8 dbits6; 19 + u8 sbits6; 16 20 }; 17 21 18 22 struct netns_xfrm {
+66 -10
net/xfrm/xfrm_hash.h
··· 3 3 4 4 #include <linux/xfrm.h> 5 5 #include <linux/socket.h> 6 + #include <linux/jhash.h> 6 7 7 8 static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) 8 9 { ··· 27 26 { 28 27 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ 29 28 saddr->a6[2] ^ saddr->a6[3]); 29 + } 30 + 31 + static inline u32 __bits2mask32(__u8 bits) 32 + { 33 + u32 mask32 = 0xffffffff; 34 + 35 + if (bits == 0) 36 + mask32 = 0; 37 + else if (bits < 32) 38 + mask32 <<= (32 - bits); 39 + 40 + return mask32; 41 + } 42 + 43 + static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr, 44 + const xfrm_address_t *saddr, 45 + __u8 dbits, 46 + __u8 sbits) 47 + { 48 + return jhash_2words(ntohl(daddr->a4) & __bits2mask32(dbits), 49 + ntohl(saddr->a4) & __bits2mask32(sbits), 50 + 0); 51 + } 52 + 53 + static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr, 54 + __u8 prefixlen) 55 + { 56 + int pdw; 57 + int pbi; 58 + u32 initval = 0; 59 + 60 + pdw = prefixlen >> 5; /* num of whole u32 in prefix */ 61 + pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ 62 + 63 + if (pbi) { 64 + __be32 mask; 65 + 66 + mask = htonl((0xffffffff) << (32 - pbi)); 67 + 68 + initval = (__force u32)(addr->a6[pdw] & mask); 69 + } 70 + 71 + return jhash2((__force u32 *)addr->a6, pdw, initval); 72 + } 73 + 74 + static inline unsigned int __xfrm6_dpref_spref_hash(const xfrm_address_t *daddr, 75 + const xfrm_address_t *saddr, 76 + __u8 dbits, 77 + __u8 sbits) 78 + { 79 + return __xfrm6_pref_hash(daddr, dbits) ^ 80 + __xfrm6_pref_hash(saddr, sbits); 30 81 } 31 82 32 83 static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, ··· 137 84 } 138 85 139 86 static inline unsigned int __sel_hash(const struct xfrm_selector *sel, 140 - unsigned short family, unsigned int hmask) 87 + unsigned short family, unsigned int hmask, 88 + u8 dbits, u8 sbits) 141 89 { 142 90 const xfrm_address_t *daddr = &sel->daddr; 143 91 const xfrm_address_t *saddr = &sel->saddr; ··· 146 92 147 93 switch (family) { 148 94 case AF_INET: 149 - if (sel->prefixlen_d != 32 || 150 - sel->prefixlen_s != 32) 95 + if (sel->prefixlen_d < dbits || 96 + sel->prefixlen_s < sbits) 151 97 return hmask + 1; 152 98 153 - h = __xfrm4_daddr_saddr_hash(daddr, saddr); 99 + h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits); 154 100 break; 155 101 156 102 case AF_INET6: 157 - if (sel->prefixlen_d != 128 || 158 - sel->prefixlen_s != 128) 103 + if (sel->prefixlen_d < dbits || 104 + sel->prefixlen_s < sbits) 159 105 return hmask + 1; 160 106 161 - h = __xfrm6_daddr_saddr_hash(daddr, saddr); 107 + h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits); 162 108 break; 163 109 } 164 110 h ^= (h >> 16); ··· 167 113 168 114 static inline unsigned int __addr_hash(const xfrm_address_t *daddr, 169 115 const xfrm_address_t *saddr, 170 - unsigned short family, unsigned int hmask) 116 + unsigned short family, 117 + unsigned int hmask, 118 + u8 dbits, u8 sbits) 171 119 { 172 120 unsigned int h = 0; 173 121 174 122 switch (family) { 175 123 case AF_INET: 176 - h = __xfrm4_daddr_saddr_hash(daddr, saddr); 124 + h = __xfrm4_dpref_spref_hash(daddr, saddr, dbits, sbits); 177 125 break; 178 126 179 127 case AF_INET6: 180 - h = __xfrm6_daddr_saddr_hash(daddr, saddr); 128 + h = __xfrm6_dpref_spref_hash(daddr, saddr, dbits, sbits); 181 129 break; 182 130 } 183 131 h ^= (h >> 16);
+47 -6
net/xfrm/xfrm_policy.c
··· 344 344 return __idx_hash(index, net->xfrm.policy_idx_hmask); 345 345 } 346 346 347 + /* calculate policy hash thresholds */ 348 + static void __get_hash_thresh(struct net *net, 349 + unsigned short family, int dir, 350 + u8 *dbits, u8 *sbits) 351 + { 352 + switch (family) { 353 + case AF_INET: 354 + *dbits = net->xfrm.policy_bydst[dir].dbits4; 355 + *sbits = net->xfrm.policy_bydst[dir].sbits4; 356 + break; 357 + 358 + case AF_INET6: 359 + *dbits = net->xfrm.policy_bydst[dir].dbits6; 360 + *sbits = net->xfrm.policy_bydst[dir].sbits6; 361 + break; 362 + 363 + default: 364 + *dbits = 0; 365 + *sbits = 0; 366 + } 367 + } 368 + 347 369 static struct hlist_head *policy_hash_bysel(struct net *net, 348 370 const struct xfrm_selector *sel, 349 371 unsigned short family, int dir) 350 372 { 351 373 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 352 - unsigned int hash = __sel_hash(sel, family, hmask); 374 + unsigned int hash; 375 + u8 dbits; 376 + u8 sbits; 377 + 378 + __get_hash_thresh(net, family, dir, &dbits, &sbits); 379 + hash = __sel_hash(sel, family, hmask, dbits, sbits); 353 380 354 381 return (hash == hmask + 1 ? 355 382 &net->xfrm.policy_inexact[dir] : ··· 389 362 unsigned short family, int dir) 390 363 { 391 364 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 392 - unsigned int hash = __addr_hash(daddr, saddr, family, hmask); 365 + unsigned int hash; 366 + u8 dbits; 367 + u8 sbits; 368 + 369 + __get_hash_thresh(net, family, dir, &dbits, &sbits); 370 + hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); 393 371 394 372 return net->xfrm.policy_bydst[dir].table + hash; 395 373 } 396 374 397 - static void xfrm_dst_hash_transfer(struct hlist_head *list, 375 + static void xfrm_dst_hash_transfer(struct net *net, 376 + struct hlist_head *list, 398 377 struct hlist_head *ndsttable, 399 - unsigned int nhashmask) 378 + unsigned int nhashmask, 379 + int dir) 400 380 { 401 381 struct hlist_node *tmp, *entry0 = NULL; 402 382 struct xfrm_policy *pol; 403 383 unsigned int h0 = 0; 384 + u8 dbits; 385 + u8 sbits; 404 386 405 387 redo: 406 388 hlist_for_each_entry_safe(pol, tmp, list, bydst) { 407 389 unsigned int h; 408 390 391 + __get_hash_thresh(net, pol->family, dir, &dbits, &sbits); 409 392 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 410 - pol->family, nhashmask); 393 + pol->family, nhashmask, dbits, sbits); 411 394 if (!entry0) { 412 395 hlist_del(&pol->bydst); 413 396 hlist_add_head(&pol->bydst, ndsttable+h); ··· 471 434 write_lock_bh(&net->xfrm.xfrm_policy_lock); 472 435 473 436 for (i = hmask; i >= 0; i--) 474 - xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); 437 + xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); 475 438 476 439 net->xfrm.policy_bydst[dir].table = ndst; 477 440 net->xfrm.policy_bydst[dir].hmask = nhashmask; ··· 2867 2830 if (!htab->table) 2868 2831 goto out_bydst; 2869 2832 htab->hmask = hmask; 2833 + htab->dbits4 = 32; 2834 + htab->sbits4 = 32; 2835 + htab->dbits6 = 128; 2836 + htab->sbits6 = 128; 2870 2837 } 2871 2838 2872 2839 INIT_LIST_HEAD(&net->xfrm.policy_all);