Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: add state hashtable keyed by seq

When creating new states with seq set in xfrm_usersa_info, we walk
through all the states already installed in that netns to find a
matching ACQUIRE state (__xfrm_find_acq_byseq, called from
xfrm_state_add). This causes severe slowdowns on systems with a large
number of states.

This patch introduces a hashtable using x->km.seq as key, so that the
corresponding state can be found in a reasonable time.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Sabrina Dubroca and committed by
Steffen Klassert
fe9f1d87 335a2a1f

+61 -13
+1
include/net/netns/xfrm.h
··· 42 42 struct hlist_head __rcu *state_bydst; 43 43 struct hlist_head __rcu *state_bysrc; 44 44 struct hlist_head __rcu *state_byspi; 45 + struct hlist_head __rcu *state_byseq; 45 46 unsigned int state_hmask; 46 47 unsigned int state_num; 47 48 struct work_struct state_hash_work;
+1
include/net/xfrm.h
··· 154 154 }; 155 155 struct hlist_node bysrc; 156 156 struct hlist_node byspi; 157 + struct hlist_node byseq; 157 158 158 159 refcount_t refcnt; 159 160 spinlock_t lock;
+7
net/xfrm/xfrm_hash.h
··· 131 131 return (h ^ (h >> 10) ^ (h >> 20)) & hmask; 132 132 } 133 133 134 + static inline unsigned int 135 + __xfrm_seq_hash(u32 seq, unsigned int hmask) 136 + { 137 + unsigned int h = seq; 138 + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; 139 + } 140 + 134 141 static inline unsigned int __idx_hash(u32 index, unsigned int hmask) 135 142 { 136 143 return (index ^ (index >> 8)) & hmask;
+52 -13
net/xfrm/xfrm_state.c
··· 78 78 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); 79 79 } 80 80 81 + static unsigned int xfrm_seq_hash(struct net *net, u32 seq) 82 + { 83 + return __xfrm_seq_hash(seq, net->xfrm.state_hmask); 84 + } 85 + 81 86 static void xfrm_hash_transfer(struct hlist_head *list, 82 87 struct hlist_head *ndsttable, 83 88 struct hlist_head *nsrctable, 84 89 struct hlist_head *nspitable, 90 + struct hlist_head *nseqtable, 85 91 unsigned int nhashmask) 86 92 { 87 93 struct hlist_node *tmp; ··· 112 106 nhashmask); 113 107 hlist_add_head_rcu(&x->byspi, nspitable + h); 114 108 } 109 + 110 + if (x->km.seq) { 111 + h = __xfrm_seq_hash(x->km.seq, nhashmask); 112 + hlist_add_head_rcu(&x->byseq, nseqtable + h); 113 + } 115 114 } 116 115 } 117 116 ··· 128 117 static void xfrm_hash_resize(struct work_struct *work) 129 118 { 130 119 struct net *net = container_of(work, struct net, xfrm.state_hash_work); 131 - struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; 120 + struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq; 132 121 unsigned long nsize, osize; 133 122 unsigned int nhashmask, ohashmask; 134 123 int i; ··· 148 137 xfrm_hash_free(nsrc, nsize); 149 138 return; 150 139 } 140 + nseq = xfrm_hash_alloc(nsize); 141 + if (!nseq) { 142 + xfrm_hash_free(ndst, nsize); 143 + xfrm_hash_free(nsrc, nsize); 144 + xfrm_hash_free(nspi, nsize); 145 + return; 146 + } 151 147 152 148 spin_lock_bh(&net->xfrm.xfrm_state_lock); 153 149 write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); ··· 162 144 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; 163 145 odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); 164 146 for (i = net->xfrm.state_hmask; i >= 0; i--) 165 - xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); 147 + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask); 166 148 167 149 osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); 168 150 ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); 151 + oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net); 169 152 ohashmask = net->xfrm.state_hmask; 170 153 171 154 rcu_assign_pointer(net->xfrm.state_bydst, ndst); 172 155 rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); 173 156 rcu_assign_pointer(net->xfrm.state_byspi, nspi); 157 + rcu_assign_pointer(net->xfrm.state_byseq, nseq); 174 158 net->xfrm.state_hmask = nhashmask; 175 159 176 160 write_seqcount_end(&net->xfrm.xfrm_state_hash_generation); ··· 185 165 xfrm_hash_free(odst, osize); 186 166 xfrm_hash_free(osrc, osize); 187 167 xfrm_hash_free(ospi, osize); 168 + xfrm_hash_free(oseq, osize); 188 169 } 189 170 190 171 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); ··· 642 621 INIT_HLIST_NODE(&x->bydst); 643 622 INIT_HLIST_NODE(&x->bysrc); 644 623 INIT_HLIST_NODE(&x->byspi); 624 + INIT_HLIST_NODE(&x->byseq); 645 625 hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); 646 626 x->mtimer.function = xfrm_timer_handler; 647 627 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); ··· 686 664 list_del(&x->km.all); 687 665 hlist_del_rcu(&x->bydst); 688 666 hlist_del_rcu(&x->bysrc); 667 + if (x->km.seq) 668 + hlist_del_rcu(&x->byseq); 689 669 if (x->id.spi) 690 670 hlist_del_rcu(&x->byspi); 691 671 net->xfrm.state_num--; ··· 1172 1148 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); 1173 1149 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); 1174 1150 } 1151 + if (x->km.seq) { 1152 + h = xfrm_seq_hash(net, x->km.seq); 1153 + hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h); 1154 + } 1175 1155 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; 1176 1156 hrtimer_start(&x->mtimer, 1177 1157 ktime_set(net->xfrm.sysctl_acq_expires, 0), ··· 1289 1261 x->props.family); 1290 1262 1291 1263 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); 1264 + } 1265 + 1266 + if (x->km.seq) { 1267 + h = xfrm_seq_hash(net, x->km.seq); 1268 + 1269 + hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h); 1292 1270 } 1293 1271 1294 1272 hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); ··· 1966 1932 1967 1933 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) 1968 1934 { 1969 - int i; 1935 + unsigned int h = xfrm_seq_hash(net, seq); 1936 + struct xfrm_state *x; 1970 1937 1971 - for (i = 0; i <= net->xfrm.state_hmask; i++) { 1972 - struct xfrm_state *x; 1973 - 1974 - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { 1975 - if (x->km.seq == seq && 1976 - (mark & x->mark.m) == x->mark.v && 1977 - x->km.state == XFRM_STATE_ACQ) { 1978 - xfrm_state_hold(x); 1979 - return x; 1980 - } 1938 + hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { 1939 + if (x->km.seq == seq && 1940 + (mark & x->mark.m) == x->mark.v && 1941 + x->km.state == XFRM_STATE_ACQ) { 1942 + xfrm_state_hold(x); 1943 + return x; 1981 1944 } 1982 1945 } 1946 + 1983 1947 return NULL; 1984 1948 } 1985 1949 ··· 2692 2660 net->xfrm.state_byspi = xfrm_hash_alloc(sz); 2693 2661 if (!net->xfrm.state_byspi) 2694 2662 goto out_byspi; 2663 + net->xfrm.state_byseq = xfrm_hash_alloc(sz); 2664 + if (!net->xfrm.state_byseq) 2665 + goto out_byseq; 2695 2666 net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); 2696 2667 2697 2668 net->xfrm.state_num = 0; ··· 2704 2669 &net->xfrm.xfrm_state_lock); 2705 2670 return 0; 2706 2671 2672 + out_byseq: 2673 + xfrm_hash_free(net->xfrm.state_byspi, sz); 2707 2674 out_byspi: 2708 2675 xfrm_hash_free(net->xfrm.state_bysrc, sz); 2709 2676 out_bysrc: ··· 2725 2688 WARN_ON(!list_empty(&net->xfrm.state_all)); 2726 2689 2727 2690 sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); 2691 + WARN_ON(!hlist_empty(net->xfrm.state_byseq)); 2692 + xfrm_hash_free(net->xfrm.state_byseq, sz); 2728 2693 WARN_ON(!hlist_empty(net->xfrm.state_byspi)); 2729 2694 xfrm_hash_free(net->xfrm.state_byspi, sz); 2730 2695 WARN_ON(!hlist_empty(net->xfrm.state_bysrc));