Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: Add an inbound percpu state cache.

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>

+70 -7
+1
include/net/netns/xfrm.h
··· 43 43 struct hlist_head __rcu *state_bysrc; 44 44 struct hlist_head __rcu *state_byspi; 45 45 struct hlist_head __rcu *state_byseq; 46 + struct hlist_head __percpu *state_cache_input; 46 47 unsigned int state_hmask; 47 48 unsigned int state_num; 48 49 struct work_struct state_hash_work;
+5
include/net/xfrm.h
··· 185 185 struct hlist_node byspi; 186 186 struct hlist_node byseq; 187 187 struct hlist_node state_cache; 188 + struct hlist_node state_cache_input; 188 189 189 190 refcount_t refcnt; 190 191 spinlock_t lock; ··· 1651 1650 struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, 1652 1651 const xfrm_address_t *daddr, __be32 spi, 1653 1652 u8 proto, unsigned short family); 1653 + struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, 1654 + const xfrm_address_t *daddr, 1655 + __be32 spi, u8 proto, 1656 + unsigned short family); 1654 1657 struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, 1655 1658 const xfrm_address_t *daddr, 1656 1659 const xfrm_address_t *saddr,
+3 -3
net/ipv4/esp4_offload.c
··· 53 53 if (sp->len == XFRM_MAX_DEPTH) 54 54 goto out_reset; 55 55 56 - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, 57 - (xfrm_address_t *)&ip_hdr(skb)->daddr, 58 - spi, IPPROTO_ESP, AF_INET); 56 + x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark, 57 + (xfrm_address_t *)&ip_hdr(skb)->daddr, 58 + spi, IPPROTO_ESP, AF_INET); 59 59 60 60 if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { 61 61 /* non-offload path will record the error and audit log */
+3 -3
net/ipv6/esp6_offload.c
··· 80 80 if (sp->len == XFRM_MAX_DEPTH) 81 81 goto out_reset; 82 82 83 - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, 84 - (xfrm_address_t *)&ipv6_hdr(skb)->daddr, 85 - spi, IPPROTO_ESP, AF_INET6); 83 + x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark, 84 + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, 85 + spi, IPPROTO_ESP, AF_INET6); 86 86 87 87 if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { 88 88 /* non-offload path will record the error and audit log */
+1 -1
net/xfrm/xfrm_input.c
··· 572 572 goto drop; 573 573 } 574 574 575 - x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); 575 + x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family); 576 576 if (x == NULL) { 577 577 secpath_reset(skb); 578 578 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+57
net/xfrm/xfrm_state.c
··· 754 754 hlist_del_rcu(&x->byseq); 755 755 if (!hlist_unhashed(&x->state_cache)) 756 756 hlist_del_rcu(&x->state_cache); 757 + if (!hlist_unhashed(&x->state_cache_input)) 758 + hlist_del_rcu(&x->state_cache_input); 759 + 757 760 if (x->id.spi) 758 761 hlist_del_rcu(&x->byspi); 759 762 net->xfrm.state_num--; ··· 1108 1105 1109 1106 return NULL; 1110 1107 } 1108 + 1109 + struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, 1110 + const xfrm_address_t *daddr, 1111 + __be32 spi, u8 proto, 1112 + unsigned short family) 1113 + { 1114 + struct hlist_head *state_cache_input; 1115 + struct xfrm_state *x = NULL; 1116 + int cpu = get_cpu(); 1117 + 1118 + state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu); 1119 + 1120 + rcu_read_lock(); 1121 + hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { 1122 + if (x->props.family != family || 1123 + x->id.spi != spi || 1124 + x->id.proto != proto || 1125 + !xfrm_addr_equal(&x->id.daddr, daddr, family)) 1126 + continue; 1127 + 1128 + if ((mark & x->mark.m) != x->mark.v) 1129 + continue; 1130 + if (!xfrm_state_hold_rcu(x)) 1131 + continue; 1132 + goto out; 1133 + } 1134 + 1135 + x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); 1136 + 1137 + if (x && x->km.state == XFRM_STATE_VALID) { 1138 + spin_lock_bh(&net->xfrm.xfrm_state_lock); 1139 + if (hlist_unhashed(&x->state_cache_input)) { 1140 + hlist_add_head_rcu(&x->state_cache_input, state_cache_input); 1141 + } else { 1142 + hlist_del_rcu(&x->state_cache_input); 1143 + hlist_add_head_rcu(&x->state_cache_input, state_cache_input); 1144 + } 1145 + spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1146 + } 1147 + 1148 + out: 1149 + rcu_read_unlock(); 1150 + put_cpu(); 1151 + return x; 1152 + } 1153 + EXPORT_SYMBOL(xfrm_input_state_lookup); 1111 1154 1112 1155 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, 1113 1156 const xfrm_address_t *daddr, ··· 3128 3079 net->xfrm.state_byseq = xfrm_hash_alloc(sz); 3129 3080 if (!net->xfrm.state_byseq) 3130 3081 goto out_byseq; 3082 + 3083 + net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); 3084 + if (!net->xfrm.state_cache_input) 3085 + goto out_state_cache_input; 3086 + 3131 3087 net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); 3132 3088 3133 3089 net->xfrm.state_num = 0; ··· 3142 3088 &net->xfrm.xfrm_state_lock); 3143 3089 return 0; 3144 3090 3091 + out_state_cache_input: 3092 + xfrm_hash_free(net->xfrm.state_byseq, sz); 3145 3093 out_byseq: 3146 3094 xfrm_hash_free(net->xfrm.state_byspi, sz); 3147 3095 out_byspi: ··· 3173 3117 xfrm_hash_free(net->xfrm.state_bysrc, sz); 3174 3118 WARN_ON(!hlist_empty(net->xfrm.state_bydst)); 3175 3119 xfrm_hash_free(net->xfrm.state_bydst, sz); 3120 + free_percpu(net->xfrm.state_cache_input); 3176 3121 } 3177 3122 3178 3123 #ifdef CONFIG_AUDITSYSCALL