Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: configure policy hash table thresholds by netlink

Enable to specify local and remote prefix length thresholds for the
policy hash table via a netlink XFRM_MSG_NEWSPDINFO message.

prefix length thresholds are specified by XFRMA_SPD_IPV4_HTHRESH and
XFRMA_SPD_IPV6_HTHRESH optional attributes (struct xfrmu_spdhthresh).

example:

struct xfrmu_spdhthresh thresh4 = {
.lbits = 0;
.rbits = 24;
};
struct xfrmu_spdhthresh thresh6 = {
.lbits = 0;
.rbits = 56;
};
struct nlmsghdr *hdr;
struct nl_msg *msg;

msg = nlmsg_alloc();
hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, XFRMA_SPD_IPV4_HTHRESH, sizeof(__u32), NLM_F_REQUEST);
nla_put(msg, XFRMA_SPD_IPV4_HTHRESH, sizeof(thresh4), &thresh4);
nla_put(msg, XFRMA_SPD_IPV6_HTHRESH, sizeof(thresh6), &thresh6);
nla_send_auto(sk, msg);

The numbers are the policy selector minimum prefix lengths to put a
policy in the hash table.

- lbits is the local threshold (source address for out policies,
destination address for in and fwd policies).

- rbits is the remote threshold (destination address for out
policies, source address for in and fwd policies).

The default values are:

XFRMA_SPD_IPV4_HTHRESH: 32 32
XFRMA_SPD_IPV6_HTHRESH: 128 128

Dynamic re-building of the SPD is performed when the thresholds values
are changed.

The current thresholds can be read via a XFRM_MSG_GETSPDINFO request:
the kernel replies to XFRM_MSG_GETSPDINFO requests by an
XFRM_MSG_NEWSPDINFO message, with both attributes
XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH.

Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Christophe Gouault and committed by
Steffen Klassert
880a6fab b58555f1

+182 -3
+10
include/net/netns/xfrm.h
··· 19 19 u8 sbits6; 20 20 }; 21 21 22 + struct xfrm_policy_hthresh { 23 + struct work_struct work; 24 + seqlock_t lock; 25 + u8 lbits4; 26 + u8 rbits4; 27 + u8 lbits6; 28 + u8 rbits6; 29 + }; 30 + 22 31 struct netns_xfrm { 23 32 struct list_head state_all; 24 33 /* ··· 54 45 struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; 55 46 unsigned int policy_count[XFRM_POLICY_MAX * 2]; 56 47 struct work_struct policy_hash_work; 48 + struct xfrm_policy_hthresh policy_hthresh; 57 49 58 50 59 51 struct sock *nlsk;
+1
include/net/xfrm.h
··· 1591 1591 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, 1592 1592 u32 id, int delete, int *err); 1593 1593 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); 1594 + void xfrm_policy_hash_rebuild(struct net *net); 1594 1595 u32 xfrm_get_acqseq(void); 1595 1596 int verify_spi_info(u8 proto, u32 min, u32 max); 1596 1597 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
+7
include/uapi/linux/xfrm.h
··· 328 328 XFRMA_SPD_UNSPEC, 329 329 XFRMA_SPD_INFO, 330 330 XFRMA_SPD_HINFO, 331 + XFRMA_SPD_IPV4_HTHRESH, 332 + XFRMA_SPD_IPV6_HTHRESH, 331 333 __XFRMA_SPD_MAX 332 334 333 335 #define XFRMA_SPD_MAX (__XFRMA_SPD_MAX - 1) ··· 347 345 struct xfrmu_spdhinfo { 348 346 __u32 spdhcnt; 349 347 __u32 spdhmcnt; 348 + }; 349 + 350 + struct xfrmu_spdhthresh { 351 + __u8 lbits; 352 + __u8 rbits; 350 353 }; 351 354 352 355 struct xfrm_usersa_info {
+87
net/xfrm/xfrm_policy.c
··· 566 566 mutex_unlock(&hash_resize_mutex); 567 567 } 568 568 569 + static void xfrm_hash_rebuild(struct work_struct *work) 570 + { 571 + struct net *net = container_of(work, struct net, 572 + xfrm.policy_hthresh.work); 573 + unsigned int hmask; 574 + struct xfrm_policy *pol; 575 + struct xfrm_policy *policy; 576 + struct hlist_head *chain; 577 + struct hlist_head *odst; 578 + struct hlist_node *newpos; 579 + int i; 580 + int dir; 581 + unsigned seq; 582 + u8 lbits4, rbits4, lbits6, rbits6; 583 + 584 + mutex_lock(&hash_resize_mutex); 585 + 586 + /* read selector prefixlen thresholds */ 587 + do { 588 + seq = read_seqbegin(&net->xfrm.policy_hthresh.lock); 589 + 590 + lbits4 = net->xfrm.policy_hthresh.lbits4; 591 + rbits4 = net->xfrm.policy_hthresh.rbits4; 592 + lbits6 = net->xfrm.policy_hthresh.lbits6; 593 + rbits6 = net->xfrm.policy_hthresh.rbits6; 594 + } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); 595 + 596 + write_lock_bh(&net->xfrm.xfrm_policy_lock); 597 + 598 + /* reset the bydst and inexact table in all directions */ 599 + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { 600 + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 601 + hmask = net->xfrm.policy_bydst[dir].hmask; 602 + odst = net->xfrm.policy_bydst[dir].table; 603 + for (i = hmask; i >= 0; i--) 604 + INIT_HLIST_HEAD(odst + i); 605 + if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { 606 + /* dir out => dst = remote, src = local */ 607 + net->xfrm.policy_bydst[dir].dbits4 = rbits4; 608 + net->xfrm.policy_bydst[dir].sbits4 = lbits4; 609 + net->xfrm.policy_bydst[dir].dbits6 = rbits6; 610 + net->xfrm.policy_bydst[dir].sbits6 = lbits6; 611 + } else { 612 + /* dir in/fwd => dst = local, src = remote */ 613 + net->xfrm.policy_bydst[dir].dbits4 = lbits4; 614 + net->xfrm.policy_bydst[dir].sbits4 = rbits4; 615 + net->xfrm.policy_bydst[dir].dbits6 = lbits6; 616 + net->xfrm.policy_bydst[dir].sbits6 = rbits6; 617 + } 618 + } 619 + 620 + /* re-insert all policies by order of creation */ 621 + list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { 622 + newpos = NULL; 623 + chain = policy_hash_bysel(net, &policy->selector, 624 + policy->family, 625 + xfrm_policy_id2dir(policy->index)); 626 + hlist_for_each_entry(pol, chain, bydst) { 627 + if (policy->priority >= pol->priority) 628 + newpos = &pol->bydst; 629 + else 630 + break; 631 + } 632 + if (newpos) 633 + hlist_add_behind(&policy->bydst, newpos); 634 + else 635 + hlist_add_head(&policy->bydst, chain); 636 + } 637 + 638 + write_unlock_bh(&net->xfrm.xfrm_policy_lock); 639 + 640 + mutex_unlock(&hash_resize_mutex); 641 + } 642 + 643 + void xfrm_policy_hash_rebuild(struct net *net) 644 + { 645 + schedule_work(&net->xfrm.policy_hthresh.work); 646 + } 647 + EXPORT_SYMBOL(xfrm_policy_hash_rebuild); 648 + 569 649 /* Generate new index... KAME seems to generate them ordered by cost 570 650 * of an absolute inpredictability of ordering of rules. This will not pass. */ 571 651 static u32 xfrm_gen_index(struct net *net, int dir, u32 index) ··· 2952 2872 htab->dbits6 = 128; 2953 2873 htab->sbits6 = 128; 2954 2874 } 2875 + net->xfrm.policy_hthresh.lbits4 = 32; 2876 + net->xfrm.policy_hthresh.rbits4 = 32; 2877 + net->xfrm.policy_hthresh.lbits6 = 128; 2878 + net->xfrm.policy_hthresh.rbits6 = 128; 2879 + 2880 + seqlock_init(&net->xfrm.policy_hthresh.lock); 2955 2881 2956 2882 INIT_LIST_HEAD(&net->xfrm.policy_all); 2957 2883 INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); 2884 + INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); 2958 2885 if (net_eq(net, &init_net)) 2959 2886 register_netdevice_notifier(&xfrm_dev_notifier); 2960 2887 return 0;
+77 -3
net/xfrm/xfrm_user.c
··· 964 964 { 965 965 return NLMSG_ALIGN(4) 966 966 + nla_total_size(sizeof(struct xfrmu_spdinfo)) 967 - + nla_total_size(sizeof(struct xfrmu_spdhinfo)); 967 + + nla_total_size(sizeof(struct xfrmu_spdhinfo)) 968 + + nla_total_size(sizeof(struct xfrmu_spdhthresh)) 969 + + nla_total_size(sizeof(struct xfrmu_spdhthresh)); 968 970 } 969 971 970 972 static int build_spdinfo(struct sk_buff *skb, struct net *net, ··· 975 973 struct xfrmk_spdinfo si; 976 974 struct xfrmu_spdinfo spc; 977 975 struct xfrmu_spdhinfo sph; 976 + struct xfrmu_spdhthresh spt4, spt6; 978 977 struct nlmsghdr *nlh; 979 978 int err; 980 979 u32 *f; 980 + unsigned lseq; 981 981 982 982 nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); 983 983 if (nlh == NULL) /* shouldn't really happen ... */ ··· 997 993 sph.spdhcnt = si.spdhcnt; 998 994 sph.spdhmcnt = si.spdhmcnt; 999 995 996 + do { 997 + lseq = read_seqbegin(&net->xfrm.policy_hthresh.lock); 998 + 999 + spt4.lbits = net->xfrm.policy_hthresh.lbits4; 1000 + spt4.rbits = net->xfrm.policy_hthresh.rbits4; 1001 + spt6.lbits = net->xfrm.policy_hthresh.lbits6; 1002 + spt6.rbits = net->xfrm.policy_hthresh.rbits6; 1003 + } while (read_seqretry(&net->xfrm.policy_hthresh.lock, lseq)); 1004 + 1000 1005 err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); 1001 1006 if (!err) 1002 1007 err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); 1008 + if (!err) 1009 + err = nla_put(skb, XFRMA_SPD_IPV4_HTHRESH, sizeof(spt4), &spt4); 1010 + if (!err) 1011 + err = nla_put(skb, XFRMA_SPD_IPV6_HTHRESH, sizeof(spt6), &spt6); 1003 1012 if (err) { 1004 1013 nlmsg_cancel(skb, nlh); 1005 1014 return err; 1006 1015 } 1007 1016 1008 1017 return nlmsg_end(skb, nlh); 1018 + } 1019 + 1020 + static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, 1021 + struct nlattr **attrs) 1022 + { 1023 + struct net *net = sock_net(skb->sk); 1024 + struct xfrmu_spdhthresh *thresh4 = NULL; 1025 + struct xfrmu_spdhthresh *thresh6 = NULL; 1026 + 1027 + /* selector prefixlen thresholds to hash policies */ 1028 + if (attrs[XFRMA_SPD_IPV4_HTHRESH]) { 1029 + struct nlattr *rta = attrs[XFRMA_SPD_IPV4_HTHRESH]; 1030 + 1031 + if (nla_len(rta) < sizeof(*thresh4)) 1032 + return -EINVAL; 1033 + thresh4 = nla_data(rta); 1034 + if (thresh4->lbits > 32 || thresh4->rbits > 32) 1035 + return -EINVAL; 1036 + } 1037 + if (attrs[XFRMA_SPD_IPV6_HTHRESH]) { 1038 + struct nlattr *rta = attrs[XFRMA_SPD_IPV6_HTHRESH]; 1039 + 1040 + if (nla_len(rta) < sizeof(*thresh6)) 1041 + return -EINVAL; 1042 + thresh6 = nla_data(rta); 1043 + if (thresh6->lbits > 128 || thresh6->rbits > 128) 1044 + return -EINVAL; 1045 + } 1046 + 1047 + if (thresh4 || thresh6) { 1048 + write_seqlock(&net->xfrm.policy_hthresh.lock); 1049 + if (thresh4) { 1050 + net->xfrm.policy_hthresh.lbits4 = thresh4->lbits; 1051 + net->xfrm.policy_hthresh.rbits4 = thresh4->rbits; 1052 + } 1053 + if (thresh6) { 1054 + net->xfrm.policy_hthresh.lbits6 = thresh6->lbits; 1055 + net->xfrm.policy_hthresh.rbits6 = thresh6->rbits; 1056 + } 1057 + write_sequnlock(&net->xfrm.policy_hthresh.lock); 1058 + 1059 + xfrm_policy_hash_rebuild(net); 1060 + } 1061 + 1062 + return 0; 1009 1063 } 1010 1064 1011 1065 static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, ··· 2336 2274 [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), 2337 2275 [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), 2338 2276 [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), 2277 + [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), 2339 2278 [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), 2340 2279 }; 2341 2280 ··· 2371 2308 [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, 2372 2309 }; 2373 2310 2311 + static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { 2312 + [XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) }, 2313 + [XFRMA_SPD_IPV6_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) }, 2314 + }; 2315 + 2374 2316 static const struct xfrm_link { 2375 2317 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); 2376 2318 int (*dump)(struct sk_buff *, struct netlink_callback *); 2377 2319 int (*done)(struct netlink_callback *); 2320 + const struct nla_policy *nla_pol; 2321 + int nla_max; 2378 2322 } xfrm_dispatch[XFRM_NR_MSGTYPES] = { 2379 2323 [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, 2380 2324 [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, ··· 2405 2335 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, 2406 2336 [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, 2407 2337 [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo }, 2338 + [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_set_spdinfo, 2339 + .nla_pol = xfrma_spd_policy, 2340 + .nla_max = XFRMA_SPD_MAX }, 2408 2341 [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, 2409 2342 }; 2410 2343 ··· 2444 2371 } 2445 2372 } 2446 2373 2447 - err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, 2448 - xfrma_policy); 2374 + err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, 2375 + link->nla_max ? : XFRMA_MAX, 2376 + link->nla_pol ? : xfrma_policy); 2449 2377 if (err < 0) 2450 2378 return err; 2451 2379