Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: ipset: list:set: Decrease refcount synchronously on deletion and replace

Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
when flush/dump set in parallel") postponed decreasing set
reference counters to the RCU callback.

An 'ipset del' command can terminate before the RCU grace period
is elapsed, and if sets are listed before then, the reference
counter shown in userspace will be wrong:

# ipset create h hash:ip; ipset create l list:set; ipset add l
# ipset del l h; ipset list h
Name: h
Type: hash:ip
Revision: 4
Header: family inet hashsize 1024 maxelem 65536
Size in memory: 88
References: 1
Number of entries: 0
Members:
# sleep 1; ipset list h
Name: h
Type: hash:ip
Revision: 4
Header: family inet hashsize 1024 maxelem 65536
Size in memory: 88
References: 0
Number of entries: 0
Members:

Fix this by making the reference count update synchronous again.

As a result, when sets are listed, ip_set_name_byindex() might
now fetch a set whose reference count is already zero. Instead
of relying on the reference count to protect against concurrent
set renaming, grab ip_set_ref_lock as reader and copy the name,
while holding the same lock in ip_set_rename() as writer
instead.

Reported-by: Li Shuang <shuali@redhat.com>
Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Stefano Brivio and committed by
Pablo Neira Ayuso
439cd39e 4269fea7

+23 -19
+1 -1
include/linux/netfilter/ipset/ip_set.h
··· 314 314 extern ip_set_id_t ip_set_get_byname(struct net *net, 315 315 const char *name, struct ip_set **set); 316 316 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); 317 - extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); 317 + extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); 318 318 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); 319 319 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); 320 320
+11 -12
net/netfilter/ipset/ip_set_core.c
··· 693 693 EXPORT_SYMBOL_GPL(ip_set_put_byindex); 694 694 695 695 /* Get the name of a set behind a set index. 696 - * We assume the set is referenced, so it does exist and 697 - * can't be destroyed. The set cannot be renamed due to 698 - * the referencing either. 699 - * 696 + * Set itself is protected by RCU, but its name isn't: to protect against 697 + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the 698 + * name. 700 699 */ 701 - const char * 702 - ip_set_name_byindex(struct net *net, ip_set_id_t index) 700 + void 701 + ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) 703 702 { 704 - const struct ip_set *set = ip_set_rcu_get(net, index); 703 + struct ip_set *set = ip_set_rcu_get(net, index); 705 704 706 705 BUG_ON(!set); 707 - BUG_ON(set->ref == 0); 708 706 709 - /* Referenced, so it's safe */ 710 - return set->name; 707 + read_lock_bh(&ip_set_ref_lock); 708 + strncpy(name, set->name, IPSET_MAXNAMELEN); 709 + read_unlock_bh(&ip_set_ref_lock); 711 710 } 712 711 EXPORT_SYMBOL_GPL(ip_set_name_byindex); 713 712 ··· 1152 1153 if (!set) 1153 1154 return -ENOENT; 1154 1155 1155 - read_lock_bh(&ip_set_ref_lock); 1156 + write_lock_bh(&ip_set_ref_lock); 1156 1157 if (set->ref != 0) { 1157 1158 ret = -IPSET_ERR_REFERENCED; 1158 1159 goto out; ··· 1169 1170 strncpy(set->name, name2, IPSET_MAXNAMELEN); 1170 1171 1171 1172 out: 1172 - read_unlock_bh(&ip_set_ref_lock); 1173 + write_unlock_bh(&ip_set_ref_lock); 1173 1174 return ret; 1174 1175 } 1175 1176
+11 -6
net/netfilter/ipset/ip_set_list_set.c
··· 148 148 { 149 149 struct set_elem *e = container_of(rcu, struct set_elem, rcu); 150 150 struct ip_set *set = e->set; 151 - struct list_set *map = set->data; 152 151 153 - ip_set_put_byindex(map->net, e->id); 154 152 ip_set_ext_destroy(set, e); 155 153 kfree(e); 156 154 } ··· 156 158 static inline void 157 159 list_set_del(struct ip_set *set, struct set_elem *e) 158 160 { 161 + struct list_set *map = set->data; 162 + 159 163 set->elements--; 160 164 list_del_rcu(&e->list); 165 + ip_set_put_byindex(map->net, e->id); 161 166 call_rcu(&e->rcu, __list_set_del_rcu); 162 167 } 163 168 164 169 static inline void 165 - list_set_replace(struct set_elem *e, struct set_elem *old) 170 + list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) 166 171 { 172 + struct list_set *map = set->data; 173 + 167 174 list_replace_rcu(&old->list, &e->list); 175 + ip_set_put_byindex(map->net, old->id); 168 176 call_rcu(&old->rcu, __list_set_del_rcu); 169 177 } 170 178 ··· 302 298 INIT_LIST_HEAD(&e->list); 303 299 list_set_init_extensions(set, ext, e); 304 300 if (n) 305 - list_set_replace(e, n); 301 + list_set_replace(set, e, n); 306 302 else if (next) 307 303 list_add_tail_rcu(&e->list, &next->list); 308 304 else if (prev) ··· 490 486 const struct list_set *map = set->data; 491 487 struct nlattr *atd, *nested; 492 488 u32 i = 0, first = cb->args[IPSET_CB_ARG0]; 489 + char name[IPSET_MAXNAMELEN]; 493 490 struct set_elem *e; 494 491 int ret = 0; 495 492 ··· 509 504 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 510 505 if (!nested) 511 506 goto nla_put_failure; 512 - if (nla_put_string(skb, IPSET_ATTR_NAME, 513 - ip_set_name_byindex(map->net, e->id))) 507 + ip_set_name_byindex(map->net, e->id, name); 508 + if (nla_put_string(skb, IPSET_ATTR_NAME, name)) 514 509 goto nla_put_failure; 515 510 if (ip_set_put_extensions(skb, set, e, true)) 516 511 goto nla_put_failure;