Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: ipset: Count non-static extension memory for userspace

Non-static (i.e. comment) extension was not counted into the memory
size. A new internal counter is introduced for this. In the case of
the hash types the sizes of the arrays are counted there as well so
that we can avoid to scan the whole set when just the header data
is requested.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

+32 -21
+6 -2
include/linux/netfilter/ipset/ip_set.h
··· 79 79 IPSET_EXT_ID_MAX, 80 80 }; 81 81 82 + struct ip_set; 83 + 82 84 /* Extension type */ 83 85 struct ip_set_ext_type { 84 86 /* Destroy extension private data (can be NULL) */ 85 - void (*destroy)(void *ext); 87 + void (*destroy)(struct ip_set *set, void *ext); 86 88 enum ip_set_extension type; 87 89 enum ipset_cadt_flags flag; 88 90 /* Size and minimal alignment */ ··· 254 252 u32 timeout; 255 253 /* Number of elements (vs timeout) */ 256 254 u32 elements; 255 + /* Size of the dynamic extensions (vs timeout) */ 256 + size_t ext_size; 257 257 /* Element data size */ 258 258 size_t dsize; 259 259 /* Offsets to extensions in elements */ ··· 272 268 */ 273 269 if (SET_WITH_COMMENT(set)) 274 270 ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( 275 - ext_comment(data, set)); 271 + set, ext_comment(data, set)); 276 272 } 277 273 278 274 static inline int
+5 -2
include/linux/netfilter/ipset/ip_set_comment.h
··· 20 20 * The kadt functions don't use the comment extensions in any way. 21 21 */ 22 22 static inline void 23 - ip_set_init_comment(struct ip_set_comment *comment, 23 + ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, 24 24 const struct ip_set_ext *ext) 25 25 { 26 26 struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); 27 27 size_t len = ext->comment ? strlen(ext->comment) : 0; 28 28 29 29 if (unlikely(c)) { 30 + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; 30 31 kfree_rcu(c, rcu); 31 32 rcu_assign_pointer(comment->c, NULL); 32 33 } ··· 39 38 if (unlikely(!c)) 40 39 return; 41 40 strlcpy(c->str, ext->comment, len + 1); 41 + set->ext_size += sizeof(*c) + strlen(c->str) + 1; 42 42 rcu_assign_pointer(comment->c, c); 43 43 } 44 44 ··· 60 58 * of the set data anymore. 61 59 */ 62 60 static inline void 63 - ip_set_comment_free(struct ip_set_comment *comment) 61 + ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment) 64 62 { 65 63 struct ip_set_comment_rcu *c; 66 64 67 65 c = rcu_dereference_protected(comment->c, 1); 68 66 if (unlikely(!c)) 69 67 return; 68 + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; 70 69 kfree_rcu(c, rcu); 71 70 rcu_assign_pointer(comment->c, NULL); 72 71 }
+3 -2
net/netfilter/ipset/ip_set_bitmap_gen.h
··· 84 84 mtype_ext_cleanup(set); 85 85 memset(map->members, 0, map->memsize); 86 86 set->elements = 0; 87 + set->ext_size = 0; 87 88 } 88 89 89 90 /* Calculate the actual memory size of the set data */ ··· 100 99 { 101 100 const struct mtype *map = set->data; 102 101 struct nlattr *nested; 103 - size_t memsize = mtype_memsize(map, set->dsize); 102 + size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size; 104 103 105 104 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 106 105 if (!nested) ··· 174 173 if (SET_WITH_COUNTER(set)) 175 174 ip_set_init_counter(ext_counter(x, set), ext); 176 175 if (SET_WITH_COMMENT(set)) 177 - ip_set_init_comment(ext_comment(x, set), ext); 176 + ip_set_init_comment(set, ext_comment(x, set), ext); 178 177 if (SET_WITH_SKBINFO(set)) 179 178 ip_set_init_skbinfo(ext_skbinfo(x, set), ext); 180 179
+1 -1
net/netfilter/ipset/ip_set_core.c
··· 324 324 } 325 325 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 326 326 327 - typedef void (*destroyer)(void *); 327 + typedef void (*destroyer)(struct ip_set *, void *); 328 328 /* ipset data extension types, in size order */ 329 329 330 330 const struct ip_set_ext_type ip_set_extensions[] = {
+14 -12
net/netfilter/ipset/ip_set_hash_gen.h
··· 343 343 /* Calculate the actual memory size of the set data */ 344 344 static size_t 345 345 mtype_ahash_memsize(const struct htype *h, const struct htable *t, 346 - u8 nets_length, size_t dsize) 346 + u8 nets_length) 347 347 { 348 - u32 i; 349 - struct hbucket *n; 350 348 size_t memsize = sizeof(*h) + sizeof(*t); 351 349 352 350 #ifdef IP_SET_HASH_WITH_NETS 353 351 memsize += sizeof(struct net_prefixes) * nets_length; 354 352 #endif 355 - for (i = 0; i < jhash_size(t->htable_bits); i++) { 356 - n = rcu_dereference_bh(hbucket(t, i)); 357 - if (!n) 358 - continue; 359 - memsize += sizeof(struct hbucket) + n->size * dsize; 360 - } 361 353 362 354 return memsize; 363 355 } ··· 392 400 memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); 393 401 #endif 394 402 set->elements = 0; 403 + set->ext_size = 0; 395 404 } 396 405 397 406 /* Destroy the hashtable part of the set */ ··· 524 531 d++; 525 532 } 526 533 tmp->pos = d; 534 + set->ext_size -= AHASH_INIT_SIZE * dsize; 527 535 rcu_assign_pointer(hbucket(t, i), tmp); 528 536 kfree_rcu(n, rcu); 529 537 } ··· 556 562 struct htype *h = set->data; 557 563 struct htable *t, *orig; 558 564 u8 htable_bits; 559 - size_t dsize = set->dsize; 565 + size_t extsize, dsize = set->dsize; 560 566 #ifdef IP_SET_HASH_WITH_NETS 561 567 u8 flags; 562 568 struct mtype_elem *tmp; ··· 599 605 /* There can't be another parallel resizing, but dumping is possible */ 600 606 atomic_set(&orig->ref, 1); 601 607 atomic_inc(&orig->uref); 608 + extsize = 0; 602 609 pr_debug("attempt to resize set %s from %u to %u, t %p\n", 603 610 set->name, orig->htable_bits, htable_bits, orig); 604 611 for (i = 0; i < jhash_size(orig->htable_bits); i++) { ··· 630 635 goto cleanup; 631 636 } 632 637 m->size = AHASH_INIT_SIZE; 638 + extsize = sizeof(*m) + AHASH_INIT_SIZE * dsize; 633 639 RCU_INIT_POINTER(hbucket(t, key), m); 634 640 } else if (m->pos >= m->size) { 635 641 struct hbucket *ht; ··· 650 654 memcpy(ht, m, sizeof(struct hbucket) + 651 655 m->size * dsize); 652 656 ht->size = m->size + AHASH_INIT_SIZE; 657 + extsize += AHASH_INIT_SIZE * dsize; 653 658 kfree(m); 654 659 m = ht; 655 660 RCU_INIT_POINTER(hbucket(t, key), ht); ··· 664 667 } 665 668 } 666 669 rcu_assign_pointer(h->table, t); 670 + set->ext_size = extsize; 667 671 668 672 spin_unlock_bh(&set->lock); 669 673 ··· 738 740 if (!n) 739 741 return -ENOMEM; 740 742 n->size = AHASH_INIT_SIZE; 743 + set->ext_size += sizeof(*n) + AHASH_INIT_SIZE * set->dsize; 741 744 goto copy_elem; 742 745 } 743 746 for (i = 0; i < n->pos; i++) { ··· 802 803 memcpy(n, old, sizeof(struct hbucket) + 803 804 old->size * set->dsize); 804 805 n->size = old->size + AHASH_INIT_SIZE; 806 + set->ext_size += AHASH_INIT_SIZE * set->dsize; 805 807 } 806 808 807 809 copy_elem: ··· 823 823 if (SET_WITH_COUNTER(set)) 824 824 ip_set_init_counter(ext_counter(data, set), ext); 825 825 if (SET_WITH_COMMENT(set)) 826 - ip_set_init_comment(ext_comment(data, set), ext); 826 + ip_set_init_comment(set, ext_comment(data, set), ext); 827 827 if (SET_WITH_SKBINFO(set)) 828 828 ip_set_init_skbinfo(ext_skbinfo(data, set), ext); 829 829 /* Must come last for the case when timed out entry is reused */ ··· 895 895 k++; 896 896 } 897 897 if (n->pos == 0 && k == 0) { 898 + set->ext_size -= sizeof(*n) + n->size * dsize; 898 899 rcu_assign_pointer(hbucket(t, key), NULL); 899 900 kfree_rcu(n, rcu); 900 901 } else if (k >= AHASH_INIT_SIZE) { ··· 914 913 k++; 915 914 } 916 915 tmp->pos = k; 916 + set->ext_size -= AHASH_INIT_SIZE * dsize; 917 917 rcu_assign_pointer(hbucket(t, key), tmp); 918 918 kfree_rcu(n, rcu); 919 919 } ··· 1063 1061 1064 1062 rcu_read_lock_bh(); 1065 1063 t = rcu_dereference_bh_nfnl(h->table); 1066 - memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); 1064 + memsize = mtype_ahash_memsize(h, t, NLEN(set->family)) + set->ext_size; 1067 1065 htable_bits = t->htable_bits; 1068 1066 rcu_read_unlock_bh(); 1069 1067
+3 -2
net/netfilter/ipset/ip_set_list_set.c
··· 228 228 if (SET_WITH_COUNTER(set)) 229 229 ip_set_init_counter(ext_counter(e, set), ext); 230 230 if (SET_WITH_COMMENT(set)) 231 - ip_set_init_comment(ext_comment(e, set), ext); 231 + ip_set_init_comment(set, ext_comment(e, set), ext); 232 232 if (SET_WITH_SKBINFO(set)) 233 233 ip_set_init_skbinfo(ext_skbinfo(e, set), ext); 234 234 /* Update timeout last */ ··· 422 422 list_for_each_entry_safe(e, n, &map->members, list) 423 423 list_set_del(set, e); 424 424 set->elements = 0; 425 + set->ext_size = 0; 425 426 } 426 427 427 428 static void ··· 468 467 { 469 468 const struct list_set *map = set->data; 470 469 struct nlattr *nested; 471 - size_t memsize = list_set_memsize(map, set->dsize); 470 + size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; 472 471 473 472 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 474 473 if (!nested)