Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'master' of git://blackhole.kfki.hu/nf

Jozsef Kadlecsik says:

====================
ipset patches for nf

The first one is larger than usual, but the issue could not be solved simpler.
Also, it's a resend of the patch I submitted a few days ago, with a one line
fix on top of that: the size of the comment extensions was not taken into
account at reporting the full size of the set.

- Fix "INFO: rcu detected stall in hash_xxx" reports of syzbot
by introducing region locking and using workqueue instead of timer based
gc of timed out entries in hash types of sets in ipset.
- Fix the forceadd evaluation path - the bug was also uncovered by the syzbot.
====================

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+472 -204
+10 -1
include/linux/netfilter/ipset/ip_set.h
··· 121 121 u32 timeout; 122 122 u8 packets_op; 123 123 u8 bytes_op; 124 + bool target; 124 125 }; 125 126 126 127 struct ip_set; ··· 188 187 /* Return true if "b" set is the same as "a" 189 188 * according to the create set parameters */ 190 189 bool (*same_set)(const struct ip_set *a, const struct ip_set *b); 190 + /* Region-locking is used */ 191 + bool region_lock; 192 + }; 193 + 194 + struct ip_set_region { 195 + spinlock_t lock; /* Region lock */ 196 + size_t ext_size; /* Size of the dynamic extensions */ 197 + u32 elements; /* Number of elements vs timeout */ 191 198 }; 192 199 193 200 /* The core set type structure */ ··· 510 501 } 511 502 512 503 #define IP_SET_INIT_KEXT(skb, opt, set) \ 513 - { .bytes = (skb)->len, .packets = 1, \ 504 + { .bytes = (skb)->len, .packets = 1, .target = true,\ 514 505 .timeout = ip_set_adt_opt_timeout(opt, set) } 515 506 516 507 #define IP_SET_INIT_UEXT(set) \
+24 -10
net/netfilter/ipset/ip_set_core.c
··· 723 723 return set; 724 724 } 725 725 726 + static inline void 727 + ip_set_lock(struct ip_set *set) 728 + { 729 + if (!set->variant->region_lock) 730 + spin_lock_bh(&set->lock); 731 + } 732 + 733 + static inline void 734 + ip_set_unlock(struct ip_set *set) 735 + { 736 + if (!set->variant->region_lock) 737 + spin_unlock_bh(&set->lock); 738 + } 739 + 726 740 int 727 741 ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 728 742 const struct xt_action_param *par, struct ip_set_adt_opt *opt) ··· 758 744 if (ret == -EAGAIN) { 759 745 /* Type requests element to be completed */ 760 746 pr_debug("element must be completed, ADD is triggered\n"); 761 - spin_lock_bh(&set->lock); 747 + ip_set_lock(set); 762 748 set->variant->kadt(set, skb, par, IPSET_ADD, opt); 763 - spin_unlock_bh(&set->lock); 749 + ip_set_unlock(set); 764 750 ret = 1; 765 751 } else { 766 752 /* --return-nomatch: invert matched element */ ··· 789 775 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 790 776 return -IPSET_ERR_TYPE_MISMATCH; 791 777 792 - spin_lock_bh(&set->lock); 778 + ip_set_lock(set); 793 779 ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); 794 - spin_unlock_bh(&set->lock); 780 + ip_set_unlock(set); 795 781 796 782 return ret; 797 783 } ··· 811 797 !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) 812 798 return -IPSET_ERR_TYPE_MISMATCH; 813 799 814 - spin_lock_bh(&set->lock); 800 + ip_set_lock(set); 815 801 ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); 816 - spin_unlock_bh(&set->lock); 802 + ip_set_unlock(set); 817 803 818 804 return ret; 819 805 } ··· 1278 1264 { 1279 1265 pr_debug("set: %s\n", set->name); 1280 1266 1281 - spin_lock_bh(&set->lock); 1267 + ip_set_lock(set); 1282 1268 set->variant->flush(set); 1283 - spin_unlock_bh(&set->lock); 1269 + ip_set_unlock(set); 1284 1270 } 1285 1271 1286 1272 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, ··· 1727 1713 bool eexist = flags & IPSET_FLAG_EXIST, retried = false; 1728 1714 1729 1715 do { 1730 - spin_lock_bh(&set->lock); 1716 + ip_set_lock(set); 1731 1717 ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); 1732 - spin_unlock_bh(&set->lock); 1718 + ip_set_unlock(set); 1733 1719 retried = true; 1734 1720 } while (ret == -EAGAIN && 1735 1721 set->variant->resize &&
+438 -193
net/netfilter/ipset/ip_set_hash_gen.h
··· 7 7 #include <linux/rcupdate.h> 8 8 #include <linux/jhash.h> 9 9 #include <linux/types.h> 10 + #include <linux/netfilter/nfnetlink.h> 10 11 #include <linux/netfilter/ipset/ip_set.h> 11 12 12 - #define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) 13 - #define ipset_dereference_protected(p, set) \ 14 - __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock)) 15 - 16 - #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) 13 + #define __ipset_dereference(p) \ 14 + rcu_dereference_protected(p, 1) 15 + #define ipset_dereference_nfnl(p) \ 16 + rcu_dereference_protected(p, \ 17 + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 18 + #define ipset_dereference_set(p, set) \ 19 + rcu_dereference_protected(p, \ 20 + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ 21 + lockdep_is_held(&(set)->lock)) 22 + #define ipset_dereference_bh_nfnl(p) \ 23 + rcu_dereference_bh_check(p, \ 24 + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 17 25 18 26 /* Hashing which uses arrays to resolve clashing. The hash table is resized 19 27 * (doubled) when searching becomes too long. ··· 80 72 __aligned(__alignof__(u64)); 81 73 }; 82 74 75 + /* Region size for locking == 2^HTABLE_REGION_BITS */ 76 + #define HTABLE_REGION_BITS 10 77 + #define ahash_numof_locks(htable_bits) \ 78 + ((htable_bits) < HTABLE_REGION_BITS ? 1 \ 79 + : jhash_size((htable_bits) - HTABLE_REGION_BITS)) 80 + #define ahash_sizeof_regions(htable_bits) \ 81 + (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) 82 + #define ahash_region(n, htable_bits) \ 83 + ((n) % ahash_numof_locks(htable_bits)) 84 + #define ahash_bucket_start(h, htable_bits) \ 85 + ((htable_bits) < HTABLE_REGION_BITS ? 0 \ 86 + : (h) * jhash_size(HTABLE_REGION_BITS)) 87 + #define ahash_bucket_end(h, htable_bits) \ 88 + ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ 89 + : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) 90 + 91 + struct htable_gc { 92 + struct delayed_work dwork; 93 + struct ip_set *set; /* Set the gc belongs to */ 94 + u32 region; /* Last gc run position */ 95 + }; 96 + 83 97 /* The hash table: the table size stored here in order to make resizing easy */ 84 98 struct htable { 85 99 atomic_t ref; /* References for resizing */ 86 - atomic_t uref; /* References for dumping */ 100 + atomic_t uref; /* References for dumping and gc */ 87 101 u8 htable_bits; /* size of hash table == 2^htable_bits */ 102 + u32 maxelem; /* Maxelem per region */ 103 + struct ip_set_region *hregion; /* Region locks and ext sizes */ 88 104 struct hbucket __rcu *bucket[0]; /* hashtable buckets */ 89 105 }; 90 106 ··· 194 162 #define NLEN 0 195 163 #endif /* IP_SET_HASH_WITH_NETS */ 196 164 165 + #define SET_ELEM_EXPIRED(set, d) \ 166 + (SET_WITH_TIMEOUT(set) && \ 167 + ip_set_timeout_expired(ext_timeout(d, set))) 168 + 197 169 #endif /* _IP_SET_HASH_GEN_H */ 198 170 199 171 #ifndef MTYPE ··· 241 205 #undef mtype_test_cidrs 242 206 #undef mtype_test 243 207 #undef mtype_uref 244 - #undef mtype_expire 245 208 #undef mtype_resize 209 + #undef mtype_ext_size 210 + #undef mtype_resize_ad 246 211 #undef mtype_head 247 212 #undef mtype_list 213 + #undef mtype_gc_do 248 214 #undef mtype_gc 249 215 #undef mtype_gc_init 250 216 #undef mtype_variant ··· 285 247 #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) 286 248 #define mtype_test IPSET_TOKEN(MTYPE, _test) 287 249 #define mtype_uref IPSET_TOKEN(MTYPE, _uref) 288 - #define mtype_expire IPSET_TOKEN(MTYPE, _expire) 289 250 #define mtype_resize IPSET_TOKEN(MTYPE, _resize) 251 + #define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) 252 + #define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) 290 253 #define mtype_head IPSET_TOKEN(MTYPE, _head) 291 254 #define mtype_list IPSET_TOKEN(MTYPE, _list) 255 + #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) 292 256 #define mtype_gc IPSET_TOKEN(MTYPE, _gc) 293 257 #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) 294 258 #define mtype_variant IPSET_TOKEN(MTYPE, _variant) ··· 315 275 /* The generic hash structure */ 316 276 struct htype { 317 277 struct htable __rcu *table; /* the hash table */ 318 - struct timer_list gc; /* garbage collection when timeout enabled */ 319 - struct ip_set *set; /* attached to this ip_set */ 278 + struct htable_gc gc; /* gc workqueue */ 320 279 u32 maxelem; /* max elements in the hash */ 321 280 u32 initval; /* random jhash init value */ 322 281 #ifdef IP_SET_HASH_WITH_MARKMASK ··· 327 288 #ifdef IP_SET_HASH_WITH_NETMASK 328 289 u8 netmask; /* netmask value for subnets to store */ 329 290 #endif 291 + struct list_head ad; /* Resize add|del backlist */ 330 292 struct mtype_elem next; /* temporary storage for uadd */ 331 293 #ifdef IP_SET_HASH_WITH_NETS 332 294 struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ 333 295 #endif 296 + }; 297 + 298 + /* ADD|DEL entries saved during resize */ 299 + struct mtype_resize_ad { 300 + struct list_head list; 301 + enum ipset_adt ad; /* ADD|DEL element */ 302 + struct mtype_elem d; /* Element value */ 303 + struct ip_set_ext ext; /* Extensions for ADD */ 304 + struct ip_set_ext mext; /* Target extensions for ADD */ 305 + u32 flags; /* Flags for ADD */ 334 306 }; 335 307 336 308 #ifdef IP_SET_HASH_WITH_NETS ··· 349 299 * sized networks. cidr == real cidr + 1 to support /0. 350 300 */ 351 301 static void 352 - mtype_add_cidr(struct htype *h, u8 cidr, u8 n) 302 + mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 353 303 { 354 304 int i, j; 355 305 306 + spin_lock_bh(&set->lock); 356 307 /* Add in increasing prefix order, so larger cidr first */ 357 308 for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { 358 309 if (j != -1) { ··· 362 311 j = i; 363 312 } else if (h->nets[i].cidr[n] == cidr) { 364 313 h->nets[CIDR_POS(cidr)].nets[n]++; 365 - return; 314 + goto unlock; 366 315 } 367 316 } 368 317 if (j != -1) { ··· 371 320 } 372 321 h->nets[i].cidr[n] = cidr; 373 322 h->nets[CIDR_POS(cidr)].nets[n] = 1; 323 + unlock: 324 + spin_unlock_bh(&set->lock); 374 325 } 375 326 376 327 static void 377 - mtype_del_cidr(struct htype *h, u8 cidr, u8 n) 328 + mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 378 329 { 379 330 u8 i, j, net_end = NLEN - 1; 380 331 332 + spin_lock_bh(&set->lock); 381 333 for (i = 0; i < NLEN; i++) { 382 334 if (h->nets[i].cidr[n] != cidr) 383 335 continue; 384 336 h->nets[CIDR_POS(cidr)].nets[n]--; 385 337 if (h->nets[CIDR_POS(cidr)].nets[n] > 0) 386 - return; 338 + goto unlock; 387 339 for (j = i; j < net_end && h->nets[j].cidr[n]; j++) 388 340 h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; 389 341 h->nets[j].cidr[n] = 0; 390 - return; 342 + goto unlock; 391 343 } 344 + unlock: 345 + spin_unlock_bh(&set->lock); 392 346 } 393 347 #endif 394 348 ··· 401 345 static size_t 402 346 mtype_ahash_memsize(const struct htype *h, const struct htable *t) 403 347 { 404 - return sizeof(*h) + sizeof(*t); 348 + return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); 405 349 } 406 350 407 351 /* Get the ith element from the array block n */ ··· 425 369 struct htype *h = set->data; 426 370 struct htable *t; 427 371 struct hbucket *n; 428 - u32 i; 372 + u32 r, i; 429 373 430 - t = ipset_dereference_protected(h->table, set); 431 - for (i = 0; i < jhash_size(t->htable_bits); i++) { 432 - n = __ipset_dereference_protected(hbucket(t, i), 1); 433 - if (!n) 434 - continue; 435 - if (set->extensions & IPSET_EXT_DESTROY) 436 - mtype_ext_cleanup(set, n); 437 - /* FIXME: use slab cache */ 438 - rcu_assign_pointer(hbucket(t, i), NULL); 439 - kfree_rcu(n, rcu); 374 + t = ipset_dereference_nfnl(h->table); 375 + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 376 + spin_lock_bh(&t->hregion[r].lock); 377 + for (i = ahash_bucket_start(r, t->htable_bits); 378 + i < ahash_bucket_end(r, t->htable_bits); i++) { 379 + n = __ipset_dereference(hbucket(t, i)); 380 + if (!n) 381 + continue; 382 + if (set->extensions & IPSET_EXT_DESTROY) 383 + mtype_ext_cleanup(set, n); 384 + /* FIXME: use slab cache */ 385 + rcu_assign_pointer(hbucket(t, i), NULL); 386 + kfree_rcu(n, rcu); 387 + } 388 + t->hregion[r].ext_size = 0; 389 + t->hregion[r].elements = 0; 390 + spin_unlock_bh(&t->hregion[r].lock); 440 391 } 441 392 #ifdef IP_SET_HASH_WITH_NETS 442 393 memset(h->nets, 0, sizeof(h->nets)); 443 394 #endif 444 - set->elements = 0; 445 - set->ext_size = 0; 446 395 } 447 396 448 397 /* Destroy the hashtable part of the set */ ··· 458 397 u32 i; 459 398 460 399 for (i = 0; i < jhash_size(t->htable_bits); i++) { 461 - n = __ipset_dereference_protected(hbucket(t, i), 1); 400 + n = __ipset_dereference(hbucket(t, i)); 462 401 if (!n) 463 402 continue; 464 403 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) ··· 467 406 kfree(n); 468 407 } 469 408 409 + ip_set_free(t->hregion); 470 410 ip_set_free(t); 471 411 } 472 412 ··· 476 414 mtype_destroy(struct ip_set *set) 477 415 { 478 416 struct htype *h = set->data; 417 + struct list_head *l, *lt; 479 418 480 419 if (SET_WITH_TIMEOUT(set)) 481 - del_timer_sync(&h->gc); 420 + cancel_delayed_work_sync(&h->gc.dwork); 482 421 483 - mtype_ahash_destroy(set, 484 - __ipset_dereference_protected(h->table, 1), true); 422 + mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); 423 + list_for_each_safe(l, lt, &h->ad) { 424 + list_del(l); 425 + kfree(l); 426 + } 485 427 kfree(h); 486 428 487 429 set->data = NULL; 488 - } 489 - 490 - static void 491 - mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) 492 - { 493 - struct htype *h = set->data; 494 - 495 - timer_setup(&h->gc, gc, 0); 496 - mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); 497 - pr_debug("gc initialized, run in every %u\n", 498 - IPSET_GC_PERIOD(set->timeout)); 499 430 } 500 431 501 432 static bool ··· 509 454 a->extensions == b->extensions; 510 455 } 511 456 512 - /* Delete expired elements from the hashtable */ 513 457 static void 514 - mtype_expire(struct ip_set *set, struct htype *h) 458 + mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) 515 459 { 516 - struct htable *t; 517 460 struct hbucket *n, *tmp; 518 461 struct mtype_elem *data; 519 462 u32 i, j, d; ··· 519 466 #ifdef IP_SET_HASH_WITH_NETS 520 467 u8 k; 521 468 #endif 469 + u8 htable_bits = t->htable_bits; 522 470 523 - t = ipset_dereference_protected(h->table, set); 524 - for (i = 0; i < jhash_size(t->htable_bits); i++) { 525 - n = __ipset_dereference_protected(hbucket(t, i), 1); 471 + spin_lock_bh(&t->hregion[r].lock); 472 + for (i = ahash_bucket_start(r, htable_bits); 473 + i < ahash_bucket_end(r, htable_bits); i++) { 474 + n = __ipset_dereference(hbucket(t, i)); 526 475 if (!n) 527 476 continue; 528 477 for (j = 0, d = 0; j < n->pos; j++) { ··· 540 485 smp_mb__after_atomic(); 541 486 #ifdef IP_SET_HASH_WITH_NETS 542 487 for (k = 0; k < IPSET_NET_COUNT; k++) 543 - mtype_del_cidr(h, 488 + mtype_del_cidr(set, h, 544 489 NCIDR_PUT(DCIDR_GET(data->cidr, k)), 545 490 k); 546 491 #endif 492 + t->hregion[r].elements--; 547 493 ip_set_ext_destroy(set, data); 548 - set->elements--; 549 494 d++; 550 495 } 551 496 if (d >= AHASH_INIT_SIZE) { 552 497 if (d >= n->size) { 498 + t->hregion[r].ext_size -= 499 + ext_size(n->size, dsize); 553 500 rcu_assign_pointer(hbucket(t, i), NULL); 554 501 kfree_rcu(n, rcu); 555 502 continue; 556 503 } 557 504 tmp = kzalloc(sizeof(*tmp) + 558 - (n->size - AHASH_INIT_SIZE) * dsize, 559 - GFP_ATOMIC); 505 + (n->size - AHASH_INIT_SIZE) * dsize, 506 + GFP_ATOMIC); 560 507 if (!tmp) 561 - /* Still try to delete expired elements */ 508 + /* Still try to delete expired elements. */ 562 509 continue; 563 510 tmp->size = n->size - AHASH_INIT_SIZE; 564 511 for (j = 0, d = 0; j < n->pos; j++) { 565 512 if (!test_bit(j, n->used)) 566 513 continue; 567 514 data = ahash_data(n, j, dsize); 568 - memcpy(tmp->value + d * dsize, data, dsize); 515 + memcpy(tmp->value + d * dsize, 516 + data, dsize); 569 517 set_bit(d, tmp->used); 570 518 d++; 571 519 } 572 520 tmp->pos = d; 573 - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); 521 + t->hregion[r].ext_size -= 522 + ext_size(AHASH_INIT_SIZE, dsize); 574 523 rcu_assign_pointer(hbucket(t, i), tmp); 575 524 kfree_rcu(n, rcu); 576 525 } 577 526 } 527 + spin_unlock_bh(&t->hregion[r].lock); 578 528 } 579 529 580 530 static void 581 - mtype_gc(struct timer_list *t) 531 + mtype_gc(struct work_struct *work) 582 532 { 583 - struct htype *h = from_timer(h, t, gc); 584 - struct ip_set *set = h->set; 533 + struct htable_gc *gc; 534 + struct ip_set *set; 535 + struct htype *h; 536 + struct htable *t; 537 + u32 r, numof_locks; 538 + unsigned int next_run; 585 539 586 - pr_debug("called\n"); 540 + gc = container_of(work, struct htable_gc, dwork.work); 541 + set = gc->set; 542 + h = set->data; 543 + 587 544 spin_lock_bh(&set->lock); 588 - mtype_expire(set, h); 545 + t = ipset_dereference_set(h->table, set); 546 + atomic_inc(&t->uref); 547 + numof_locks = ahash_numof_locks(t->htable_bits); 548 + r = gc->region++; 549 + if (r >= numof_locks) { 550 + r = gc->region = 0; 551 + } 552 + next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; 553 + if (next_run < HZ/10) 554 + next_run = HZ/10; 589 555 spin_unlock_bh(&set->lock); 590 556 591 - h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; 592 - add_timer(&h->gc); 557 + mtype_gc_do(set, h, t, r); 558 + 559 + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 560 + pr_debug("Table destroy after resize by expire: %p\n", t); 561 + mtype_ahash_destroy(set, t, false); 562 + } 563 + 564 + queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); 565 + 593 566 } 567 + 568 + static void 569 + mtype_gc_init(struct htable_gc *gc) 570 + { 571 + INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); 572 + queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); 573 + } 574 + 575 + static int 576 + mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 577 + struct ip_set_ext *mext, u32 flags); 578 + static int 579 + mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 580 + struct ip_set_ext *mext, u32 flags); 594 581 595 582 /* Resize a hash: create a new hash table with doubling the hashsize 596 583 * and inserting the elements to it. Repeat until we succeed or ··· 644 547 struct htype *h = set->data; 645 548 struct htable *t, *orig; 646 549 u8 htable_bits; 647 - size_t extsize, dsize = set->dsize; 550 + size_t dsize = set->dsize; 648 551 #ifdef IP_SET_HASH_WITH_NETS 649 552 u8 flags; 650 553 struct mtype_elem *tmp; ··· 652 555 struct mtype_elem *data; 653 556 struct mtype_elem *d; 654 557 struct hbucket *n, *m; 655 - u32 i, j, key; 558 + struct list_head *l, *lt; 559 + struct mtype_resize_ad *x; 560 + u32 i, j, r, nr, key; 656 561 int ret; 657 562 658 563 #ifdef IP_SET_HASH_WITH_NETS ··· 662 563 if (!tmp) 663 564 return -ENOMEM; 664 565 #endif 665 - rcu_read_lock_bh(); 666 - orig = rcu_dereference_bh_nfnl(h->table); 566 + orig = ipset_dereference_bh_nfnl(h->table); 667 567 htable_bits = orig->htable_bits; 668 - rcu_read_unlock_bh(); 669 568 670 569 retry: 671 570 ret = 0; ··· 680 583 ret = -ENOMEM; 681 584 goto out; 682 585 } 586 + t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); 587 + if (!t->hregion) { 588 + kfree(t); 589 + ret = -ENOMEM; 590 + goto out; 591 + } 683 592 t->htable_bits = htable_bits; 593 + t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); 594 + for (i = 0; i < ahash_numof_locks(htable_bits); i++) 595 + spin_lock_init(&t->hregion[i].lock); 684 596 685 - spin_lock_bh(&set->lock); 686 - orig = __ipset_dereference_protected(h->table, 1); 687 - /* There can't be another parallel resizing, but dumping is possible */ 597 + /* There can't be another parallel resizing, 598 + * but dumping, gc, kernel side add/del are possible 599 + */ 600 + orig = ipset_dereference_bh_nfnl(h->table); 688 601 atomic_set(&orig->ref, 1); 689 602 atomic_inc(&orig->uref); 690 - extsize = 0; 691 603 pr_debug("attempt to resize set %s from %u to %u, t %p\n", 692 604 set->name, orig->htable_bits, htable_bits, orig); 693 - for (i = 0; i < jhash_size(orig->htable_bits); i++) { 694 - n = __ipset_dereference_protected(hbucket(orig, i), 1); 695 - if (!n) 696 - continue; 697 - for (j = 0; j < n->pos; j++) { 698 - if (!test_bit(j, n->used)) 605 + for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { 606 + /* Expire may replace a hbucket with another one */ 607 + rcu_read_lock_bh(); 608 + for (i = ahash_bucket_start(r, orig->htable_bits); 609 + i < ahash_bucket_end(r, orig->htable_bits); i++) { 610 + n = __ipset_dereference(hbucket(orig, i)); 611 + if (!n) 699 612 continue; 700 - data = ahash_data(n, j, dsize); 613 + for (j = 0; j < n->pos; j++) { 614 + if (!test_bit(j, n->used)) 615 + continue; 616 + data = ahash_data(n, j, dsize); 617 + if (SET_ELEM_EXPIRED(set, data)) 618 + continue; 701 619 #ifdef IP_SET_HASH_WITH_NETS 702 - /* We have readers running parallel with us, 703 - * so the live data cannot be modified. 704 - */ 705 - flags = 0; 706 - memcpy(tmp, data, dsize); 707 - data = tmp; 708 - mtype_data_reset_flags(data, &flags); 620 + /* We have readers running parallel with us, 621 + * so the live data cannot be modified. 622 + */ 623 + flags = 0; 624 + memcpy(tmp, data, dsize); 625 + data = tmp; 626 + mtype_data_reset_flags(data, &flags); 709 627 #endif 710 - key = HKEY(data, h->initval, htable_bits); 711 - m = __ipset_dereference_protected(hbucket(t, key), 1); 712 - if (!m) { 713 - m = kzalloc(sizeof(*m) + 628 + key = HKEY(data, h->initval, htable_bits); 629 + m = __ipset_dereference(hbucket(t, key)); 630 + nr = ahash_region(key, htable_bits); 631 + if (!m) { 632 + m = kzalloc(sizeof(*m) + 714 633 AHASH_INIT_SIZE * dsize, 715 634 GFP_ATOMIC); 716 - if (!m) { 717 - ret = -ENOMEM; 718 - goto cleanup; 719 - } 720 - m->size = AHASH_INIT_SIZE; 721 - extsize += ext_size(AHASH_INIT_SIZE, dsize); 722 - RCU_INIT_POINTER(hbucket(t, key), m); 723 - } else if (m->pos >= m->size) { 724 - struct hbucket *ht; 635 + if (!m) { 636 + ret = -ENOMEM; 637 + goto cleanup; 638 + } 639 + m->size = AHASH_INIT_SIZE; 640 + t->hregion[nr].ext_size += 641 + ext_size(AHASH_INIT_SIZE, 642 + dsize); 643 + RCU_INIT_POINTER(hbucket(t, key), m); 644 + } else if (m->pos >= m->size) { 645 + struct hbucket *ht; 725 646 726 - if (m->size >= AHASH_MAX(h)) { 727 - ret = -EAGAIN; 728 - } else { 729 - ht = kzalloc(sizeof(*ht) + 647 + if (m->size >= AHASH_MAX(h)) { 648 + ret = -EAGAIN; 649 + } else { 650 + ht = kzalloc(sizeof(*ht) + 730 651 (m->size + AHASH_INIT_SIZE) 731 652 * dsize, 732 653 GFP_ATOMIC); 733 - if (!ht) 734 - ret = -ENOMEM; 654 + if (!ht) 655 + ret = -ENOMEM; 656 + } 657 + if (ret < 0) 658 + goto cleanup; 659 + memcpy(ht, m, sizeof(struct hbucket) + 660 + m->size * dsize); 661 + ht->size = m->size + AHASH_INIT_SIZE; 662 + t->hregion[nr].ext_size += 663 + ext_size(AHASH_INIT_SIZE, 664 + dsize); 665 + kfree(m); 666 + m = ht; 667 + RCU_INIT_POINTER(hbucket(t, key), ht); 735 668 } 736 - if (ret < 0) 737 - goto cleanup; 738 - memcpy(ht, m, sizeof(struct hbucket) + 739 - m->size * dsize); 740 - ht->size = m->size + AHASH_INIT_SIZE; 741 - extsize += ext_size(AHASH_INIT_SIZE, dsize); 742 - kfree(m); 743 - m = ht; 744 - RCU_INIT_POINTER(hbucket(t, key), ht); 745 - } 746 - d = ahash_data(m, m->pos, dsize); 747 - memcpy(d, data, dsize); 748 - set_bit(m->pos++, m->used); 669 + d = ahash_data(m, m->pos, dsize); 670 + memcpy(d, data, dsize); 671 + set_bit(m->pos++, m->used); 672 + t->hregion[nr].elements++; 749 673 #ifdef IP_SET_HASH_WITH_NETS 750 - mtype_data_reset_flags(d, &flags); 674 + mtype_data_reset_flags(d, &flags); 751 675 #endif 676 + } 752 677 } 678 + rcu_read_unlock_bh(); 753 679 } 754 - rcu_assign_pointer(h->table, t); 755 - set->ext_size = extsize; 756 680 757 - spin_unlock_bh(&set->lock); 681 + /* There can't be any other writer. */ 682 + rcu_assign_pointer(h->table, t); 758 683 759 684 /* Give time to other readers of the set */ 760 685 synchronize_rcu(); 761 686 762 687 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, 763 688 orig->htable_bits, orig, t->htable_bits, t); 764 - /* If there's nobody else dumping the table, destroy it */ 689 + /* Add/delete elements processed by the SET target during resize. 690 + * Kernel-side add cannot trigger a resize and userspace actions 691 + * are serialized by the mutex. 692 + */ 693 + list_for_each_safe(l, lt, &h->ad) { 694 + x = list_entry(l, struct mtype_resize_ad, list); 695 + if (x->ad == IPSET_ADD) { 696 + mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); 697 + } else { 698 + mtype_del(set, &x->d, NULL, NULL, 0); 699 + } 700 + list_del(l); 701 + kfree(l); 702 + } 703 + /* If there's nobody else using the table, destroy it */ 765 704 if (atomic_dec_and_test(&orig->uref)) { 766 705 pr_debug("Table destroy by resize %p\n", orig); 767 706 mtype_ahash_destroy(set, orig, false); ··· 810 677 return ret; 811 678 812 679 cleanup: 680 + rcu_read_unlock_bh(); 813 681 atomic_set(&orig->ref, 0); 814 682 atomic_dec(&orig->uref); 815 - spin_unlock_bh(&set->lock); 816 683 mtype_ahash_destroy(set, t, false); 817 684 if (ret == -EAGAIN) 818 685 goto retry; 819 686 goto out; 687 + } 688 + 689 + /* Get the current number of elements and ext_size in the set */ 690 + static void 691 + mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) 692 + { 693 + struct htype *h = set->data; 694 + const struct htable *t; 695 + u32 i, j, r; 696 + struct hbucket *n; 697 + struct mtype_elem *data; 698 + 699 + t = rcu_dereference_bh(h->table); 700 + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 701 + for (i = ahash_bucket_start(r, t->htable_bits); 702 + i < ahash_bucket_end(r, t->htable_bits); i++) { 703 + n = rcu_dereference_bh(hbucket(t, i)); 704 + if (!n) 705 + continue; 706 + for (j = 0; j < n->pos; j++) { 707 + if (!test_bit(j, n->used)) 708 + continue; 709 + data = ahash_data(n, j, set->dsize); 710 + if (!SET_ELEM_EXPIRED(set, data)) 711 + (*elements)++; 712 + } 713 + } 714 + *ext_size += t->hregion[r].ext_size; 715 + } 820 716 } 821 717 822 718 /* Add an element to a hash and update the internal counters when succeeded, ··· 860 698 const struct mtype_elem *d = value; 861 699 struct mtype_elem *data; 862 700 struct hbucket *n, *old = ERR_PTR(-ENOENT); 863 - int i, j = -1; 701 + int i, j = -1, ret; 864 702 bool flag_exist = flags & IPSET_FLAG_EXIST; 865 703 bool deleted = false, forceadd = false, reuse = false; 866 - u32 key, multi = 0; 704 + u32 r, key, multi = 0, elements, maxelem; 867 705 868 - if (set->elements >= h->maxelem) { 869 - if (SET_WITH_TIMEOUT(set)) 870 - /* FIXME: when set is full, we slow down here */ 871 - mtype_expire(set, h); 872 - if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) 706 + rcu_read_lock_bh(); 707 + t = rcu_dereference_bh(h->table); 708 + key = HKEY(value, h->initval, t->htable_bits); 709 + r = ahash_region(key, t->htable_bits); 710 + atomic_inc(&t->uref); 711 + elements = t->hregion[r].elements; 712 + maxelem = t->maxelem; 713 + if (elements >= maxelem) { 714 + u32 e; 715 + if (SET_WITH_TIMEOUT(set)) { 716 + rcu_read_unlock_bh(); 717 + mtype_gc_do(set, h, t, r); 718 + rcu_read_lock_bh(); 719 + } 720 + maxelem = h->maxelem; 721 + elements = 0; 722 + for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) 723 + elements += t->hregion[e].elements; 724 + if (elements >= maxelem && SET_WITH_FORCEADD(set)) 873 725 forceadd = true; 874 726 } 727 + rcu_read_unlock_bh(); 875 728 876 - t = ipset_dereference_protected(h->table, set); 877 - key = HKEY(value, h->initval, t->htable_bits); 878 - n = __ipset_dereference_protected(hbucket(t, key), 1); 729 + spin_lock_bh(&t->hregion[r].lock); 730 + n = rcu_dereference_bh(hbucket(t, key)); 879 731 if (!n) { 880 - if (forceadd || set->elements >= h->maxelem) 732 + if (forceadd || elements >= maxelem) 881 733 goto set_full; 882 734 old = NULL; 883 735 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, 884 736 GFP_ATOMIC); 885 - if (!n) 886 - return -ENOMEM; 737 + if (!n) { 738 + ret = -ENOMEM; 739 + goto unlock; 740 + } 887 741 n->size = AHASH_INIT_SIZE; 888 - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); 742 + t->hregion[r].ext_size += 743 + ext_size(AHASH_INIT_SIZE, set->dsize); 889 744 goto copy_elem; 890 745 } 891 746 for (i = 0; i < n->pos; i++) { ··· 916 737 } 917 738 data = ahash_data(n, i, set->dsize); 918 739 if (mtype_data_equal(data, d, &multi)) { 919 - if (flag_exist || 920 - (SET_WITH_TIMEOUT(set) && 921 - ip_set_timeout_expired(ext_timeout(data, set)))) { 740 + if (flag_exist || SET_ELEM_EXPIRED(set, data)) { 922 741 /* Just the extensions could be overwritten */ 923 742 j = i; 924 743 goto overwrite_extensions; 925 744 } 926 - return -IPSET_ERR_EXIST; 745 + ret = -IPSET_ERR_EXIST; 746 + goto unlock; 927 747 } 928 748 /* Reuse first timed out entry */ 929 - if (SET_WITH_TIMEOUT(set) && 930 - ip_set_timeout_expired(ext_timeout(data, set)) && 931 - j == -1) { 749 + if (SET_ELEM_EXPIRED(set, data) && j == -1) { 932 750 j = i; 933 751 reuse = true; 934 752 } 935 753 } 936 754 if (reuse || forceadd) { 755 + if (j == -1) 756 + j = 0; 937 757 data = ahash_data(n, j, set->dsize); 938 758 if (!deleted) { 939 759 #ifdef IP_SET_HASH_WITH_NETS 940 760 for (i = 0; i < IPSET_NET_COUNT; i++) 941 - mtype_del_cidr(h, 761 + mtype_del_cidr(set, h, 942 762 NCIDR_PUT(DCIDR_GET(data->cidr, i)), 943 763 i); 944 764 #endif 945 765 ip_set_ext_destroy(set, data); 946 - set->elements--; 766 + t->hregion[r].elements--; 947 767 } 948 768 goto copy_data; 949 769 } 950 - if (set->elements >= h->maxelem) 770 + if (elements >= maxelem) 951 771 goto set_full; 952 772 /* Create a new slot */ 953 773 if (n->pos >= n->size) { ··· 954 776 if (n->size >= AHASH_MAX(h)) { 955 777 /* Trigger rehashing */ 956 778 mtype_data_next(&h->next, d); 957 - return -EAGAIN; 779 + ret = -EAGAIN; 780 + goto resize; 958 781 } 959 782 old = n; 960 783 n = kzalloc(sizeof(*n) + 961 784 (old->size + AHASH_INIT_SIZE) * set->dsize, 962 785 GFP_ATOMIC); 963 - if (!n) 964 - return -ENOMEM; 786 + if (!n) { 787 + ret = -ENOMEM; 788 + goto unlock; 789 + } 965 790 memcpy(n, old, sizeof(struct hbucket) + 966 791 old->size * set->dsize); 967 792 n->size = old->size + AHASH_INIT_SIZE; 968 - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); 793 + t->hregion[r].ext_size += 794 + ext_size(AHASH_INIT_SIZE, set->dsize); 969 795 } 970 796 971 797 copy_elem: 972 798 j = n->pos++; 973 799 data = ahash_data(n, j, set->dsize); 974 800 copy_data: 975 - set->elements++; 801 + t->hregion[r].elements++; 976 802 #ifdef IP_SET_HASH_WITH_NETS 977 803 for (i = 0; i < IPSET_NET_COUNT; i++) 978 - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); 804 + mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); 979 805 #endif 980 806 memcpy(data, d, sizeof(struct mtype_elem)); 981 807 overwrite_extensions: ··· 1002 820 if (old) 1003 821 kfree_rcu(old, rcu); 1004 822 } 823 + ret = 0; 824 + resize: 825 + spin_unlock_bh(&t->hregion[r].lock); 826 + if (atomic_read(&t->ref) && ext->target) { 827 + /* Resize is in process and kernel side add, save values */ 828 + struct mtype_resize_ad *x; 1005 829 1006 - return 0; 830 + x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); 831 + if (!x) 832 + /* Don't bother */ 833 + goto out; 834 + x->ad = IPSET_ADD; 835 + memcpy(&x->d, value, sizeof(struct mtype_elem)); 836 + memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); 837 + memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); 838 + x->flags = flags; 839 + spin_lock_bh(&set->lock); 840 + list_add_tail(&x->list, &h->ad); 841 + spin_unlock_bh(&set->lock); 842 + } 843 + goto out; 844 + 1007 845 set_full: 1008 846 if (net_ratelimit()) 1009 847 pr_warn("Set %s is full, maxelem %u reached\n", 1010 - set->name, h->maxelem); 1011 - return -IPSET_ERR_HASH_FULL; 848 + set->name, maxelem); 849 + ret = -IPSET_ERR_HASH_FULL; 850 + unlock: 851 + spin_unlock_bh(&t->hregion[r].lock); 852 + out: 853 + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 854 + pr_debug("Table destroy after resize by add: %p\n", t); 855 + mtype_ahash_destroy(set, t, false); 856 + } 857 + return ret; 1012 858 } 1013 859 1014 860 /* Delete an element from the hash and free up space if possible. ··· 1050 840 const struct mtype_elem *d = value; 1051 841 struct mtype_elem *data; 1052 842 struct hbucket *n; 1053 - int i, j, k, ret = -IPSET_ERR_EXIST; 843 + struct mtype_resize_ad *x = NULL; 844 + int i, j, k, r, ret = -IPSET_ERR_EXIST; 1054 845 u32 key, multi = 0; 1055 846 size_t dsize = set->dsize; 1056 847 1057 - t = ipset_dereference_protected(h->table, set); 848 + /* Userspace add and resize is excluded by the mutex. 849 + * Kernespace add does not trigger resize. 850 + */ 851 + rcu_read_lock_bh(); 852 + t = rcu_dereference_bh(h->table); 1058 853 key = HKEY(value, h->initval, t->htable_bits); 1059 - n = __ipset_dereference_protected(hbucket(t, key), 1); 854 + r = ahash_region(key, t->htable_bits); 855 + atomic_inc(&t->uref); 856 + rcu_read_unlock_bh(); 857 + 858 + spin_lock_bh(&t->hregion[r].lock); 859 + n = rcu_dereference_bh(hbucket(t, key)); 1060 860 if (!n) 1061 861 goto out; 1062 862 for (i = 0, k = 0; i < n->pos; i++) { ··· 1077 857 data = ahash_data(n, i, dsize); 1078 858 if (!mtype_data_equal(data, d, &multi)) 1079 859 continue; 1080 - if (SET_WITH_TIMEOUT(set) && 1081 - ip_set_timeout_expired(ext_timeout(data, set))) 860 + if (SET_ELEM_EXPIRED(set, data)) 1082 861 goto out; 1083 862 1084 863 ret = 0; ··· 1085 866 smp_mb__after_atomic(); 1086 867 if (i + 1 == n->pos) 1087 868 n->pos--; 1088 - set->elements--; 869 + t->hregion[r].elements--; 1089 870 #ifdef IP_SET_HASH_WITH_NETS 1090 871 for (j = 0; j < IPSET_NET_COUNT; j++) 1091 - mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), 1092 - j); 872 + mtype_del_cidr(set, h, 873 + NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); 1093 874 #endif 1094 875 ip_set_ext_destroy(set, data); 1095 876 877 + if (atomic_read(&t->ref) && ext->target) { 878 + /* Resize is in process and kernel side del, 879 + * save values 880 + */ 881 + x = kzalloc(sizeof(struct mtype_resize_ad), 882 + GFP_ATOMIC); 883 + if (x) { 884 + x->ad = IPSET_DEL; 885 + memcpy(&x->d, value, 886 + sizeof(struct mtype_elem)); 887 + x->flags = flags; 888 + } 889 + } 1096 890 for (; i < n->pos; i++) { 1097 891 if (!test_bit(i, n->used)) 1098 892 k++; 1099 893 } 1100 894 if (n->pos == 0 && k == 0) { 1101 - set->ext_size -= ext_size(n->size, dsize); 895 + t->hregion[r].ext_size -= ext_size(n->size, dsize); 1102 896 rcu_assign_pointer(hbucket(t, key), NULL); 1103 897 kfree_rcu(n, rcu); 1104 898 } else if (k >= AHASH_INIT_SIZE) { ··· 1130 898 k++; 1131 899 } 1132 900 tmp->pos = k; 1133 - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); 901 + t->hregion[r].ext_size -= 902 + ext_size(AHASH_INIT_SIZE, dsize); 1134 903 rcu_assign_pointer(hbucket(t, key), tmp); 1135 904 kfree_rcu(n, rcu); 1136 905 } ··· 1139 906 } 1140 907 1141 908 out: 909 + spin_unlock_bh(&t->hregion[r].lock); 910 + if (x) { 911 + spin_lock_bh(&set->lock); 912 + list_add(&x->list, &h->ad); 913 + spin_unlock_bh(&set->lock); 914 + } 915 + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 916 + pr_debug("Table destroy after resize by del: %p\n", t); 917 + mtype_ahash_destroy(set, t, false); 918 + } 1142 919 return ret; 1143 920 } 1144 921 ··· 1234 991 int i, ret = 0; 1235 992 u32 key, multi = 0; 1236 993 994 + rcu_read_lock_bh(); 1237 995 t = rcu_dereference_bh(h->table); 1238 996 #ifdef IP_SET_HASH_WITH_NETS 1239 997 /* If we test an IP address and not a network address, ··· 1266 1022 goto out; 1267 1023 } 1268 1024 out: 1025 + rcu_read_unlock_bh(); 1269 1026 return ret; 1270 1027 } 1271 1028 ··· 1278 1033 const struct htable *t; 1279 1034 struct nlattr *nested; 1280 1035 size_t memsize; 1036 + u32 elements = 0; 1037 + size_t ext_size = 0; 1281 1038 u8 htable_bits; 1282 1039 1283 - /* If any members have expired, set->elements will be wrong 1284 - * mytype_expire function will update it with the right count. 1285 - * we do not hold set->lock here, so grab it first. 1286 - * set->elements can still be incorrect in the case of a huge set, 1287 - * because elements might time out during the listing. 1288 - */ 1289 - if (SET_WITH_TIMEOUT(set)) { 1290 - spin_lock_bh(&set->lock); 1291 - mtype_expire(set, h); 1292 - spin_unlock_bh(&set->lock); 1293 - } 1294 - 1295 1040 rcu_read_lock_bh(); 1296 - t = rcu_dereference_bh_nfnl(h->table); 1297 - memsize = mtype_ahash_memsize(h, t) + set->ext_size; 1041 + t = rcu_dereference_bh(h->table); 1042 + mtype_ext_size(set, &elements, &ext_size); 1043 + memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; 1298 1044 htable_bits = t->htable_bits; 1299 1045 rcu_read_unlock_bh(); 1300 1046 ··· 1307 1071 #endif 1308 1072 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 1309 1073 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || 1310 - nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) 1074 + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) 1311 1075 goto nla_put_failure; 1312 1076 if (unlikely(ip_set_put_flags(skb, set))) 1313 1077 goto nla_put_failure; ··· 1327 1091 1328 1092 if (start) { 1329 1093 rcu_read_lock_bh(); 1330 - t = rcu_dereference_bh_nfnl(h->table); 1094 + t = ipset_dereference_bh_nfnl(h->table); 1331 1095 atomic_inc(&t->uref); 1332 1096 cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; 1333 1097 rcu_read_unlock_bh(); 1334 1098 } else if (cb->args[IPSET_CB_PRIVATE]) { 1335 1099 t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; 1336 1100 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1337 - /* Resizing didn't destroy the hash table */ 1338 - pr_debug("Table destroy by dump: %p\n", t); 1101 + pr_debug("Table destroy after resize " 1102 + " by dump: %p\n", t); 1339 1103 mtype_ahash_destroy(set, t, false); 1340 1104 } 1341 1105 cb->args[IPSET_CB_PRIVATE] = 0; ··· 1377 1141 if (!test_bit(i, n->used)) 1378 1142 continue; 1379 1143 e = ahash_data(n, i, set->dsize); 1380 - if (SET_WITH_TIMEOUT(set) && 1381 - ip_set_timeout_expired(ext_timeout(e, set))) 1144 + if (SET_ELEM_EXPIRED(set, e)) 1382 1145 continue; 1383 1146 pr_debug("list hash %lu hbucket %p i %u, data %p\n", 1384 1147 cb->args[IPSET_CB_ARG0], n, i, e); ··· 1443 1208 .uref = mtype_uref, 1444 1209 .resize = mtype_resize, 1445 1210 .same_set = mtype_same_set, 1211 + .region_lock = true, 1446 1212 }; 1447 1213 1448 1214 #ifdef IP_SET_EMIT_CREATE ··· 1462 1226 size_t hsize; 1463 1227 struct htype *h; 1464 1228 struct htable *t; 1229 + u32 i; 1465 1230 1466 1231 pr_debug("Create set %s with family %s\n", 1467 1232 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); ··· 1531 1294 kfree(h); 1532 1295 return -ENOMEM; 1533 1296 } 1297 + t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); 1298 + if (!t->hregion) { 1299 + kfree(t); 1300 + kfree(h); 1301 + return -ENOMEM; 1302 + } 1303 + h->gc.set = set; 1304 + for (i = 0; i < ahash_numof_locks(hbits); i++) 1305 + spin_lock_init(&t->hregion[i].lock); 1534 1306 h->maxelem = maxelem; 1535 1307 #ifdef IP_SET_HASH_WITH_NETMASK 1536 1308 h->netmask = netmask; ··· 1550 1304 get_random_bytes(&h->initval, sizeof(h->initval)); 1551 1305 1552 1306 t->htable_bits = hbits; 1307 + t->maxelem = h->maxelem / ahash_numof_locks(hbits); 1553 1308 RCU_INIT_POINTER(h->table, t); 1554 1309 1555 - h->set = set; 1310 + INIT_LIST_HEAD(&h->ad); 1556 1311 set->data = h; 1557 1312 #ifndef IP_SET_PROTO_UNDEF 1558 1313 if (set->family == NFPROTO_IPV4) { ··· 1576 1329 #ifndef IP_SET_PROTO_UNDEF 1577 1330 if (set->family == NFPROTO_IPV4) 1578 1331 #endif 1579 - IPSET_TOKEN(HTYPE, 4_gc_init)(set, 1580 - IPSET_TOKEN(HTYPE, 4_gc)); 1332 + IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); 1581 1333 #ifndef IP_SET_PROTO_UNDEF 1582 1334 else 1583 - IPSET_TOKEN(HTYPE, 6_gc_init)(set, 1584 - IPSET_TOKEN(HTYPE, 6_gc)); 1335 + IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); 1585 1336 #endif 1586 1337 } 1587 1338 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",