Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_tables: Simplify set backend selection

Drop nft_set_type's ability to act as a container of multiple backend
implementations it chooses from. Instead consolidate the whole selection
logic in nft_select_set_ops() and the actual backend provided estimate()
callback.

This turns nf_tables_set_types into a list containing all available
backends which is traversed when selecting one matching userspace
requested criteria.

Also, this change allows to embed nft_set_ops structure into
nft_set_type and pull flags field into the latter as it's only used
during selection phase.

A crucial part of this change is to make sure the new layout respects
hash backend constraints formerly enforced by nft_hash_select_ops()
function: This is achieved by introduction of a specific estimate()
callback for nft_hash_fast_ops which returns false for key lengths != 4.
In turn, nft_hash_estimate() is changed to return false for key lengths
== 4 so it won't be chosen by accident. Also, both callbacks must return
false for unbounded sets as their size estimate depends on a known
maximum element count.

Note that this patch partially reverts commit 4f2921ca21b71 ("netfilter:
nf_tables: meter: pick a set backend that supports updates") by making
nft_set_ops_candidate() not explicitly look for an update callback but
make NFT_SET_EVAL a regular backend feature flag which is checked along
with the others. This way all feature requirements are checked in one
go.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Phil Sutter and committed by
Pablo Neira Ayuso
71cc0873 36dd1bcc

+142 -146
+16 -20
include/net/netfilter/nf_tables.h
··· 275 275 enum nft_set_class space; 276 276 }; 277 277 278 - /** 279 - * struct nft_set_type - nf_tables set type 280 - * 281 - * @select_ops: function to select nft_set_ops 282 - * @ops: default ops, used when no select_ops functions is present 283 - * @list: used internally 284 - * @owner: module reference 285 - */ 286 - struct nft_set_type { 287 - const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, 288 - const struct nft_set_desc *desc, 289 - u32 flags); 290 - const struct nft_set_ops *ops; 291 - struct list_head list; 292 - struct module *owner; 293 - }; 294 - 295 278 struct nft_set_ext; 296 279 struct nft_expr; 297 280 ··· 293 310 * @init: initialize private data of new set instance 294 311 * @destroy: destroy private data of set instance 295 312 * @elemsize: element private size 296 - * @features: features supported by the implementation 297 313 */ 298 314 struct nft_set_ops { 299 315 bool (*lookup)(const struct net *net, ··· 343 361 void (*destroy)(const struct nft_set *set); 344 362 345 363 unsigned int elemsize; 346 - u32 features; 347 - const struct nft_set_type *type; 348 364 }; 365 + 366 + /** 367 + * struct nft_set_type - nf_tables set type 368 + * 369 + * @ops: set ops for this type 370 + * @list: used internally 371 + * @owner: module reference 372 + * @features: features supported by the implementation 373 + */ 374 + struct nft_set_type { 375 + const struct nft_set_ops ops; 376 + struct list_head list; 377 + struct module *owner; 378 + u32 features; 379 + }; 380 + #define to_set_type(o) container_of(o, struct nft_set_type, ops) 349 381 350 382 int nft_register_set(struct nft_set_type *type); 351 383 void nft_unregister_set(struct nft_set_type *type);
+9 -16
net/netfilter/nf_tables_api.c
··· 2523 2523 EXPORT_SYMBOL_GPL(nft_unregister_set); 2524 2524 2525 2525 #define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ 2526 - NFT_SET_TIMEOUT | NFT_SET_OBJECT) 2526 + NFT_SET_TIMEOUT | NFT_SET_OBJECT | \ 2527 + NFT_SET_EVAL) 2527 2528 2528 - static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags) 2529 + static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) 2529 2530 { 2530 - if ((flags & NFT_SET_EVAL) && !ops->update) 2531 - return false; 2532 - 2533 - return (flags & ops->features) == (flags & NFT_SET_FEATURES); 2531 + return (flags & type->features) == (flags & NFT_SET_FEATURES); 2534 2532 } 2535 2533 2536 2534 /* ··· 2565 2567 best.space = ~0; 2566 2568 2567 2569 list_for_each_entry(type, &nf_tables_set_types, list) { 2568 - if (!type->select_ops) 2569 - ops = type->ops; 2570 - else 2571 - ops = type->select_ops(ctx, desc, flags); 2572 - if (!ops) 2573 - continue; 2570 + ops = &type->ops; 2574 2571 2575 - if (!nft_set_ops_candidate(ops, flags)) 2572 + if (!nft_set_ops_candidate(type, flags)) 2576 2573 continue; 2577 2574 if (!ops->estimate(desc, flags, &est)) 2578 2575 continue; ··· 2598 2605 if (!try_module_get(type->owner)) 2599 2606 continue; 2600 2607 if (bops != NULL) 2601 - module_put(bops->type->owner); 2608 + module_put(to_set_type(bops)->owner); 2602 2609 2603 2610 bops = ops; 2604 2611 best = est; ··· 3240 3247 err2: 3241 3248 kvfree(set); 3242 3249 err1: 3243 - module_put(ops->type->owner); 3250 + module_put(to_set_type(ops)->owner); 3244 3251 return err; 3245 3252 } 3246 3253 3247 3254 static void nft_set_destroy(struct nft_set *set) 3248 3255 { 3249 3256 set->ops->destroy(set); 3250 - module_put(set->ops->type->owner); 3257 + module_put(to_set_type(set->ops)->owner); 3251 3258 kfree(set->name); 3252 3259 kvfree(set); 3253 3260 }
+15 -19
net/netfilter/nft_set_bitmap.c
··· 296 296 return true; 297 297 } 298 298 299 - static struct nft_set_type nft_bitmap_type; 300 - static struct nft_set_ops nft_bitmap_ops __read_mostly = { 301 - .type = &nft_bitmap_type, 302 - .privsize = nft_bitmap_privsize, 303 - .elemsize = offsetof(struct nft_bitmap_elem, ext), 304 - .estimate = nft_bitmap_estimate, 305 - .init = nft_bitmap_init, 306 - .destroy = nft_bitmap_destroy, 307 - .insert = nft_bitmap_insert, 308 - .remove = nft_bitmap_remove, 309 - .deactivate = nft_bitmap_deactivate, 310 - .flush = nft_bitmap_flush, 311 - .activate = nft_bitmap_activate, 312 - .lookup = nft_bitmap_lookup, 313 - .walk = nft_bitmap_walk, 314 - .get = nft_bitmap_get, 315 - }; 316 - 317 299 static struct nft_set_type nft_bitmap_type __read_mostly = { 318 - .ops = &nft_bitmap_ops, 319 300 .owner = THIS_MODULE, 301 + .ops = { 302 + .privsize = nft_bitmap_privsize, 303 + .elemsize = offsetof(struct nft_bitmap_elem, ext), 304 + .estimate = nft_bitmap_estimate, 305 + .init = nft_bitmap_init, 306 + .destroy = nft_bitmap_destroy, 307 + .insert = nft_bitmap_insert, 308 + .remove = nft_bitmap_remove, 309 + .deactivate = nft_bitmap_deactivate, 310 + .flush = nft_bitmap_flush, 311 + .activate = nft_bitmap_activate, 312 + .lookup = nft_bitmap_lookup, 313 + .walk = nft_bitmap_walk, 314 + .get = nft_bitmap_get, 315 + }, 320 316 }; 321 317 322 318 static int __init nft_bitmap_module_init(void)
+86 -71
net/netfilter/nft_set_hash.c
··· 605 605 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, 606 606 struct nft_set_estimate *est) 607 607 { 608 + if (!desc->size) 609 + return false; 610 + 611 + if (desc->klen == 4) 612 + return false; 613 + 608 614 est->size = sizeof(struct nft_hash) + 609 615 nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + 610 616 desc->size * sizeof(struct nft_hash_elem); ··· 620 614 return true; 621 615 } 622 616 623 - static struct nft_set_type nft_hash_type; 624 - static struct nft_set_ops nft_rhash_ops __read_mostly = { 625 - .type = &nft_hash_type, 626 - .privsize = nft_rhash_privsize, 627 - .elemsize = offsetof(struct nft_rhash_elem, ext), 628 - .estimate = nft_rhash_estimate, 629 - .init = nft_rhash_init, 630 - .destroy = nft_rhash_destroy, 631 - .insert = nft_rhash_insert, 632 - .activate = nft_rhash_activate, 633 - .deactivate = nft_rhash_deactivate, 634 - .flush = nft_rhash_flush, 635 - .remove = nft_rhash_remove, 636 - .lookup = nft_rhash_lookup, 637 - .update = nft_rhash_update, 638 - .walk = nft_rhash_walk, 639 - .get = nft_rhash_get, 640 - .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, 641 - }; 642 - 643 - static struct nft_set_ops nft_hash_ops __read_mostly = { 644 - .type = &nft_hash_type, 645 - .privsize = nft_hash_privsize, 646 - .elemsize = offsetof(struct nft_hash_elem, ext), 647 - .estimate = nft_hash_estimate, 648 - .init = nft_hash_init, 649 - .destroy = nft_hash_destroy, 650 - .insert = nft_hash_insert, 651 - .activate = nft_hash_activate, 652 - .deactivate = nft_hash_deactivate, 653 - .flush = nft_hash_flush, 654 - .remove = nft_hash_remove, 655 - .lookup = nft_hash_lookup, 656 - .walk = nft_hash_walk, 657 - .get = nft_hash_get, 658 - .features = NFT_SET_MAP | NFT_SET_OBJECT, 659 - }; 660 - 661 - static struct nft_set_ops nft_hash_fast_ops __read_mostly = { 662 - .type = &nft_hash_type, 663 - .privsize = nft_hash_privsize, 664 - .elemsize = offsetof(struct nft_hash_elem, ext), 665 - .estimate = nft_hash_estimate, 666 - .init = nft_hash_init, 667 - .destroy = nft_hash_destroy, 668 - .insert = nft_hash_insert, 669 - .activate = nft_hash_activate, 670 - .deactivate = nft_hash_deactivate, 671 - .flush = nft_hash_flush, 672 - .remove = nft_hash_remove, 673 - .lookup = nft_hash_lookup_fast, 674 - .walk = nft_hash_walk, 675 - .get = nft_hash_get, 676 - .features = NFT_SET_MAP | NFT_SET_OBJECT, 677 - }; 678 - 679 - static const struct nft_set_ops * 680 - nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, 681 - u32 flags) 617 + static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features, 618 + struct nft_set_estimate *est) 682 619 { 683 - if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) { 684 - switch (desc->klen) { 685 - case 4: 686 - return &nft_hash_fast_ops; 687 - default: 688 - return &nft_hash_ops; 689 - } 690 - } 620 + if (!desc->size) 621 + return false; 691 622 692 - return &nft_rhash_ops; 623 + if (desc->klen != 4) 624 + return false; 625 + 626 + est->size = sizeof(struct nft_hash) + 627 + nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + 628 + desc->size * sizeof(struct nft_hash_elem); 629 + est->lookup = NFT_SET_CLASS_O_1; 630 + est->space = NFT_SET_CLASS_O_N; 631 + 632 + return true; 693 633 } 694 634 695 - static struct nft_set_type nft_hash_type __read_mostly = { 696 - .select_ops = nft_hash_select_ops, 635 + static struct nft_set_type nft_rhash_type __read_mostly = { 697 636 .owner = THIS_MODULE, 637 + .features = NFT_SET_MAP | NFT_SET_OBJECT | 638 + NFT_SET_TIMEOUT | NFT_SET_EVAL, 639 + .ops = { 640 + .privsize = nft_rhash_privsize, 641 + .elemsize = offsetof(struct nft_rhash_elem, ext), 642 + .estimate = nft_rhash_estimate, 643 + .init = nft_rhash_init, 644 + .destroy = nft_rhash_destroy, 645 + .insert = nft_rhash_insert, 646 + .activate = nft_rhash_activate, 647 + .deactivate = nft_rhash_deactivate, 648 + .flush = nft_rhash_flush, 649 + .remove = nft_rhash_remove, 650 + .lookup = nft_rhash_lookup, 651 + .update = nft_rhash_update, 652 + .walk = nft_rhash_walk, 653 + .get = nft_rhash_get, 654 + }, 655 + }; 656 + 657 + static struct nft_set_type nft_hash_type __read_mostly = { 658 + .owner = THIS_MODULE, 659 + .features = NFT_SET_MAP | NFT_SET_OBJECT, 660 + .ops = { 661 + .privsize = nft_hash_privsize, 662 + .elemsize = offsetof(struct nft_hash_elem, ext), 663 + .estimate = nft_hash_estimate, 664 + .init = nft_hash_init, 665 + .destroy = nft_hash_destroy, 666 + .insert = nft_hash_insert, 667 + .activate = nft_hash_activate, 668 + .deactivate = nft_hash_deactivate, 669 + .flush = nft_hash_flush, 670 + .remove = nft_hash_remove, 671 + .lookup = nft_hash_lookup, 672 + .walk = nft_hash_walk, 673 + .get = nft_hash_get, 674 + }, 675 + }; 676 + 677 + static struct nft_set_type nft_hash_fast_type __read_mostly = { 678 + .owner = THIS_MODULE, 679 + .features = NFT_SET_MAP | NFT_SET_OBJECT, 680 + .ops = { 681 + .privsize = nft_hash_privsize, 682 + .elemsize = offsetof(struct nft_hash_elem, ext), 683 + .estimate = nft_hash_fast_estimate, 684 + .init = nft_hash_init, 685 + .destroy = nft_hash_destroy, 686 + .insert = nft_hash_insert, 687 + .activate = nft_hash_activate, 688 + .deactivate = nft_hash_deactivate, 689 + .flush = nft_hash_flush, 690 + .remove = nft_hash_remove, 691 + .lookup = nft_hash_lookup_fast, 692 + .walk = nft_hash_walk, 693 + .get = nft_hash_get, 694 + }, 698 695 }; 699 696 700 697 static int __init nft_hash_module_init(void) 701 698 { 702 - return nft_register_set(&nft_hash_type); 699 + if (nft_register_set(&nft_hash_fast_type) || 700 + nft_register_set(&nft_hash_type) || 701 + nft_register_set(&nft_rhash_type)) 702 + return 1; 703 + return 0; 703 704 } 704 705 705 706 static void __exit nft_hash_module_exit(void) 706 707 { 708 + nft_unregister_set(&nft_rhash_type); 707 709 nft_unregister_set(&nft_hash_type); 710 + nft_unregister_set(&nft_hash_fast_type); 708 711 } 709 712 710 713 module_init(nft_hash_module_init);
+16 -20
net/netfilter/nft_set_rbtree.c
··· 393 393 return true; 394 394 } 395 395 396 - static struct nft_set_type nft_rbtree_type; 397 - static struct nft_set_ops nft_rbtree_ops __read_mostly = { 398 - .type = &nft_rbtree_type, 399 - .privsize = nft_rbtree_privsize, 400 - .elemsize = offsetof(struct nft_rbtree_elem, ext), 401 - .estimate = nft_rbtree_estimate, 402 - .init = nft_rbtree_init, 403 - .destroy = nft_rbtree_destroy, 404 - .insert = nft_rbtree_insert, 405 - .remove = nft_rbtree_remove, 406 - .deactivate = nft_rbtree_deactivate, 407 - .flush = nft_rbtree_flush, 408 - .activate = nft_rbtree_activate, 409 - .lookup = nft_rbtree_lookup, 410 - .walk = nft_rbtree_walk, 411 - .get = nft_rbtree_get, 412 - .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, 413 - }; 414 - 415 396 static struct nft_set_type nft_rbtree_type __read_mostly = { 416 - .ops = &nft_rbtree_ops, 417 397 .owner = THIS_MODULE, 398 + .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, 399 + .ops = { 400 + .privsize = nft_rbtree_privsize, 401 + .elemsize = offsetof(struct nft_rbtree_elem, ext), 402 + .estimate = nft_rbtree_estimate, 403 + .init = nft_rbtree_init, 404 + .destroy = nft_rbtree_destroy, 405 + .insert = nft_rbtree_insert, 406 + .remove = nft_rbtree_remove, 407 + .deactivate = nft_rbtree_deactivate, 408 + .flush = nft_rbtree_flush, 409 + .activate = nft_rbtree_activate, 410 + .lookup = nft_rbtree_lookup, 411 + .walk = nft_rbtree_walk, 412 + .get = nft_rbtree_get, 413 + }, 418 414 }; 419 415 420 416 static int __init nft_rbtree_module_init(void)