Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next tree.
They are:

* nf_tables set timeout infrastructure from Patrick Mchardy.

1) Add support for set timeout support.

2) Add support for set element timeouts using the new set extension
infrastructure.

4) Add garbage collection helper functions to get rid of stale elements.
Elements are accumulated in a batch that are asynchronously released
via RCU when the batch is full.

5) Add garbage collection synchronization helpers. This introduces a new
element busy bit to address concurrent access from the netlink API and the
garbage collector.

5) Add timeout support for the nft_hash set implementation. The garbage
collector peridically checks for stale elements from the workqueue.

* iptables/nftables cgroup fixes:

6) Ignore non full-socket objects from the input path, otherwise cgroup
match may crash, from Daniel Borkmann.

7) Fix cgroup in nf_tables.

8) Save some cycles from xt_socket by skipping packet header parsing when
skb->sk is already set because of early demux. Also from Daniel.

* br_netfilter updates from Florian Westphal.

9) Save frag_max_size and restore it from the forward path too.

10) Use a per-cpu area to restore the original source MAC address when traffic
is DNAT'ed.

11) Add helper functions to access physical devices.

12) Use these new physdev helper function from xt_physdev.

13) Add another nf_bridge_info_get() helper function to fetch the br_netfilter
state information.

14) Annotate original layer 2 protocol number in nf_bridge info, instead of
using kludgy flags.

15) Also annotate the pkttype mangling when the packet travels back and forth
from the IP to the bridge layer, instead of using a flag.

* More nf_tables set enhancement from Patrick:

16) Fix possible usage of set variant that doesn't support timeouts.

17) Avoid spurious "set is full" errors from Netlink API when there are pending
stale elements scheduled to be released.

18) Restrict loop checks to set maps.

19) Add support for dynamic set updates from the packet path.

20) Add support to store optional user data (eg. comments) per set element.

BTW, I have also pulled net-next into nf-next to anticipate the conflict
resolution between your okfn() signature changes and Florian's br_netfilter
updates.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+976 -183
+23 -5
include/linux/netfilter_bridge.h
··· 2 2 #define __LINUX_BRIDGE_NETFILTER_H 3 3 4 4 #include <uapi/linux/netfilter_bridge.h> 5 - 5 + #include <linux/skbuff.h> 6 6 7 7 enum nf_br_hook_priorities { 8 8 NF_BR_PRI_FIRST = INT_MIN, ··· 17 17 18 18 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 19 19 20 - #define BRNF_PKT_TYPE 0x01 21 20 #define BRNF_BRIDGED_DNAT 0x02 22 21 #define BRNF_NF_BRIDGE_PREROUTING 0x08 23 - #define BRNF_8021Q 0x10 24 - #define BRNF_PPPoE 0x20 25 22 26 23 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 27 24 { 28 - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) 25 + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) 29 26 return PPPOE_SES_HLEN; 30 27 return 0; 31 28 } ··· 37 40 skb_dst_drop(skb); 38 41 } 39 42 43 + static inline int nf_bridge_get_physinif(const struct sk_buff *skb) 44 + { 45 + return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; 46 + } 47 + 48 + static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) 49 + { 50 + return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; 51 + } 52 + 53 + static inline struct net_device * 54 + nf_bridge_get_physindev(const struct sk_buff *skb) 55 + { 56 + return skb->nf_bridge ? skb->nf_bridge->physindev : NULL; 57 + } 58 + 59 + static inline struct net_device * 60 + nf_bridge_get_physoutdev(const struct sk_buff *skb) 61 + { 62 + return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; 63 + } 40 64 #else 41 65 #define br_drop_fake_rtable(skb) do { } while (0) 42 66 #endif /* CONFIG_BRIDGE_NETFILTER */
+7 -1
include/linux/skbuff.h
··· 166 166 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 167 167 struct nf_bridge_info { 168 168 atomic_t use; 169 + enum { 170 + BRNF_PROTO_UNCHANGED, 171 + BRNF_PROTO_8021Q, 172 + BRNF_PROTO_PPPOE 173 + } orig_proto; 174 + bool pkt_otherhost; 169 175 unsigned int mask; 170 176 struct net_device *physindev; 171 177 struct net_device *physoutdev; 172 - unsigned long data[32 / sizeof(unsigned long)]; 178 + char neigh_header[8]; 173 179 }; 174 180 #endif 175 181
+154 -1
include/net/netfilter/nf_tables.h
··· 195 195 }; 196 196 197 197 struct nft_set_ext; 198 + struct nft_expr; 198 199 199 200 /** 200 201 * struct nft_set_ops - nf_tables set operations ··· 218 217 bool (*lookup)(const struct nft_set *set, 219 218 const struct nft_data *key, 220 219 const struct nft_set_ext **ext); 220 + bool (*update)(struct nft_set *set, 221 + const struct nft_data *key, 222 + void *(*new)(struct nft_set *, 223 + const struct nft_expr *, 224 + struct nft_data []), 225 + const struct nft_expr *expr, 226 + struct nft_data data[], 227 + const struct nft_set_ext **ext); 228 + 221 229 int (*insert)(const struct nft_set *set, 222 230 const struct nft_set_elem *elem); 223 231 void (*activate)(const struct nft_set *set, ··· 267 257 * @dtype: data type (verdict or numeric type defined by userspace) 268 258 * @size: maximum set size 269 259 * @nelems: number of elements 260 + * @ndeact: number of deactivated elements queued for removal 261 + * @timeout: default timeout value in msecs 262 + * @gc_int: garbage collection interval in msecs 270 263 * @policy: set parameterization (see enum nft_set_policies) 271 264 * @ops: set ops 272 265 * @pnet: network namespace ··· 285 272 u32 ktype; 286 273 u32 dtype; 287 274 u32 size; 288 - u32 nelems; 275 + atomic_t nelems; 276 + u32 ndeact; 277 + u64 timeout; 278 + u32 gc_int; 289 279 u16 policy; 290 280 /* runtime data below here */ 291 281 const struct nft_set_ops *ops ____cacheline_aligned; ··· 305 289 return (void *)set->data; 306 290 } 307 291 292 + static inline struct nft_set *nft_set_container_of(const void *priv) 293 + { 294 + return (void *)priv - offsetof(struct nft_set, data); 295 + } 296 + 308 297 struct nft_set *nf_tables_set_lookup(const struct nft_table *table, 309 298 const struct nlattr *nla); 310 299 struct nft_set *nf_tables_set_lookup_byid(const struct net *net, 311 300 const struct nlattr *nla); 301 + 302 + static inline unsigned long nft_set_gc_interval(const struct nft_set *set) 303 + { 304 + return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; 305 + } 312 306 313 307 /** 314 308 * struct nft_set_binding - nf_tables set binding 315 309 * 316 310 * @list: set bindings list node 317 311 * @chain: chain containing the rule bound to the set 312 + * @flags: set action flags 318 313 * 319 314 * A set binding contains all information necessary for validation 320 315 * of new elements added to a bound set. ··· 333 306 struct nft_set_binding { 334 307 struct list_head list; 335 308 const struct nft_chain *chain; 309 + u32 flags; 336 310 }; 337 311 338 312 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, ··· 347 319 * @NFT_SET_EXT_KEY: element key 348 320 * @NFT_SET_EXT_DATA: mapping data 349 321 * @NFT_SET_EXT_FLAGS: element flags 322 + * @NFT_SET_EXT_TIMEOUT: element timeout 323 + * @NFT_SET_EXT_EXPIRATION: element expiration time 324 + * @NFT_SET_EXT_USERDATA: user data associated with the element 350 325 * @NFT_SET_EXT_NUM: number of extension types 351 326 */ 352 327 enum nft_set_extensions { 353 328 NFT_SET_EXT_KEY, 354 329 NFT_SET_EXT_DATA, 355 330 NFT_SET_EXT_FLAGS, 331 + NFT_SET_EXT_TIMEOUT, 332 + NFT_SET_EXT_EXPIRATION, 333 + NFT_SET_EXT_USERDATA, 356 334 NFT_SET_EXT_NUM 357 335 }; 358 336 ··· 455 421 return nft_set_ext(ext, NFT_SET_EXT_FLAGS); 456 422 } 457 423 424 + static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) 425 + { 426 + return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); 427 + } 428 + 429 + static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext) 430 + { 431 + return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); 432 + } 433 + 434 + static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) 435 + { 436 + return nft_set_ext(ext, NFT_SET_EXT_USERDATA); 437 + } 438 + 439 + static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) 440 + { 441 + return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && 442 + time_is_before_eq_jiffies(*nft_set_ext_expiration(ext)); 443 + } 444 + 458 445 static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, 459 446 void *elem) 460 447 { 461 448 return elem + set->ops->elemsize; 462 449 } 463 450 451 + void *nft_set_elem_init(const struct nft_set *set, 452 + const struct nft_set_ext_tmpl *tmpl, 453 + const struct nft_data *key, 454 + const struct nft_data *data, 455 + u64 timeout, gfp_t gfp); 464 456 void nft_set_elem_destroy(const struct nft_set *set, void *elem); 457 + 458 + /** 459 + * struct nft_set_gc_batch_head - nf_tables set garbage collection batch 460 + * 461 + * @rcu: rcu head 462 + * @set: set the elements belong to 463 + * @cnt: count of elements 464 + */ 465 + struct nft_set_gc_batch_head { 466 + struct rcu_head rcu; 467 + const struct nft_set *set; 468 + unsigned int cnt; 469 + }; 470 + 471 + #define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ 472 + sizeof(struct nft_set_gc_batch_head)) / \ 473 + sizeof(void *)) 474 + 475 + /** 476 + * struct nft_set_gc_batch - nf_tables set garbage collection batch 477 + * 478 + * @head: GC batch head 479 + * @elems: garbage collection elements 480 + */ 481 + struct nft_set_gc_batch { 482 + struct nft_set_gc_batch_head head; 483 + void *elems[NFT_SET_GC_BATCH_SIZE]; 484 + }; 485 + 486 + struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, 487 + gfp_t gfp); 488 + void nft_set_gc_batch_release(struct rcu_head *rcu); 489 + 490 + static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) 491 + { 492 + if (gcb != NULL) 493 + call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); 494 + } 495 + 496 + static inline struct nft_set_gc_batch * 497 + nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, 498 + gfp_t gfp) 499 + { 500 + if (gcb != NULL) { 501 + if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) 502 + return gcb; 503 + nft_set_gc_batch_complete(gcb); 504 + } 505 + return nft_set_gc_batch_alloc(set, gfp); 506 + } 507 + 508 + static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, 509 + void *elem) 510 + { 511 + gcb->elems[gcb->head.cnt++] = elem; 512 + } 465 513 466 514 /** 467 515 * struct nft_expr_type - nf_tables expression type ··· 866 750 return 1 << ACCESS_ONCE(net->nft.gencursor); 867 751 } 868 752 753 + #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) 754 + 869 755 /* 870 756 * Set element transaction helpers 871 757 */ ··· 882 764 struct nft_set_ext *ext) 883 765 { 884 766 ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); 767 + } 768 + 769 + /* 770 + * We use a free bit in the genmask field to indicate the element 771 + * is busy, meaning it is currently being processed either by 772 + * the netlink API or GC. 773 + * 774 + * Even though the genmask is only a single byte wide, this works 775 + * because the extension structure if fully constant once initialized, 776 + * so there are no non-atomic write accesses unless it is already 777 + * marked busy. 778 + */ 779 + #define NFT_SET_ELEM_BUSY_MASK (1 << 2) 780 + 781 + #if defined(__LITTLE_ENDIAN_BITFIELD) 782 + #define NFT_SET_ELEM_BUSY_BIT 2 783 + #elif defined(__BIG_ENDIAN_BITFIELD) 784 + #define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) 785 + #else 786 + #error 787 + #endif 788 + 789 + static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) 790 + { 791 + unsigned long *word = (unsigned long *)ext; 792 + 793 + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); 794 + return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); 795 + } 796 + 797 + static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) 798 + { 799 + unsigned long *word = (unsigned long *)ext; 800 + 801 + clear_bit(NFT_SET_ELEM_BUSY_BIT, word); 885 802 } 886 803 887 804 /**
+3
include/net/netfilter/nf_tables_core.h
··· 31 31 int nft_lookup_module_init(void); 32 32 void nft_lookup_module_exit(void); 33 33 34 + int nft_dynset_module_init(void); 35 + void nft_dynset_module_exit(void); 36 + 34 37 int nft_bitwise_module_init(void); 35 38 void nft_bitwise_module_exit(void); 36 39
+39
include/uapi/linux/netfilter/nf_tables.h
··· 208 208 * @NFT_SET_CONSTANT: set contents may not change while bound 209 209 * @NFT_SET_INTERVAL: set contains intervals 210 210 * @NFT_SET_MAP: set is used as a dictionary 211 + * @NFT_SET_TIMEOUT: set uses timeouts 211 212 */ 212 213 enum nft_set_flags { 213 214 NFT_SET_ANONYMOUS = 0x1, 214 215 NFT_SET_CONSTANT = 0x2, 215 216 NFT_SET_INTERVAL = 0x4, 216 217 NFT_SET_MAP = 0x8, 218 + NFT_SET_TIMEOUT = 0x10, 217 219 }; 218 220 219 221 /** ··· 254 252 * @NFTA_SET_POLICY: selection policy (NLA_U32) 255 253 * @NFTA_SET_DESC: set description (NLA_NESTED) 256 254 * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) 255 + * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) 256 + * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) 257 257 */ 258 258 enum nft_set_attributes { 259 259 NFTA_SET_UNSPEC, ··· 269 265 NFTA_SET_POLICY, 270 266 NFTA_SET_DESC, 271 267 NFTA_SET_ID, 268 + NFTA_SET_TIMEOUT, 269 + NFTA_SET_GC_INTERVAL, 272 270 __NFTA_SET_MAX 273 271 }; 274 272 #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) ··· 290 284 * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) 291 285 * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) 292 286 * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) 287 + * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) 288 + * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) 289 + * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) 293 290 */ 294 291 enum nft_set_elem_attributes { 295 292 NFTA_SET_ELEM_UNSPEC, 296 293 NFTA_SET_ELEM_KEY, 297 294 NFTA_SET_ELEM_DATA, 298 295 NFTA_SET_ELEM_FLAGS, 296 + NFTA_SET_ELEM_TIMEOUT, 297 + NFTA_SET_ELEM_EXPIRATION, 298 + NFTA_SET_ELEM_USERDATA, 299 299 __NFTA_SET_ELEM_MAX 300 300 }; 301 301 #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) ··· 516 504 __NFTA_LOOKUP_MAX 517 505 }; 518 506 #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) 507 + 508 + enum nft_dynset_ops { 509 + NFT_DYNSET_OP_ADD, 510 + NFT_DYNSET_OP_UPDATE, 511 + }; 512 + 513 + /** 514 + * enum nft_dynset_attributes - dynset expression attributes 515 + * 516 + * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING) 517 + * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32) 518 + * @NFTA_DYNSET_OP: operation (NLA_U32) 519 + * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) 520 + * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) 521 + * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) 522 + */ 523 + enum nft_dynset_attributes { 524 + NFTA_DYNSET_UNSPEC, 525 + NFTA_DYNSET_SET_NAME, 526 + NFTA_DYNSET_SET_ID, 527 + NFTA_DYNSET_OP, 528 + NFTA_DYNSET_SREG_KEY, 529 + NFTA_DYNSET_SREG_DATA, 530 + NFTA_DYNSET_TIMEOUT, 531 + __NFTA_DYNSET_MAX, 532 + }; 533 + #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) 519 534 520 535 /** 521 536 * enum nft_payload_bases - nf_tables payload expression offset bases
+94 -54
net/bridge/br_netfilter.c
··· 111 111 pppoe_proto(skb) == htons(PPP_IPV6) && \ 112 112 brnf_filter_pppoe_tagged) 113 113 114 + /* largest possible L2 header, see br_nf_dev_queue_xmit() */ 115 + #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) 116 + 117 + #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 118 + struct brnf_frag_data { 119 + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; 120 + u8 encap_size; 121 + u8 size; 122 + }; 123 + 124 + static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); 125 + #endif 126 + 127 + static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) 128 + { 129 + return skb->nf_bridge; 130 + } 131 + 114 132 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) 115 133 { 116 134 struct net_bridge_port *port; ··· 207 189 skb->network_header += len; 208 190 } 209 191 210 - static inline void nf_bridge_save_header(struct sk_buff *skb) 211 - { 212 - int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); 213 - 214 - skb_copy_from_linear_data_offset(skb, -header_size, 215 - skb->nf_bridge->data, header_size); 216 - } 217 - 218 192 /* When handing a packet over to the IP layer 219 193 * check whether we have a skb that is in the 220 194 * expected format ··· 262 252 263 253 static void nf_bridge_update_protocol(struct sk_buff *skb) 264 254 { 265 - if (skb->nf_bridge->mask & BRNF_8021Q) 255 + switch (skb->nf_bridge->orig_proto) { 256 + case BRNF_PROTO_8021Q: 266 257 skb->protocol = htons(ETH_P_8021Q); 267 - else if (skb->nf_bridge->mask & BRNF_PPPoE) 258 + break; 259 + case BRNF_PROTO_PPPOE: 268 260 skb->protocol = htons(ETH_P_PPP_SES); 261 + break; 262 + case BRNF_PROTO_UNCHANGED: 263 + break; 264 + } 269 265 } 270 266 271 267 /* PF_BRIDGE/PRE_ROUTING *********************************************/ ··· 279 263 * bridge PRE_ROUTING hook. */ 280 264 static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) 281 265 { 282 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 266 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 283 267 struct rtable *rt; 284 268 285 - if (nf_bridge->mask & BRNF_PKT_TYPE) { 269 + if (nf_bridge->pkt_otherhost) { 286 270 skb->pkt_type = PACKET_OTHERHOST; 287 - nf_bridge->mask ^= BRNF_PKT_TYPE; 271 + nf_bridge->pkt_otherhost = false; 288 272 } 289 273 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 290 274 ··· 312 296 */ 313 297 static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) 314 298 { 315 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 316 299 struct neighbour *neigh; 317 300 struct dst_entry *dst; 318 301 ··· 321 306 dst = skb_dst(skb); 322 307 neigh = dst_neigh_lookup_skb(dst, skb); 323 308 if (neigh) { 309 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 324 310 int ret; 325 311 326 312 if (neigh->hh.hh_len) { ··· 335 319 */ 336 320 skb_copy_from_linear_data_offset(skb, 337 321 -(ETH_HLEN-ETH_ALEN), 338 - skb->nf_bridge->data, 322 + nf_bridge->neigh_header, 339 323 ETH_HLEN-ETH_ALEN); 340 324 /* tell br_dev_xmit to continue with forwarding */ 341 325 nf_bridge->mask |= BRNF_BRIDGED_DNAT; ··· 408 392 { 409 393 struct net_device *dev = skb->dev; 410 394 struct iphdr *iph = ip_hdr(skb); 411 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 395 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 412 396 struct rtable *rt; 413 397 int err; 414 398 int frag_max_size; ··· 416 400 frag_max_size = IPCB(skb)->frag_max_size; 417 401 BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; 418 402 419 - if (nf_bridge->mask & BRNF_PKT_TYPE) { 403 + if (nf_bridge->pkt_otherhost) { 420 404 skb->pkt_type = PACKET_OTHERHOST; 421 - nf_bridge->mask ^= BRNF_PKT_TYPE; 405 + nf_bridge->pkt_otherhost = false; 422 406 } 423 407 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 424 408 if (dnat_took_place(skb)) { ··· 501 485 /* Some common code for IPv4/IPv6 */ 502 486 static struct net_device *setup_pre_routing(struct sk_buff *skb) 503 487 { 504 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 488 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 505 489 506 490 if (skb->pkt_type == PACKET_OTHERHOST) { 507 491 skb->pkt_type = PACKET_HOST; 508 - nf_bridge->mask |= BRNF_PKT_TYPE; 492 + nf_bridge->pkt_otherhost = true; 509 493 } 510 494 511 495 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; 512 496 nf_bridge->physindev = skb->dev; 513 497 skb->dev = brnf_get_logical_dev(skb, skb->dev); 498 + 514 499 if (skb->protocol == htons(ETH_P_8021Q)) 515 - nf_bridge->mask |= BRNF_8021Q; 500 + nf_bridge->orig_proto = BRNF_PROTO_8021Q; 516 501 else if (skb->protocol == htons(ETH_P_PPP_SES)) 517 - nf_bridge->mask |= BRNF_PPPoE; 502 + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; 518 503 519 504 /* Must drop socket now because of tproxy. */ 520 505 skb_orphan(skb); ··· 697 680 /* PF_BRIDGE/FORWARD *************************************************/ 698 681 static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) 699 682 { 700 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 683 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 701 684 struct net_device *in; 702 685 703 686 if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { 687 + int frag_max_size; 688 + 689 + if (skb->protocol == htons(ETH_P_IP)) { 690 + frag_max_size = IPCB(skb)->frag_max_size; 691 + BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; 692 + } 693 + 704 694 in = nf_bridge->physindev; 705 - if (nf_bridge->mask & BRNF_PKT_TYPE) { 695 + if (nf_bridge->pkt_otherhost) { 706 696 skb->pkt_type = PACKET_OTHERHOST; 707 - nf_bridge->mask ^= BRNF_PKT_TYPE; 697 + nf_bridge->pkt_otherhost = false; 708 698 } 709 699 nf_bridge_update_protocol(skb); 710 700 } else { ··· 746 722 if (!nf_bridge_unshare(skb)) 747 723 return NF_DROP; 748 724 725 + nf_bridge = nf_bridge_info_get(skb); 726 + if (!nf_bridge) 727 + return NF_DROP; 728 + 749 729 parent = bridge_parent(state->out); 750 730 if (!parent) 751 731 return NF_DROP; ··· 763 735 764 736 nf_bridge_pull_encap_header(skb); 765 737 766 - nf_bridge = skb->nf_bridge; 767 738 if (skb->pkt_type == PACKET_OTHERHOST) { 768 739 skb->pkt_type = PACKET_HOST; 769 - nf_bridge->mask |= BRNF_PKT_TYPE; 740 + nf_bridge->pkt_otherhost = true; 770 741 } 771 742 772 - if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) 773 - return NF_DROP; 743 + if (pf == NFPROTO_IPV4) { 744 + int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; 745 + 746 + if (br_parse_ip_options(skb)) 747 + return NF_DROP; 748 + 749 + IPCB(skb)->frag_max_size = frag_max; 750 + } 774 751 775 752 nf_bridge->physoutdev = skb->dev; 776 753 if (pf == NFPROTO_IPV4) ··· 825 792 } 826 793 827 794 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) 828 - static bool nf_bridge_copy_header(struct sk_buff *skb) 829 - { 830 - int err; 831 - unsigned int header_size; 832 - 833 - nf_bridge_update_protocol(skb); 834 - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); 835 - err = skb_cow_head(skb, header_size); 836 - if (err) 837 - return false; 838 - 839 - skb_copy_to_linear_data_offset(skb, -header_size, 840 - skb->nf_bridge->data, header_size); 841 - __skb_push(skb, nf_bridge_encap_header_len(skb)); 842 - return true; 843 - } 844 - 845 795 static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) 846 796 { 847 - if (!nf_bridge_copy_header(skb)) { 797 + struct brnf_frag_data *data; 798 + int err; 799 + 800 + data = this_cpu_ptr(&brnf_frag_data_storage); 801 + err = skb_cow_head(skb, data->size); 802 + 803 + if (err) { 848 804 kfree_skb(skb); 849 805 return 0; 850 806 } 807 + 808 + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); 809 + __skb_push(skb, data->encap_size); 851 810 852 811 return br_dev_queue_push_xmit(sk, skb); 853 812 } ··· 858 833 * boundaries by preserving frag_list rather than refragmenting. 859 834 */ 860 835 if (skb->len + mtu_reserved > skb->dev->mtu) { 836 + struct brnf_frag_data *data; 837 + 861 838 frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; 862 839 if (br_parse_ip_options(skb)) 863 840 /* Drop invalid packet */ 864 841 return NF_DROP; 865 842 IPCB(skb)->frag_max_size = frag_max_size; 843 + 844 + nf_bridge_update_protocol(skb); 845 + 846 + data = this_cpu_ptr(&brnf_frag_data_storage); 847 + data->encap_size = nf_bridge_encap_header_len(skb); 848 + data->size = ETH_HLEN + data->encap_size; 849 + 850 + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, 851 + data->size); 852 + 866 853 ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); 867 - } else 854 + } else { 868 855 ret = br_dev_queue_push_xmit(sk, skb); 856 + } 869 857 870 858 return ret; 871 859 } ··· 894 856 struct sk_buff *skb, 895 857 const struct nf_hook_state *state) 896 858 { 897 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 859 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 898 860 struct net_device *realoutdev = bridge_parent(skb->dev); 899 861 u_int8_t pf; 900 862 ··· 920 882 * about the value of skb->pkt_type. */ 921 883 if (skb->pkt_type == PACKET_OTHERHOST) { 922 884 skb->pkt_type = PACKET_HOST; 923 - nf_bridge->mask |= BRNF_PKT_TYPE; 885 + nf_bridge->pkt_otherhost = true; 924 886 } 925 887 926 888 nf_bridge_pull_encap_header(skb); 927 - nf_bridge_save_header(skb); 928 889 if (pf == NFPROTO_IPV4) 929 890 skb->protocol = htons(ETH_P_IP); 930 891 else ··· 962 925 */ 963 926 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) 964 927 { 965 - struct nf_bridge_info *nf_bridge = skb->nf_bridge; 928 + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); 966 929 967 930 skb_pull(skb, ETH_HLEN); 968 931 nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; 969 932 970 - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), 971 - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); 933 + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); 934 + 935 + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), 936 + nf_bridge->neigh_header, 937 + ETH_HLEN - ETH_ALEN); 972 938 skb->dev = nf_bridge->physindev; 973 939 br_handle_frame_finish(NULL, skb); 974 940 }
+3 -1
net/ipv4/netfilter/nf_reject_ipv4.c
··· 13 13 #include <net/dst.h> 14 14 #include <net/netfilter/ipv4/nf_reject.h> 15 15 #include <linux/netfilter_ipv4.h> 16 + #include <linux/netfilter_bridge.h> 16 17 #include <net/netfilter/ipv4/nf_reject.h> 17 18 18 19 const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, ··· 147 146 */ 148 147 if (oldskb->nf_bridge) { 149 148 struct ethhdr *oeth = eth_hdr(oldskb); 150 - nskb->dev = oldskb->nf_bridge->physindev; 149 + 150 + nskb->dev = nf_bridge_get_physindev(oldskb); 151 151 niph->tot_len = htons(nskb->len); 152 152 ip_send_check(niph); 153 153 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+3 -1
net/ipv6/netfilter/nf_reject_ipv6.c
··· 13 13 #include <net/ip6_checksum.h> 14 14 #include <net/netfilter/ipv6/nf_reject.h> 15 15 #include <linux/netfilter_ipv6.h> 16 + #include <linux/netfilter_bridge.h> 16 17 #include <net/netfilter/ipv6/nf_reject.h> 17 18 18 19 const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, ··· 196 195 */ 197 196 if (oldskb->nf_bridge) { 198 197 struct ethhdr *oeth = eth_hdr(oldskb); 199 - nskb->dev = oldskb->nf_bridge->physindev; 198 + 199 + nskb->dev = nf_bridge_get_physindev(oldskb); 200 200 nskb->protocol = htons(ETH_P_IPV6); 201 201 ip6h->payload_len = htons(sizeof(struct tcphdr)); 202 202 if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+1 -1
net/netfilter/Makefile
··· 70 70 71 71 # nf_tables 72 72 nf_tables-objs += nf_tables_core.o nf_tables_api.o 73 - nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o 73 + nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o 74 74 nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o 75 75 76 76 obj-$(CONFIG_NF_TABLES) += nf_tables.o
+24 -8
net/netfilter/ipset/ip_set_hash_netiface.c
··· 19 19 #include <net/netlink.h> 20 20 21 21 #include <linux/netfilter.h> 22 + #include <linux/netfilter_bridge.h> 22 23 #include <linux/netfilter/ipset/pfxlen.h> 23 24 #include <linux/netfilter/ipset/ip_set.h> 24 25 #include <linux/netfilter/ipset/ip_set_hash.h> ··· 212 211 #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) 213 212 #include "ip_set_hash_gen.h" 214 213 214 + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 215 + static const char *get_physindev_name(const struct sk_buff *skb) 216 + { 217 + struct net_device *dev = nf_bridge_get_physindev(skb); 218 + 219 + return dev ? dev->name : NULL; 220 + } 221 + 222 + static const char *get_phyoutdev_name(const struct sk_buff *skb) 223 + { 224 + struct net_device *dev = nf_bridge_get_physoutdev(skb); 225 + 226 + return dev ? dev->name : NULL; 227 + } 228 + #endif 229 + 215 230 static int 216 231 hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, 217 232 const struct xt_action_param *par, ··· 251 234 e.ip &= ip_set_netmask(e.cidr); 252 235 253 236 #define IFACE(dir) (par->dir ? par->dir->name : NULL) 254 - #define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) 255 237 #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) 256 238 257 239 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { 258 240 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 259 - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; 241 + e.iface = SRCDIR ? get_physindev_name(skb) : 242 + get_phyoutdev_name(skb); 260 243 261 - if (!nf_bridge) 244 + if (!e.iface) 262 245 return -EINVAL; 263 - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); 264 246 e.physdev = 1; 265 247 #else 266 248 e.iface = NULL; ··· 492 476 493 477 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { 494 478 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 495 - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; 496 - 497 - if (!nf_bridge) 479 + e.iface = SRCDIR ? get_physindev_name(skb) : 480 + get_phyoutdev_name(skb); 481 + if (!e.iface) 498 482 return -EINVAL; 499 - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); 483 + 500 484 e.physdev = 1; 501 485 #else 502 486 e.iface = NULL;
+3 -2
net/netfilter/nf_log_common.c
··· 17 17 #include <net/route.h> 18 18 19 19 #include <linux/netfilter.h> 20 + #include <linux/netfilter_bridge.h> 20 21 #include <linux/netfilter/xt_LOG.h> 21 22 #include <net/netfilter/nf_log.h> 22 23 ··· 164 163 const struct net_device *physindev; 165 164 const struct net_device *physoutdev; 166 165 167 - physindev = skb->nf_bridge->physindev; 166 + physindev = nf_bridge_get_physindev(skb); 168 167 if (physindev && in != physindev) 169 168 nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); 170 - physoutdev = skb->nf_bridge->physoutdev; 169 + physoutdev = nf_bridge_get_physoutdev(skb); 171 170 if (physoutdev && out != physoutdev) 172 171 nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); 173 172 }
+10 -8
net/netfilter/nf_queue.c
··· 10 10 #include <linux/proc_fs.h> 11 11 #include <linux/skbuff.h> 12 12 #include <linux/netfilter.h> 13 + #include <linux/netfilter_bridge.h> 13 14 #include <linux/seq_file.h> 14 15 #include <linux/rcupdate.h> 15 16 #include <net/protocol.h> ··· 59 58 sock_put(state->sk); 60 59 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 61 60 if (entry->skb->nf_bridge) { 62 - struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; 61 + struct net_device *physdev; 63 62 64 - if (nf_bridge->physindev) 65 - dev_put(nf_bridge->physindev); 66 - if (nf_bridge->physoutdev) 67 - dev_put(nf_bridge->physoutdev); 63 + physdev = nf_bridge_get_physindev(entry->skb); 64 + if (physdev) 65 + dev_put(physdev); 66 + physdev = nf_bridge_get_physoutdev(entry->skb); 67 + if (physdev) 68 + dev_put(physdev); 68 69 } 69 70 #endif 70 71 /* Drop reference to owner of hook which queued us. */ ··· 90 87 sock_hold(state->sk); 91 88 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 92 89 if (entry->skb->nf_bridge) { 93 - struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; 94 90 struct net_device *physdev; 95 91 96 - physdev = nf_bridge->physindev; 92 + physdev = nf_bridge_get_physindev(entry->skb); 97 93 if (physdev) 98 94 dev_hold(physdev); 99 - physdev = nf_bridge->physoutdev; 95 + physdev = nf_bridge_get_physoutdev(entry->skb); 100 96 if (physdev) 101 97 dev_hold(physdev); 102 98 }
+165 -23
net/netfilter/nf_tables_api.c
··· 2159 2159 features = 0; 2160 2160 if (nla[NFTA_SET_FLAGS] != NULL) { 2161 2161 features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); 2162 - features &= NFT_SET_INTERVAL | NFT_SET_MAP; 2162 + features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT; 2163 2163 } 2164 2164 2165 2165 bops = NULL; ··· 2216 2216 [NFTA_SET_POLICY] = { .type = NLA_U32 }, 2217 2217 [NFTA_SET_DESC] = { .type = NLA_NESTED }, 2218 2218 [NFTA_SET_ID] = { .type = NLA_U32 }, 2219 + [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, 2220 + [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, 2219 2221 }; 2220 2222 2221 2223 static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { ··· 2367 2365 if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) 2368 2366 goto nla_put_failure; 2369 2367 } 2368 + 2369 + if (set->timeout && 2370 + nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout))) 2371 + goto nla_put_failure; 2372 + if (set->gc_int && 2373 + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) 2374 + goto nla_put_failure; 2370 2375 2371 2376 if (set->policy != NFT_SET_POL_PERFORMANCE) { 2372 2377 if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy))) ··· 2587 2578 char name[IFNAMSIZ]; 2588 2579 unsigned int size; 2589 2580 bool create; 2590 - u32 ktype, dtype, flags, policy; 2581 + u64 timeout; 2582 + u32 ktype, dtype, flags, policy, gc_int; 2591 2583 struct nft_set_desc desc; 2592 2584 int err; 2593 2585 ··· 2615 2605 if (nla[NFTA_SET_FLAGS] != NULL) { 2616 2606 flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); 2617 2607 if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | 2618 - NFT_SET_INTERVAL | NFT_SET_MAP)) 2608 + NFT_SET_INTERVAL | NFT_SET_MAP | 2609 + NFT_SET_TIMEOUT)) 2619 2610 return -EINVAL; 2620 2611 } 2621 2612 ··· 2641 2630 desc.dlen = sizeof(struct nft_data); 2642 2631 } else if (flags & NFT_SET_MAP) 2643 2632 return -EINVAL; 2633 + 2634 + timeout = 0; 2635 + if (nla[NFTA_SET_TIMEOUT] != NULL) { 2636 + if (!(flags & NFT_SET_TIMEOUT)) 2637 + return -EINVAL; 2638 + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT])); 2639 + } 2640 + gc_int = 0; 2641 + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { 2642 + if (!(flags & NFT_SET_TIMEOUT)) 2643 + return -EINVAL; 2644 + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); 2645 + } 2644 2646 2645 2647 policy = NFT_SET_POL_PERFORMANCE; 2646 2648 if (nla[NFTA_SET_POLICY] != NULL) ··· 2723 2699 set->flags = flags; 2724 2700 set->size = desc.size; 2725 2701 set->policy = policy; 2702 + set->timeout = timeout; 2703 + set->gc_int = gc_int; 2726 2704 2727 2705 err = ops->init(set, &desc, nla); 2728 2706 if (err < 0) ··· 2811 2785 if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) 2812 2786 return -EBUSY; 2813 2787 2814 - if (set->flags & NFT_SET_MAP) { 2788 + if (binding->flags & NFT_SET_MAP) { 2815 2789 /* If the set is already bound to the same chain all 2816 2790 * jumps are already validated for that chain. 2817 2791 */ 2818 2792 list_for_each_entry(i, &set->bindings, list) { 2819 - if (i->chain == binding->chain) 2793 + if (binding->flags & NFT_SET_MAP && 2794 + i->chain == binding->chain) 2820 2795 goto bind; 2821 2796 } 2822 2797 ··· 2864 2837 .len = sizeof(u8), 2865 2838 .align = __alignof__(u8), 2866 2839 }, 2840 + [NFT_SET_EXT_TIMEOUT] = { 2841 + .len = sizeof(u64), 2842 + .align = __alignof__(u64), 2843 + }, 2844 + [NFT_SET_EXT_EXPIRATION] = { 2845 + .len = sizeof(unsigned long), 2846 + .align = __alignof__(unsigned long), 2847 + }, 2848 + [NFT_SET_EXT_USERDATA] = { 2849 + .len = sizeof(struct nft_userdata), 2850 + .align = __alignof__(struct nft_userdata), 2851 + }, 2867 2852 }; 2868 2853 EXPORT_SYMBOL_GPL(nft_set_ext_types); 2869 2854 ··· 2887 2848 [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, 2888 2849 [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, 2889 2850 [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, 2851 + [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, 2852 + [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, 2853 + .len = NFT_USERDATA_MAXLEN }, 2890 2854 }; 2891 2855 2892 2856 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { ··· 2950 2908 nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, 2951 2909 htonl(*nft_set_ext_flags(ext)))) 2952 2910 goto nla_put_failure; 2911 + 2912 + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && 2913 + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, 2914 + cpu_to_be64(*nft_set_ext_timeout(ext)))) 2915 + goto nla_put_failure; 2916 + 2917 + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { 2918 + unsigned long expires, now = jiffies; 2919 + 2920 + expires = *nft_set_ext_expiration(ext); 2921 + if (time_before(now, expires)) 2922 + expires -= now; 2923 + else 2924 + expires = 0; 2925 + 2926 + if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, 2927 + cpu_to_be64(jiffies_to_msecs(expires)))) 2928 + goto nla_put_failure; 2929 + } 2930 + 2931 + if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { 2932 + struct nft_userdata *udata; 2933 + 2934 + udata = nft_set_ext_userdata(ext); 2935 + if (nla_put(skb, NFTA_SET_ELEM_USERDATA, 2936 + udata->len + 1, udata->data)) 2937 + goto nla_put_failure; 2938 + } 2953 2939 2954 2940 nla_nest_end(skb, nest); 2955 2941 return 0; ··· 3198 3128 return trans; 3199 3129 } 3200 3130 3201 - static void *nft_set_elem_init(const struct nft_set *set, 3202 - const struct nft_set_ext_tmpl *tmpl, 3203 - const struct nft_data *key, 3204 - const struct nft_data *data, 3205 - gfp_t gfp) 3131 + void *nft_set_elem_init(const struct nft_set *set, 3132 + const struct nft_set_ext_tmpl *tmpl, 3133 + const struct nft_data *key, 3134 + const struct nft_data *data, 3135 + u64 timeout, gfp_t gfp) 3206 3136 { 3207 3137 struct nft_set_ext *ext; 3208 3138 void *elem; ··· 3217 3147 memcpy(nft_set_ext_key(ext), key, set->klen); 3218 3148 if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) 3219 3149 memcpy(nft_set_ext_data(ext), data, set->dlen); 3150 + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) 3151 + *nft_set_ext_expiration(ext) = 3152 + jiffies + msecs_to_jiffies(timeout); 3153 + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) 3154 + *nft_set_ext_timeout(ext) = timeout; 3220 3155 3221 3156 return elem; 3222 3157 } ··· 3247 3172 struct nft_set_ext *ext; 3248 3173 struct nft_set_elem elem; 3249 3174 struct nft_set_binding *binding; 3175 + struct nft_userdata *udata; 3250 3176 struct nft_data data; 3251 3177 enum nft_registers dreg; 3252 3178 struct nft_trans *trans; 3179 + u64 timeout; 3253 3180 u32 flags; 3181 + u8 ulen; 3254 3182 int err; 3255 - 3256 - if (set->size && set->nelems == set->size) 3257 - return -ENFILE; 3258 3183 3259 3184 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, 3260 3185 nft_set_elem_policy); ··· 3290 3215 return -EINVAL; 3291 3216 } 3292 3217 3218 + timeout = 0; 3219 + if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { 3220 + if (!(set->flags & NFT_SET_TIMEOUT)) 3221 + return -EINVAL; 3222 + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT])); 3223 + } else if (set->flags & NFT_SET_TIMEOUT) { 3224 + timeout = set->timeout; 3225 + } 3226 + 3293 3227 err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); 3294 3228 if (err < 0) 3295 3229 goto err1; ··· 3307 3223 goto err2; 3308 3224 3309 3225 nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); 3226 + if (timeout > 0) { 3227 + nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); 3228 + if (timeout != set->timeout) 3229 + nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); 3230 + } 3310 3231 3311 3232 if (nla[NFTA_SET_ELEM_DATA] != NULL) { 3312 3233 err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); ··· 3330 3241 .chain = (struct nft_chain *)binding->chain, 3331 3242 }; 3332 3243 3244 + if (!(binding->flags & NFT_SET_MAP)) 3245 + continue; 3246 + 3333 3247 err = nft_validate_data_load(&bind_ctx, dreg, 3334 3248 &data, d2.type); 3335 3249 if (err < 0) ··· 3342 3250 nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); 3343 3251 } 3344 3252 3253 + /* The full maximum length of userdata can exceed the maximum 3254 + * offset value (U8_MAX) for following extensions, therefor it 3255 + * must be the last extension added. 3256 + */ 3257 + ulen = 0; 3258 + if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { 3259 + ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); 3260 + if (ulen > 0) 3261 + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, 3262 + ulen); 3263 + } 3264 + 3345 3265 err = -ENOMEM; 3346 - elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); 3266 + elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, 3267 + timeout, GFP_KERNEL); 3347 3268 if (elem.priv == NULL) 3348 3269 goto err3; 3349 3270 3350 3271 ext = nft_set_elem_ext(set, elem.priv); 3351 3272 if (flags) 3352 3273 *nft_set_ext_flags(ext) = flags; 3274 + if (ulen > 0) { 3275 + udata = nft_set_ext_userdata(ext); 3276 + udata->len = ulen - 1; 3277 + nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); 3278 + } 3353 3279 3354 3280 trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); 3355 3281 if (trans == NULL) 3356 3282 goto err4; 3357 3283 3358 - ext->genmask = nft_genmask_cur(ctx->net); 3284 + ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; 3359 3285 err = set->ops->insert(set, &elem); 3360 3286 if (err < 0) 3361 3287 goto err5; ··· 3426 3316 return -EBUSY; 3427 3317 3428 3318 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 3429 - err = nft_add_set_elem(&ctx, set, attr); 3430 - if (err < 0) 3431 - break; 3319 + if (set->size && 3320 + !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) 3321 + return -ENFILE; 3432 3322 3433 - set->nelems++; 3323 + err = nft_add_set_elem(&ctx, set, attr); 3324 + if (err < 0) { 3325 + atomic_dec(&set->nelems); 3326 + break; 3327 + } 3434 3328 } 3435 3329 return err; 3436 3330 } ··· 3516 3402 if (err < 0) 3517 3403 break; 3518 3404 3519 - set->nelems--; 3405 + set->ndeact++; 3520 3406 } 3521 3407 return err; 3522 3408 } 3409 + 3410 + void nft_set_gc_batch_release(struct rcu_head *rcu) 3411 + { 3412 + struct nft_set_gc_batch *gcb; 3413 + unsigned int i; 3414 + 3415 + gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); 3416 + for (i = 0; i < gcb->head.cnt; i++) 3417 + nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); 3418 + kfree(gcb); 3419 + } 3420 + EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); 3421 + 3422 + struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, 3423 + gfp_t gfp) 3424 + { 3425 + struct nft_set_gc_batch *gcb; 3426 + 3427 + gcb = kzalloc(sizeof(*gcb), gfp); 3428 + if (gcb == NULL) 3429 + return gcb; 3430 + gcb->head.set = set; 3431 + return gcb; 3432 + } 3433 + EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); 3523 3434 3524 3435 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, 3525 3436 u32 portid, u32 seq) ··· 3849 3710 &te->elem, 3850 3711 NFT_MSG_DELSETELEM, 0); 3851 3712 te->set->ops->remove(te->set, &te->elem); 3713 + atomic_dec(&te->set->nelems); 3714 + te->set->ndeact--; 3852 3715 break; 3853 3716 } 3854 3717 } ··· 3954 3813 nft_trans_destroy(trans); 3955 3814 break; 3956 3815 case NFT_MSG_NEWSETELEM: 3957 - nft_trans_elem_set(trans)->nelems--; 3958 3816 te = (struct nft_trans_elem *)trans->data; 3959 3817 3960 3818 te->set->ops->remove(te->set, &te->elem); 3819 + atomic_dec(&te->set->nelems); 3961 3820 break; 3962 3821 case NFT_MSG_DELSETELEM: 3963 3822 te = (struct nft_trans_elem *)trans->data; 3964 3823 3965 - nft_trans_elem_set(trans)->nelems++; 3966 3824 te->set->ops->activate(te->set, &te->elem); 3825 + te->set->ndeact--; 3967 3826 3968 3827 nft_trans_destroy(trans); 3969 3828 break; ··· 4101 3960 continue; 4102 3961 4103 3962 list_for_each_entry(binding, &set->bindings, list) { 4104 - if (binding->chain != chain) 3963 + if (!(binding->flags & NFT_SET_MAP) || 3964 + binding->chain != chain) 4105 3965 continue; 4106 3966 4107 3967 iter.skip = 0;
+7
net/netfilter/nf_tables_core.c
··· 239 239 if (err < 0) 240 240 goto err6; 241 241 242 + err = nft_dynset_module_init(); 243 + if (err < 0) 244 + goto err7; 245 + 242 246 return 0; 243 247 248 + err7: 249 + nft_payload_module_exit(); 244 250 err6: 245 251 nft_byteorder_module_exit(); 246 252 err5: ··· 263 257 264 258 void nf_tables_core_module_exit(void) 265 259 { 260 + nft_dynset_module_exit(); 266 261 nft_payload_module_exit(); 267 262 nft_byteorder_module_exit(); 268 263 nft_bitwise_module_exit();
+218
net/netfilter/nft_dynset.c
··· 1 + /* 2 + * Copyright (c) 2015 Patrick McHardy <kaber@trash.net> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + */ 9 + 10 + #include <linux/kernel.h> 11 + #include <linux/module.h> 12 + #include <linux/init.h> 13 + #include <linux/netlink.h> 14 + #include <linux/netfilter.h> 15 + #include <linux/netfilter/nf_tables.h> 16 + #include <net/netfilter/nf_tables.h> 17 + #include <net/netfilter/nf_tables_core.h> 18 + 19 + struct nft_dynset { 20 + struct nft_set *set; 21 + struct nft_set_ext_tmpl tmpl; 22 + enum nft_dynset_ops op:8; 23 + enum nft_registers sreg_key:8; 24 + enum nft_registers sreg_data:8; 25 + u64 timeout; 26 + struct nft_set_binding binding; 27 + }; 28 + 29 + static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, 30 + struct nft_data data[NFT_REG_MAX + 1]) 31 + { 32 + const struct nft_dynset *priv = nft_expr_priv(expr); 33 + u64 timeout; 34 + void *elem; 35 + 36 + if (set->size && !atomic_add_unless(&set->nelems, 1, set->size)) 37 + return NULL; 38 + 39 + timeout = priv->timeout ? : set->timeout; 40 + elem = nft_set_elem_init(set, &priv->tmpl, 41 + &data[priv->sreg_key], &data[priv->sreg_data], 42 + timeout, GFP_ATOMIC); 43 + if (elem == NULL) { 44 + if (set->size) 45 + atomic_dec(&set->nelems); 46 + } 47 + return elem; 48 + } 49 + 50 + static void nft_dynset_eval(const struct nft_expr *expr, 51 + struct nft_data data[NFT_REG_MAX + 1], 52 + const struct nft_pktinfo *pkt) 53 + { 54 + const struct nft_dynset *priv = nft_expr_priv(expr); 55 + struct nft_set *set = priv->set; 56 + const struct nft_set_ext *ext; 57 + u64 timeout; 58 + 59 + if (set->ops->update(set, &data[priv->sreg_key], nft_dynset_new, 60 + expr, data, &ext)) { 61 + if (priv->op == NFT_DYNSET_OP_UPDATE && 62 + nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { 63 + timeout = priv->timeout ? : set->timeout; 64 + *nft_set_ext_expiration(ext) = jiffies + timeout; 65 + return; 66 + } 67 + } 68 + 69 + data[NFT_REG_VERDICT].verdict = NFT_BREAK; 70 + } 71 + 72 + static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { 73 + [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING }, 74 + [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, 75 + [NFTA_DYNSET_OP] = { .type = NLA_U32 }, 76 + [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, 77 + [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, 78 + [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, 79 + }; 80 + 81 + static int nft_dynset_init(const struct nft_ctx *ctx, 82 + const struct nft_expr *expr, 83 + const struct nlattr * const tb[]) 84 + { 85 + struct nft_dynset *priv = nft_expr_priv(expr); 86 + struct nft_set *set; 87 + u64 timeout; 88 + int err; 89 + 90 + if (tb[NFTA_DYNSET_SET_NAME] == NULL || 91 + tb[NFTA_DYNSET_OP] == NULL || 92 + tb[NFTA_DYNSET_SREG_KEY] == NULL) 93 + return -EINVAL; 94 + 95 + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); 96 + if (IS_ERR(set)) { 97 + if (tb[NFTA_DYNSET_SET_ID]) 98 + set = nf_tables_set_lookup_byid(ctx->net, 99 + tb[NFTA_DYNSET_SET_ID]); 100 + if (IS_ERR(set)) 101 + return PTR_ERR(set); 102 + } 103 + 104 + if (set->flags & NFT_SET_CONSTANT) 105 + return -EBUSY; 106 + 107 + priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); 108 + switch (priv->op) { 109 + case NFT_DYNSET_OP_ADD: 110 + break; 111 + case NFT_DYNSET_OP_UPDATE: 112 + if (!(set->flags & NFT_SET_TIMEOUT)) 113 + return -EOPNOTSUPP; 114 + break; 115 + default: 116 + return -EOPNOTSUPP; 117 + } 118 + 119 + timeout = 0; 120 + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { 121 + if (!(set->flags & NFT_SET_TIMEOUT)) 122 + return -EINVAL; 123 + timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); 124 + } 125 + 126 + priv->sreg_key = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_KEY])); 127 + err = nft_validate_input_register(priv->sreg_key); 128 + if (err < 0) 129 + return err; 130 + 131 + if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { 132 + if (!(set->flags & NFT_SET_MAP)) 133 + return -EINVAL; 134 + if (set->dtype == NFT_DATA_VERDICT) 135 + return -EOPNOTSUPP; 136 + 137 + priv->sreg_data = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_DATA])); 138 + err = nft_validate_input_register(priv->sreg_data); 139 + if (err < 0) 140 + return err; 141 + } else if (set->flags & NFT_SET_MAP) 142 + return -EINVAL; 143 + 144 + nft_set_ext_prepare(&priv->tmpl); 145 + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); 146 + if (set->flags & NFT_SET_MAP) 147 + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); 148 + if (set->flags & NFT_SET_TIMEOUT) { 149 + if (timeout || set->timeout) 150 + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); 151 + } 152 + 153 + priv->timeout = timeout; 154 + 155 + err = nf_tables_bind_set(ctx, set, &priv->binding); 156 + if (err < 0) 157 + return err; 158 + 159 + priv->set = set; 160 + return 0; 161 + } 162 + 163 + static void nft_dynset_destroy(const struct nft_ctx *ctx, 164 + const struct nft_expr *expr) 165 + { 166 + struct nft_dynset *priv = nft_expr_priv(expr); 167 + 168 + nf_tables_unbind_set(ctx, priv->set, &priv->binding); 169 + } 170 + 171 + static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) 172 + { 173 + const struct nft_dynset *priv = nft_expr_priv(expr); 174 + 175 + if (nla_put_be32(skb, NFTA_DYNSET_SREG_KEY, htonl(priv->sreg_key))) 176 + goto nla_put_failure; 177 + if (priv->set->flags & NFT_SET_MAP && 178 + nla_put_be32(skb, NFTA_DYNSET_SREG_DATA, htonl(priv->sreg_data))) 179 + goto nla_put_failure; 180 + if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op))) 181 + goto nla_put_failure; 182 + if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) 183 + goto nla_put_failure; 184 + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) 185 + goto nla_put_failure; 186 + return 0; 187 + 188 + nla_put_failure: 189 + return -1; 190 + } 191 + 192 + static struct nft_expr_type nft_dynset_type; 193 + static const struct nft_expr_ops nft_dynset_ops = { 194 + .type = &nft_dynset_type, 195 + .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), 196 + .eval = nft_dynset_eval, 197 + .init = nft_dynset_init, 198 + .destroy = nft_dynset_destroy, 199 + .dump = nft_dynset_dump, 200 + }; 201 + 202 + static struct nft_expr_type nft_dynset_type __read_mostly = { 203 + .name = "dynset", 204 + .ops = &nft_dynset_ops, 205 + .policy = nft_dynset_policy, 206 + .maxattr = NFTA_DYNSET_MAX, 207 + .owner = THIS_MODULE, 208 + }; 209 + 210 + int __init nft_dynset_module_init(void) 211 + { 212 + return nft_register_expr(&nft_dynset_type); 213 + } 214 + 215 + void nft_dynset_module_exit(void) 216 + { 217 + nft_unregister_expr(&nft_dynset_type); 218 + }
+113 -4
net/netfilter/nft_hash.c
··· 15 15 #include <linux/log2.h> 16 16 #include <linux/jhash.h> 17 17 #include <linux/netlink.h> 18 + #include <linux/workqueue.h> 18 19 #include <linux/rhashtable.h> 19 20 #include <linux/netfilter.h> 20 21 #include <linux/netfilter/nf_tables.h> ··· 26 25 27 26 struct nft_hash { 28 27 struct rhashtable ht; 28 + struct delayed_work gc_work; 29 29 }; 30 30 31 31 struct nft_hash_elem { ··· 64 62 65 63 if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) 66 64 return 1; 65 + if (nft_set_elem_expired(&he->ext)) 66 + return 1; 67 67 if (!nft_set_elem_active(&he->ext, x->genmask)) 68 68 return 1; 69 69 return 0; ··· 90 86 return !!he; 91 87 } 92 88 89 + static bool nft_hash_update(struct nft_set *set, const struct nft_data *key, 90 + void *(*new)(struct nft_set *, 91 + const struct nft_expr *, 92 + struct nft_data []), 93 + const struct nft_expr *expr, 94 + struct nft_data data[], 95 + const struct nft_set_ext **ext) 96 + { 97 + struct nft_hash *priv = nft_set_priv(set); 98 + struct nft_hash_elem *he; 99 + struct nft_hash_cmp_arg arg = { 100 + .genmask = NFT_GENMASK_ANY, 101 + .set = set, 102 + .key = key, 103 + }; 104 + 105 + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); 106 + if (he != NULL) 107 + goto out; 108 + 109 + he = new(set, expr, data); 110 + if (he == NULL) 111 + goto err1; 112 + if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, 113 + nft_hash_params)) 114 + goto err2; 115 + out: 116 + *ext = &he->ext; 117 + return true; 118 + 119 + err2: 120 + nft_set_elem_destroy(set, he); 121 + err1: 122 + return false; 123 + } 124 + 93 125 static int nft_hash_insert(const struct nft_set *set, 94 126 const struct nft_set_elem *elem) 95 127 { ··· 147 107 struct nft_hash_elem *he = elem->priv; 148 108 149 109 nft_set_elem_change_active(set, &he->ext); 110 + nft_set_elem_clear_busy(&he->ext); 150 111 } 151 112 152 113 static void *nft_hash_deactivate(const struct nft_set *set, ··· 161 120 .key = &elem->key, 162 121 }; 163 122 123 + rcu_read_lock(); 164 124 he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); 165 - if (he != NULL) 166 - nft_set_elem_change_active(set, &he->ext); 125 + if (he != NULL) { 126 + if (!nft_set_elem_mark_busy(&he->ext)) 127 + nft_set_elem_change_active(set, &he->ext); 128 + else 129 + he = NULL; 130 + } 131 + rcu_read_unlock(); 167 132 168 133 return he; 169 134 } ··· 217 170 218 171 if (iter->count < iter->skip) 219 172 goto cont; 173 + if (nft_set_elem_expired(&he->ext)) 174 + goto cont; 220 175 if (!nft_set_elem_active(&he->ext, genmask)) 221 176 goto cont; 222 177 ··· 235 186 out: 236 187 rhashtable_walk_stop(&hti); 237 188 rhashtable_walk_exit(&hti); 189 + } 190 + 191 + static void nft_hash_gc(struct work_struct *work) 192 + { 193 + struct nft_set *set; 194 + struct nft_hash_elem *he; 195 + struct nft_hash *priv; 196 + struct nft_set_gc_batch *gcb = NULL; 197 + struct rhashtable_iter hti; 198 + int err; 199 + 200 + priv = container_of(work, struct nft_hash, gc_work.work); 201 + set = nft_set_container_of(priv); 202 + 203 + err = rhashtable_walk_init(&priv->ht, &hti); 204 + if (err) 205 + goto schedule; 206 + 207 + err = rhashtable_walk_start(&hti); 208 + if (err && err != -EAGAIN) 209 + goto out; 210 + 211 + while ((he = rhashtable_walk_next(&hti))) { 212 + if (IS_ERR(he)) { 213 + if (PTR_ERR(he) != -EAGAIN) 214 + goto out; 215 + continue; 216 + } 217 + 218 + if (!nft_set_elem_expired(&he->ext)) 219 + continue; 220 + if (nft_set_elem_mark_busy(&he->ext)) 221 + continue; 222 + 223 + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); 224 + if (gcb == NULL) 225 + goto out; 226 + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); 227 + atomic_dec(&set->nelems); 228 + nft_set_gc_batch_add(gcb, he); 229 + } 230 + out: 231 + rhashtable_walk_stop(&hti); 232 + rhashtable_walk_exit(&hti); 233 + 234 + nft_set_gc_batch_complete(gcb); 235 + schedule: 236 + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, 237 + nft_set_gc_interval(set)); 238 238 } 239 239 240 240 static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) ··· 305 207 { 306 208 struct nft_hash *priv = nft_set_priv(set); 307 209 struct rhashtable_params params = nft_hash_params; 210 + int err; 308 211 309 212 params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; 310 213 params.key_len = set->klen; 311 214 312 - return rhashtable_init(&priv->ht, &params); 215 + err = rhashtable_init(&priv->ht, &params); 216 + if (err < 0) 217 + return err; 218 + 219 + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); 220 + if (set->flags & NFT_SET_TIMEOUT) 221 + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, 222 + nft_set_gc_interval(set)); 223 + return 0; 313 224 } 314 225 315 226 static void nft_hash_elem_destroy(void *ptr, void *arg) ··· 330 223 { 331 224 struct nft_hash *priv = nft_set_priv(set); 332 225 226 + cancel_delayed_work_sync(&priv->gc_work); 333 227 rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, 334 228 (void *)set); 335 229 } ··· 371 263 .deactivate = nft_hash_deactivate, 372 264 .remove = nft_hash_remove, 373 265 .lookup = nft_hash_lookup, 266 + .update = nft_hash_update, 374 267 .walk = nft_hash_walk, 375 - .features = NFT_SET_MAP, 268 + .features = NFT_SET_MAP | NFT_SET_TIMEOUT, 376 269 .owner = THIS_MODULE, 377 270 }; 378 271
+2
net/netfilter/nft_lookup.c
··· 92 92 } else if (set->flags & NFT_SET_MAP) 93 93 return -EINVAL; 94 94 95 + priv->binding.flags = set->flags & NFT_SET_MAP; 96 + 95 97 err = nf_tables_bind_set(ctx, set, &priv->binding); 96 98 if (err < 0) 97 99 return err;
+2 -3
net/netfilter/nft_meta.c
··· 166 166 dest->data[0] = out->group; 167 167 break; 168 168 case NFT_META_CGROUP: 169 - if (skb->sk == NULL) 170 - break; 171 - 169 + if (skb->sk == NULL || !sk_fullsock(skb->sk)) 170 + goto err; 172 171 dest->data[0] = skb->sk->sk_classid; 173 172 break; 174 173 default:
+1 -1
net/netfilter/xt_cgroup.c
··· 39 39 { 40 40 const struct xt_cgroup_info *info = par->matchinfo; 41 41 42 - if (skb->sk == NULL) 42 + if (skb->sk == NULL || !sk_fullsock(skb->sk)) 43 43 return false; 44 44 45 45 return (info->id == skb->sk->sk_classid) ^ info->invert;
+22 -12
net/netfilter/xt_physdev.c
··· 25 25 static bool 26 26 physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) 27 27 { 28 - static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 29 28 const struct xt_physdev_info *info = par->matchinfo; 29 + const struct net_device *physdev; 30 30 unsigned long ret; 31 31 const char *indev, *outdev; 32 - const struct nf_bridge_info *nf_bridge; 33 32 34 33 /* Not a bridged IP packet or no info available yet: 35 34 * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if 36 35 * the destination device will be a bridge. */ 37 - if (!(nf_bridge = skb->nf_bridge)) { 36 + if (!skb->nf_bridge) { 38 37 /* Return MATCH if the invert flags of the used options are on */ 39 38 if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && 40 39 !(info->invert & XT_PHYSDEV_OP_BRIDGED)) ··· 53 54 return true; 54 55 } 55 56 57 + physdev = nf_bridge_get_physoutdev(skb); 58 + outdev = physdev ? physdev->name : NULL; 59 + 56 60 /* This only makes sense in the FORWARD and POSTROUTING chains */ 57 61 if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && 58 - (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) 62 + (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) 59 63 return false; 60 64 65 + physdev = nf_bridge_get_physindev(skb); 66 + indev = physdev ? physdev->name : NULL; 67 + 61 68 if ((info->bitmask & XT_PHYSDEV_OP_ISIN && 62 - (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || 69 + (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || 63 70 (info->bitmask & XT_PHYSDEV_OP_ISOUT && 64 - (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) 71 + (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) 65 72 return false; 66 73 67 74 if (!(info->bitmask & XT_PHYSDEV_OP_IN)) 68 75 goto match_outdev; 69 - indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; 70 - ret = ifname_compare_aligned(indev, info->physindev, info->in_mask); 71 76 72 - if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) 73 - return false; 77 + if (indev) { 78 + ret = ifname_compare_aligned(indev, info->physindev, 79 + info->in_mask); 80 + 81 + if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) 82 + return false; 83 + } 74 84 75 85 match_outdev: 76 86 if (!(info->bitmask & XT_PHYSDEV_OP_OUT)) 77 87 return true; 78 - outdev = nf_bridge->physoutdev ? 79 - nf_bridge->physoutdev->name : nulldevname; 88 + 89 + if (!outdev) 90 + return false; 91 + 80 92 ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask); 81 93 82 94 return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
+50 -45
net/netfilter/xt_socket.c
··· 143 143 } 144 144 } 145 145 146 - static bool 147 - socket_match(const struct sk_buff *skb, struct xt_action_param *par, 148 - const struct xt_socket_mtinfo1 *info) 146 + static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, 147 + const struct net_device *indev) 149 148 { 150 149 const struct iphdr *iph = ip_hdr(skb); 151 - struct udphdr _hdr, *hp = NULL; 152 - struct sock *sk = skb->sk; 153 150 __be32 uninitialized_var(daddr), uninitialized_var(saddr); 154 151 __be16 uninitialized_var(dport), uninitialized_var(sport); 155 152 u8 uninitialized_var(protocol); ··· 156 159 #endif 157 160 158 161 if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { 162 + struct udphdr _hdr, *hp; 163 + 159 164 hp = skb_header_pointer(skb, ip_hdrlen(skb), 160 165 sizeof(_hdr), &_hdr); 161 166 if (hp == NULL) 162 - return false; 167 + return NULL; 163 168 164 169 protocol = iph->protocol; 165 170 saddr = iph->saddr; ··· 171 172 172 173 } else if (iph->protocol == IPPROTO_ICMP) { 173 174 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, 174 - &sport, &dport)) 175 - return false; 175 + &sport, &dport)) 176 + return NULL; 176 177 } else { 177 - return false; 178 + return NULL; 178 179 } 179 180 180 181 #ifdef XT_SOCKET_HAVE_CONNTRACK 181 - /* Do the lookup with the original socket address in case this is a 182 - * reply packet of an established SNAT-ted connection. */ 183 - 182 + /* Do the lookup with the original socket address in 183 + * case this is a reply packet of an established 184 + * SNAT-ted connection. 185 + */ 184 186 ct = nf_ct_get(skb, &ctinfo); 185 187 if (ct && !nf_ct_is_untracked(ct) && 186 188 ((iph->protocol != IPPROTO_ICMP && ··· 197 197 } 198 198 #endif 199 199 200 + return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, 201 + sport, dport, indev); 202 + } 203 + 204 + static bool 205 + socket_match(const struct sk_buff *skb, struct xt_action_param *par, 206 + const struct xt_socket_mtinfo1 *info) 207 + { 208 + struct sock *sk = skb->sk; 209 + 200 210 if (!sk) 201 - sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol, 202 - saddr, daddr, sport, dport, 203 - par->in); 211 + sk = xt_socket_lookup_slow_v4(skb, par->in); 204 212 if (sk) { 205 213 bool wildcard; 206 214 bool transparent = true; ··· 233 225 sk = NULL; 234 226 } 235 227 236 - pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", 237 - protocol, &saddr, ntohs(sport), 238 - &daddr, ntohs(dport), 239 - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); 240 - 241 - return (sk != NULL); 228 + return sk != NULL; 242 229 } 243 230 244 231 static bool ··· 330 327 return NULL; 331 328 } 332 329 333 - static bool 334 - socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 330 + static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, 331 + const struct net_device *indev) 335 332 { 336 - struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); 337 - struct udphdr _hdr, *hp = NULL; 338 - struct sock *sk = skb->sk; 339 - const struct in6_addr *daddr = NULL, *saddr = NULL; 340 333 __be16 uninitialized_var(dport), uninitialized_var(sport); 341 - int thoff = 0, uninitialized_var(tproto); 342 - const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 334 + const struct in6_addr *daddr = NULL, *saddr = NULL; 335 + struct ipv6hdr *iph = ipv6_hdr(skb); 336 + int thoff = 0, tproto; 343 337 344 338 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); 345 339 if (tproto < 0) { 346 340 pr_debug("unable to find transport header in IPv6 packet, dropping\n"); 347 - return NF_DROP; 341 + return NULL; 348 342 } 349 343 350 344 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { 351 - hp = skb_header_pointer(skb, thoff, 352 - sizeof(_hdr), &_hdr); 345 + struct udphdr _hdr, *hp; 346 + 347 + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); 353 348 if (hp == NULL) 354 - return false; 349 + return NULL; 355 350 356 351 saddr = &iph->saddr; 357 352 sport = hp->source; ··· 357 356 dport = hp->dest; 358 357 359 358 } else if (tproto == IPPROTO_ICMPV6) { 359 + struct ipv6hdr ipv6_var; 360 + 360 361 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, 361 362 &sport, &dport, &ipv6_var)) 362 - return false; 363 + return NULL; 363 364 } else { 364 - return false; 365 + return NULL; 365 366 } 366 367 368 + return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, 369 + sport, dport, indev); 370 + } 371 + 372 + static bool 373 + socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 374 + { 375 + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 376 + struct sock *sk = skb->sk; 377 + 367 378 if (!sk) 368 - sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto, 369 - saddr, daddr, sport, dport, 370 - par->in); 379 + sk = xt_socket_lookup_slow_v6(skb, par->in); 371 380 if (sk) { 372 381 bool wildcard; 373 382 bool transparent = true; ··· 402 391 sk = NULL; 403 392 } 404 393 405 - pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " 406 - "(orig %pI6:%hu) sock %p\n", 407 - tproto, saddr, ntohs(sport), 408 - daddr, ntohs(dport), 409 - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); 410 - 411 - return (sk != NULL); 394 + return sk != NULL; 412 395 } 413 396 #endif 414 397