Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

+11

Documentation/networking/ipvs-sysctl.rst

··· 300 300 301 301 Kernels with this sync_version entry are able to receive messages 302 302 of both version 1 and version 2 of the synchronisation protocol. 303 + 304 + run_estimation - BOOLEAN 305 + 0 - disabled 306 + not 0 - enabled (default) 307 + 308 + If disabled, the estimation will be stop, and you can't see 309 + any update on speed estimation data. 310 + 311 + You can always re-enable estimation by setting this value to 1. 312 + But be careful, the first estimation after re-enable is not 313 + accurate.

+3

drivers/net/ifb.c

··· 31 31 #include <linux/init.h> 32 32 #include <linux/interrupt.h> 33 33 #include <linux/moduleparam.h> 34 + #include <linux/netfilter_netdev.h> 34 35 #include <net/pkt_sched.h> 35 36 #include <net/net_namespace.h> 36 37 ··· 76 75 } 77 76 78 77 while ((skb = __skb_dequeue(&txp->tq)) != NULL) { 78 + /* Skip tc and netfilter to prevent redirection loop. */ 79 79 skb->redirected = 0; 80 80 skb->tc_skip_classify = 1; 81 + nf_skip_egress(skb, true); 81 82 82 83 u64_stats_update_begin(&txp->tsync); 83 84 txp->tx_packets++;

+4

include/linux/netdevice.h

··· 1861 1861 * @xps_maps: XXX: need comments on this one 1862 1862 * @miniq_egress: clsact qdisc specific data for 1863 1863 * egress processing 1864 + * @nf_hooks_egress: netfilter hooks executed for egress packets 1864 1865 * @qdisc_hash: qdisc hash table 1865 1866 * @watchdog_timeo: Represents the timeout that is used by 1866 1867 * the watchdog (see dev_watchdog()) ··· 2160 2159 #endif 2161 2160 #ifdef CONFIG_NET_CLS_ACT 2162 2161 struct mini_Qdisc __rcu *miniq_egress; 2162 + #endif 2163 + #ifdef CONFIG_NETFILTER_EGRESS 2164 + struct nf_hook_entries __rcu *nf_hooks_egress; 2163 2165 #endif 2164 2166 2165 2167 #ifdef CONFIG_NET_SCHED

+2 -3

include/linux/netfilter_arp/arp_tables.h

··· 54 54 const struct nf_hook_ops *ops); 55 55 void arpt_unregister_table(struct net *net, const char *name); 56 56 void arpt_unregister_table_pre_exit(struct net *net, const char *name); 57 - extern unsigned int arpt_do_table(struct sk_buff *skb, 58 - const struct nf_hook_state *state, 59 - struct xt_table *table); 57 + extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, 58 + const struct nf_hook_state *state); 60 59 61 60 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT 62 61 #include <net/compat.h>

+2 -3

include/linux/netfilter_bridge/ebtables.h

··· 112 112 const struct nf_hook_ops *ops); 113 113 extern void ebt_unregister_table(struct net *net, const char *tablename); 114 114 void ebt_unregister_table_pre_exit(struct net *net, const char *tablename); 115 - extern unsigned int ebt_do_table(struct sk_buff *skb, 116 - const struct nf_hook_state *state, 117 - struct ebt_table *table); 115 + extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb, 116 + const struct nf_hook_state *state); 118 117 119 118 /* True if the hook mask denotes that the rule is in a base chain, 120 119 * used in the check() functions */

-58

include/linux/netfilter_ingress.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _NETFILTER_INGRESS_H_ 3 - #define _NETFILTER_INGRESS_H_ 4 - 5 - #include <linux/netfilter.h> 6 - #include <linux/netdevice.h> 7 - 8 - #ifdef CONFIG_NETFILTER_INGRESS 9 - static inline bool nf_hook_ingress_active(const struct sk_buff *skb) 10 - { 11 - #ifdef CONFIG_JUMP_LABEL 12 - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) 13 - return false; 14 - #endif 15 - return rcu_access_pointer(skb->dev->nf_hooks_ingress); 16 - } 17 - 18 - /* caller must hold rcu_read_lock */ 19 - static inline int nf_hook_ingress(struct sk_buff *skb) 20 - { 21 - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); 22 - struct nf_hook_state state; 23 - int ret; 24 - 25 - /* Must recheck the ingress hook head, in the event it became NULL 26 - * after the check in nf_hook_ingress_active evaluated to true. 27 - */ 28 - if (unlikely(!e)) 29 - return 0; 30 - 31 - nf_hook_state_init(&state, NF_NETDEV_INGRESS, 32 - NFPROTO_NETDEV, skb->dev, NULL, NULL, 33 - dev_net(skb->dev), NULL); 34 - ret = nf_hook_slow(skb, &state, e, 0); 35 - if (ret == 0) 36 - return -1; 37 - 38 - return ret; 39 - } 40 - 41 - static inline void nf_hook_ingress_init(struct net_device *dev) 42 - { 43 - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); 44 - } 45 - #else /* CONFIG_NETFILTER_INGRESS */ 46 - static inline int nf_hook_ingress_active(struct sk_buff *skb) 47 - { 48 - return 0; 49 - } 50 - 51 - static inline int nf_hook_ingress(struct sk_buff *skb) 52 - { 53 - return 0; 54 - } 55 - 56 - static inline void nf_hook_ingress_init(struct net_device *dev) {} 57 - #endif /* CONFIG_NETFILTER_INGRESS */ 58 - #endif /* _NETFILTER_INGRESS_H_ */

+3 -3

include/linux/netfilter_ipv4/ip_tables.h

··· 63 63 } 64 64 65 65 extern void *ipt_alloc_initial_table(const struct xt_table *); 66 - extern unsigned int ipt_do_table(struct sk_buff *skb, 67 - const struct nf_hook_state *state, 68 - struct xt_table *table); 66 + extern unsigned int ipt_do_table(void *priv, 67 + struct sk_buff *skb, 68 + const struct nf_hook_state *state); 69 69 70 70 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT 71 71 #include <net/compat.h>

+2 -3

include/linux/netfilter_ipv6/ip6_tables.h

··· 29 29 const struct nf_hook_ops *ops); 30 30 void ip6t_unregister_table_pre_exit(struct net *net, const char *name); 31 31 void ip6t_unregister_table_exit(struct net *net, const char *name); 32 - extern unsigned int ip6t_do_table(struct sk_buff *skb, 33 - const struct nf_hook_state *state, 34 - struct xt_table *table); 32 + extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, 33 + const struct nf_hook_state *state); 35 34 36 35 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT 37 36 #include <net/compat.h>

+146

include/linux/netfilter_netdev.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _NETFILTER_NETDEV_H_ 3 + #define _NETFILTER_NETDEV_H_ 4 + 5 + #include <linux/netfilter.h> 6 + #include <linux/netdevice.h> 7 + 8 + #ifdef CONFIG_NETFILTER_INGRESS 9 + static inline bool nf_hook_ingress_active(const struct sk_buff *skb) 10 + { 11 + #ifdef CONFIG_JUMP_LABEL 12 + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) 13 + return false; 14 + #endif 15 + return rcu_access_pointer(skb->dev->nf_hooks_ingress); 16 + } 17 + 18 + /* caller must hold rcu_read_lock */ 19 + static inline int nf_hook_ingress(struct sk_buff *skb) 20 + { 21 + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); 22 + struct nf_hook_state state; 23 + int ret; 24 + 25 + /* Must recheck the ingress hook head, in the event it became NULL 26 + * after the check in nf_hook_ingress_active evaluated to true. 27 + */ 28 + if (unlikely(!e)) 29 + return 0; 30 + 31 + nf_hook_state_init(&state, NF_NETDEV_INGRESS, 32 + NFPROTO_NETDEV, skb->dev, NULL, NULL, 33 + dev_net(skb->dev), NULL); 34 + ret = nf_hook_slow(skb, &state, e, 0); 35 + if (ret == 0) 36 + return -1; 37 + 38 + return ret; 39 + } 40 + 41 + #else /* CONFIG_NETFILTER_INGRESS */ 42 + static inline int nf_hook_ingress_active(struct sk_buff *skb) 43 + { 44 + return 0; 45 + } 46 + 47 + static inline int nf_hook_ingress(struct sk_buff *skb) 48 + { 49 + return 0; 50 + } 51 + #endif /* CONFIG_NETFILTER_INGRESS */ 52 + 53 + #ifdef CONFIG_NETFILTER_EGRESS 54 + static inline bool nf_hook_egress_active(void) 55 + { 56 + #ifdef CONFIG_JUMP_LABEL 57 + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS])) 58 + return false; 59 + #endif 60 + return true; 61 + } 62 + 63 + /** 64 + * nf_hook_egress - classify packets before transmission 65 + * @skb: packet to be classified 66 + * @rc: result code which shall be returned by __dev_queue_xmit() on failure 67 + * @dev: netdev whose egress hooks shall be applied to @skb 68 + * 69 + * Returns @skb on success or %NULL if the packet was consumed or filtered. 70 + * Caller must hold rcu_read_lock. 71 + * 72 + * On ingress, packets are classified first by tc, then by netfilter. 73 + * On egress, the order is reversed for symmetry. Conceptually, tc and 74 + * netfilter can be thought of as layers, with netfilter layered above tc: 75 + * When tc redirects a packet to another interface, netfilter is not applied 76 + * because the packet is on the tc layer. 77 + * 78 + * The nf_skip_egress flag controls whether netfilter is applied on egress. 79 + * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the 80 + * packet passes through tc and netfilter. Because __dev_queue_xmit() may be 81 + * called recursively by tunnel drivers such as vxlan, the flag is reverted to 82 + * false after sch_handle_egress(). This ensures that netfilter is applied 83 + * both on the overlay and underlying network. 84 + */ 85 + static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, 86 + struct net_device *dev) 87 + { 88 + struct nf_hook_entries *e; 89 + struct nf_hook_state state; 90 + int ret; 91 + 92 + #ifdef CONFIG_NETFILTER_SKIP_EGRESS 93 + if (skb->nf_skip_egress) 94 + return skb; 95 + #endif 96 + 97 + e = rcu_dereference(dev->nf_hooks_egress); 98 + if (!e) 99 + return skb; 100 + 101 + nf_hook_state_init(&state, NF_NETDEV_EGRESS, 102 + NFPROTO_NETDEV, dev, NULL, NULL, 103 + dev_net(dev), NULL); 104 + ret = nf_hook_slow(skb, &state, e, 0); 105 + 106 + if (ret == 1) { 107 + return skb; 108 + } else if (ret < 0) { 109 + *rc = NET_XMIT_DROP; 110 + return NULL; 111 + } else { /* ret == 0 */ 112 + *rc = NET_XMIT_SUCCESS; 113 + return NULL; 114 + } 115 + } 116 + #else /* CONFIG_NETFILTER_EGRESS */ 117 + static inline bool nf_hook_egress_active(void) 118 + { 119 + return false; 120 + } 121 + 122 + static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, 123 + struct net_device *dev) 124 + { 125 + return skb; 126 + } 127 + #endif /* CONFIG_NETFILTER_EGRESS */ 128 + 129 + static inline void nf_skip_egress(struct sk_buff *skb, bool skip) 130 + { 131 + #ifdef CONFIG_NETFILTER_SKIP_EGRESS 132 + skb->nf_skip_egress = skip; 133 + #endif 134 + } 135 + 136 + static inline void nf_hook_netdev_init(struct net_device *dev) 137 + { 138 + #ifdef CONFIG_NETFILTER_INGRESS 139 + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); 140 + #endif 141 + #ifdef CONFIG_NETFILTER_EGRESS 142 + RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); 143 + #endif 144 + } 145 + 146 + #endif /* _NETFILTER_NETDEV_H_ */

+4

include/linux/skbuff.h

··· 652 652 * @tc_at_ingress: used within tc_classify to distinguish in/egress 653 653 * @redirected: packet was redirected by packet classifier 654 654 * @from_ingress: packet was redirected from the ingress path 655 + * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h 655 656 * @peeked: this packet has been seen already, so stats have been 656 657 * done for it, don't do them again 657 658 * @nf_trace: netfilter packet trace flag ··· 868 867 __u8 redirected:1; 869 868 #ifdef CONFIG_NET_REDIRECT 870 869 __u8 from_ingress:1; 870 + #endif 871 + #ifdef CONFIG_NETFILTER_SKIP_EGRESS 872 + __u8 nf_skip_egress:1; 871 873 #endif 872 874 #ifdef CONFIG_TLS_DEVICE 873 875 __u8 decrypted:1;

+11

include/net/ip_vs.h

··· 931 931 int sysctl_conn_reuse_mode; 932 932 int sysctl_schedule_icmp; 933 933 int sysctl_ignore_tunneled; 934 + int sysctl_run_estimation; 934 935 935 936 /* ip_vs_lblc */ 936 937 int sysctl_lblc_expiration; ··· 1072 1071 return ipvs->sysctl_cache_bypass; 1073 1072 } 1074 1073 1074 + static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) 1075 + { 1076 + return ipvs->sysctl_run_estimation; 1077 + } 1078 + 1075 1079 #else 1076 1080 1077 1081 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) ··· 1167 1161 static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) 1168 1162 { 1169 1163 return 0; 1164 + } 1165 + 1166 + static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) 1167 + { 1168 + return 1; 1170 1169 } 1171 1170 1172 1171 #endif

+1

include/uapi/linux/netfilter.h

··· 51 51 52 52 enum nf_dev_hooks { 53 53 NF_NETDEV_INGRESS, 54 + NF_NETDEV_EGRESS, 54 55 NF_NETDEV_NUMHOOKS 55 56 }; 56 57

+1 -1

net/bridge/netfilter/ebtable_broute.c

··· 66 66 NFPROTO_BRIDGE, s->in, NULL, NULL, 67 67 s->net, NULL); 68 68 69 - ret = ebt_do_table(skb, &state, priv); 69 + ret = ebt_do_table(priv, skb, &state); 70 70 if (ret != NF_DROP) 71 71 return ret; 72 72

+3 -10

net/bridge/netfilter/ebtable_filter.c

··· 58 58 .me = THIS_MODULE, 59 59 }; 60 60 61 - static unsigned int 62 - ebt_filter_hook(void *priv, struct sk_buff *skb, 63 - const struct nf_hook_state *state) 64 - { 65 - return ebt_do_table(skb, state, priv); 66 - } 67 - 68 61 static const struct nf_hook_ops ebt_ops_filter[] = { 69 62 { 70 - .hook = ebt_filter_hook, 63 + .hook = ebt_do_table, 71 64 .pf = NFPROTO_BRIDGE, 72 65 .hooknum = NF_BR_LOCAL_IN, 73 66 .priority = NF_BR_PRI_FILTER_BRIDGED, 74 67 }, 75 68 { 76 - .hook = ebt_filter_hook, 69 + .hook = ebt_do_table, 77 70 .pf = NFPROTO_BRIDGE, 78 71 .hooknum = NF_BR_FORWARD, 79 72 .priority = NF_BR_PRI_FILTER_BRIDGED, 80 73 }, 81 74 { 82 - .hook = ebt_filter_hook, 75 + .hook = ebt_do_table, 83 76 .pf = NFPROTO_BRIDGE, 84 77 .hooknum = NF_BR_LOCAL_OUT, 85 78 .priority = NF_BR_PRI_FILTER_OTHER,

+3 -9

net/bridge/netfilter/ebtable_nat.c

··· 58 58 .me = THIS_MODULE, 59 59 }; 60 60 61 - static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb, 62 - const struct nf_hook_state *state) 63 - { 64 - return ebt_do_table(skb, state, priv); 65 - } 66 - 67 61 static const struct nf_hook_ops ebt_ops_nat[] = { 68 62 { 69 - .hook = ebt_nat_hook, 63 + .hook = ebt_do_table, 70 64 .pf = NFPROTO_BRIDGE, 71 65 .hooknum = NF_BR_LOCAL_OUT, 72 66 .priority = NF_BR_PRI_NAT_DST_OTHER, 73 67 }, 74 68 { 75 - .hook = ebt_nat_hook, 69 + .hook = ebt_do_table, 76 70 .pf = NFPROTO_BRIDGE, 77 71 .hooknum = NF_BR_POST_ROUTING, 78 72 .priority = NF_BR_PRI_NAT_SRC, 79 73 }, 80 74 { 81 - .hook = ebt_nat_hook, 75 + .hook = ebt_do_table, 82 76 .pf = NFPROTO_BRIDGE, 83 77 .hooknum = NF_BR_PRE_ROUTING, 84 78 .priority = NF_BR_PRI_NAT_DST_BRIDGED,

+3 -3

net/bridge/netfilter/ebtables.c

··· 189 189 } 190 190 191 191 /* Do some firewalling */ 192 - unsigned int ebt_do_table(struct sk_buff *skb, 193 - const struct nf_hook_state *state, 194 - struct ebt_table *table) 192 + unsigned int ebt_do_table(void *priv, struct sk_buff *skb, 193 + const struct nf_hook_state *state) 195 194 { 195 + struct ebt_table *table = priv; 196 196 unsigned int hook = state->hook; 197 197 int i, nentries; 198 198 struct ebt_entry *point;

+15 -4

net/core/dev.c

··· 140 140 #include <linux/if_macvlan.h> 141 141 #include <linux/errqueue.h> 142 142 #include <linux/hrtimer.h> 143 - #include <linux/netfilter_ingress.h> 143 + #include <linux/netfilter_netdev.h> 144 144 #include <linux/crash_dump.h> 145 145 #include <linux/sctp.h> 146 146 #include <net/udp_tunnel.h> ··· 3926 3926 static struct sk_buff * 3927 3927 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) 3928 3928 { 3929 + #ifdef CONFIG_NET_CLS_ACT 3929 3930 struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); 3930 3931 struct tcf_result cl_res; 3931 3932 ··· 3962 3961 default: 3963 3962 break; 3964 3963 } 3964 + #endif /* CONFIG_NET_CLS_ACT */ 3965 3965 3966 3966 return skb; 3967 3967 } ··· 4156 4154 qdisc_pkt_len_init(skb); 4157 4155 #ifdef CONFIG_NET_CLS_ACT 4158 4156 skb->tc_at_ingress = 0; 4159 - # ifdef CONFIG_NET_EGRESS 4157 + #endif 4158 + #ifdef CONFIG_NET_EGRESS 4160 4159 if (static_branch_unlikely(&egress_needed_key)) { 4160 + if (nf_hook_egress_active()) { 4161 + skb = nf_hook_egress(skb, &rc, dev); 4162 + if (!skb) 4163 + goto out; 4164 + } 4165 + nf_skip_egress(skb, true); 4161 4166 skb = sch_handle_egress(skb, &rc, dev); 4162 4167 if (!skb) 4163 4168 goto out; 4169 + nf_skip_egress(skb, false); 4164 4170 } 4165 - # endif 4166 4171 #endif 4167 4172 /* If device/qdisc don't need skb->dst, release it right now while 4168 4173 * its hot in this cpu cache. ··· 5311 5302 if (static_branch_unlikely(&ingress_needed_key)) { 5312 5303 bool another = false; 5313 5304 5305 + nf_skip_egress(skb, true); 5314 5306 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, 5315 5307 &another); 5316 5308 if (another) ··· 5319 5309 if (!skb) 5320 5310 goto out; 5321 5311 5312 + nf_skip_egress(skb, false); 5322 5313 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) 5323 5314 goto out; 5324 5315 } ··· 10881 10870 if (!dev->ethtool_ops) 10882 10871 dev->ethtool_ops = &default_ethtool_ops; 10883 10872 10884 - nf_hook_ingress_init(dev); 10873 + nf_hook_netdev_init(dev); 10885 10874 10886 10875 return dev; 10887 10876

+4 -3

net/ipv4/netfilter/arp_tables.c

··· 179 179 return (void *)entry + entry->next_offset; 180 180 } 181 181 182 - unsigned int arpt_do_table(struct sk_buff *skb, 183 - const struct nf_hook_state *state, 184 - struct xt_table *table) 182 + unsigned int arpt_do_table(void *priv, 183 + struct sk_buff *skb, 184 + const struct nf_hook_state *state) 185 185 { 186 + const struct xt_table *table = priv; 186 187 unsigned int hook = state->hook; 187 188 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 188 189 unsigned int verdict = NF_DROP;

+1 -9

net/ipv4/netfilter/arptable_filter.c

··· 26 26 .priority = NF_IP_PRI_FILTER, 27 27 }; 28 28 29 - /* The work comes in here from netfilter.c */ 30 - static unsigned int 31 - arptable_filter_hook(void *priv, struct sk_buff *skb, 32 - const struct nf_hook_state *state) 33 - { 34 - return arpt_do_table(skb, state, priv); 35 - } 36 - 37 29 static struct nf_hook_ops *arpfilter_ops __read_mostly; 38 30 39 31 static int arptable_filter_table_init(struct net *net) ··· 64 72 if (ret < 0) 65 73 return ret; 66 74 67 - arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); 75 + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); 68 76 if (IS_ERR(arpfilter_ops)) { 69 77 xt_unregister_template(&packet_filter); 70 78 return PTR_ERR(arpfilter_ops);

+4 -3

net/ipv4/netfilter/ip_tables.c

··· 222 222 223 223 /* Returns one of the generic firewall policies, like NF_ACCEPT. */ 224 224 unsigned int 225 - ipt_do_table(struct sk_buff *skb, 226 - const struct nf_hook_state *state, 227 - struct xt_table *table) 225 + ipt_do_table(void *priv, 226 + struct sk_buff *skb, 227 + const struct nf_hook_state *state) 228 228 { 229 + const struct xt_table *table = priv; 229 230 unsigned int hook = state->hook; 230 231 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 231 232 const struct iphdr *ip;

+1 -8

net/ipv4/netfilter/iptable_filter.c

··· 28 28 .priority = NF_IP_PRI_FILTER, 29 29 }; 30 30 31 - static unsigned int 32 - iptable_filter_hook(void *priv, struct sk_buff *skb, 33 - const struct nf_hook_state *state) 34 - { 35 - return ipt_do_table(skb, state, priv); 36 - } 37 - 38 31 static struct nf_hook_ops *filter_ops __read_mostly; 39 32 40 33 /* Default to forward because I got too much mail already. */ ··· 83 90 if (ret < 0) 84 91 return ret; 85 92 86 - filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); 93 + filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); 87 94 if (IS_ERR(filter_ops)) { 88 95 xt_unregister_template(&packet_filter); 89 96 return PTR_ERR(filter_ops);

+4 -4

net/ipv4/netfilter/iptable_mangle.c

··· 34 34 }; 35 35 36 36 static unsigned int 37 - ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) 37 + ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 38 38 { 39 39 unsigned int ret; 40 40 const struct iphdr *iph; ··· 50 50 daddr = iph->daddr; 51 51 tos = iph->tos; 52 52 53 - ret = ipt_do_table(skb, state, priv); 53 + ret = ipt_do_table(priv, skb, state); 54 54 /* Reroute for ANY change. */ 55 55 if (ret != NF_DROP && ret != NF_STOLEN) { 56 56 iph = ip_hdr(skb); ··· 75 75 const struct nf_hook_state *state) 76 76 { 77 77 if (state->hook == NF_INET_LOCAL_OUT) 78 - return ipt_mangle_out(skb, state, priv); 79 - return ipt_do_table(skb, state, priv); 78 + return ipt_mangle_out(priv, skb, state); 79 + return ipt_do_table(priv, skb, state); 80 80 } 81 81 82 82 static struct nf_hook_ops *mangle_ops __read_mostly;

+4 -11

net/ipv4/netfilter/iptable_nat.c

··· 29 29 .af = NFPROTO_IPV4, 30 30 }; 31 31 32 - static unsigned int iptable_nat_do_chain(void *priv, 33 - struct sk_buff *skb, 34 - const struct nf_hook_state *state) 35 - { 36 - return ipt_do_table(skb, state, priv); 37 - } 38 - 39 32 static const struct nf_hook_ops nf_nat_ipv4_ops[] = { 40 33 { 41 - .hook = iptable_nat_do_chain, 34 + .hook = ipt_do_table, 42 35 .pf = NFPROTO_IPV4, 43 36 .hooknum = NF_INET_PRE_ROUTING, 44 37 .priority = NF_IP_PRI_NAT_DST, 45 38 }, 46 39 { 47 - .hook = iptable_nat_do_chain, 40 + .hook = ipt_do_table, 48 41 .pf = NFPROTO_IPV4, 49 42 .hooknum = NF_INET_POST_ROUTING, 50 43 .priority = NF_IP_PRI_NAT_SRC, 51 44 }, 52 45 { 53 - .hook = iptable_nat_do_chain, 46 + .hook = ipt_do_table, 54 47 .pf = NFPROTO_IPV4, 55 48 .hooknum = NF_INET_LOCAL_OUT, 56 49 .priority = NF_IP_PRI_NAT_DST, 57 50 }, 58 51 { 59 - .hook = iptable_nat_do_chain, 52 + .hook = ipt_do_table, 60 53 .pf = NFPROTO_IPV4, 61 54 .hooknum = NF_INET_LOCAL_IN, 62 55 .priority = NF_IP_PRI_NAT_SRC,

+1 -9

net/ipv4/netfilter/iptable_raw.c

··· 32 32 .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, 33 33 }; 34 34 35 - /* The work comes in here from netfilter.c. */ 36 - static unsigned int 37 - iptable_raw_hook(void *priv, struct sk_buff *skb, 38 - const struct nf_hook_state *state) 39 - { 40 - return ipt_do_table(skb, state, priv); 41 - } 42 - 43 35 static struct nf_hook_ops *rawtable_ops __read_mostly; 44 36 45 37 static int iptable_raw_table_init(struct net *net) ··· 82 90 if (ret < 0) 83 91 return ret; 84 92 85 - rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); 93 + rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); 86 94 if (IS_ERR(rawtable_ops)) { 87 95 xt_unregister_template(table); 88 96 return PTR_ERR(rawtable_ops);

+1 -8

net/ipv4/netfilter/iptable_security.c

··· 33 33 .priority = NF_IP_PRI_SECURITY, 34 34 }; 35 35 36 - static unsigned int 37 - iptable_security_hook(void *priv, struct sk_buff *skb, 38 - const struct nf_hook_state *state) 39 - { 40 - return ipt_do_table(skb, state, priv); 41 - } 42 - 43 36 static struct nf_hook_ops *sectbl_ops __read_mostly; 44 37 45 38 static int iptable_security_table_init(struct net *net) ··· 71 78 if (ret < 0) 72 79 return ret; 73 80 74 - sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); 81 + sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); 75 82 if (IS_ERR(sectbl_ops)) { 76 83 xt_unregister_template(&security_table); 77 84 return PTR_ERR(sectbl_ops);

+3 -3

net/ipv6/netfilter/ip6_tables.c

··· 247 247 248 248 /* Returns one of the generic firewall policies, like NF_ACCEPT. */ 249 249 unsigned int 250 - ip6t_do_table(struct sk_buff *skb, 251 - const struct nf_hook_state *state, 252 - struct xt_table *table) 250 + ip6t_do_table(void *priv, struct sk_buff *skb, 251 + const struct nf_hook_state *state) 253 252 { 253 + const struct xt_table *table = priv; 254 254 unsigned int hook = state->hook; 255 255 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 256 256 /* Initializing verdict to NF_DROP keeps gcc happy. */

+1 -9

net/ipv6/netfilter/ip6table_filter.c

··· 27 27 .priority = NF_IP6_PRI_FILTER, 28 28 }; 29 29 30 - /* The work comes in here from netfilter.c. */ 31 - static unsigned int 32 - ip6table_filter_hook(void *priv, struct sk_buff *skb, 33 - const struct nf_hook_state *state) 34 - { 35 - return ip6t_do_table(skb, state, priv); 36 - } 37 - 38 30 static struct nf_hook_ops *filter_ops __read_mostly; 39 31 40 32 /* Default to forward because I got too much mail already. */ ··· 82 90 if (ret < 0) 83 91 return ret; 84 92 85 - filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); 93 + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); 86 94 if (IS_ERR(filter_ops)) { 87 95 xt_unregister_template(&packet_filter); 88 96 return PTR_ERR(filter_ops);

+4 -4

net/ipv6/netfilter/ip6table_mangle.c

··· 29 29 }; 30 30 31 31 static unsigned int 32 - ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) 32 + ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 33 33 { 34 34 unsigned int ret; 35 35 struct in6_addr saddr, daddr; ··· 46 46 /* flowlabel and prio (includes version, which shouldn't change either */ 47 47 flowlabel = *((u_int32_t *)ipv6_hdr(skb)); 48 48 49 - ret = ip6t_do_table(skb, state, priv); 49 + ret = ip6t_do_table(priv, skb, state); 50 50 51 51 if (ret != NF_DROP && ret != NF_STOLEN && 52 52 (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || ··· 68 68 const struct nf_hook_state *state) 69 69 { 70 70 if (state->hook == NF_INET_LOCAL_OUT) 71 - return ip6t_mangle_out(skb, state, priv); 72 - return ip6t_do_table(skb, state, priv); 71 + return ip6t_mangle_out(priv, skb, state); 72 + return ip6t_do_table(priv, skb, state); 73 73 } 74 74 75 75 static struct nf_hook_ops *mangle_ops __read_mostly;

+4 -11

net/ipv6/netfilter/ip6table_nat.c

··· 31 31 .af = NFPROTO_IPV6, 32 32 }; 33 33 34 - static unsigned int ip6table_nat_do_chain(void *priv, 35 - struct sk_buff *skb, 36 - const struct nf_hook_state *state) 37 - { 38 - return ip6t_do_table(skb, state, priv); 39 - } 40 - 41 34 static const struct nf_hook_ops nf_nat_ipv6_ops[] = { 42 35 { 43 - .hook = ip6table_nat_do_chain, 36 + .hook = ip6t_do_table, 44 37 .pf = NFPROTO_IPV6, 45 38 .hooknum = NF_INET_PRE_ROUTING, 46 39 .priority = NF_IP6_PRI_NAT_DST, 47 40 }, 48 41 { 49 - .hook = ip6table_nat_do_chain, 42 + .hook = ip6t_do_table, 50 43 .pf = NFPROTO_IPV6, 51 44 .hooknum = NF_INET_POST_ROUTING, 52 45 .priority = NF_IP6_PRI_NAT_SRC, 53 46 }, 54 47 { 55 - .hook = ip6table_nat_do_chain, 48 + .hook = ip6t_do_table, 56 49 .pf = NFPROTO_IPV6, 57 50 .hooknum = NF_INET_LOCAL_OUT, 58 51 .priority = NF_IP6_PRI_NAT_DST, 59 52 }, 60 53 { 61 - .hook = ip6table_nat_do_chain, 54 + .hook = ip6t_do_table, 62 55 .pf = NFPROTO_IPV6, 63 56 .hooknum = NF_INET_LOCAL_IN, 64 57 .priority = NF_IP6_PRI_NAT_SRC,

+1 -9

net/ipv6/netfilter/ip6table_raw.c

··· 31 31 .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, 32 32 }; 33 33 34 - /* The work comes in here from netfilter.c. */ 35 - static unsigned int 36 - ip6table_raw_hook(void *priv, struct sk_buff *skb, 37 - const struct nf_hook_state *state) 38 - { 39 - return ip6t_do_table(skb, state, priv); 40 - } 41 - 42 34 static struct nf_hook_ops *rawtable_ops __read_mostly; 43 35 44 36 static int ip6table_raw_table_init(struct net *net) ··· 80 88 return ret; 81 89 82 90 /* Register hooks */ 83 - rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); 91 + rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); 84 92 if (IS_ERR(rawtable_ops)) { 85 93 xt_unregister_template(table); 86 94 return PTR_ERR(rawtable_ops);

+1 -8

net/ipv6/netfilter/ip6table_security.c

··· 32 32 .priority = NF_IP6_PRI_SECURITY, 33 33 }; 34 34 35 - static unsigned int 36 - ip6table_security_hook(void *priv, struct sk_buff *skb, 37 - const struct nf_hook_state *state) 38 - { 39 - return ip6t_do_table(skb, state, priv); 40 - } 41 - 42 35 static struct nf_hook_ops *sectbl_ops __read_mostly; 43 36 44 37 static int ip6table_security_table_init(struct net *net) ··· 70 77 if (ret < 0) 71 78 return ret; 72 79 73 - sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); 80 + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); 74 81 if (IS_ERR(sectbl_ops)) { 75 82 xt_unregister_template(&security_table); 76 83 return PTR_ERR(sectbl_ops);

+11

net/netfilter/Kconfig

··· 10 10 This allows you to classify packets from ingress using the Netfilter 11 11 infrastructure. 12 12 13 + config NETFILTER_EGRESS 14 + bool "Netfilter egress support" 15 + default y 16 + select NET_EGRESS 17 + help 18 + This allows you to classify packets before transmission using the 19 + Netfilter infrastructure. 20 + 21 + config NETFILTER_SKIP_EGRESS 22 + def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB) 23 + 13 24 config NETFILTER_NETLINK 14 25 tristate 15 26

+34 -4

net/netfilter/core.c

··· 317 317 return &dev->nf_hooks_ingress; 318 318 } 319 319 #endif 320 + #ifdef CONFIG_NETFILTER_EGRESS 321 + if (hooknum == NF_NETDEV_EGRESS) { 322 + if (dev && dev_net(dev) == net) 323 + return &dev->nf_hooks_egress; 324 + } 325 + #endif 320 326 WARN_ON_ONCE(1); 321 327 return NULL; 322 328 } ··· 341 335 return 0; 342 336 } 343 337 344 - static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) 338 + static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg, 339 + int pf) 345 340 { 346 341 if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) || 347 342 (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS)) 348 343 return true; 349 344 350 345 return false; 346 + } 347 + 348 + static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg, 349 + int pf) 350 + { 351 + return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS; 351 352 } 352 353 353 354 static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf) ··· 396 383 397 384 switch (pf) { 398 385 case NFPROTO_NETDEV: 399 - err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS); 400 - if (err < 0) 401 - return err; 386 + #ifndef CONFIG_NETFILTER_INGRESS 387 + if (reg->hooknum == NF_NETDEV_INGRESS) 388 + return -EOPNOTSUPP; 389 + #endif 390 + #ifndef CONFIG_NETFILTER_EGRESS 391 + if (reg->hooknum == NF_NETDEV_EGRESS) 392 + return -EOPNOTSUPP; 393 + #endif 394 + if ((reg->hooknum != NF_NETDEV_INGRESS && 395 + reg->hooknum != NF_NETDEV_EGRESS) || 396 + !reg->dev || dev_net(reg->dev) != net) 397 + return -EINVAL; 402 398 break; 403 399 case NFPROTO_INET: 404 400 if (reg->hooknum != NF_INET_INGRESS) ··· 439 417 #ifdef CONFIG_NETFILTER_INGRESS 440 418 if (nf_ingress_hook(reg, pf)) 441 419 net_inc_ingress_queue(); 420 + #endif 421 + #ifdef CONFIG_NETFILTER_EGRESS 422 + if (nf_egress_hook(reg, pf)) 423 + net_inc_egress_queue(); 442 424 #endif 443 425 nf_static_key_inc(reg, pf); 444 426 ··· 500 474 #ifdef CONFIG_NETFILTER_INGRESS 501 475 if (nf_ingress_hook(reg, pf)) 502 476 net_dec_ingress_queue(); 477 + #endif 478 + #ifdef CONFIG_NETFILTER_EGRESS 479 + if (nf_egress_hook(reg, pf)) 480 + net_dec_egress_queue(); 503 481 #endif 504 482 nf_static_key_dec(reg, pf); 505 483 } else {

+36 -138

net/netfilter/ipvs/ip_vs_core.c

··· 1330 1330 * Check if outgoing packet belongs to the established ip_vs_conn. 1331 1331 */ 1332 1332 static unsigned int 1333 - ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) 1333 + ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 1334 1334 { 1335 + struct netns_ipvs *ipvs = net_ipvs(state->net); 1336 + unsigned int hooknum = state->hook; 1335 1337 struct ip_vs_iphdr iph; 1336 1338 struct ip_vs_protocol *pp; 1337 1339 struct ip_vs_proto_data *pd; 1338 1340 struct ip_vs_conn *cp; 1341 + int af = state->pf; 1339 1342 struct sock *sk; 1340 1343 1341 1344 EnterFunction(11); ··· 1470 1467 "ip_vs_out: packet continues traversal as normal"); 1471 1468 return NF_ACCEPT; 1472 1469 } 1473 - 1474 - /* 1475 - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, 1476 - * used only for VS/NAT. 1477 - * Check if packet is reply for established ip_vs_conn. 1478 - */ 1479 - static unsigned int 1480 - ip_vs_reply4(void *priv, struct sk_buff *skb, 1481 - const struct nf_hook_state *state) 1482 - { 1483 - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); 1484 - } 1485 - 1486 - /* 1487 - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. 1488 - * Check if packet is reply for established ip_vs_conn. 1489 - */ 1490 - static unsigned int 1491 - ip_vs_local_reply4(void *priv, struct sk_buff *skb, 1492 - const struct nf_hook_state *state) 1493 - { 1494 - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); 1495 - } 1496 - 1497 - #ifdef CONFIG_IP_VS_IPV6 1498 - 1499 - /* 1500 - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, 1501 - * used only for VS/NAT. 1502 - * Check if packet is reply for established ip_vs_conn. 1503 - */ 1504 - static unsigned int 1505 - ip_vs_reply6(void *priv, struct sk_buff *skb, 1506 - const struct nf_hook_state *state) 1507 - { 1508 - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); 1509 - } 1510 - 1511 - /* 1512 - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. 1513 - * Check if packet is reply for established ip_vs_conn. 1514 - */ 1515 - static unsigned int 1516 - ip_vs_local_reply6(void *priv, struct sk_buff *skb, 1517 - const struct nf_hook_state *state) 1518 - { 1519 - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); 1520 - } 1521 - 1522 - #endif 1523 1470 1524 1471 static unsigned int 1525 1472 ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 1910 1957 * and send it on its way... 1911 1958 */ 1912 1959 static unsigned int 1913 - ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) 1960 + ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 1914 1961 { 1962 + struct netns_ipvs *ipvs = net_ipvs(state->net); 1963 + unsigned int hooknum = state->hook; 1915 1964 struct ip_vs_iphdr iph; 1916 1965 struct ip_vs_protocol *pp; 1917 1966 struct ip_vs_proto_data *pd; ··· 1921 1966 int ret, pkts; 1922 1967 int conn_reuse_mode; 1923 1968 struct sock *sk; 1969 + int af = state->pf; 1924 1970 1925 1971 /* Already marked as IPVS request or reply? */ 1926 1972 if (skb->ipvs_property) ··· 2094 2138 } 2095 2139 2096 2140 /* 2097 - * AF_INET handler in NF_INET_LOCAL_IN chain 2098 - * Schedule and forward packets from remote clients 2099 - */ 2100 - static unsigned int 2101 - ip_vs_remote_request4(void *priv, struct sk_buff *skb, 2102 - const struct nf_hook_state *state) 2103 - { 2104 - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); 2105 - } 2106 - 2107 - /* 2108 - * AF_INET handler in NF_INET_LOCAL_OUT chain 2109 - * Schedule and forward packets from local clients 2110 - */ 2111 - static unsigned int 2112 - ip_vs_local_request4(void *priv, struct sk_buff *skb, 2113 - const struct nf_hook_state *state) 2114 - { 2115 - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); 2116 - } 2117 - 2118 - #ifdef CONFIG_IP_VS_IPV6 2119 - 2120 - /* 2121 - * AF_INET6 handler in NF_INET_LOCAL_IN chain 2122 - * Schedule and forward packets from remote clients 2123 - */ 2124 - static unsigned int 2125 - ip_vs_remote_request6(void *priv, struct sk_buff *skb, 2126 - const struct nf_hook_state *state) 2127 - { 2128 - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); 2129 - } 2130 - 2131 - /* 2132 - * AF_INET6 handler in NF_INET_LOCAL_OUT chain 2133 - * Schedule and forward packets from local clients 2134 - */ 2135 - static unsigned int 2136 - ip_vs_local_request6(void *priv, struct sk_buff *skb, 2137 - const struct nf_hook_state *state) 2138 - { 2139 - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); 2140 - } 2141 - 2142 - #endif 2143 - 2144 - 2145 - /* 2146 2141 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP 2147 2142 * related packets destined for 0.0.0.0/0. 2148 2143 * When fwmark-based virtual service is used, such as transparent ··· 2106 2199 ip_vs_forward_icmp(void *priv, struct sk_buff *skb, 2107 2200 const struct nf_hook_state *state) 2108 2201 { 2109 - int r; 2110 2202 struct netns_ipvs *ipvs = net_ipvs(state->net); 2111 - 2112 - if (ip_hdr(skb)->protocol != IPPROTO_ICMP) 2113 - return NF_ACCEPT; 2203 + int r; 2114 2204 2115 2205 /* ipvs enabled in this netns ? */ 2116 2206 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 2117 2207 return NF_ACCEPT; 2208 + 2209 + if (state->pf == NFPROTO_IPV4) { 2210 + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) 2211 + return NF_ACCEPT; 2212 + #ifdef CONFIG_IP_VS_IPV6 2213 + } else { 2214 + struct ip_vs_iphdr iphdr; 2215 + 2216 + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); 2217 + 2218 + if (iphdr.protocol != IPPROTO_ICMPV6) 2219 + return NF_ACCEPT; 2220 + 2221 + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); 2222 + #endif 2223 + } 2118 2224 2119 2225 return ip_vs_in_icmp(ipvs, skb, &r, state->hook); 2120 2226 } 2121 2227 2122 - #ifdef CONFIG_IP_VS_IPV6 2123 - static unsigned int 2124 - ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, 2125 - const struct nf_hook_state *state) 2126 - { 2127 - int r; 2128 - struct netns_ipvs *ipvs = net_ipvs(state->net); 2129 - struct ip_vs_iphdr iphdr; 2130 - 2131 - ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); 2132 - if (iphdr.protocol != IPPROTO_ICMPV6) 2133 - return NF_ACCEPT; 2134 - 2135 - /* ipvs enabled in this netns ? */ 2136 - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 2137 - return NF_ACCEPT; 2138 - 2139 - return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); 2140 - } 2141 - #endif 2142 - 2143 - 2144 2228 static const struct nf_hook_ops ip_vs_ops4[] = { 2145 2229 /* After packet filtering, change source only for VS/NAT */ 2146 2230 { 2147 - .hook = ip_vs_reply4, 2231 + .hook = ip_vs_out_hook, 2148 2232 .pf = NFPROTO_IPV4, 2149 2233 .hooknum = NF_INET_LOCAL_IN, 2150 2234 .priority = NF_IP_PRI_NAT_SRC - 2, ··· 2144 2246 * or VS/NAT(change destination), so that filtering rules can be 2145 2247 * applied to IPVS. */ 2146 2248 { 2147 - .hook = ip_vs_remote_request4, 2249 + .hook = ip_vs_in_hook, 2148 2250 .pf = NFPROTO_IPV4, 2149 2251 .hooknum = NF_INET_LOCAL_IN, 2150 2252 .priority = NF_IP_PRI_NAT_SRC - 1, 2151 2253 }, 2152 2254 /* Before ip_vs_in, change source only for VS/NAT */ 2153 2255 { 2154 - .hook = ip_vs_local_reply4, 2256 + .hook = ip_vs_out_hook, 2155 2257 .pf = NFPROTO_IPV4, 2156 2258 .hooknum = NF_INET_LOCAL_OUT, 2157 2259 .priority = NF_IP_PRI_NAT_DST + 1, 2158 2260 }, 2159 2261 /* After mangle, schedule and forward local requests */ 2160 2262 { 2161 - .hook = ip_vs_local_request4, 2263 + .hook = ip_vs_in_hook, 2162 2264 .pf = NFPROTO_IPV4, 2163 2265 .hooknum = NF_INET_LOCAL_OUT, 2164 2266 .priority = NF_IP_PRI_NAT_DST + 2, ··· 2173 2275 }, 2174 2276 /* After packet filtering, change source only for VS/NAT */ 2175 2277 { 2176 - .hook = ip_vs_reply4, 2278 + .hook = ip_vs_out_hook, 2177 2279 .pf = NFPROTO_IPV4, 2178 2280 .hooknum = NF_INET_FORWARD, 2179 2281 .priority = 100, ··· 2184 2286 static const struct nf_hook_ops ip_vs_ops6[] = { 2185 2287 /* After packet filtering, change source only for VS/NAT */ 2186 2288 { 2187 - .hook = ip_vs_reply6, 2289 + .hook = ip_vs_out_hook, 2188 2290 .pf = NFPROTO_IPV6, 2189 2291 .hooknum = NF_INET_LOCAL_IN, 2190 2292 .priority = NF_IP6_PRI_NAT_SRC - 2, ··· 2193 2295 * or VS/NAT(change destination), so that filtering rules can be 2194 2296 * applied to IPVS. */ 2195 2297 { 2196 - .hook = ip_vs_remote_request6, 2298 + .hook = ip_vs_in_hook, 2197 2299 .pf = NFPROTO_IPV6, 2198 2300 .hooknum = NF_INET_LOCAL_IN, 2199 2301 .priority = NF_IP6_PRI_NAT_SRC - 1, 2200 2302 }, 2201 2303 /* Before ip_vs_in, change source only for VS/NAT */ 2202 2304 { 2203 - .hook = ip_vs_local_reply6, 2305 + .hook = ip_vs_out_hook, 2204 2306 .pf = NFPROTO_IPV6, 2205 2307 .hooknum = NF_INET_LOCAL_OUT, 2206 2308 .priority = NF_IP6_PRI_NAT_DST + 1, 2207 2309 }, 2208 2310 /* After mangle, schedule and forward local requests */ 2209 2311 { 2210 - .hook = ip_vs_local_request6, 2312 + .hook = ip_vs_in_hook, 2211 2313 .pf = NFPROTO_IPV6, 2212 2314 .hooknum = NF_INET_LOCAL_OUT, 2213 2315 .priority = NF_IP6_PRI_NAT_DST + 2, ··· 2215 2317 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 2216 2318 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 2217 2319 { 2218 - .hook = ip_vs_forward_icmp_v6, 2320 + .hook = ip_vs_forward_icmp, 2219 2321 .pf = NFPROTO_IPV6, 2220 2322 .hooknum = NF_INET_FORWARD, 2221 2323 .priority = 99, 2222 2324 }, 2223 2325 /* After packet filtering, change source only for VS/NAT */ 2224 2326 { 2225 - .hook = ip_vs_reply6, 2327 + .hook = ip_vs_out_hook, 2226 2328 .pf = NFPROTO_IPV6, 2227 2329 .hooknum = NF_INET_FORWARD, 2228 2330 .priority = 100,

+8

net/netfilter/ipvs/ip_vs_ctl.c

··· 2017 2017 .mode = 0644, 2018 2018 .proc_handler = proc_dointvec, 2019 2019 }, 2020 + { 2021 + .procname = "run_estimation", 2022 + .maxlen = sizeof(int), 2023 + .mode = 0644, 2024 + .proc_handler = proc_dointvec, 2025 + }, 2020 2026 #ifdef CONFIG_IP_VS_DEBUG 2021 2027 { 2022 2028 .procname = "debug_level", ··· 4096 4090 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 4097 4091 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 4098 4092 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 4093 + ipvs->sysctl_run_estimation = 1; 4094 + tbl[idx++].data = &ipvs->sysctl_run_estimation; 4099 4095 4100 4096 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 4101 4097 if (ipvs->sysctl_hdr == NULL) {

+5

net/netfilter/ipvs/ip_vs_est.c

··· 100 100 u64 rate; 101 101 struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); 102 102 103 + if (!sysctl_run_estimation(ipvs)) 104 + goto skip; 105 + 103 106 spin_lock(&ipvs->est_lock); 104 107 list_for_each_entry(e, &ipvs->est_list, list) { 105 108 s = container_of(e, struct ip_vs_stats, est); ··· 134 131 spin_unlock(&s->lock); 135 132 } 136 133 spin_unlock(&ipvs->est_lock); 134 + 135 + skip: 137 136 mod_timer(&ipvs->est_timer, jiffies + 2*HZ); 138 137 } 139 138

+12 -4

net/netfilter/nfnetlink_hook.c

··· 185 185 nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) 186 186 { 187 187 const struct nf_hook_entries *hook_head = NULL; 188 - #ifdef CONFIG_NETFILTER_INGRESS 188 + #if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) 189 189 struct net_device *netdev; 190 190 #endif 191 191 ··· 221 221 hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); 222 222 break; 223 223 #endif 224 - #ifdef CONFIG_NETFILTER_INGRESS 224 + #if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) 225 225 case NFPROTO_NETDEV: 226 - if (hook != NF_NETDEV_INGRESS) 226 + if (hook >= NF_NETDEV_NUMHOOKS) 227 227 return ERR_PTR(-EOPNOTSUPP); 228 228 229 229 if (!dev) ··· 233 233 if (!netdev) 234 234 return ERR_PTR(-ENODEV); 235 235 236 - return rcu_dereference(netdev->nf_hooks_ingress); 236 + #ifdef CONFIG_NETFILTER_INGRESS 237 + if (hook == NF_NETDEV_INGRESS) 238 + return rcu_dereference(netdev->nf_hooks_ingress); 239 + #endif 240 + #ifdef CONFIG_NETFILTER_EGRESS 241 + if (hook == NF_NETDEV_EGRESS) 242 + return rcu_dereference(netdev->nf_hooks_egress); 243 + #endif 244 + fallthrough; 237 245 #endif 238 246 default: 239 247 return ERR_PTR(-EPROTONOSUPPORT);

+3 -1

net/netfilter/nft_chain_filter.c

··· 310 310 .name = "filter", 311 311 .type = NFT_CHAIN_T_DEFAULT, 312 312 .family = NFPROTO_NETDEV, 313 - .hook_mask = (1 << NF_NETDEV_INGRESS), 313 + .hook_mask = (1 << NF_NETDEV_INGRESS) | 314 + (1 << NF_NETDEV_EGRESS), 314 315 .hooks = { 315 316 [NF_NETDEV_INGRESS] = nft_do_chain_netdev, 317 + [NF_NETDEV_EGRESS] = nft_do_chain_netdev, 316 318 }, 317 319 }; 318 320

+1 -10

net/netfilter/nft_dynset.c

··· 198 198 return -EBUSY; 199 199 200 200 priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); 201 - switch (priv->op) { 202 - case NFT_DYNSET_OP_ADD: 203 - case NFT_DYNSET_OP_DELETE: 204 - break; 205 - case NFT_DYNSET_OP_UPDATE: 206 - if (!(set->flags & NFT_SET_TIMEOUT)) 207 - return -EOPNOTSUPP; 208 - break; 209 - default: 201 + if (priv->op > NFT_DYNSET_OP_DELETE) 210 202 return -EOPNOTSUPP; 211 - } 212 203 213 204 timeout = 0; 214 205 if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {

+35

net/packet/af_packet.c

··· 91 91 #endif 92 92 #include <linux/bpf.h> 93 93 #include <net/compat.h> 94 + #include <linux/netfilter_netdev.h> 94 95 95 96 #include "internal.h" 96 97 ··· 242 241 static void __fanout_unlink(struct sock *sk, struct packet_sock *po); 243 242 static void __fanout_link(struct sock *sk, struct packet_sock *po); 244 243 244 + #ifdef CONFIG_NETFILTER_EGRESS 245 + static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) 246 + { 247 + struct sk_buff *next, *head = NULL, *tail; 248 + int rc; 249 + 250 + rcu_read_lock(); 251 + for (; skb != NULL; skb = next) { 252 + next = skb->next; 253 + skb_mark_not_on_list(skb); 254 + 255 + if (!nf_hook_egress(skb, &rc, skb->dev)) 256 + continue; 257 + 258 + if (!head) 259 + head = skb; 260 + else 261 + tail->next = skb; 262 + 263 + tail = skb; 264 + } 265 + rcu_read_unlock(); 266 + 267 + return head; 268 + } 269 + #endif 270 + 245 271 static int packet_direct_xmit(struct sk_buff *skb) 246 272 { 273 + #ifdef CONFIG_NETFILTER_EGRESS 274 + if (nf_hook_egress_active()) { 275 + skb = nf_hook_direct_egress(skb); 276 + if (!skb) 277 + return NET_XMIT_DROP; 278 + } 279 + #endif 247 280 return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); 248 281 } 249 282