Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nf-23-08-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florisn Westphal says:

====================
These are netfilter fixes for the *net* tree.

First patch resolves a false-positive lockdep splat:
rcu_dereference is used outside of rcu read lock. Let lockdep
validate that the transaction mutex is locked.

Second patch fixes a kdoc warning added in previous PR.

Third patch fixes a memory leak:
The catchall element isn't disabled correctly, this allows
userspace to deactivate the element again. This results in refcount
underflow which in turn prevents memory release. This was always
broken since the feature was added in 5.13.

Patch 4 fixes an incorrect change in the previous pull request:
Adding a duplicate key to a set should work if the duplicate key
has expired, restore this behaviour. All from myself.

Patch #5 resolves an old historic artifact in sctp conntrack:
a 300ms timeout for shutdown_ack. Increase this to 3s. From Xin Long.

Patch #6 fixes a sysctl data race in ipvs, two threads can clobber the
sysctl value, from Sishuai Gong. This is a day-0 bug that predates git
history.

Patches 7, 8 and 9, from Pablo Neira Ayuso, are also followups
for the previous GC rework in nf_tables: The netlink notifier and the
netns exit path must both increment the gc worker seqcount, else worker
may encounter stale (free'd) pointers.
================

Signed-off-by: David S. Miller <davem@davemloft.net>

+69 -31
+2 -2
Documentation/networking/nf_conntrack-sysctl.rst
··· 178 178 Default is set to (hb_interval * path_max_retrans + rto_max) 179 179 180 180 nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds) 181 - default 0.3 181 + default 3 182 182 183 183 nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds) 184 - default 0.3 184 + default 3 185 185 186 186 nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds) 187 187 default 3
+1
include/net/netfilter/nf_tables.h
··· 534 534 * @expr: stateful expression 535 535 * @ops: set ops 536 536 * @flags: set flags 537 + * @dead: set will be freed, never cleared 537 538 * @genmask: generation mask 538 539 * @klen: key length 539 540 * @dlen: data length
+4
net/netfilter/ipvs/ip_vs_ctl.c
··· 1876 1876 proc_do_sync_threshold(struct ctl_table *table, int write, 1877 1877 void *buffer, size_t *lenp, loff_t *ppos) 1878 1878 { 1879 + struct netns_ipvs *ipvs = table->extra2; 1879 1880 int *valp = table->data; 1880 1881 int val[2]; 1881 1882 int rc; ··· 1886 1885 .mode = table->mode, 1887 1886 }; 1888 1887 1888 + mutex_lock(&ipvs->sync_mutex); 1889 1889 memcpy(val, valp, sizeof(val)); 1890 1890 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1891 1891 if (write) { ··· 1896 1894 else 1897 1895 memcpy(valp, val, sizeof(val)); 1898 1896 } 1897 + mutex_unlock(&ipvs->sync_mutex); 1899 1898 return rc; 1900 1899 } 1901 1900 ··· 4324 4321 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 4325 4322 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 4326 4323 tbl[idx].data = &ipvs->sysctl_sync_threshold; 4324 + tbl[idx].extra2 = ipvs; 4327 4325 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 4328 4326 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 4329 4327 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+3 -3
net/netfilter/nf_conntrack_proto_sctp.c
··· 49 49 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, 50 50 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, 51 51 [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, 52 - [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, 53 - [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, 52 + [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS, 53 + [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS, 54 54 [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, 55 55 [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, 56 56 }; ··· 105 105 { 106 106 /* ORIGINAL */ 107 107 /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ 108 - /* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW}, 108 + /* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW}, 109 109 /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, 110 110 /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, 111 111 /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+39 -5
net/netfilter/nf_tables_api.c
··· 7091 7091 ret = __nft_set_catchall_flush(ctx, set, &elem); 7092 7092 if (ret < 0) 7093 7093 break; 7094 + nft_set_elem_change_active(ctx->net, set, ext); 7094 7095 } 7095 7096 7096 7097 return ret; ··· 9481 9480 if (!trans) 9482 9481 return NULL; 9483 9482 9483 + trans->net = maybe_get_net(net); 9484 + if (!trans->net) { 9485 + kfree(trans); 9486 + return NULL; 9487 + } 9488 + 9484 9489 refcount_inc(&set->refs); 9485 9490 trans->set = set; 9486 - trans->net = get_net(net); 9487 9491 trans->seq = gc_seq; 9488 9492 9489 9493 return trans; ··· 9744 9738 } 9745 9739 } 9746 9740 9741 + static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net) 9742 + { 9743 + unsigned int gc_seq; 9744 + 9745 + /* Bump gc counter, it becomes odd, this is the busy mark. */ 9746 + gc_seq = READ_ONCE(nft_net->gc_seq); 9747 + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); 9748 + 9749 + return gc_seq; 9750 + } 9751 + 9752 + static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) 9753 + { 9754 + WRITE_ONCE(nft_net->gc_seq, ++gc_seq); 9755 + } 9756 + 9747 9757 static int nf_tables_commit(struct net *net, struct sk_buff *skb) 9748 9758 { 9749 9759 struct nftables_pernet *nft_net = nft_pernet(net); ··· 9845 9823 9846 9824 WRITE_ONCE(nft_net->base_seq, base_seq); 9847 9825 9848 - /* Bump gc counter, it becomes odd, this is the busy mark. */ 9849 - gc_seq = READ_ONCE(nft_net->gc_seq); 9850 - WRITE_ONCE(nft_net->gc_seq, ++gc_seq); 9826 + gc_seq = nft_gc_seq_begin(nft_net); 9851 9827 9852 9828 /* step 3. Start new generation, rules_gen_X now in use. */ 9853 9829 net->nft.gencursor = nft_gencursor_next(net); ··· 10058 10038 nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); 10059 10039 nf_tables_commit_audit_log(&adl, nft_net->base_seq); 10060 10040 10061 - WRITE_ONCE(nft_net->gc_seq, ++gc_seq); 10041 + nft_gc_seq_end(nft_net, gc_seq); 10062 10042 nf_tables_commit_release(net); 10063 10043 10064 10044 return 0; ··· 11059 11039 struct net *net = n->net; 11060 11040 unsigned int deleted; 11061 11041 bool restart = false; 11042 + unsigned int gc_seq; 11062 11043 11063 11044 if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) 11064 11045 return NOTIFY_DONE; ··· 11067 11046 nft_net = nft_pernet(net); 11068 11047 deleted = 0; 11069 11048 mutex_lock(&nft_net->commit_mutex); 11049 + 11050 + gc_seq = nft_gc_seq_begin(nft_net); 11051 + 11070 11052 if (!list_empty(&nf_tables_destroy_list)) 11071 11053 rcu_barrier(); 11072 11054 again: ··· 11092 11068 if (restart) 11093 11069 goto again; 11094 11070 } 11071 + nft_gc_seq_end(nft_net, gc_seq); 11072 + 11095 11073 mutex_unlock(&nft_net->commit_mutex); 11096 11074 11097 11075 return NOTIFY_DONE; ··· 11131 11105 static void __net_exit nf_tables_exit_net(struct net *net) 11132 11106 { 11133 11107 struct nftables_pernet *nft_net = nft_pernet(net); 11108 + unsigned int gc_seq; 11134 11109 11135 11110 mutex_lock(&nft_net->commit_mutex); 11111 + 11112 + gc_seq = nft_gc_seq_begin(nft_net); 11113 + 11136 11114 if (!list_empty(&nft_net->commit_list) || 11137 11115 !list_empty(&nft_net->module_list)) 11138 11116 __nf_tables_abort(net, NFNL_ABORT_NONE); 11117 + 11139 11118 __nft_release_tables(net); 11119 + 11120 + nft_gc_seq_end(nft_net, gc_seq); 11121 + 11140 11122 mutex_unlock(&nft_net->commit_mutex); 11141 11123 WARN_ON_ONCE(!list_empty(&nft_net->tables)); 11142 11124 WARN_ON_ONCE(!list_empty(&nft_net->module_list));
+3
net/netfilter/nft_dynset.c
··· 191 191 if (IS_ERR(set)) 192 192 return PTR_ERR(set); 193 193 194 + if (set->flags & NFT_SET_OBJECT) 195 + return -EOPNOTSUPP; 196 + 194 197 if (set->ops->update == NULL) 195 198 return -EOPNOTSUPP; 196 199
+17 -21
net/netfilter/nft_set_pipapo.c
··· 566 566 goto out; 567 567 568 568 if (last) { 569 + if (nft_set_elem_expired(&f->mt[b].e->ext)) 570 + goto next_match; 569 571 if ((genmask && 570 572 !nft_set_elem_active(&f->mt[b].e->ext, genmask))) 571 573 goto next_match; ··· 602 600 static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, 603 601 const struct nft_set_elem *elem, unsigned int flags) 604 602 { 605 - struct nft_pipapo_elem *ret; 606 - 607 - ret = pipapo_get(net, set, (const u8 *)elem->key.val.data, 603 + return pipapo_get(net, set, (const u8 *)elem->key.val.data, 608 604 nft_genmask_cur(net)); 609 - if (IS_ERR(ret)) 610 - return ret; 611 - 612 - if (nft_set_elem_expired(&ret->ext)) 613 - return ERR_PTR(-ENOENT); 614 - 615 - return ret; 616 605 } 617 606 618 607 /** ··· 1542 1549 1543 1550 /** 1544 1551 * pipapo_gc() - Drop expired entries from set, destroy start and end elements 1545 - * @set: nftables API set representation 1552 + * @_set: nftables API set representation 1546 1553 * @m: Matching data 1547 1554 */ 1548 1555 static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) ··· 1690 1697 priv->clone = new_clone; 1691 1698 } 1692 1699 1700 + static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) 1701 + { 1702 + #ifdef CONFIG_PROVE_LOCKING 1703 + const struct net *net = read_pnet(&set->net); 1704 + 1705 + return lockdep_is_held(&nft_pernet(net)->commit_mutex); 1706 + #else 1707 + return true; 1708 + #endif 1709 + } 1710 + 1693 1711 static void nft_pipapo_abort(const struct nft_set *set) 1694 1712 { 1695 1713 struct nft_pipapo *priv = nft_set_priv(set); ··· 1709 1705 if (!priv->dirty) 1710 1706 return; 1711 1707 1712 - m = rcu_dereference(priv->match); 1708 + m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); 1713 1709 1714 1710 new_clone = pipapo_clone(m); 1715 1711 if (IS_ERR(new_clone)) ··· 1736 1732 const struct nft_set *set, 1737 1733 const struct nft_set_elem *elem) 1738 1734 { 1739 - struct nft_pipapo_elem *e; 1740 - 1741 - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0); 1742 - if (IS_ERR(e)) 1743 - return; 1735 + struct nft_pipapo_elem *e = elem->priv; 1744 1736 1745 1737 nft_set_elem_change_active(net, set, &e->ext); 1746 1738 } ··· 1949 1949 const u8 *data; 1950 1950 1951 1951 data = (const u8 *)nft_set_ext_key(&e->ext); 1952 - 1953 - e = pipapo_get(net, set, data, 0); 1954 - if (IS_ERR(e)) 1955 - return; 1956 1952 1957 1953 while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { 1958 1954 union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];