Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: conntrack: Fix data-races around ct mark

nf_conn:mark can be read from and written to in parallel. Use
READ_ONCE()/WRITE_ONCE() for reads and writes to prevent unwanted
compiler optimizations.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Daniel Xu and committed by
Pablo Neira Ayuso
52d1aa8b 40b9d1ab

+45 -39
+1 -1
net/core/flow_dissector.c
··· 296 296 key->ct_zone = ct->zone.id; 297 297 #endif 298 298 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 299 - key->ct_mark = ct->mark; 299 + key->ct_mark = READ_ONCE(ct->mark); 300 300 #endif 301 301 302 302 cl = nf_ct_labels_find(ct);
+2 -2
net/ipv4/netfilter/ipt_CLUSTERIP.c
··· 435 435 436 436 switch (ctinfo) { 437 437 case IP_CT_NEW: 438 - ct->mark = hash; 438 + WRITE_ONCE(ct->mark, hash); 439 439 break; 440 440 case IP_CT_RELATED: 441 441 case IP_CT_RELATED_REPLY: ··· 452 452 #ifdef DEBUG 453 453 nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 454 454 #endif 455 - pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); 455 + pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); 456 456 if (!clusterip_responsible(cipinfo->config, hash)) { 457 457 pr_debug("not responsible\n"); 458 458 return NF_DROP;
+1 -1
net/netfilter/nf_conntrack_core.c
··· 1781 1781 } 1782 1782 1783 1783 #ifdef CONFIG_NF_CONNTRACK_MARK 1784 - ct->mark = exp->master->mark; 1784 + ct->mark = READ_ONCE(exp->master->mark); 1785 1785 #endif 1786 1786 #ifdef CONFIG_NF_CONNTRACK_SECMARK 1787 1787 ct->secmark = exp->master->secmark;
+14 -10
net/netfilter/nf_conntrack_netlink.c
··· 328 328 } 329 329 330 330 #ifdef CONFIG_NF_CONNTRACK_MARK 331 - static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) 331 + static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) 332 332 { 333 - if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) 333 + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) 334 334 goto nla_put_failure; 335 335 return 0; 336 336 ··· 543 543 static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) 544 544 { 545 545 if (ctnetlink_dump_status(skb, ct) < 0 || 546 - ctnetlink_dump_mark(skb, ct) < 0 || 546 + ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || 547 547 ctnetlink_dump_secctx(skb, ct) < 0 || 548 548 ctnetlink_dump_id(skb, ct) < 0 || 549 549 ctnetlink_dump_use(skb, ct) < 0 || ··· 722 722 struct sk_buff *skb; 723 723 unsigned int type; 724 724 unsigned int flags = 0, group; 725 + u32 mark; 725 726 int err; 726 727 727 728 if (events & (1 << IPCT_DESTROY)) { ··· 827 826 } 828 827 829 828 #ifdef CONFIG_NF_CONNTRACK_MARK 830 - if ((events & (1 << IPCT_MARK) || ct->mark) 831 - && ctnetlink_dump_mark(skb, ct) < 0) 829 + mark = READ_ONCE(ct->mark); 830 + if ((events & (1 << IPCT_MARK) || mark) && 831 + ctnetlink_dump_mark(skb, mark) < 0) 832 832 goto nla_put_failure; 833 833 #endif 834 834 nlmsg_end(skb, nlh); ··· 1156 1154 } 1157 1155 1158 1156 #ifdef CONFIG_NF_CONNTRACK_MARK 1159 - if ((ct->mark & filter->mark.mask) != filter->mark.val) 1157 + if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) 1160 1158 goto ignore_entry; 1161 1159 #endif 1162 1160 status = (u32)READ_ONCE(ct->status); ··· 2004 2002 mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); 2005 2003 2006 2004 mark = ntohl(nla_get_be32(cda[CTA_MARK])); 2007 - newmark = (ct->mark & mask) ^ mark; 2008 - if (newmark != ct->mark) 2009 - ct->mark = newmark; 2005 + newmark = (READ_ONCE(ct->mark) & mask) ^ mark; 2006 + if (newmark != READ_ONCE(ct->mark)) 2007 + WRITE_ONCE(ct->mark, newmark); 2010 2008 } 2011 2009 #endif 2012 2010 ··· 2671 2669 { 2672 2670 const struct nf_conntrack_zone *zone; 2673 2671 struct nlattr *nest_parms; 2672 + u32 mark; 2674 2673 2675 2674 zone = nf_ct_zone(ct); 2676 2675 ··· 2733 2730 goto nla_put_failure; 2734 2731 2735 2732 #ifdef CONFIG_NF_CONNTRACK_MARK 2736 - if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) 2733 + mark = READ_ONCE(ct->mark); 2734 + if (mark && ctnetlink_dump_mark(skb, mark) < 0) 2737 2735 goto nla_put_failure; 2738 2736 #endif 2739 2737 if (ctnetlink_dump_labels(skb, ct) < 0)
+1 -1
net/netfilter/nf_conntrack_standalone.c
··· 366 366 goto release; 367 367 368 368 #if defined(CONFIG_NF_CONNTRACK_MARK) 369 - seq_printf(s, "mark=%u ", ct->mark); 369 + seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); 370 370 #endif 371 371 372 372 ct_show_secctx(s, ct);
+3 -3
net/netfilter/nft_ct.c
··· 98 98 return; 99 99 #ifdef CONFIG_NF_CONNTRACK_MARK 100 100 case NFT_CT_MARK: 101 - *dest = ct->mark; 101 + *dest = READ_ONCE(ct->mark); 102 102 return; 103 103 #endif 104 104 #ifdef CONFIG_NF_CONNTRACK_SECMARK ··· 297 297 switch (priv->key) { 298 298 #ifdef CONFIG_NF_CONNTRACK_MARK 299 299 case NFT_CT_MARK: 300 - if (ct->mark != value) { 301 - ct->mark = value; 300 + if (READ_ONCE(ct->mark) != value) { 301 + WRITE_ONCE(ct->mark, value); 302 302 nf_conntrack_event_cache(IPCT_MARK, ct); 303 303 } 304 304 break;
+10 -8
net/netfilter/xt_connmark.c
··· 30 30 u_int32_t new_targetmark; 31 31 struct nf_conn *ct; 32 32 u_int32_t newmark; 33 + u_int32_t oldmark; 33 34 34 35 ct = nf_ct_get(skb, &ctinfo); 35 36 if (ct == NULL) ··· 38 37 39 38 switch (info->mode) { 40 39 case XT_CONNMARK_SET: 41 - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; 40 + oldmark = READ_ONCE(ct->mark); 41 + newmark = (oldmark & ~info->ctmask) ^ info->ctmark; 42 42 if (info->shift_dir == D_SHIFT_RIGHT) 43 43 newmark >>= info->shift_bits; 44 44 else 45 45 newmark <<= info->shift_bits; 46 46 47 - if (ct->mark != newmark) { 48 - ct->mark = newmark; 47 + if (READ_ONCE(ct->mark) != newmark) { 48 + WRITE_ONCE(ct->mark, newmark); 49 49 nf_conntrack_event_cache(IPCT_MARK, ct); 50 50 } 51 51 break; ··· 57 55 else 58 56 new_targetmark <<= info->shift_bits; 59 57 60 - newmark = (ct->mark & ~info->ctmask) ^ 58 + newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ 61 59 new_targetmark; 62 - if (ct->mark != newmark) { 63 - ct->mark = newmark; 60 + if (READ_ONCE(ct->mark) != newmark) { 61 + WRITE_ONCE(ct->mark, newmark); 64 62 nf_conntrack_event_cache(IPCT_MARK, ct); 65 63 } 66 64 break; 67 65 case XT_CONNMARK_RESTORE: 68 - new_targetmark = (ct->mark & info->ctmask); 66 + new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); 69 67 if (info->shift_dir == D_SHIFT_RIGHT) 70 68 new_targetmark >>= info->shift_bits; 71 69 else ··· 128 126 if (ct == NULL) 129 127 return false; 130 128 131 - return ((ct->mark & info->mask) == info->mark) ^ info->invert; 129 + return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; 132 130 } 133 131 134 132 static int connmark_mt_check(const struct xt_mtchk_param *par)
+4 -4
net/openvswitch/conntrack.c
··· 152 152 static u32 ovs_ct_get_mark(const struct nf_conn *ct) 153 153 { 154 154 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 155 - return ct ? ct->mark : 0; 155 + return ct ? READ_ONCE(ct->mark) : 0; 156 156 #else 157 157 return 0; 158 158 #endif ··· 340 340 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 341 341 u32 new_mark; 342 342 343 - new_mark = ct_mark | (ct->mark & ~(mask)); 344 - if (ct->mark != new_mark) { 345 - ct->mark = new_mark; 343 + new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); 344 + if (READ_ONCE(ct->mark) != new_mark) { 345 + WRITE_ONCE(ct->mark, new_mark); 346 346 if (nf_ct_is_confirmed(ct)) 347 347 nf_conntrack_event_cache(IPCT_MARK, ct); 348 348 key->ct.mark = new_mark;
+2 -2
net/sched/act_connmark.c
··· 61 61 62 62 c = nf_ct_get(skb, &ctinfo); 63 63 if (c) { 64 - skb->mark = c->mark; 64 + skb->mark = READ_ONCE(c->mark); 65 65 /* using overlimits stats to count how many packets marked */ 66 66 ca->tcf_qstats.overlimits++; 67 67 goto out; ··· 81 81 c = nf_ct_tuplehash_to_ctrack(thash); 82 82 /* using overlimits stats to count how many packets marked */ 83 83 ca->tcf_qstats.overlimits++; 84 - skb->mark = c->mark; 84 + skb->mark = READ_ONCE(c->mark); 85 85 nf_ct_put(c); 86 86 87 87 out:
+4 -4
net/sched/act_ct.c
··· 178 178 entry = tcf_ct_flow_table_flow_action_get_next(action); 179 179 entry->id = FLOW_ACTION_CT_METADATA; 180 180 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 181 - entry->ct_metadata.mark = ct->mark; 181 + entry->ct_metadata.mark = READ_ONCE(ct->mark); 182 182 #endif 183 183 ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : 184 184 IP_CT_ESTABLISHED_REPLY; ··· 936 936 if (!mask) 937 937 return; 938 938 939 - new_mark = mark | (ct->mark & ~(mask)); 940 - if (ct->mark != new_mark) { 941 - ct->mark = new_mark; 939 + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); 940 + if (READ_ONCE(ct->mark) != new_mark) { 941 + WRITE_ONCE(ct->mark, new_mark); 942 942 if (nf_ct_is_confirmed(ct)) 943 943 nf_conntrack_event_cache(IPCT_MARK, ct); 944 944 }
+3 -3
net/sched/act_ctinfo.c
··· 32 32 { 33 33 u8 dscp, newdscp; 34 34 35 - newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & 35 + newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & 36 36 ~INET_ECN_MASK; 37 37 38 38 switch (proto) { ··· 72 72 struct sk_buff *skb) 73 73 { 74 74 ca->stats_cpmark_set++; 75 - skb->mark = ct->mark & cp->cpmarkmask; 75 + skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; 76 76 } 77 77 78 78 static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, ··· 130 130 } 131 131 132 132 if (cp->mode & CTINFO_MODE_DSCP) 133 - if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) 133 + if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) 134 134 tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); 135 135 136 136 if (cp->mode & CTINFO_MODE_CPMARK)