Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: ctnetlink: deliver events for conntracks changed from userspace

As for now, the creation and update of conntracks via ctnetlink do not
propagate an event to userspace. This can result in inconsistent situations
if several userspace processes modify the connection tracking table by means
of ctnetlink at the same time. Specifically, using the conntrack command
line tool and conntrackd at the same time can trigger unconsistencies.

This patch also modifies the event cache infrastructure to pass the
process PID and the ECHO flag to nfnetlink_send() to report back
to userspace if the process that triggered the change needs so.
Based on a suggestion from Patrick McHardy.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>

authored by

Pablo Neira Ayuso and committed by
Patrick McHardy
19abb7b0 226c0c0e

+197 -34
+1 -1
include/net/netfilter/nf_conntrack.h
··· 199 199 200 200 extern void nf_conntrack_hash_insert(struct nf_conn *ct); 201 201 202 - extern void nf_conntrack_flush(struct net *net); 202 + extern void nf_conntrack_flush(struct net *net, u32 pid, int report); 203 203 204 204 extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, 205 205 unsigned int nhoff, u_int16_t l3num,
+53 -4
include/net/netfilter/nf_conntrack_ecache.h
··· 17 17 unsigned int events; 18 18 }; 19 19 20 + /* This structure is passed to event handler */ 21 + struct nf_ct_event { 22 + struct nf_conn *ct; 23 + u32 pid; 24 + int report; 25 + }; 26 + 20 27 extern struct atomic_notifier_head nf_conntrack_chain; 21 28 extern int nf_conntrack_register_notifier(struct notifier_block *nb); 22 29 extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); ··· 46 39 local_bh_enable(); 47 40 } 48 41 49 - static inline void nf_conntrack_event(enum ip_conntrack_events event, 50 - struct nf_conn *ct) 42 + static inline void 43 + nf_conntrack_event_report(enum ip_conntrack_events event, 44 + struct nf_conn *ct, 45 + u32 pid, 46 + int report) 51 47 { 48 + struct nf_ct_event item = { 49 + .ct = ct, 50 + .pid = pid, 51 + .report = report 52 + }; 52 53 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) 53 - atomic_notifier_call_chain(&nf_conntrack_chain, event, ct); 54 + atomic_notifier_call_chain(&nf_conntrack_chain, event, &item); 54 55 } 56 + 57 + static inline void 58 + nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) 59 + { 60 + nf_conntrack_event_report(event, ct, 0, 0); 61 + } 62 + 63 + struct nf_exp_event { 64 + struct nf_conntrack_expect *exp; 65 + u32 pid; 66 + int report; 67 + }; 55 68 56 69 extern struct atomic_notifier_head nf_ct_expect_chain; 57 70 extern int nf_ct_expect_register_notifier(struct notifier_block *nb); 58 71 extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); 59 72 60 73 static inline void 74 + nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 75 + struct nf_conntrack_expect *exp, 76 + u32 pid, 77 + int report) 78 + { 79 + struct nf_exp_event item = { 80 + .exp = exp, 81 + .pid = pid, 82 + .report = report 83 + }; 84 + atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item); 85 + } 86 + 87 + static inline void 61 88 nf_ct_expect_event(enum ip_conntrack_expect_events event, 62 89 struct nf_conntrack_expect *exp) 63 90 { 64 - atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); 91 + nf_ct_expect_event_report(event, exp, 0, 0); 65 92 } 66 93 67 94 extern int nf_conntrack_ecache_init(struct net *net); ··· 107 66 struct nf_conn *ct) {} 108 67 static inline void nf_conntrack_event(enum ip_conntrack_events event, 109 68 struct nf_conn *ct) {} 69 + static inline void nf_conntrack_event_report(enum ip_conntrack_events event, 70 + struct nf_conn *ct, 71 + u32 pid, 72 + int report) {} 110 73 static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} 111 74 static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, 112 75 struct nf_conntrack_expect *exp) {} 76 + static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, 77 + struct nf_conntrack_expect *exp, 78 + u32 pid, 79 + int report) {} 113 80 static inline void nf_ct_event_cache_flush(struct net *net) {} 114 81 115 82 static inline int nf_conntrack_ecache_init(struct net *net)
+2
include/net/netfilter/nf_conntrack_expect.h
··· 100 100 u_int8_t, const __be16 *, const __be16 *); 101 101 void nf_ct_expect_put(struct nf_conntrack_expect *exp); 102 102 int nf_ct_expect_related(struct nf_conntrack_expect *expect); 103 + int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 104 + u32 pid, int report); 103 105 104 106 #endif /*_NF_CONNTRACK_EXPECT_H*/ 105 107
+21 -4
net/netfilter/nf_conntrack_core.c
··· 181 181 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 182 182 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 183 183 184 - nf_conntrack_event(IPCT_DESTROY, ct); 184 + if (!test_bit(IPS_DYING_BIT, &ct->status)) 185 + nf_conntrack_event(IPCT_DESTROY, ct); 185 186 set_bit(IPS_DYING_BIT, &ct->status); 186 187 187 188 /* To make sure we don't get any weird locking issues here: ··· 973 972 } 974 973 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 975 974 975 + struct __nf_ct_flush_report { 976 + u32 pid; 977 + int report; 978 + }; 979 + 976 980 static int kill_all(struct nf_conn *i, void *data) 977 981 { 982 + struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 983 + 984 + /* get_next_corpse sets the dying bit for us */ 985 + nf_conntrack_event_report(IPCT_DESTROY, 986 + i, 987 + fr->pid, 988 + fr->report); 978 989 return 1; 979 990 } 980 991 ··· 1000 987 } 1001 988 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1002 989 1003 - void nf_conntrack_flush(struct net *net) 990 + void nf_conntrack_flush(struct net *net, u32 pid, int report) 1004 991 { 1005 - nf_ct_iterate_cleanup(net, kill_all, NULL); 992 + struct __nf_ct_flush_report fr = { 993 + .pid = pid, 994 + .report = report, 995 + }; 996 + nf_ct_iterate_cleanup(net, kill_all, &fr); 1006 997 } 1007 998 EXPORT_SYMBOL_GPL(nf_conntrack_flush); 1008 999 ··· 1022 1005 nf_ct_event_cache_flush(net); 1023 1006 nf_conntrack_ecache_fini(net); 1024 1007 i_see_dead_people: 1025 - nf_conntrack_flush(net); 1008 + nf_conntrack_flush(net, 0, 0); 1026 1009 if (atomic_read(&net->ct.count) != 0) { 1027 1010 schedule(); 1028 1011 goto i_see_dead_people;
+11 -3
net/netfilter/nf_conntrack_ecache.c
··· 35 35 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) 36 36 { 37 37 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) 38 - && ecache->events) 39 - atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, 40 - ecache->ct); 38 + && ecache->events) { 39 + struct nf_ct_event item = { 40 + .ct = ecache->ct, 41 + .pid = 0, 42 + .report = 0 43 + }; 44 + 45 + atomic_notifier_call_chain(&nf_conntrack_chain, 46 + ecache->events, 47 + &item); 48 + } 41 49 42 50 ecache->events = 0; 43 51 nf_ct_put(ecache->ct);
+36 -7
net/netfilter/nf_conntrack_expect.c
··· 362 362 return 1; 363 363 } 364 364 365 - int nf_ct_expect_related(struct nf_conntrack_expect *expect) 365 + static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) 366 366 { 367 367 const struct nf_conntrack_expect_policy *p; 368 368 struct nf_conntrack_expect *i; ··· 371 371 struct net *net = nf_ct_exp_net(expect); 372 372 struct hlist_node *n; 373 373 unsigned int h; 374 - int ret; 374 + int ret = 0; 375 375 376 - NF_CT_ASSERT(master_help); 377 - 378 - spin_lock_bh(&nf_conntrack_lock); 379 376 if (!master_help->helper) { 380 377 ret = -ESHUTDOWN; 381 378 goto out; ··· 406 409 printk(KERN_WARNING 407 410 "nf_conntrack: expectation table full\n"); 408 411 ret = -EMFILE; 409 - goto out; 410 412 } 413 + out: 414 + return ret; 415 + } 416 + 417 + int nf_ct_expect_related(struct nf_conntrack_expect *expect) 418 + { 419 + int ret; 420 + 421 + spin_lock_bh(&nf_conntrack_lock); 422 + ret = __nf_ct_expect_check(expect); 423 + if (ret < 0) 424 + goto out; 411 425 412 426 nf_ct_expect_insert(expect); 427 + atomic_inc(&expect->use); 428 + spin_unlock_bh(&nf_conntrack_lock); 413 429 nf_ct_expect_event(IPEXP_NEW, expect); 414 - ret = 0; 430 + nf_ct_expect_put(expect); 431 + return ret; 415 432 out: 416 433 spin_unlock_bh(&nf_conntrack_lock); 417 434 return ret; 418 435 } 419 436 EXPORT_SYMBOL_GPL(nf_ct_expect_related); 437 + 438 + int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 439 + u32 pid, int report) 440 + { 441 + int ret; 442 + 443 + spin_lock_bh(&nf_conntrack_lock); 444 + ret = __nf_ct_expect_check(expect); 445 + if (ret < 0) 446 + goto out; 447 + nf_ct_expect_insert(expect); 448 + out: 449 + spin_unlock_bh(&nf_conntrack_lock); 450 + if (ret == 0) 451 + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); 452 + return ret; 453 + } 454 + EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); 420 455 421 456 #ifdef CONFIG_PROC_FS 422 457 struct ct_expect_iter_state {
+73 -15
net/netfilter/nf_conntrack_netlink.c
··· 410 410 struct nlmsghdr *nlh; 411 411 struct nfgenmsg *nfmsg; 412 412 struct nlattr *nest_parms; 413 - struct nf_conn *ct = (struct nf_conn *)ptr; 413 + struct nf_ct_event *item = (struct nf_ct_event *)ptr; 414 + struct nf_conn *ct = item->ct; 414 415 struct sk_buff *skb; 415 416 unsigned int type; 416 417 sk_buff_data_t b; ··· 444 443 b = skb->tail; 445 444 446 445 type |= NFNL_SUBSYS_CTNETLINK << 8; 447 - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); 446 + nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); 448 447 nfmsg = NLMSG_DATA(nlh); 449 448 450 449 nlh->nlmsg_flags = flags; ··· 512 511 rcu_read_unlock(); 513 512 514 513 nlh->nlmsg_len = skb->tail - b; 515 - nfnetlink_send(skb, 0, group, 0); 514 + nfnetlink_send(skb, item->pid, group, item->report); 516 515 return NOTIFY_DONE; 517 516 518 517 nla_put_failure: ··· 723 722 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); 724 723 else { 725 724 /* Flush the whole table */ 726 - nf_conntrack_flush(&init_net); 725 + nf_conntrack_flush(&init_net, 726 + NETLINK_CB(skb).pid, 727 + nlmsg_report(nlh)); 727 728 return 0; 728 729 } 729 730 ··· 745 742 return -ENOENT; 746 743 } 747 744 } 745 + 746 + nf_conntrack_event_report(IPCT_DESTROY, 747 + ct, 748 + NETLINK_CB(skb).pid, 749 + nlmsg_report(nlh)); 750 + 751 + /* death_by_timeout would report the event again */ 752 + set_bit(IPS_DYING_BIT, &ct->status); 748 753 749 754 nf_ct_kill(ct); 750 755 nf_ct_put(ct); ··· 1099 1088 return 0; 1100 1089 } 1101 1090 1091 + static inline void 1092 + ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) 1093 + { 1094 + unsigned int events = 0; 1095 + 1096 + if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 1097 + events |= IPCT_RELATED; 1098 + else 1099 + events |= IPCT_NEW; 1100 + 1101 + nf_conntrack_event_report(IPCT_STATUS | 1102 + IPCT_HELPER | 1103 + IPCT_REFRESH | 1104 + IPCT_PROTOINFO | 1105 + IPCT_NATSEQADJ | 1106 + IPCT_MARK | 1107 + events, 1108 + ct, 1109 + pid, 1110 + report); 1111 + } 1112 + 1102 1113 static int 1103 1114 ctnetlink_create_conntrack(struct nlattr *cda[], 1104 1115 struct nf_conntrack_tuple *otuple, 1105 1116 struct nf_conntrack_tuple *rtuple, 1106 - struct nf_conn *master_ct) 1117 + struct nf_conn *master_ct, 1118 + u32 pid, 1119 + int report) 1107 1120 { 1108 1121 struct nf_conn *ct; 1109 1122 int err = -EINVAL; ··· 1233 1198 ct->master = master_ct; 1234 1199 } 1235 1200 1201 + nf_conntrack_get(&ct->ct_general); 1236 1202 add_timer(&ct->timeout); 1237 1203 nf_conntrack_hash_insert(ct); 1238 1204 rcu_read_unlock(); 1205 + ctnetlink_event_report(ct, pid, report); 1206 + nf_ct_put(ct); 1239 1207 1240 1208 return 0; 1241 1209 ··· 1303 1265 err = ctnetlink_create_conntrack(cda, 1304 1266 &otuple, 1305 1267 &rtuple, 1306 - master_ct); 1268 + master_ct, 1269 + NETLINK_CB(skb).pid, 1270 + nlmsg_report(nlh)); 1307 1271 if (err < 0 && master_ct) 1308 1272 nf_ct_put(master_ct); 1309 1273 ··· 1317 1277 * so there's no need to increase the refcount */ 1318 1278 err = -EEXIST; 1319 1279 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { 1280 + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 1281 + 1320 1282 /* we only allow nat config for new conntracks */ 1321 1283 if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { 1322 1284 err = -EOPNOTSUPP; ··· 1329 1287 err = -EOPNOTSUPP; 1330 1288 goto out_unlock; 1331 1289 } 1332 - err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), 1333 - cda); 1290 + 1291 + err = ctnetlink_change_conntrack(ct, cda); 1292 + if (err == 0) { 1293 + nf_conntrack_get(&ct->ct_general); 1294 + spin_unlock_bh(&nf_conntrack_lock); 1295 + ctnetlink_event_report(ct, 1296 + NETLINK_CB(skb).pid, 1297 + nlmsg_report(nlh)); 1298 + nf_ct_put(ct); 1299 + } else 1300 + spin_unlock_bh(&nf_conntrack_lock); 1301 + 1302 + return err; 1334 1303 } 1335 1304 1336 1305 out_unlock: ··· 1476 1423 { 1477 1424 struct nlmsghdr *nlh; 1478 1425 struct nfgenmsg *nfmsg; 1479 - struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr; 1426 + struct nf_exp_event *item = (struct nf_exp_event *)ptr; 1427 + struct nf_conntrack_expect *exp = item->exp; 1480 1428 struct sk_buff *skb; 1481 1429 unsigned int type; 1482 1430 sk_buff_data_t b; ··· 1499 1445 b = skb->tail; 1500 1446 1501 1447 type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; 1502 - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); 1448 + nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); 1503 1449 nfmsg = NLMSG_DATA(nlh); 1504 1450 1505 1451 nlh->nlmsg_flags = flags; ··· 1513 1459 rcu_read_unlock(); 1514 1460 1515 1461 nlh->nlmsg_len = skb->tail - b; 1516 - nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); 1462 + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); 1517 1463 return NOTIFY_DONE; 1518 1464 1519 1465 nla_put_failure: ··· 1727 1673 } 1728 1674 1729 1675 static int 1730 - ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) 1676 + ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) 1731 1677 { 1732 1678 struct nf_conntrack_tuple tuple, mask, master_tuple; 1733 1679 struct nf_conntrack_tuple_hash *h = NULL; ··· 1774 1720 memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); 1775 1721 exp->mask.src.u.all = mask.src.u.all; 1776 1722 1777 - err = nf_ct_expect_related(exp); 1723 + err = nf_ct_expect_related_report(exp, pid, report); 1778 1724 nf_ct_expect_put(exp); 1779 1725 1780 1726 out: ··· 1807 1753 if (!exp) { 1808 1754 spin_unlock_bh(&nf_conntrack_lock); 1809 1755 err = -ENOENT; 1810 - if (nlh->nlmsg_flags & NLM_F_CREATE) 1811 - err = ctnetlink_create_expect(cda, u3); 1756 + if (nlh->nlmsg_flags & NLM_F_CREATE) { 1757 + err = ctnetlink_create_expect(cda, 1758 + u3, 1759 + NETLINK_CB(skb).pid, 1760 + nlmsg_report(nlh)); 1761 + } 1812 1762 return err; 1813 1763 } 1814 1764