Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_conntrack_tstamp: add flow-based timestamp extension

This patch adds flow-based timestamping for conntracks. This
conntrack extension is disabled by default. Basically, we use
two 64-bits variables to store the creation timestamp once the
conntrack has been confirmed and the other to store the deletion
time. This extension is disabled by default, to enable it, you
have to:

echo 1 > /proc/sys/net/netfilter/nf_conntrack_timestamp

This patch allows to save memory for user-space flow-based
loogers such as ulogd2. In short, ulogd2 does not need to
keep a hashtable with the conntrack in user-space to know
when they were created and destroyed, instead we use the
kernel timestamp. If we want to have a sane IPFIX implementation
in user-space, this nanosecs resolution timestamps are also
useful. Other custom user-space applications can benefit from
this via libnetfilter_conntrack.

This patch modifies the /proc output to display the delta time
in seconds since the flow start. You can also obtain the
flow-start date by means of the conntrack-tools.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>

authored by

Pablo Neira Ayuso and committed by
Patrick McHardy
a992ca2a 93557f53

+312 -1
+4
include/net/netfilter/nf_conntrack_extend.h
··· 17 17 #ifdef CONFIG_NF_CONNTRACK_ZONES 18 18 NF_CT_EXT_ZONE, 19 19 #endif 20 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 21 + NF_CT_EXT_TSTAMP, 22 + #endif 20 23 NF_CT_EXT_NUM, 21 24 }; 22 25 ··· 28 25 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter 29 26 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache 30 27 #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone 28 + #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp 31 29 32 30 /* Extensions: optional stuff which isn't permanently in struct. */ 33 31 struct nf_ct_ext {
+53
include/net/netfilter/nf_conntrack_timestamp.h
··· 1 + #ifndef _NF_CONNTRACK_TSTAMP_H 2 + #define _NF_CONNTRACK_TSTAMP_H 3 + 4 + #include <net/net_namespace.h> 5 + #include <linux/netfilter/nf_conntrack_common.h> 6 + #include <linux/netfilter/nf_conntrack_tuple_common.h> 7 + #include <net/netfilter/nf_conntrack.h> 8 + #include <net/netfilter/nf_conntrack_extend.h> 9 + 10 + struct nf_conn_tstamp { 11 + u_int64_t start; 12 + u_int64_t stop; 13 + }; 14 + 15 + static inline 16 + struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct) 17 + { 18 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 19 + return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP); 20 + #else 21 + return NULL; 22 + #endif 23 + } 24 + 25 + static inline 26 + struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) 27 + { 28 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 29 + struct net *net = nf_ct_net(ct); 30 + 31 + if (!net->ct.sysctl_tstamp) 32 + return NULL; 33 + 34 + return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp); 35 + #else 36 + return NULL; 37 + #endif 38 + }; 39 + 40 + static inline bool nf_ct_tstamp_enabled(struct net *net) 41 + { 42 + return net->ct.sysctl_tstamp != 0; 43 + } 44 + 45 + static inline void nf_ct_set_tstamp(struct net *net, bool enable) 46 + { 47 + net->ct.sysctl_tstamp = enable; 48 + } 49 + 50 + extern int nf_conntrack_tstamp_init(struct net *net); 51 + extern void nf_conntrack_tstamp_fini(struct net *net); 52 + 53 + #endif /* _NF_CONNTRACK_TSTAMP_H */
+2
include/net/netns/conntrack.h
··· 21 21 int sysctl_events; 22 22 unsigned int sysctl_events_retry_timeout; 23 23 int sysctl_acct; 24 + int sysctl_tstamp; 24 25 int sysctl_checksum; 25 26 unsigned int sysctl_log_invalid; /* Log invalid packets */ 26 27 #ifdef CONFIG_SYSCTL 27 28 struct ctl_table_header *sysctl_header; 28 29 struct ctl_table_header *acct_sysctl_header; 30 + struct ctl_table_header *tstamp_sysctl_header; 29 31 struct ctl_table_header *event_sysctl_header; 30 32 #endif 31 33 char *slabname;
+11
net/netfilter/Kconfig
··· 85 85 86 86 If unsure, say `N'. 87 87 88 + config NF_CONNTRACK_TIMESTAMP 89 + bool 'Connection tracking timestamping' 90 + depends on NETFILTER_ADVANCED 91 + help 92 + This option enables support for connection tracking timestamping. 93 + This allows you to store the flow start-time and to obtain 94 + the flow-stop time (once it has been destroyed) via Connection 95 + tracking events. 96 + 97 + If unsure, say `N'. 98 + 88 99 config NF_CT_PROTO_DCCP 89 100 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' 90 101 depends on EXPERIMENTAL
+1
net/netfilter/Makefile
··· 1 1 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 2 2 3 3 nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 4 + nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 4 5 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 5 6 6 7 obj-$(CONFIG_NETFILTER) = netfilter.o
+26
net/netfilter/nf_conntrack_core.c
··· 43 43 #include <net/netfilter/nf_conntrack_acct.h> 44 44 #include <net/netfilter/nf_conntrack_ecache.h> 45 45 #include <net/netfilter/nf_conntrack_zones.h> 46 + #include <net/netfilter/nf_conntrack_timestamp.h> 46 47 #include <net/netfilter/nf_nat.h> 47 48 #include <net/netfilter/nf_nat_core.h> 48 49 ··· 283 282 static void death_by_timeout(unsigned long ul_conntrack) 284 283 { 285 284 struct nf_conn *ct = (void *)ul_conntrack; 285 + struct nf_conn_tstamp *tstamp; 286 + 287 + tstamp = nf_conn_tstamp_find(ct); 288 + if (tstamp && tstamp->stop == 0) 289 + tstamp->stop = ktime_to_ns(ktime_get_real()); 286 290 287 291 if (!test_bit(IPS_DYING_BIT, &ct->status) && 288 292 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { ··· 425 419 struct nf_conntrack_tuple_hash *h; 426 420 struct nf_conn *ct; 427 421 struct nf_conn_help *help; 422 + struct nf_conn_tstamp *tstamp; 428 423 struct hlist_nulls_node *n; 429 424 enum ip_conntrack_info ctinfo; 430 425 struct net *net; ··· 495 488 atomic_inc(&ct->ct_general.use); 496 489 ct->status |= IPS_CONFIRMED; 497 490 491 + /* set conntrack timestamp, if enabled. */ 492 + tstamp = nf_conn_tstamp_find(ct); 493 + if (tstamp) { 494 + if (skb->tstamp.tv64 == 0) 495 + __net_timestamp((struct sk_buff *)skb); 496 + 497 + tstamp->start = ktime_to_ns(skb->tstamp); 498 + } 498 499 /* Since the lookup is lockless, hash insertion must be done after 499 500 * starting the timer and setting the CONFIRMED bit. The RCU barriers 500 501 * guarantee that no other CPU can find the conntrack before the above ··· 761 746 } 762 747 763 748 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 749 + nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 764 750 765 751 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 766 752 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, ··· 1202 1186 static int kill_report(struct nf_conn *i, void *data) 1203 1187 { 1204 1188 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1189 + struct nf_conn_tstamp *tstamp; 1190 + 1191 + tstamp = nf_conn_tstamp_find(i); 1192 + if (tstamp && tstamp->stop == 0) 1193 + tstamp->stop = ktime_to_ns(ktime_get_real()); 1205 1194 1206 1195 /* If we fail to deliver the event, death_by_timeout() will retry */ 1207 1196 if (nf_conntrack_event_report(IPCT_DESTROY, i, ··· 1518 1497 ret = nf_conntrack_acct_init(net); 1519 1498 if (ret < 0) 1520 1499 goto err_acct; 1500 + ret = nf_conntrack_tstamp_init(net); 1501 + if (ret < 0) 1502 + goto err_tstamp; 1521 1503 ret = nf_conntrack_ecache_init(net); 1522 1504 if (ret < 0) 1523 1505 goto err_ecache; ··· 1528 1504 return 0; 1529 1505 1530 1506 err_ecache: 1507 + nf_conntrack_tstamp_fini(net); 1508 + err_tstamp: 1531 1509 nf_conntrack_acct_fini(net); 1532 1510 err_acct: 1533 1511 nf_conntrack_expect_fini(net);
+45 -1
net/netfilter/nf_conntrack_netlink.c
··· 42 42 #include <net/netfilter/nf_conntrack_tuple.h> 43 43 #include <net/netfilter/nf_conntrack_acct.h> 44 44 #include <net/netfilter/nf_conntrack_zones.h> 45 + #include <net/netfilter/nf_conntrack_timestamp.h> 45 46 #ifdef CONFIG_NF_NAT_NEEDED 46 47 #include <net/netfilter/nf_nat_core.h> 47 48 #include <net/netfilter/nf_nat_protocol.h> ··· 231 230 return -1; 232 231 } 233 232 233 + static int 234 + ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) 235 + { 236 + struct nlattr *nest_count; 237 + const struct nf_conn_tstamp *tstamp; 238 + 239 + tstamp = nf_conn_tstamp_find(ct); 240 + if (!tstamp) 241 + return 0; 242 + 243 + nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); 244 + if (!nest_count) 245 + goto nla_put_failure; 246 + 247 + NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)); 248 + if (tstamp->stop != 0) { 249 + NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP, 250 + cpu_to_be64(tstamp->stop)); 251 + } 252 + nla_nest_end(skb, nest_count); 253 + 254 + return 0; 255 + 256 + nla_put_failure: 257 + return -1; 258 + } 259 + 234 260 #ifdef CONFIG_NF_CONNTRACK_MARK 235 261 static inline int 236 262 ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) ··· 432 404 ctnetlink_dump_timeout(skb, ct) < 0 || 433 405 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 434 406 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 407 + ctnetlink_dump_timestamp(skb, ct) < 0 || 435 408 ctnetlink_dump_protoinfo(skb, ct) < 0 || 436 409 ctnetlink_dump_helpinfo(skb, ct) < 0 || 437 410 ctnetlink_dump_mark(skb, ct) < 0 || ··· 500 471 } 501 472 502 473 static inline size_t 474 + ctnetlink_timestamp_size(const struct nf_conn *ct) 475 + { 476 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 477 + if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) 478 + return 0; 479 + return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); 480 + #else 481 + return 0; 482 + #endif 483 + } 484 + 485 + static inline size_t 503 486 ctnetlink_nlmsg_size(const struct nf_conn *ct) 504 487 { 505 488 return NLMSG_ALIGN(sizeof(struct nfgenmsg)) ··· 522 481 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ 523 482 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ 524 483 + ctnetlink_counters_size(ct) 484 + + ctnetlink_timestamp_size(ct) 525 485 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ 526 486 + nla_total_size(0) /* CTA_PROTOINFO */ 527 487 + nla_total_size(0) /* CTA_HELP */ ··· 613 571 614 572 if (events & (1 << IPCT_DESTROY)) { 615 573 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 616 - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 574 + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 575 + ctnetlink_dump_timestamp(skb, ct) < 0) 617 576 goto nla_put_failure; 618 577 } else { 619 578 if (ctnetlink_dump_timeout(skb, ct) < 0) ··· 1403 1360 } 1404 1361 1405 1362 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1363 + nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 1406 1364 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); 1407 1365 /* we must add conntrack extensions before confirmation. */ 1408 1366 ct->status |= IPS_CONFIRMED;
+41
net/netfilter/nf_conntrack_standalone.c
··· 29 29 #include <net/netfilter/nf_conntrack_helper.h> 30 30 #include <net/netfilter/nf_conntrack_acct.h> 31 31 #include <net/netfilter/nf_conntrack_zones.h> 32 + #include <net/netfilter/nf_conntrack_timestamp.h> 32 33 #include <linux/rculist_nulls.h> 33 34 34 35 MODULE_LICENSE("GPL"); ··· 47 46 struct ct_iter_state { 48 47 struct seq_net_private p; 49 48 unsigned int bucket; 49 + u_int64_t time_now; 50 50 }; 51 51 52 52 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) ··· 98 96 static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 99 97 __acquires(RCU) 100 98 { 99 + struct ct_iter_state *st = seq->private; 100 + 101 + st->time_now = ktime_to_ns(ktime_get_real()); 101 102 rcu_read_lock(); 102 103 return ct_get_idx(seq, *pos); 103 104 } ··· 135 130 } 136 131 #else 137 132 static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) 133 + { 134 + return 0; 135 + } 136 + #endif 137 + 138 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 139 + static u_int64_t ct_delta_time(u_int64_t time_now, const struct nf_conn *ct) 140 + { 141 + struct nf_conn_tstamp *tstamp; 142 + 143 + tstamp = nf_conn_tstamp_find(ct); 144 + if (tstamp) { 145 + u_int64_t delta_time = time_now - tstamp->start; 146 + return delta_time > 0 ? div_s64(delta_time, NSEC_PER_SEC) : 0; 147 + } 148 + return -1; 149 + } 150 + 151 + static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) 152 + { 153 + struct ct_iter_state *st = s->private; 154 + u_int64_t delta_time; 155 + 156 + delta_time = ct_delta_time(st->time_now, ct); 157 + if (delta_time < 0) 158 + return 0; 159 + 160 + return seq_printf(s, "delta-time=%llu ", 161 + (unsigned long long)delta_time); 162 + } 163 + #else 164 + static inline int 165 + ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) 138 166 { 139 167 return 0; 140 168 } ··· 240 202 if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) 241 203 goto release; 242 204 #endif 205 + 206 + if (ct_show_delta_time(s, ct)) 207 + goto release; 243 208 244 209 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 245 210 goto release;
+120
net/netfilter/nf_conntrack_timestamp.c
··· 1 + /* 2 + * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation (or any later at your option). 7 + */ 8 + 9 + #include <linux/netfilter.h> 10 + #include <linux/slab.h> 11 + #include <linux/kernel.h> 12 + #include <linux/moduleparam.h> 13 + 14 + #include <net/netfilter/nf_conntrack.h> 15 + #include <net/netfilter/nf_conntrack_extend.h> 16 + #include <net/netfilter/nf_conntrack_timestamp.h> 17 + 18 + static int nf_ct_tstamp __read_mostly; 19 + 20 + module_param_named(tstamp, nf_ct_tstamp, bool, 0644); 21 + MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); 22 + 23 + #ifdef CONFIG_SYSCTL 24 + static struct ctl_table tstamp_sysctl_table[] = { 25 + { 26 + .procname = "nf_conntrack_timestamp", 27 + .data = &init_net.ct.sysctl_tstamp, 28 + .maxlen = sizeof(unsigned int), 29 + .mode = 0644, 30 + .proc_handler = proc_dointvec, 31 + }, 32 + {} 33 + }; 34 + #endif /* CONFIG_SYSCTL */ 35 + 36 + static struct nf_ct_ext_type tstamp_extend __read_mostly = { 37 + .len = sizeof(struct nf_conn_tstamp), 38 + .align = __alignof__(struct nf_conn_tstamp), 39 + .id = NF_CT_EXT_TSTAMP, 40 + }; 41 + 42 + #ifdef CONFIG_SYSCTL 43 + static int nf_conntrack_tstamp_init_sysctl(struct net *net) 44 + { 45 + struct ctl_table *table; 46 + 47 + table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), 48 + GFP_KERNEL); 49 + if (!table) 50 + goto out; 51 + 52 + table[0].data = &net->ct.sysctl_tstamp; 53 + 54 + net->ct.tstamp_sysctl_header = register_net_sysctl_table(net, 55 + nf_net_netfilter_sysctl_path, table); 56 + if (!net->ct.tstamp_sysctl_header) { 57 + printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); 58 + goto out_register; 59 + } 60 + return 0; 61 + 62 + out_register: 63 + kfree(table); 64 + out: 65 + return -ENOMEM; 66 + } 67 + 68 + static void nf_conntrack_tstamp_fini_sysctl(struct net *net) 69 + { 70 + struct ctl_table *table; 71 + 72 + table = net->ct.tstamp_sysctl_header->ctl_table_arg; 73 + unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); 74 + kfree(table); 75 + } 76 + #else 77 + static int nf_conntrack_tstamp_init_sysctl(struct net *net) 78 + { 79 + return 0; 80 + } 81 + 82 + static void nf_conntrack_tstamp_fini_sysctl(struct net *net) 83 + { 84 + } 85 + #endif 86 + 87 + int nf_conntrack_tstamp_init(struct net *net) 88 + { 89 + int ret; 90 + 91 + net->ct.sysctl_tstamp = nf_ct_tstamp; 92 + 93 + if (net_eq(net, &init_net)) { 94 + ret = nf_ct_extend_register(&tstamp_extend); 95 + if (ret < 0) { 96 + printk(KERN_ERR "nf_ct_tstamp: Unable to register " 97 + "extension\n"); 98 + goto out_extend_register; 99 + } 100 + } 101 + 102 + ret = nf_conntrack_tstamp_init_sysctl(net); 103 + if (ret < 0) 104 + goto out_sysctl; 105 + 106 + return 0; 107 + 108 + out_sysctl: 109 + if (net_eq(net, &init_net)) 110 + nf_ct_extend_unregister(&tstamp_extend); 111 + out_extend_register: 112 + return ret; 113 + } 114 + 115 + void nf_conntrack_tstamp_fini(struct net *net) 116 + { 117 + nf_conntrack_tstamp_fini_sysctl(net); 118 + if (net_eq(net, &init_net)) 119 + nf_ct_extend_unregister(&tstamp_extend); 120 + }