Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_conncount: Split insert and traversal

This patch is originally from Florian Westphal.

When we have a very coarse grouping, e.g. by large subnets, zone id,
etc, it's likely that we do not need to do tree rotation because
we'll find a node where we can attach new entry. Based on this
observation, we split tree traversal and insertion.

Later on, we can make traversal lockless (tree protected
by RCU), and add extra lock in the individual nodes to protect list
insertion/deletion, thereby allowing parallel insert/delete in different
tree nodes.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Yi-Hung Wei and committed by
Pablo Neira Ayuso
34848d5c 2ba39118

+67 -20
+67 -20
net/netfilter/nf_conncount.c
··· 262 262 } 263 263 264 264 static unsigned int 265 + insert_tree(struct rb_root *root, 266 + unsigned int hash, 267 + const u32 *key, 268 + u8 keylen, 269 + const struct nf_conntrack_tuple *tuple, 270 + const struct nf_conntrack_zone *zone) 271 + { 272 + struct rb_node **rbnode, *parent; 273 + struct nf_conncount_rb *rbconn; 274 + struct nf_conncount_tuple *conn; 275 + unsigned int count = 0; 276 + 277 + spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 278 + 279 + parent = NULL; 280 + rbnode = &(root->rb_node); 281 + while (*rbnode) { 282 + int diff; 283 + rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); 284 + 285 + parent = *rbnode; 286 + diff = key_diff(key, rbconn->key, keylen); 287 + if (diff < 0) { 288 + rbnode = &((*rbnode)->rb_left); 289 + } else if (diff > 0) { 290 + rbnode = &((*rbnode)->rb_right); 291 + } else { 292 + /* unlikely: other cpu added node already */ 293 + if (!nf_conncount_add(&rbconn->list, tuple, zone)) { 294 + count = 0; /* hotdrop */ 295 + goto out_unlock; 296 + } 297 + 298 + count = rbconn->list.count; 299 + goto out_unlock; 300 + } 301 + } 302 + 303 + /* expected case: match, insert new node */ 304 + rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); 305 + if (rbconn == NULL) 306 + goto out_unlock; 307 + 308 + conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); 309 + if (conn == NULL) { 310 + kmem_cache_free(conncount_rb_cachep, rbconn); 311 + goto out_unlock; 312 + } 313 + 314 + conn->tuple = *tuple; 315 + conn->zone = *zone; 316 + memcpy(rbconn->key, key, sizeof(u32) * keylen); 317 + 318 + nf_conncount_list_init(&rbconn->list); 319 + list_add(&conn->node, &rbconn->list.head); 320 + count = 1; 321 + 322 + rb_link_node(&rbconn->node, parent, rbnode); 323 + rb_insert_color(&rbconn->node, root); 324 + out_unlock: 325 + spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 326 + return count; 327 + } 328 + 329 + static unsigned int 265 330 count_tree(struct net *net, 266 331 struct nf_conncount_data *data, 267 332 const u32 *key, ··· 337 272 struct rb_root *root; 338 273 struct rb_node **rbnode, *parent; 339 274 struct nf_conncount_rb *rbconn; 340 - struct nf_conncount_tuple *conn; 341 275 unsigned int gc_count, hash; 342 276 bool no_gc = false; 343 277 unsigned int count = 0; ··· 403 339 count = 0; 404 340 if (!tuple) 405 341 goto out_unlock; 406 - /* no match, need to insert new node */ 407 - rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); 408 - if (rbconn == NULL) 409 - goto out_unlock; 410 342 411 - conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); 412 - if (conn == NULL) { 413 - kmem_cache_free(conncount_rb_cachep, rbconn); 414 - goto out_unlock; 415 - } 343 + spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 344 + return insert_tree(root, hash, key, keylen, tuple, zone); 416 345 417 - conn->tuple = *tuple; 418 - conn->zone = *zone; 419 - memcpy(rbconn->key, key, sizeof(u32) * keylen); 420 - 421 - nf_conncount_list_init(&rbconn->list); 422 - list_add(&conn->node, &rbconn->list.head); 423 - count = 1; 424 - 425 - rb_link_node(&rbconn->node, parent, rbnode); 426 - rb_insert_color(&rbconn->node, root); 427 346 out_unlock: 428 347 spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 429 348 return count;