Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_conncount: speculative garbage collection on empty lists

Instead of removing a empty list node that might be reintroduced soon
thereafter, tentatively place the empty list node on the list passed to
tree_nodes_free(), then re-check if the list is empty again before erasing
it from the tree.

[ Florian: rebase on top of pending nf_conncount fixes ]

Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+15 -33
-1
include/net/netfilter/nf_conntrack_count.h
··· 9 9 spinlock_t list_lock; 10 10 struct list_head head; /* connections with the same filtering key */ 11 11 unsigned int count; /* length of list */ 12 - bool dead; 13 12 }; 14 13 15 14 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+15 -32
net/netfilter/nf_conncount.c
··· 81 81 return memcmp(a, b, klen * sizeof(u32)); 82 82 } 83 83 84 - static bool conn_free(struct nf_conncount_list *list, 84 + static void conn_free(struct nf_conncount_list *list, 85 85 struct nf_conncount_tuple *conn) 86 86 { 87 - bool free_entry = false; 88 - 89 87 lockdep_assert_held(&list->list_lock); 90 88 91 89 list->count--; 92 90 list_del(&conn->node); 93 - if (list->count == 0) { 94 - list->dead = true; 95 - free_entry = true; 96 - } 97 91 98 92 kmem_cache_free(conncount_conn_cachep, conn); 99 - return free_entry; 100 93 } 101 94 102 95 static const struct nf_conntrack_tuple_hash * 103 96 find_or_evict(struct net *net, struct nf_conncount_list *list, 104 - struct nf_conncount_tuple *conn, bool *free_entry) 97 + struct nf_conncount_tuple *conn) 105 98 { 106 99 const struct nf_conntrack_tuple_hash *found; 107 100 unsigned long a, b; ··· 114 121 */ 115 122 age = a - b; 116 123 if (conn->cpu == cpu || age >= 2) { 117 - *free_entry = conn_free(list, conn); 124 + conn_free(list, conn); 118 125 return ERR_PTR(-ENOENT); 119 126 } 120 127 ··· 130 137 struct nf_conncount_tuple *conn, *conn_n; 131 138 struct nf_conn *found_ct; 132 139 unsigned int collect = 0; 133 - bool free_entry = false; 134 140 135 141 /* check the saved connections */ 136 142 list_for_each_entry_safe(conn, conn_n, &list->head, node) { 137 143 if (collect > CONNCOUNT_GC_MAX_NODES) 138 144 break; 139 145 140 - found = find_or_evict(net, list, conn, &free_entry); 146 + found = find_or_evict(net, list, conn); 141 147 if (IS_ERR(found)) { 142 148 /* Not found, but might be about to be confirmed */ 143 149 if (PTR_ERR(found) == -EAGAIN) { ··· 213 221 spin_lock_init(&list->list_lock); 214 222 INIT_LIST_HEAD(&list->head); 215 223 list->count = 0; 216 - list->dead = false; 217 224 } 218 225 EXPORT_SYMBOL_GPL(nf_conncount_list_init); 219 226 ··· 224 233 struct nf_conncount_tuple *conn, *conn_n; 225 234 struct nf_conn *found_ct; 226 235 unsigned int collected = 0; 227 - bool free_entry = false; 228 236 bool ret = false; 229 237 230 238 /* don't bother if other cpu is already doing GC */ ··· 231 241 return false; 232 242 233 243 list_for_each_entry_safe(conn, conn_n, &list->head, node) { 234 - found = find_or_evict(net, list, conn, &free_entry); 244 + found = find_or_evict(net, list, conn); 235 245 if (IS_ERR(found)) { 236 - if (PTR_ERR(found) == -ENOENT) { 237 - if (free_entry) { 238 - spin_unlock(&list->list_lock); 239 - return true; 240 - } 246 + if (PTR_ERR(found) == -ENOENT) 241 247 collected++; 242 - } 243 248 continue; 244 249 } 245 250 ··· 245 260 * closed already -> ditch it 246 261 */ 247 262 nf_ct_put(found_ct); 248 - if (conn_free(list, conn)) { 249 - spin_unlock(&list->list_lock); 250 - return true; 251 - } 263 + conn_free(list, conn); 252 264 collected++; 253 265 continue; 254 266 } ··· 255 273 break; 256 274 } 257 275 258 - if (!list->count) { 259 - list->dead = true; 276 + if (!list->count) 260 277 ret = true; 261 - } 262 278 spin_unlock(&list->list_lock); 263 279 264 280 return ret; ··· 271 291 kmem_cache_free(conncount_rb_cachep, rbconn); 272 292 } 273 293 294 + /* caller must hold tree nf_conncount_locks[] lock */ 274 295 static void tree_nodes_free(struct rb_root *root, 275 296 struct nf_conncount_rb *gc_nodes[], 276 297 unsigned int gc_count) ··· 281 300 while (gc_count) { 282 301 rbconn = gc_nodes[--gc_count]; 283 302 spin_lock(&rbconn->list.list_lock); 284 - rb_erase(&rbconn->node, root); 285 - call_rcu(&rbconn->rcu_head, __tree_nodes_free); 303 + if (!rbconn->list.count) { 304 + rb_erase(&rbconn->node, root); 305 + call_rcu(&rbconn->rcu_head, __tree_nodes_free); 306 + } 286 307 spin_unlock(&rbconn->list.list_lock); 287 308 } 288 309 } ··· 301 318 struct rb_root *root, 302 319 unsigned int hash, 303 320 const u32 *key, 304 - u8 keylen, 305 321 const struct nf_conntrack_tuple *tuple, 306 322 const struct nf_conntrack_zone *zone) 307 323 { ··· 309 327 struct nf_conncount_rb *rbconn; 310 328 struct nf_conncount_tuple *conn; 311 329 unsigned int count = 0, gc_count = 0; 330 + u8 keylen = data->keylen; 312 331 bool do_gc = true; 313 332 314 333 spin_lock_bh(&nf_conncount_locks[hash]); ··· 437 454 if (!tuple) 438 455 return 0; 439 456 440 - return insert_tree(net, data, root, hash, key, keylen, tuple, zone); 457 + return insert_tree(net, data, root, hash, key, tuple, zone); 441 458 } 442 459 443 460 static void tree_gc_worker(struct work_struct *work)