Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: x_table: speedup compat operations

One iptables invocation with 135000 rules takes 35 seconds of cpu time
on a recent server, using a 32bit distro and a 64bit kernel.

We eventually trigger NMI/RCU watchdog.

INFO: rcu_sched_state detected stall on CPU 3 (t=6000 jiffies)

COMPAT mode has quadratic behavior and consume 16 bytes of memory per
rule.

Switch the xt_compat algos to use an array instead of list, and use a
binary search to locate an offset in the sorted array.

This halves memory need (8 bytes per rule), and removes quadratic
behavior [ O(N*N) -> O(N*log2(N)) ]

Time of iptables goes from 35 s to 150 ms.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Eric Dumazet and committed by
Pablo Neira Ayuso
255d0dc3 b017900a

+57 -35
+2 -1
include/linux/netfilter/x_tables.h
··· 611 611 extern void xt_compat_lock(u_int8_t af); 612 612 extern void xt_compat_unlock(u_int8_t af); 613 613 614 - extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); 614 + extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); 615 615 extern void xt_compat_flush_offsets(u_int8_t af); 616 + extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); 616 617 extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); 617 618 618 619 extern int xt_compat_match_offset(const struct xt_match *match);
+1
net/bridge/netfilter/ebtables.c
··· 1764 1764 1765 1765 newinfo->entries_size = size; 1766 1766 1767 + xt_compat_init_offsets(AF_INET, info->nentries); 1767 1768 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, 1768 1769 entries, newinfo); 1769 1770 }
+2
net/ipv4/netfilter/arp_tables.c
··· 883 883 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 884 884 newinfo->initial_entries = 0; 885 885 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 886 + xt_compat_init_offsets(NFPROTO_ARP, info->number); 886 887 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 887 888 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 888 889 if (ret != 0) ··· 1351 1350 duprintf("translate_compat_table: size %u\n", info->size); 1352 1351 j = 0; 1353 1352 xt_compat_lock(NFPROTO_ARP); 1353 + xt_compat_init_offsets(NFPROTO_ARP, number); 1354 1354 /* Walk through entries, checking offsets. */ 1355 1355 xt_entry_foreach(iter0, entry0, total_size) { 1356 1356 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+2
net/ipv4/netfilter/ip_tables.c
··· 1080 1080 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1081 1081 newinfo->initial_entries = 0; 1082 1082 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1083 + xt_compat_init_offsets(AF_INET, info->number); 1083 1084 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1084 1085 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1085 1086 if (ret != 0) ··· 1682 1681 duprintf("translate_compat_table: size %u\n", info->size); 1683 1682 j = 0; 1684 1683 xt_compat_lock(AF_INET); 1684 + xt_compat_init_offsets(AF_INET, number); 1685 1685 /* Walk through entries, checking offsets. */ 1686 1686 xt_entry_foreach(iter0, entry0, total_size) { 1687 1687 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+2
net/ipv6/netfilter/ip6_tables.c
··· 1093 1093 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1094 1094 newinfo->initial_entries = 0; 1095 1095 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1096 + xt_compat_init_offsets(AF_INET6, info->number); 1096 1097 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1097 1098 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1098 1099 if (ret != 0) ··· 1697 1696 duprintf("translate_compat_table: size %u\n", info->size); 1698 1697 j = 0; 1699 1698 xt_compat_lock(AF_INET6); 1699 + xt_compat_init_offsets(AF_INET6, number); 1700 1700 /* Walk through entries, checking offsets. */ 1701 1701 xt_entry_foreach(iter0, entry0, total_size) { 1702 1702 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+48 -34
net/netfilter/x_tables.c
··· 38 38 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 39 39 40 40 struct compat_delta { 41 - struct compat_delta *next; 42 - unsigned int offset; 43 - int delta; 41 + unsigned int offset; /* offset in kernel */ 42 + int delta; /* delta in 32bit user land */ 44 43 }; 45 44 46 45 struct xt_af { ··· 48 49 struct list_head target; 49 50 #ifdef CONFIG_COMPAT 50 51 struct mutex compat_mutex; 51 - struct compat_delta *compat_offsets; 52 + struct compat_delta *compat_tab; 53 + unsigned int number; /* number of slots in compat_tab[] */ 54 + unsigned int cur; /* number of used slots in compat_tab[] */ 52 55 #endif 53 56 }; 54 57 ··· 415 414 EXPORT_SYMBOL_GPL(xt_check_match); 416 415 417 416 #ifdef CONFIG_COMPAT 418 - int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) 417 + int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) 419 418 { 420 - struct compat_delta *tmp; 419 + struct xt_af *xp = &xt[af]; 421 420 422 - tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); 423 - if (!tmp) 424 - return -ENOMEM; 425 - 426 - tmp->offset = offset; 427 - tmp->delta = delta; 428 - 429 - if (xt[af].compat_offsets) { 430 - tmp->next = xt[af].compat_offsets->next; 431 - xt[af].compat_offsets->next = tmp; 432 - } else { 433 - xt[af].compat_offsets = tmp; 434 - tmp->next = NULL; 421 + if (!xp->compat_tab) { 422 + if (!xp->number) 423 + return -EINVAL; 424 + xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); 425 + if (!xp->compat_tab) 426 + return -ENOMEM; 427 + xp->cur = 0; 435 428 } 429 + 430 + if (xp->cur >= xp->number) 431 + return -EINVAL; 432 + 433 + if (xp->cur) 434 + delta += xp->compat_tab[xp->cur - 1].delta; 435 + xp->compat_tab[xp->cur].offset = offset; 436 + xp->compat_tab[xp->cur].delta = delta; 437 + xp->cur++; 436 438 return 0; 437 439 } 438 440 EXPORT_SYMBOL_GPL(xt_compat_add_offset); 439 441 440 442 void xt_compat_flush_offsets(u_int8_t af) 441 443 { 442 - struct compat_delta *tmp, *next; 443 - 444 - if (xt[af].compat_offsets) { 445 - for (tmp = xt[af].compat_offsets; tmp; tmp = next) { 446 - next = tmp->next; 447 - kfree(tmp); 448 - } 449 - xt[af].compat_offsets = NULL; 444 + if (xt[af].compat_tab) { 445 + vfree(xt[af].compat_tab); 446 + xt[af].compat_tab = NULL; 447 + xt[af].number = 0; 450 448 } 451 449 } 452 450 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); 453 451 454 452 int xt_compat_calc_jump(u_int8_t af, unsigned int offset) 455 453 { 456 - struct compat_delta *tmp; 457 - int delta; 454 + struct compat_delta *tmp = xt[af].compat_tab; 455 + int mid, left = 0, right = xt[af].cur - 1; 458 456 459 - for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) 460 - if (tmp->offset < offset) 461 - delta += tmp->delta; 462 - return delta; 457 + while (left <= right) { 458 + mid = (left + right) >> 1; 459 + if (offset > tmp[mid].offset) 460 + left = mid + 1; 461 + else if (offset < tmp[mid].offset) 462 + right = mid - 1; 463 + else 464 + return mid ? tmp[mid - 1].delta : 0; 465 + } 466 + WARN_ON_ONCE(1); 467 + return 0; 463 468 } 464 469 EXPORT_SYMBOL_GPL(xt_compat_calc_jump); 470 + 471 + void xt_compat_init_offsets(u_int8_t af, unsigned int number) 472 + { 473 + xt[af].number = number; 474 + xt[af].cur = 0; 475 + } 476 + EXPORT_SYMBOL(xt_compat_init_offsets); 465 477 466 478 int xt_compat_match_offset(const struct xt_match *match) 467 479 { ··· 1351 1337 mutex_init(&xt[i].mutex); 1352 1338 #ifdef CONFIG_COMPAT 1353 1339 mutex_init(&xt[i].compat_mutex); 1354 - xt[i].compat_offsets = NULL; 1340 + xt[i].compat_tab = NULL; 1355 1341 #endif 1356 1342 INIT_LIST_HEAD(&xt[i].target); 1357 1343 INIT_LIST_HEAD(&xt[i].match);