Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rfs: better sizing of dev_flow_table

Aim of this patch is to provide full range of rps_flow_cnt on 64bit arches.

Theorical limit on number of flows is 2^32

Fix some buggy RPS/RFS macros as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
CC: Xi Wang <xi.wang@gmail.com>
CC: Laurent Chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
60b778ce 035c4c16

+31 -21
+4 -4
include/linux/netdevice.h
··· 597 597 struct rcu_head rcu; 598 598 u16 cpus[0]; 599 599 }; 600 - #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) 600 + #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) 601 601 602 602 /* 603 603 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the ··· 621 621 struct rps_dev_flow flows[0]; 622 622 }; 623 623 #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ 624 - (_num * sizeof(struct rps_dev_flow))) 624 + ((_num) * sizeof(struct rps_dev_flow))) 625 625 626 626 /* 627 627 * The rps_sock_flow_table contains mappings of flows to the last CPU ··· 632 632 u16 ents[0]; 633 633 }; 634 634 #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ 635 - (_num * sizeof(u16))) 635 + ((_num) * sizeof(u16))) 636 636 637 637 #define RPS_NO_CPU 0xffff 638 638 ··· 684 684 struct rcu_head rcu; 685 685 u16 queues[0]; 686 686 }; 687 - #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) 687 + #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) 688 688 #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ 689 689 / sizeof(u16)) 690 690
+27 -17
net/core/net-sysfs.c
··· 622 622 char *buf) 623 623 { 624 624 struct rps_dev_flow_table *flow_table; 625 - unsigned int val = 0; 625 + unsigned long val = 0; 626 626 627 627 rcu_read_lock(); 628 628 flow_table = rcu_dereference(queue->rps_flow_table); 629 629 if (flow_table) 630 - val = flow_table->mask + 1; 630 + val = (unsigned long)flow_table->mask + 1; 631 631 rcu_read_unlock(); 632 632 633 - return sprintf(buf, "%u\n", val); 633 + return sprintf(buf, "%lu\n", val); 634 634 } 635 635 636 636 static void rps_dev_flow_table_release_work(struct work_struct *work) ··· 654 654 struct rx_queue_attribute *attr, 655 655 const char *buf, size_t len) 656 656 { 657 - unsigned int count; 658 - char *endp; 657 + unsigned long mask, count; 659 658 struct rps_dev_flow_table *table, *old_table; 660 659 static DEFINE_SPINLOCK(rps_dev_flow_lock); 660 + int rc; 661 661 662 662 if (!capable(CAP_NET_ADMIN)) 663 663 return -EPERM; 664 664 665 - count = simple_strtoul(buf, &endp, 0); 666 - if (endp == buf) 667 - return -EINVAL; 665 + rc = kstrtoul(buf, 0, &count); 666 + if (rc < 0) 667 + return rc; 668 668 669 669 if (count) { 670 - int i; 671 - 672 - if (count > INT_MAX) 670 + mask = count - 1; 671 + /* mask = roundup_pow_of_two(count) - 1; 672 + * without overflows... 673 + */ 674 + while ((mask | (mask >> 1)) != mask) 675 + mask |= (mask >> 1); 676 + /* On 64 bit arches, must check mask fits in table->mask (u32), 677 + * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) 678 + * doesnt overflow. 679 + */ 680 + #if BITS_PER_LONG > 32 681 + if (mask > (unsigned long)(u32)mask) 673 682 return -EINVAL; 674 - count = roundup_pow_of_two(count); 675 - if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table)) 683 + #else 684 + if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) 676 685 / sizeof(struct rps_dev_flow)) { 677 686 /* Enforce a limit to prevent overflow */ 678 687 return -EINVAL; 679 688 } 680 - table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); 689 + #endif 690 + table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); 681 691 if (!table) 682 692 return -ENOMEM; 683 693 684 - table->mask = count - 1; 685 - for (i = 0; i < count; i++) 686 - table->flows[i].cpu = RPS_NO_CPU; 694 + table->mask = mask; 695 + for (count = 0; count <= mask; count++) 696 + table->flows[count].cpu = RPS_NO_CPU; 687 697 } else 688 698 table = NULL; 689 699