x86, UV, BAU: Extend for more than 16 cpus per socket

Fix a hard-coded limit of a maximum of 16 cpu's per socket.

The UV Broadcast Assist Unit code initializes by scanning the
cpu topology of the system and assigning a master cpu for each
socket and UV hub. That scan had an assumption of a limit of 16
cpus per socket. With Westmere we are going over that limit.
The UV hub hardware will allow up to 32.

If the scan finds the system has gone over that limit it returns
an error and we print a warning and fall back to doing TLB
shootdowns without the BAU.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: <stable@kernel.org> # .37.x
LKML-Reference: <E1PZol7-0000mM-77@eag09.americas.sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by Cliff Wickman and committed by Ingo Molnar cfa60917 d8850ba4

+23 -8
+5 -4
arch/x86/include/asm/uv/uv_bau.h
··· 26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 28 * 29 - * We will use 31 sets, one for sending BAU messages from each of the 32 30 * cpu's on the uvhub. 31 * 32 * TLB shootdown will use the first of the 8 descriptors of each set. 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 34 */ 35 36 #define UV_ITEMS_PER_DESCRIPTOR 8 37 /* the 'throttle' to prevent the hardware stay-busy bug */ 38 #define MAX_BAU_CONCURRENT 3 39 - #define UV_CPUS_PER_ACT_STATUS 32 40 #define UV_ACT_STATUS_MASK 0x3 41 #define UV_ACT_STATUS_SIZE 2 42 - #define UV_ADP_SIZE 32 43 #define UV_DISTRIBUTION_SIZE 256 44 #define UV_SW_ACK_NPENDING 8 45 #define UV_NET_ENDPOINT_INTD 0x38 ··· 102 * number of destination side software ack resources 103 */ 104 #define DEST_NUM_RESOURCES 8 105 - #define MAX_CPUS_PER_NODE 32 106 /* 107 * completion statuses for sending a TLB flush message 108 */
··· 26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 28 * 29 + * We will use one set for sending BAU messages from each of the 30 * cpu's on the uvhub. 31 * 32 * TLB shootdown will use the first of the 8 descriptors of each set. 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 34 */ 35 36 + #define MAX_CPUS_PER_UVHUB 64 37 + #define MAX_CPUS_PER_SOCKET 32 38 + #define UV_ADP_SIZE 64 /* hardware-provided max. */ 39 + #define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ 40 #define UV_ITEMS_PER_DESCRIPTOR 8 41 /* the 'throttle' to prevent the hardware stay-busy bug */ 42 #define MAX_BAU_CONCURRENT 3 43 #define UV_ACT_STATUS_MASK 0x3 44 #define UV_ACT_STATUS_SIZE 2 45 #define UV_DISTRIBUTION_SIZE 256 46 #define UV_SW_ACK_NPENDING 8 47 #define UV_NET_ENDPOINT_INTD 0x38 ··· 100 * number of destination side software ack resources 101 */ 102 #define DEST_NUM_RESOURCES 8 103 /* 104 * completion statuses for sending a TLB flush message 105 */
+18 -4
arch/x86/platform/uv/tlb_uv.c
··· 1341 1342 /* 1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1344 - * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub 1345 */ 1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE 1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); ··· 1490 /* 1491 * initialize the bau_control structure for each cpu 1492 */ 1493 - static void __init uv_init_per_cpu(int nuvhubs) 1494 { 1495 int i; 1496 int cpu; ··· 1507 struct bau_control *smaster = NULL; 1508 struct socket_desc { 1509 short num_cpus; 1510 - short cpu_number[16]; 1511 }; 1512 struct uvhub_desc { 1513 unsigned short socket_mask; ··· 1540 sdp = &bdp->socket[socket]; 1541 sdp->cpu_number[sdp->num_cpus] = cpu; 1542 sdp->num_cpus++; 1543 } 1544 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1545 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) ··· 1574 bcp->uvhub_master = hmaster; 1575 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> 1576 blade_processor_id; 1577 } 1578 nextsocket: 1579 socket++; ··· 1605 bcp->congested_reps = congested_reps; 1606 bcp->congested_period = congested_period; 1607 } 1608 } 1609 1610 /* ··· 1636 spin_lock_init(&disable_lock); 1637 congested_cycles = microsec_2_cycles(congested_response_us); 1638 1639 - uv_init_per_cpu(nuvhubs); 1640 1641 uv_partition_base_pnode = 0x7fffffff; 1642 for (uvhub = 0; uvhub < nuvhubs; uvhub++)
··· 1341 1342 /* 1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1344 + * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) 1345 */ 1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE 1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); ··· 1490 /* 1491 * initialize the bau_control structure for each cpu 1492 */ 1493 + static int __init uv_init_per_cpu(int nuvhubs) 1494 { 1495 int i; 1496 int cpu; ··· 1507 struct bau_control *smaster = NULL; 1508 struct socket_desc { 1509 short num_cpus; 1510 + short cpu_number[MAX_CPUS_PER_SOCKET]; 1511 }; 1512 struct uvhub_desc { 1513 unsigned short socket_mask; ··· 1540 sdp = &bdp->socket[socket]; 1541 sdp->cpu_number[sdp->num_cpus] = cpu; 1542 sdp->num_cpus++; 1543 + if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { 1544 + printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); 1545 + return 1; 1546 + } 1547 } 1548 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1549 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) ··· 1570 bcp->uvhub_master = hmaster; 1571 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> 1572 blade_processor_id; 1573 + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1574 + printk(KERN_EMERG 1575 + "%d cpus per uvhub invalid\n", 1576 + bcp->uvhub_cpu); 1577 + return 1; 1578 + } 1579 } 1580 nextsocket: 1581 socket++; ··· 1595 bcp->congested_reps = congested_reps; 1596 bcp->congested_period = congested_period; 1597 } 1598 + return 0; 1599 } 1600 1601 /* ··· 1625 spin_lock_init(&disable_lock); 1626 congested_cycles = microsec_2_cycles(congested_response_us); 1627 1628 + if (uv_init_per_cpu(nuvhubs)) { 1629 + nobau = 1; 1630 + return 0; 1631 + } 1632 1633 uv_partition_base_pnode = 0x7fffffff; 1634 for (uvhub = 0; uvhub < nuvhubs; uvhub++)