x86, UV, BAU: Extend for more than 16 cpus per socket

Fix a hard-coded limit of a maximum of 16 cpu's per socket.

The UV Broadcast Assist Unit code initializes by scanning the
cpu topology of the system and assigning a master cpu for each
socket and UV hub. That scan had an assumption of a limit of 16
cpus per socket. With Westmere we are going over that limit.
The UV hub hardware will allow up to 32.

If the scan finds the system has gone over that limit it returns
an error and we print a warning and fall back to doing TLB
shootdowns without the BAU.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: <stable@kernel.org> # .37.x
LKML-Reference: <E1PZol7-0000mM-77@eag09.americas.sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by Cliff Wickman and committed by Ingo Molnar cfa60917 d8850ba4

+23 -8
+5 -4
arch/x86/include/asm/uv/uv_bau.h
··· 26 26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, 27 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 28 28 * 29 - * We will use 31 sets, one for sending BAU messages from each of the 32 29 + * We will use one set for sending BAU messages from each of the 30 30 * cpu's on the uvhub. 31 31 * 32 32 * TLB shootdown will use the first of the 8 descriptors of each set. 33 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 34 34 */ 35 35 36 + #define MAX_CPUS_PER_UVHUB 64 37 + #define MAX_CPUS_PER_SOCKET 32 38 + #define UV_ADP_SIZE 64 /* hardware-provided max. */ 39 + #define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ 36 40 #define UV_ITEMS_PER_DESCRIPTOR 8 37 41 /* the 'throttle' to prevent the hardware stay-busy bug */ 38 42 #define MAX_BAU_CONCURRENT 3 39 - #define UV_CPUS_PER_ACT_STATUS 32 40 43 #define UV_ACT_STATUS_MASK 0x3 41 44 #define UV_ACT_STATUS_SIZE 2 42 - #define UV_ADP_SIZE 32 43 45 #define UV_DISTRIBUTION_SIZE 256 44 46 #define UV_SW_ACK_NPENDING 8 45 47 #define UV_NET_ENDPOINT_INTD 0x38 ··· 102 100 * number of destination side software ack resources 103 101 */ 104 102 #define DEST_NUM_RESOURCES 8 105 - #define MAX_CPUS_PER_NODE 32 106 103 /* 107 104 * completion statuses for sending a TLB flush message 108 105 */
+18 -4
arch/x86/platform/uv/tlb_uv.c
··· 1341 1341 1342 1342 /* 1343 1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1344 - * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub 1344 + * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) 1345 1345 */ 1346 1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE 1347 1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); ··· 1490 1490 /* 1491 1491 * initialize the bau_control structure for each cpu 1492 1492 */ 1493 - static void __init uv_init_per_cpu(int nuvhubs) 1493 + static int __init uv_init_per_cpu(int nuvhubs) 1494 1494 { 1495 1495 int i; 1496 1496 int cpu; ··· 1507 1507 struct bau_control *smaster = NULL; 1508 1508 struct socket_desc { 1509 1509 short num_cpus; 1510 - short cpu_number[16]; 1510 + short cpu_number[MAX_CPUS_PER_SOCKET]; 1511 1511 }; 1512 1512 struct uvhub_desc { 1513 1513 unsigned short socket_mask; ··· 1540 1540 sdp = &bdp->socket[socket]; 1541 1541 sdp->cpu_number[sdp->num_cpus] = cpu; 1542 1542 sdp->num_cpus++; 1543 + if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { 1544 + printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); 1545 + return 1; 1546 + } 1543 1547 } 1544 1548 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1545 1549 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) ··· 1574 1570 bcp->uvhub_master = hmaster; 1575 1571 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> 1576 1572 blade_processor_id; 1573 + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1574 + printk(KERN_EMERG 1575 + "%d cpus per uvhub invalid\n", 1576 + bcp->uvhub_cpu); 1577 + return 1; 1578 + } 1577 1579 } 1578 1580 nextsocket: 1579 1581 socket++; ··· 1605 1595 bcp->congested_reps = congested_reps; 1606 1596 bcp->congested_period = congested_period; 1607 1597 } 1598 + return 0; 1608 1599 } 1609 1600 1610 1601 /* ··· 1636 1625 spin_lock_init(&disable_lock); 1637 1626 congested_cycles = microsec_2_cycles(congested_response_us); 1638 1627 1639 - uv_init_per_cpu(nuvhubs); 1628 + if (uv_init_per_cpu(nuvhubs)) { 1629 + nobau = 1; 1630 + return 0; 1631 + } 1640 1632 1641 1633 uv_partition_base_pnode = 0x7fffffff; 1642 1634 for (uvhub = 0; uvhub < nuvhubs; uvhub++)