Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: uv2: Workaround for UV2 Hub bug (system global address format)

This is a workaround for a UV2 hub bug that affects the format of system
global addresses.

The GRU API for UV2 was inadvertently broken by a hardware change. The
format of the physical address used for TLB dropins and for addresses used
with instructions running in unmapped mode has changed. This change was
not documented and became apparent only when diags failed running on
system simulators.

For UV1, TLB and GRU instruction physical addresses are identical to
socket physical addresses (although high NASID bits must be OR'ed into the
address).

For UV2, socket physical addresses need to be converted. The NODE portion
of the physical address needs to be shifted so that the low bit is in bit
39 or bit 40, depending on an MMR value.

It is not yet clear if this bug will be fixed in a silicon respin. If it
is fixed, the hub revision will be incremented & the workaround disabled.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

authored by

Jack Steiner and committed by
Thomas Gleixner
6a469e46 9d037a77

+46 -16
+1
arch/x86/include/asm/uv/uv_bau.h
··· 55 55 #define UV_BAU_TUNABLES_DIR "sgi_uv" 56 56 #define UV_BAU_TUNABLES_FILE "bau_tunables" 57 57 #define WHITESPACE " \t\n" 58 + #define uv_mmask ((1UL << uv_hub_info->m_val) - 1) 58 59 #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) 59 60 #define cpubit_isset(cpu, bau_local_cpumask) \ 60 61 test_bit((cpu), (bau_local_cpumask).bits)
+34 -3
arch/x86/include/asm/uv/uv_hub.h
··· 46 46 * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant 47 47 * of the nasid for socket usage. 48 48 * 49 + * GPA - (global physical address) a socket physical address converted 50 + * so that it can be used by the GRU as a global address. Socket 51 + * physical addresses 1) need additional NASID (node) bits added 52 + * to the high end of the address, and 2) unaliased if the 53 + * partition does not have a physical address 0. In addition, on 54 + * UV2 rev 1, GPAs need the gnode left shifted to bits 39 or 40. 55 + * 49 56 * 50 57 * NumaLink Global Physical Address Format: 51 58 * +--------------------------------+---------------------+ ··· 148 141 unsigned int gnode_extra; 149 142 unsigned char hub_revision; 150 143 unsigned char apic_pnode_shift; 144 + unsigned char m_shift; 145 + unsigned char n_lshift; 151 146 unsigned long gnode_upper; 152 147 unsigned long lowmem_remap_top; 153 148 unsigned long lowmem_remap_base; ··· 184 175 static inline int is_uv2_hub(void) 185 176 { 186 177 return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; 178 + } 179 + 180 + static inline int is_uv2_1_hub(void) 181 + { 182 + return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE; 183 + } 184 + 185 + static inline int is_uv2_2_hub(void) 186 + { 187 + return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE + 1; 187 188 } 188 189 189 190 union uvh_apicid { ··· 295 276 { 296 277 if (paddr < uv_hub_info->lowmem_remap_top) 297 278 paddr |= uv_hub_info->lowmem_remap_base; 298 - return paddr | uv_hub_info->gnode_upper; 279 + paddr |= uv_hub_info->gnode_upper; 280 + paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | 281 + ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); 282 + return paddr; 299 283 } 300 284 301 285 ··· 322 300 unsigned long remap_base = uv_hub_info->lowmem_remap_base; 323 301 unsigned long remap_top = uv_hub_info->lowmem_remap_top; 324 302 303 + gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | 304 + ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); 305 + gpa = gpa & uv_hub_info->gpa_mask; 325 306 if (paddr >= remap_base && paddr < remap_base + remap_top) 326 307 paddr -= remap_base; 327 308 return paddr; 328 309 } 329 310 330 311 331 - /* gnode -> pnode */ 312 + /* gpa -> pnode */ 332 313 static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) 333 314 { 334 - return gpa >> uv_hub_info->m_val; 315 + return gpa >> uv_hub_info->n_lshift; 335 316 } 336 317 337 318 /* gpa -> pnode */ ··· 343 318 unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1; 344 319 345 320 return uv_gpa_to_gnode(gpa) & n_mask; 321 + } 322 + 323 + /* gpa -> node offset*/ 324 + static inline unsigned long uv_gpa_to_offset(unsigned long gpa) 325 + { 326 + return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift; 346 327 } 347 328 348 329 /* pnode, offset --> socket virtual */
+5 -2
arch/x86/kernel/apic/x2apic_uv_x.c
··· 832 832 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; 833 833 uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; 834 834 835 + uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; 836 + uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ? 837 + (m_val == 40 ? 40 : 39) : m_val; 838 + 835 839 pnode = uv_apicid_to_pnode(apicid); 836 840 blade = boot_pnode_to_blade(pnode); 837 841 lcpu = uv_blade_info[blade].nr_possible_cpus; ··· 866 862 if (uv_node_to_blade[nid] >= 0) 867 863 continue; 868 864 paddr = node_start_pfn(nid) << PAGE_SHIFT; 869 - paddr = uv_soc_phys_ram_to_gpa(paddr); 870 - pnode = (paddr >> m_val) & pnode_mask; 865 + pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); 871 866 blade = boot_pnode_to_blade(pnode); 872 867 uv_node_to_blade[nid] = blade; 873 868 }
+6 -11
arch/x86/platform/uv/tlb_uv.c
··· 115 115 116 116 /* base pnode in this partition */ 117 117 static int uv_base_pnode __read_mostly; 118 - /* position of pnode (which is nasid>>1): */ 119 - static int uv_nshift __read_mostly; 120 - static unsigned long uv_mmask __read_mostly; 121 118 122 119 static DEFINE_PER_CPU(struct ptc_stats, ptcstats); 123 120 static DEFINE_PER_CPU(struct bau_control, bau_control); ··· 1432 1435 { 1433 1436 int i; 1434 1437 int cpu; 1435 - unsigned long pa; 1438 + unsigned long gpa; 1436 1439 unsigned long m; 1437 1440 unsigned long n; 1438 1441 size_t dsize; ··· 1448 1451 bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); 1449 1452 BUG_ON(!bau_desc); 1450 1453 1451 - pa = uv_gpa(bau_desc); /* need the real nasid*/ 1452 - n = pa >> uv_nshift; 1453 - m = pa & uv_mmask; 1454 + gpa = uv_gpa(bau_desc); 1455 + n = uv_gpa_to_gnode(gpa); 1456 + m = uv_gpa_to_offset(gpa); 1454 1457 1455 1458 /* the 14-bit pnode */ 1456 1459 write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); ··· 1522 1525 bcp->queue_last = pqp + (DEST_Q_SIZE - 1); 1523 1526 } 1524 1527 /* 1525 - * need the pnode of where the memory was really allocated 1528 + * need the gnode of where the memory was really allocated 1526 1529 */ 1527 - pn = uv_gpa(pqp) >> uv_nshift; 1530 + pn = uv_gpa_to_gnode(uv_gpa(pqp)); 1528 1531 first = uv_physnodeaddr(pqp); 1529 1532 pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; 1530 1533 last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); ··· 1834 1837 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); 1835 1838 } 1836 1839 1837 - uv_nshift = uv_hub_info->m_val; 1838 - uv_mmask = (1UL << uv_hub_info->m_val) - 1; 1839 1840 nuvhubs = uv_num_possible_blades(); 1840 1841 spin_lock_init(&disable_lock); 1841 1842 congested_cycles = usec_2_cycles(congested_respns_us);