Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.12 551 lines 13 kB view raw
1/* 2 * Copyright 2016, Rashmica Gupta, IBM Corp. 3 * 4 * This traverses the kernel virtual memory and dumps the pages that are in 5 * the hash pagetable, along with their flags to 6 * /sys/kernel/debug/kernel_hash_pagetable. 7 * 8 * If radix is enabled then there is no hash page table and so no debugfs file 9 * is generated. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; version 2 14 * of the License. 15 */ 16#include <linux/debugfs.h> 17#include <linux/fs.h> 18#include <linux/io.h> 19#include <linux/mm.h> 20#include <linux/sched.h> 21#include <linux/seq_file.h> 22#include <asm/fixmap.h> 23#include <asm/pgtable.h> 24#include <linux/const.h> 25#include <asm/page.h> 26#include <asm/pgalloc.h> 27#include <asm/plpar_wrappers.h> 28#include <linux/memblock.h> 29#include <asm/firmware.h> 30 31struct pg_state { 32 struct seq_file *seq; 33 const struct addr_marker *marker; 34 unsigned long start_address; 35 unsigned int level; 36 u64 current_flags; 37}; 38 39struct addr_marker { 40 unsigned long start_address; 41 const char *name; 42}; 43 44static struct addr_marker address_markers[] = { 45 { 0, "Start of kernel VM" }, 46 { 0, "vmalloc() Area" }, 47 { 0, "vmalloc() End" }, 48 { 0, "isa I/O start" }, 49 { 0, "isa I/O end" }, 50 { 0, "phb I/O start" }, 51 { 0, "phb I/O end" }, 52 { 0, "I/O remap start" }, 53 { 0, "I/O remap end" }, 54 { 0, "vmemmap start" }, 55 { -1, NULL }, 56}; 57 58struct flag_info { 59 u64 mask; 60 u64 val; 61 const char *set; 62 const char *clear; 63 bool is_val; 64 int shift; 65}; 66 67static const struct flag_info v_flag_array[] = { 68 { 69 .mask = SLB_VSID_B, 70 .val = SLB_VSID_B_256M, 71 .set = "ssize: 256M", 72 .clear = "ssize: 1T ", 73 }, { 74 .mask = HPTE_V_SECONDARY, 75 .val = HPTE_V_SECONDARY, 76 .set = "secondary", 77 .clear = "primary ", 78 }, { 79 .mask = HPTE_V_VALID, 80 .val = HPTE_V_VALID, 81 .set = "valid ", 82 .clear = "invalid", 83 }, { 84 .mask = HPTE_V_BOLTED, 85 .val = HPTE_V_BOLTED, 86 .set = "bolted", 87 .clear = "", 88 } 89}; 90 91static const struct flag_info r_flag_array[] = { 92 { 93 .mask = HPTE_R_PP0 | HPTE_R_PP, 94 .val = PP_RWXX, 95 .set = "prot:RW--", 96 }, { 97 .mask = HPTE_R_PP0 | HPTE_R_PP, 98 .val = PP_RWRX, 99 .set = "prot:RWR-", 100 }, { 101 .mask = HPTE_R_PP0 | HPTE_R_PP, 102 .val = PP_RWRW, 103 .set = "prot:RWRW", 104 }, { 105 .mask = HPTE_R_PP0 | HPTE_R_PP, 106 .val = PP_RXRX, 107 .set = "prot:R-R-", 108 }, { 109 .mask = HPTE_R_PP0 | HPTE_R_PP, 110 .val = PP_RXXX, 111 .set = "prot:R---", 112 }, { 113 .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 114 .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 115 .set = "key", 116 .clear = "", 117 .is_val = true, 118 }, { 119 .mask = HPTE_R_R, 120 .val = HPTE_R_R, 121 .set = "ref", 122 .clear = " ", 123 }, { 124 .mask = HPTE_R_C, 125 .val = HPTE_R_C, 126 .set = "changed", 127 .clear = " ", 128 }, { 129 .mask = HPTE_R_N, 130 .val = HPTE_R_N, 131 .set = "no execute", 132 }, { 133 .mask = HPTE_R_WIMG, 134 .val = HPTE_R_W, 135 .set = "writethru", 136 }, { 137 .mask = HPTE_R_WIMG, 138 .val = HPTE_R_I, 139 .set = "no cache", 140 }, { 141 .mask = HPTE_R_WIMG, 142 .val = HPTE_R_G, 143 .set = "guarded", 144 } 145}; 146 147static int calculate_pagesize(struct pg_state *st, int ps, char s[]) 148{ 149 static const char units[] = "BKMGTPE"; 150 const char *unit = units; 151 152 while (ps > 9 && unit[1]) { 153 ps -= 10; 154 unit++; 155 } 156 seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit); 157 return ps; 158} 159 160static void dump_flag_info(struct pg_state *st, const struct flag_info 161 *flag, u64 pte, int num) 162{ 163 unsigned int i; 164 165 for (i = 0; i < num; i++, flag++) { 166 const char *s = NULL; 167 u64 val; 168 169 /* flag not defined so don't check it */ 170 if (flag->mask == 0) 171 continue; 172 /* Some 'flags' are actually values */ 173 if (flag->is_val) { 174 val = pte & flag->val; 175 if (flag->shift) 176 val = val >> flag->shift; 177 seq_printf(st->seq, " %s:%llx", flag->set, val); 178 } else { 179 if ((pte & flag->mask) == flag->val) 180 s = flag->set; 181 else 182 s = flag->clear; 183 if (s) 184 seq_printf(st->seq, " %s", s); 185 } 186 } 187} 188 189static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r, 190 unsigned long rpn, int bps, int aps, unsigned long lp) 191{ 192 int aps_index; 193 194 while (ea >= st->marker[1].start_address) { 195 st->marker++; 196 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 197 } 198 seq_printf(st->seq, "0x%lx:\t", ea); 199 seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v)); 200 dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array)); 201 seq_printf(st->seq, " rpn: %lx\t", rpn); 202 dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array)); 203 204 calculate_pagesize(st, bps, "base"); 205 aps_index = calculate_pagesize(st, aps, "actual"); 206 if (aps_index != 2) 207 seq_printf(st->seq, "LP enc: %lx", lp); 208 seq_puts(st->seq, "\n"); 209} 210 211 212static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 213 *r) 214{ 215 struct hash_pte *hptep; 216 unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v; 217 int i, ssize = mmu_kernel_ssize; 218 unsigned long shift = mmu_psize_defs[psize].shift; 219 220 /* calculate hash */ 221 vsid = get_kernel_vsid(ea, ssize); 222 vpn = hpt_vpn(ea, vsid, ssize); 223 hash = hpt_hash(vpn, shift, ssize); 224 want_v = hpte_encode_avpn(vpn, psize, ssize); 225 226 /* to check in the secondary hash table, we invert the hash */ 227 if (!primary) 228 hash = ~hash; 229 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 230 for (i = 0; i < HPTES_PER_GROUP; i++) { 231 hptep = htab_address + hpte_group; 232 hpte_v = be64_to_cpu(hptep->v); 233 234 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { 235 /* HPTE matches */ 236 *v = be64_to_cpu(hptep->v); 237 *r = be64_to_cpu(hptep->r); 238 return 0; 239 } 240 ++hpte_group; 241 } 242 return -1; 243} 244 245#ifdef CONFIG_PPC_PSERIES 246static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) 247{ 248 struct hash_pte ptes[4]; 249 unsigned long vsid, vpn, hash, hpte_group, want_v; 250 int i, j, ssize = mmu_kernel_ssize; 251 long lpar_rc = 0; 252 unsigned long shift = mmu_psize_defs[psize].shift; 253 254 /* calculate hash */ 255 vsid = get_kernel_vsid(ea, ssize); 256 vpn = hpt_vpn(ea, vsid, ssize); 257 hash = hpt_hash(vpn, shift, ssize); 258 want_v = hpte_encode_avpn(vpn, psize, ssize); 259 260 /* to check in the secondary hash table, we invert the hash */ 261 if (!primary) 262 hash = ~hash; 263 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; 264 /* see if we can find an entry in the hpte with this hash */ 265 for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { 266 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); 267 268 if (lpar_rc != H_SUCCESS) 269 continue; 270 for (j = 0; j < 4; j++) { 271 if (HPTE_V_COMPARE(ptes[j].v, want_v) && 272 (ptes[j].v & HPTE_V_VALID)) { 273 /* HPTE matches */ 274 *v = ptes[j].v; 275 *r = ptes[j].r; 276 return 0; 277 } 278 } 279 } 280 return -1; 281} 282#endif 283 284static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, 285 unsigned long *lp_bits) 286{ 287 struct mmu_psize_def entry; 288 unsigned long arpn, mask, lp; 289 int penc = -2, idx = 0, shift; 290 291 /*. 292 * The LP field has 8 bits. Depending on the actual page size, some of 293 * these bits are concatenated with the APRN to get the RPN. The rest 294 * of the bits in the LP field is the LP value and is an encoding for 295 * the base page size and the actual page size. 296 * 297 * - find the mmu entry for our base page size 298 * - go through all page encodings and use the associated mask to 299 * find an encoding that matches our encoding in the LP field. 300 */ 301 arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 302 lp = arpn & 0xff; 303 304 entry = mmu_psize_defs[bps]; 305 while (idx < MMU_PAGE_COUNT) { 306 penc = entry.penc[idx]; 307 if ((penc != -1) && (mmu_psize_defs[idx].shift)) { 308 shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT; 309 mask = (0x1 << (shift)) - 1; 310 if ((lp & mask) == penc) { 311 *aps = mmu_psize_to_shift(idx); 312 *lp_bits = lp & mask; 313 *rpn = arpn >> shift; 314 return; 315 } 316 } 317 idx++; 318 } 319} 320 321static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, 322 u64 *r) 323{ 324#ifdef CONFIG_PPC_PSERIES 325 if (firmware_has_feature(FW_FEATURE_LPAR)) 326 return pseries_find(ea, psize, primary, v, r); 327#endif 328 return native_find(ea, psize, primary, v, r); 329} 330 331static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) 332{ 333 unsigned long slot; 334 u64 v = 0, r = 0; 335 unsigned long rpn, lp_bits; 336 int base_psize = 0, actual_psize = 0; 337 338 if (ea <= PAGE_OFFSET) 339 return -1; 340 341 /* Look in primary table */ 342 slot = base_hpte_find(ea, psize, true, &v, &r); 343 344 /* Look in secondary table */ 345 if (slot == -1) 346 slot = base_hpte_find(ea, psize, true, &v, &r); 347 348 /* No entry found */ 349 if (slot == -1) 350 return -1; 351 352 /* 353 * We found an entry in the hash page table: 354 * - check that this has the same base page 355 * - find the actual page size 356 * - find the RPN 357 */ 358 base_psize = mmu_psize_to_shift(psize); 359 360 if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) { 361 decode_r(psize, r, &rpn, &actual_psize, &lp_bits); 362 } else { 363 /* 4K actual page size */ 364 actual_psize = 12; 365 rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 366 /* In this case there are no LP bits */ 367 lp_bits = -1; 368 } 369 /* 370 * We didn't find a matching encoding, so the PTE we found isn't for 371 * this address. 372 */ 373 if (actual_psize == -1) 374 return -1; 375 376 dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits); 377 return 0; 378} 379 380static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) 381{ 382 pte_t *pte = pte_offset_kernel(pmd, 0); 383 unsigned long addr, pteval, psize; 384 int i, status; 385 386 for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 387 addr = start + i * PAGE_SIZE; 388 pteval = pte_val(*pte); 389 390 if (addr < VMALLOC_END) 391 psize = mmu_vmalloc_psize; 392 else 393 psize = mmu_io_psize; 394#ifdef CONFIG_PPC_64K_PAGES 395 /* check for secret 4K mappings */ 396 if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) || 397 ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) 398 psize = mmu_io_psize; 399#endif 400 /* check for hashpte */ 401 status = hpte_find(st, addr, psize); 402 403 if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE) 404 && (status != -1)) { 405 /* found a hpte that is not in the linux page tables */ 406 seq_printf(st->seq, "page probably bolted before linux" 407 " pagetables were set: addr:%lx, pteval:%lx\n", 408 addr, pteval); 409 } 410 } 411} 412 413static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) 414{ 415 pmd_t *pmd = pmd_offset(pud, 0); 416 unsigned long addr; 417 unsigned int i; 418 419 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { 420 addr = start + i * PMD_SIZE; 421 if (!pmd_none(*pmd)) 422 /* pmd exists */ 423 walk_pte(st, pmd, addr); 424 } 425} 426 427static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) 428{ 429 pud_t *pud = pud_offset(pgd, 0); 430 unsigned long addr; 431 unsigned int i; 432 433 for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 434 addr = start + i * PUD_SIZE; 435 if (!pud_none(*pud)) 436 /* pud exists */ 437 walk_pmd(st, pud, addr); 438 } 439} 440 441static void walk_pagetables(struct pg_state *st) 442{ 443 pgd_t *pgd = pgd_offset_k(0UL); 444 unsigned int i; 445 unsigned long addr; 446 447 /* 448 * Traverse the linux pagetable structure and dump pages that are in 449 * the hash pagetable. 450 */ 451 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 452 addr = KERN_VIRT_START + i * PGDIR_SIZE; 453 if (!pgd_none(*pgd)) 454 /* pgd exists */ 455 walk_pud(st, pgd, addr); 456 } 457} 458 459 460static void walk_linearmapping(struct pg_state *st) 461{ 462 unsigned long addr; 463 464 /* 465 * Traverse the linear mapping section of virtual memory and dump pages 466 * that are in the hash pagetable. 467 */ 468 unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; 469 470 for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + 471 memblock_end_of_DRAM(); addr += psize) 472 hpte_find(st, addr, mmu_linear_psize); 473} 474 475static void walk_vmemmap(struct pg_state *st) 476{ 477#ifdef CONFIG_SPARSEMEM_VMEMMAP 478 struct vmemmap_backing *ptr = vmemmap_list; 479 480 /* 481 * Traverse the vmemmaped memory and dump pages that are in the hash 482 * pagetable. 483 */ 484 while (ptr->list) { 485 hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); 486 ptr = ptr->list; 487 } 488 seq_puts(st->seq, "---[ vmemmap end ]---\n"); 489#endif 490} 491 492static void populate_markers(void) 493{ 494 address_markers[0].start_address = PAGE_OFFSET; 495 address_markers[1].start_address = VMALLOC_START; 496 address_markers[2].start_address = VMALLOC_END; 497 address_markers[3].start_address = ISA_IO_BASE; 498 address_markers[4].start_address = ISA_IO_END; 499 address_markers[5].start_address = PHB_IO_BASE; 500 address_markers[6].start_address = PHB_IO_END; 501 address_markers[7].start_address = IOREMAP_BASE; 502 address_markers[8].start_address = IOREMAP_END; 503#ifdef CONFIG_PPC_STD_MMU_64 504 address_markers[9].start_address = H_VMEMMAP_BASE; 505#else 506 address_markers[9].start_address = VMEMMAP_BASE; 507#endif 508} 509 510static int ptdump_show(struct seq_file *m, void *v) 511{ 512 struct pg_state st = { 513 .seq = m, 514 .start_address = PAGE_OFFSET, 515 .marker = address_markers, 516 }; 517 /* 518 * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and 519 * dump pages that are in the hash pagetable. 520 */ 521 walk_linearmapping(&st); 522 walk_pagetables(&st); 523 walk_vmemmap(&st); 524 return 0; 525} 526 527static int ptdump_open(struct inode *inode, struct file *file) 528{ 529 return single_open(file, ptdump_show, NULL); 530} 531 532static const struct file_operations ptdump_fops = { 533 .open = ptdump_open, 534 .read = seq_read, 535 .llseek = seq_lseek, 536 .release = single_release, 537}; 538 539static int ptdump_init(void) 540{ 541 struct dentry *debugfs_file; 542 543 if (!radix_enabled()) { 544 populate_markers(); 545 debugfs_file = debugfs_create_file("kernel_hash_pagetable", 546 0400, NULL, NULL, &ptdump_fops); 547 return debugfs_file ? 0 : -ENOMEM; 548 } 549 return 0; 550} 551device_initcall(ptdump_init);