at v4.13 521 lines 12 kB view raw
1/* 2 * Copyright 2016, Rashmica Gupta, IBM Corp. 3 * 4 * This traverses the kernel pagetables and dumps the 5 * information about the used sections of memory to 6 * /sys/kernel/debug/kernel_pagetables. 7 * 8 * Derived from the arm64 implementation: 9 * Copyright (c) 2014, The Linux Foundation, Laura Abbott. 10 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven. 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; version 2 15 * of the License. 16 */ 17#include <linux/debugfs.h> 18#include <linux/fs.h> 19#include <linux/hugetlb.h> 20#include <linux/io.h> 21#include <linux/mm.h> 22#include <linux/sched.h> 23#include <linux/seq_file.h> 24#include <asm/fixmap.h> 25#include <asm/pgtable.h> 26#include <linux/const.h> 27#include <asm/page.h> 28#include <asm/pgalloc.h> 29 30#ifdef CONFIG_PPC32 31#define KERN_VIRT_START 0 32#endif 33 34/* 35 * To visualise what is happening, 36 * 37 * - PTRS_PER_P** = how many entries there are in the corresponding P** 38 * - P**_SHIFT = how many bits of the address we use to index into the 39 * corresponding P** 40 * - P**_SIZE is how much memory we can access through the table - not the 41 * size of the table itself. 42 * P**={PGD, PUD, PMD, PTE} 43 * 44 * 45 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a 46 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to 47 * a page. 48 * 49 * In the case where there are only 3 levels, the PUD is folded into the 50 * PGD: every PUD has only one entry which points to the PMD. 51 * 52 * The page dumper groups page table entries of the same type into a single 53 * description. It uses pg_state to track the range information while 54 * iterating over the PTE entries. When the continuity is broken it then 55 * dumps out a description of the range - ie PTEs that are virtually contiguous 56 * with the same PTE flags are chunked together. This is to make it clear how 57 * different areas of the kernel virtual memory are used. 58 * 59 */ 60struct pg_state { 61 struct seq_file *seq; 62 const struct addr_marker *marker; 63 unsigned long start_address; 64 unsigned long start_pa; 65 unsigned long last_pa; 66 unsigned int level; 67 u64 current_flags; 68}; 69 70struct addr_marker { 71 unsigned long start_address; 72 const char *name; 73}; 74 75static struct addr_marker address_markers[] = { 76 { 0, "Start of kernel VM" }, 77 { 0, "vmalloc() Area" }, 78 { 0, "vmalloc() End" }, 79#ifdef CONFIG_PPC64 80 { 0, "isa I/O start" }, 81 { 0, "isa I/O end" }, 82 { 0, "phb I/O start" }, 83 { 0, "phb I/O end" }, 84 { 0, "I/O remap start" }, 85 { 0, "I/O remap end" }, 86 { 0, "vmemmap start" }, 87#else 88 { 0, "Early I/O remap start" }, 89 { 0, "Early I/O remap end" }, 90#ifdef CONFIG_NOT_COHERENT_CACHE 91 { 0, "Consistent mem start" }, 92 { 0, "Consistent mem end" }, 93#endif 94#ifdef CONFIG_HIGHMEM 95 { 0, "Highmem PTEs start" }, 96 { 0, "Highmem PTEs end" }, 97#endif 98 { 0, "Fixmap start" }, 99 { 0, "Fixmap end" }, 100#endif 101 { -1, NULL }, 102}; 103 104struct flag_info { 105 u64 mask; 106 u64 val; 107 const char *set; 108 const char *clear; 109 bool is_val; 110 int shift; 111}; 112 113static const struct flag_info flag_array[] = { 114 { 115#ifdef CONFIG_PPC_STD_MMU_64 116 .mask = _PAGE_PRIVILEGED, 117 .val = 0, 118#else 119 .mask = _PAGE_USER, 120 .val = _PAGE_USER, 121#endif 122 .set = "user", 123 .clear = " ", 124 }, { 125#if _PAGE_RO == 0 126 .mask = _PAGE_RW, 127 .val = _PAGE_RW, 128#else 129 .mask = _PAGE_RO, 130 .val = 0, 131#endif 132 .set = "rw", 133 .clear = "ro", 134 }, { 135 .mask = _PAGE_EXEC, 136 .val = _PAGE_EXEC, 137 .set = " X ", 138 .clear = " ", 139 }, { 140 .mask = _PAGE_PTE, 141 .val = _PAGE_PTE, 142 .set = "pte", 143 .clear = " ", 144 }, { 145 .mask = _PAGE_PRESENT, 146 .val = _PAGE_PRESENT, 147 .set = "present", 148 .clear = " ", 149 }, { 150#ifdef CONFIG_PPC_STD_MMU_64 151 .mask = H_PAGE_HASHPTE, 152 .val = H_PAGE_HASHPTE, 153#else 154 .mask = _PAGE_HASHPTE, 155 .val = _PAGE_HASHPTE, 156#endif 157 .set = "hpte", 158 .clear = " ", 159 }, { 160#ifndef CONFIG_PPC_STD_MMU_64 161 .mask = _PAGE_GUARDED, 162 .val = _PAGE_GUARDED, 163 .set = "guarded", 164 .clear = " ", 165 }, { 166#endif 167 .mask = _PAGE_DIRTY, 168 .val = _PAGE_DIRTY, 169 .set = "dirty", 170 .clear = " ", 171 }, { 172 .mask = _PAGE_ACCESSED, 173 .val = _PAGE_ACCESSED, 174 .set = "accessed", 175 .clear = " ", 176 }, { 177#ifndef CONFIG_PPC_STD_MMU_64 178 .mask = _PAGE_WRITETHRU, 179 .val = _PAGE_WRITETHRU, 180 .set = "write through", 181 .clear = " ", 182 }, { 183#endif 184#ifndef CONFIG_PPC_BOOK3S_64 185 .mask = _PAGE_NO_CACHE, 186 .val = _PAGE_NO_CACHE, 187 .set = "no cache", 188 .clear = " ", 189 }, { 190#else 191 .mask = _PAGE_NON_IDEMPOTENT, 192 .val = _PAGE_NON_IDEMPOTENT, 193 .set = "non-idempotent", 194 .clear = " ", 195 }, { 196 .mask = _PAGE_TOLERANT, 197 .val = _PAGE_TOLERANT, 198 .set = "tolerant", 199 .clear = " ", 200 }, { 201#endif 202#ifdef CONFIG_PPC_BOOK3S_64 203 .mask = H_PAGE_BUSY, 204 .val = H_PAGE_BUSY, 205 .set = "busy", 206 }, { 207#ifdef CONFIG_PPC_64K_PAGES 208 .mask = H_PAGE_COMBO, 209 .val = H_PAGE_COMBO, 210 .set = "combo", 211 }, { 212 .mask = H_PAGE_4K_PFN, 213 .val = H_PAGE_4K_PFN, 214 .set = "4K_pfn", 215 }, { 216#endif 217 .mask = H_PAGE_F_GIX, 218 .val = H_PAGE_F_GIX, 219 .set = "f_gix", 220 .is_val = true, 221 .shift = H_PAGE_F_GIX_SHIFT, 222 }, { 223 .mask = H_PAGE_F_SECOND, 224 .val = H_PAGE_F_SECOND, 225 .set = "f_second", 226 }, { 227#endif 228 .mask = _PAGE_SPECIAL, 229 .val = _PAGE_SPECIAL, 230 .set = "special", 231 }, { 232 .mask = _PAGE_SHARED, 233 .val = _PAGE_SHARED, 234 .set = "shared", 235 } 236}; 237 238struct pgtable_level { 239 const struct flag_info *flag; 240 size_t num; 241 u64 mask; 242}; 243 244static struct pgtable_level pg_level[] = { 245 { 246 }, { /* pgd */ 247 .flag = flag_array, 248 .num = ARRAY_SIZE(flag_array), 249 }, { /* pud */ 250 .flag = flag_array, 251 .num = ARRAY_SIZE(flag_array), 252 }, { /* pmd */ 253 .flag = flag_array, 254 .num = ARRAY_SIZE(flag_array), 255 }, { /* pte */ 256 .flag = flag_array, 257 .num = ARRAY_SIZE(flag_array), 258 }, 259}; 260 261static void dump_flag_info(struct pg_state *st, const struct flag_info 262 *flag, u64 pte, int num) 263{ 264 unsigned int i; 265 266 for (i = 0; i < num; i++, flag++) { 267 const char *s = NULL; 268 u64 val; 269 270 /* flag not defined so don't check it */ 271 if (flag->mask == 0) 272 continue; 273 /* Some 'flags' are actually values */ 274 if (flag->is_val) { 275 val = pte & flag->val; 276 if (flag->shift) 277 val = val >> flag->shift; 278 seq_printf(st->seq, " %s:%llx", flag->set, val); 279 } else { 280 if ((pte & flag->mask) == flag->val) 281 s = flag->set; 282 else 283 s = flag->clear; 284 if (s) 285 seq_printf(st->seq, " %s", s); 286 } 287 st->current_flags &= ~flag->mask; 288 } 289 if (st->current_flags != 0) 290 seq_printf(st->seq, " unknown flags:%llx", st->current_flags); 291} 292 293static void dump_addr(struct pg_state *st, unsigned long addr) 294{ 295 static const char units[] = "KMGTPE"; 296 const char *unit = units; 297 unsigned long delta; 298 299#ifdef CONFIG_PPC64 300 seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1); 301 seq_printf(st->seq, "0x%016lx ", st->start_pa); 302#else 303 seq_printf(st->seq, "0x%08lx-0x%08lx ", st->start_address, addr - 1); 304 seq_printf(st->seq, "0x%08lx ", st->start_pa); 305#endif 306 307 delta = (addr - st->start_address) >> 10; 308 /* Work out what appropriate unit to use */ 309 while (!(delta & 1023) && unit[1]) { 310 delta >>= 10; 311 unit++; 312 } 313 seq_printf(st->seq, "%9lu%c", delta, *unit); 314 315} 316 317static void note_page(struct pg_state *st, unsigned long addr, 318 unsigned int level, u64 val) 319{ 320 u64 flag = val & pg_level[level].mask; 321 u64 pa = val & PTE_RPN_MASK; 322 323 /* At first no level is set */ 324 if (!st->level) { 325 st->level = level; 326 st->current_flags = flag; 327 st->start_address = addr; 328 st->start_pa = pa; 329 st->last_pa = pa; 330 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 331 /* 332 * Dump the section of virtual memory when: 333 * - the PTE flags from one entry to the next differs. 334 * - we change levels in the tree. 335 * - the address is in a different section of memory and is thus 336 * used for a different purpose, regardless of the flags. 337 * - the pa of this page is not adjacent to the last inspected page 338 */ 339 } else if (flag != st->current_flags || level != st->level || 340 addr >= st->marker[1].start_address || 341 pa != st->last_pa + PAGE_SIZE) { 342 343 /* Check the PTE flags */ 344 if (st->current_flags) { 345 dump_addr(st, addr); 346 347 /* Dump all the flags */ 348 if (pg_level[st->level].flag) 349 dump_flag_info(st, pg_level[st->level].flag, 350 st->current_flags, 351 pg_level[st->level].num); 352 353 seq_puts(st->seq, "\n"); 354 } 355 356 /* 357 * Address indicates we have passed the end of the 358 * current section of virtual memory 359 */ 360 while (addr >= st->marker[1].start_address) { 361 st->marker++; 362 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 363 } 364 st->start_address = addr; 365 st->start_pa = pa; 366 st->last_pa = pa; 367 st->current_flags = flag; 368 st->level = level; 369 } else { 370 st->last_pa = pa; 371 } 372} 373 374static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) 375{ 376 pte_t *pte = pte_offset_kernel(pmd, 0); 377 unsigned long addr; 378 unsigned int i; 379 380 for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 381 addr = start + i * PAGE_SIZE; 382 note_page(st, addr, 4, pte_val(*pte)); 383 384 } 385} 386 387static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) 388{ 389 pmd_t *pmd = pmd_offset(pud, 0); 390 unsigned long addr; 391 unsigned int i; 392 393 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { 394 addr = start + i * PMD_SIZE; 395 if (!pmd_none(*pmd) && !pmd_huge(*pmd)) 396 /* pmd exists */ 397 walk_pte(st, pmd, addr); 398 else 399 note_page(st, addr, 3, pmd_val(*pmd)); 400 } 401} 402 403static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) 404{ 405 pud_t *pud = pud_offset(pgd, 0); 406 unsigned long addr; 407 unsigned int i; 408 409 for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 410 addr = start + i * PUD_SIZE; 411 if (!pud_none(*pud) && !pud_huge(*pud)) 412 /* pud exists */ 413 walk_pmd(st, pud, addr); 414 else 415 note_page(st, addr, 2, pud_val(*pud)); 416 } 417} 418 419static void walk_pagetables(struct pg_state *st) 420{ 421 pgd_t *pgd = pgd_offset_k(0UL); 422 unsigned int i; 423 unsigned long addr; 424 425 /* 426 * Traverse the linux pagetable structure and dump pages that are in 427 * the hash pagetable. 428 */ 429 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 430 addr = KERN_VIRT_START + i * PGDIR_SIZE; 431 if (!pgd_none(*pgd) && !pgd_huge(*pgd)) 432 /* pgd exists */ 433 walk_pud(st, pgd, addr); 434 else 435 note_page(st, addr, 1, pgd_val(*pgd)); 436 } 437} 438 439static void populate_markers(void) 440{ 441 int i = 0; 442 443 address_markers[i++].start_address = PAGE_OFFSET; 444 address_markers[i++].start_address = VMALLOC_START; 445 address_markers[i++].start_address = VMALLOC_END; 446#ifdef CONFIG_PPC64 447 address_markers[i++].start_address = ISA_IO_BASE; 448 address_markers[i++].start_address = ISA_IO_END; 449 address_markers[i++].start_address = PHB_IO_BASE; 450 address_markers[i++].start_address = PHB_IO_END; 451 address_markers[i++].start_address = IOREMAP_BASE; 452 address_markers[i++].start_address = IOREMAP_END; 453#ifdef CONFIG_PPC_STD_MMU_64 454 address_markers[i++].start_address = H_VMEMMAP_BASE; 455#else 456 address_markers[i++].start_address = VMEMMAP_BASE; 457#endif 458#else /* !CONFIG_PPC64 */ 459 address_markers[i++].start_address = ioremap_bot; 460 address_markers[i++].start_address = IOREMAP_TOP; 461#ifdef CONFIG_NOT_COHERENT_CACHE 462 address_markers[i++].start_address = IOREMAP_TOP; 463 address_markers[i++].start_address = IOREMAP_TOP + 464 CONFIG_CONSISTENT_SIZE; 465#endif 466#ifdef CONFIG_HIGHMEM 467 address_markers[i++].start_address = PKMAP_BASE; 468 address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP); 469#endif 470 address_markers[i++].start_address = FIXADDR_START; 471 address_markers[i++].start_address = FIXADDR_TOP; 472#endif /* CONFIG_PPC64 */ 473} 474 475static int ptdump_show(struct seq_file *m, void *v) 476{ 477 struct pg_state st = { 478 .seq = m, 479 .start_address = KERN_VIRT_START, 480 .marker = address_markers, 481 }; 482 /* Traverse kernel page tables */ 483 walk_pagetables(&st); 484 note_page(&st, 0, 0, 0); 485 return 0; 486} 487 488 489static int ptdump_open(struct inode *inode, struct file *file) 490{ 491 return single_open(file, ptdump_show, NULL); 492} 493 494static const struct file_operations ptdump_fops = { 495 .open = ptdump_open, 496 .read = seq_read, 497 .llseek = seq_lseek, 498 .release = single_release, 499}; 500 501static void build_pgtable_complete_mask(void) 502{ 503 unsigned int i, j; 504 505 for (i = 0; i < ARRAY_SIZE(pg_level); i++) 506 if (pg_level[i].flag) 507 for (j = 0; j < pg_level[i].num; j++) 508 pg_level[i].mask |= pg_level[i].flag[j].mask; 509} 510 511static int ptdump_init(void) 512{ 513 struct dentry *debugfs_file; 514 515 populate_markers(); 516 build_pgtable_complete_mask(); 517 debugfs_file = debugfs_create_file("kernel_page_tables", 0400, NULL, 518 NULL, &ptdump_fops); 519 return debugfs_file ? 0 : -ENOMEM; 520} 521device_initcall(ptdump_init);