Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/64s/hash: Convert SLB miss handlers to C

This patch moves SLB miss handlers completely to C, using the standard
exception handler macros to set up the stack and branch to C.

This can be done because the segment containing the kernel stack is
always bolted, so accessing it with relocation on will not cause an
SLB exception.

Arbitrary kernel memory must not be accessed when handling kernel
space SLB misses, so care should be taken there. However user SLB
misses can access any kernel memory, which can be used to move some
fields out of the paca (in later patches).

User SLB misses could quite easily reconcile IRQs and set up a first
class kernel environment and exit via ret_from_except, however that
doesn't seem to be necessary at the moment, so we only do that if a
bad fault is encountered.

[ Credit to Aneesh for bug fixes, error checks, and improvements to
bad address handling, etc ]

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Disallow tracing for all of slb.c for now.]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Nicholas Piggin and committed by
Michael Ellerman
48e7b769 4c2de74c

+217 -629
+2
arch/powerpc/include/asm/asm-prototypes.h
··· 77 77 void system_reset_exception(struct pt_regs *regs); 78 78 void machine_check_exception(struct pt_regs *regs); 79 79 void emulation_assist_interrupt(struct pt_regs *regs); 80 + long do_slb_fault(struct pt_regs *regs, unsigned long ea); 81 + void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); 80 82 81 83 /* signals, syscalls and interrupts */ 82 84 long sys_swapcontext(struct ucontext __user *old_ctx,
-8
arch/powerpc/include/asm/exception-64s.h
··· 61 61 #define MAX_MCE_DEPTH 4 62 62 63 63 /* 64 - * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR 65 - * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole 66 - * in the save area so it's not necessary to overlap them. Could be used 67 - * for future savings though if another 4 byte register was to be saved. 68 - */ 69 - #define EX_LR EX_DAR 70 - 71 - /* 72 64 * EX_R3 is only used by the bad_stack handler. bad_stack reloads and 73 65 * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap 74 66 * with EX_DAR.
+42 -160
arch/powerpc/kernel/exceptions-64s.S
··· 596 596 597 597 598 598 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) 599 - SET_SCRATCH0(r13) 600 - EXCEPTION_PROLOG_0(PACA_EXSLB) 601 - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) 602 - mr r12,r3 /* save r3 */ 603 - mfspr r3,SPRN_DAR 604 - mfspr r11,SPRN_SRR1 605 - crset 4*cr6+eq 606 - BRANCH_TO_COMMON(r10, slb_miss_common) 599 + EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380); 607 600 EXC_REAL_END(data_access_slb, 0x380, 0x80) 608 601 609 602 EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) 610 - SET_SCRATCH0(r13) 611 - EXCEPTION_PROLOG_0(PACA_EXSLB) 612 - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) 613 - mr r12,r3 /* save r3 */ 614 - mfspr r3,SPRN_DAR 615 - mfspr r11,SPRN_SRR1 616 - crset 4*cr6+eq 617 - BRANCH_TO_COMMON(r10, slb_miss_common) 603 + EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380); 618 604 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) 605 + 619 606 TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) 607 + 608 + EXC_COMMON_BEGIN(data_access_slb_common) 609 + mfspr r10,SPRN_DAR 610 + std r10,PACA_EXSLB+EX_DAR(r13) 611 + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) 612 + ld r4,PACA_EXSLB+EX_DAR(r13) 613 + std r4,_DAR(r1) 614 + addi r3,r1,STACK_FRAME_OVERHEAD 615 + bl do_slb_fault 616 + cmpdi r3,0 617 + bne- 1f 618 + b fast_exception_return 619 + 1: /* Error case */ 620 + std r3,RESULT(r1) 621 + bl save_nvgprs 622 + RECONCILE_IRQ_STATE(r10, r11) 623 + ld r4,_DAR(r1) 624 + ld r5,RESULT(r1) 625 + addi r3,r1,STACK_FRAME_OVERHEAD 626 + bl do_bad_slb_fault 627 + b ret_from_except 620 628 621 629 622 630 EXC_REAL(instruction_access, 0x400, 0x80) ··· 648 640 649 641 650 642 EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) 651 - SET_SCRATCH0(r13) 652 - EXCEPTION_PROLOG_0(PACA_EXSLB) 653 - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) 654 - mr r12,r3 /* save r3 */ 655 - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 656 - mfspr r11,SPRN_SRR1 657 - crclr 4*cr6+eq 658 - BRANCH_TO_COMMON(r10, slb_miss_common) 643 + EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480); 659 644 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) 660 645 661 646 EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) 662 - SET_SCRATCH0(r13) 663 - EXCEPTION_PROLOG_0(PACA_EXSLB) 664 - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) 665 - mr r12,r3 /* save r3 */ 666 - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 667 - mfspr r11,SPRN_SRR1 668 - crclr 4*cr6+eq 669 - BRANCH_TO_COMMON(r10, slb_miss_common) 647 + EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480); 670 648 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) 649 + 671 650 TRAMP_KVM(PACA_EXSLB, 0x480) 672 651 673 - 674 - /* 675 - * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as 676 - * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled. 677 - */ 678 - EXC_COMMON_BEGIN(slb_miss_common) 679 - /* 680 - * r13 points to the PACA, r9 contains the saved CR, 681 - * r12 contains the saved r3, 682 - * r11 contain the saved SRR1, SRR0 is still ready for return 683 - * r3 has the faulting address 684 - * r9 - r13 are saved in paca->exslb. 685 - * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss 686 - * We assume we aren't going to take any exceptions during this 687 - * procedure. 688 - */ 689 - mflr r10 690 - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ 691 - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ 692 - 693 - andi. r9,r11,MSR_PR // Check for exception from userspace 694 - cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later 695 - 696 - /* 697 - * Test MSR_RI before calling slb_allocate_realmode, because the 698 - * MSR in r11 gets clobbered. However we still want to allocate 699 - * SLB in case MSR_RI=0, to minimise the risk of getting stuck in 700 - * recursive SLB faults. So use cr5 for this, which is preserved. 701 - */ 702 - andi. r11,r11,MSR_RI /* check for unrecoverable exception */ 703 - cmpdi cr5,r11,MSR_RI 704 - 705 - crset 4*cr0+eq 706 - #ifdef CONFIG_PPC_BOOK3S_64 707 - BEGIN_MMU_FTR_SECTION 708 - bl slb_allocate 709 - END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) 710 - #endif 711 - 712 - ld r10,PACA_EXSLB+EX_LR(r13) 713 - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ 714 - mtlr r10 715 - 716 - /* 717 - * Large address, check whether we have to allocate new contexts. 718 - */ 719 - beq- 8f 720 - 721 - bne- cr5,2f /* if unrecoverable exception, oops */ 722 - 723 - /* All done -- return from exception. */ 724 - 725 - bne cr4,1f /* returning to kernel */ 726 - 727 - mtcrf 0x80,r9 728 - mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ 729 - mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 730 - mtcrf 0x02,r9 /* I/D indication is in cr6 */ 731 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 732 - 733 - RESTORE_CTR(r9, PACA_EXSLB) 734 - RESTORE_PPR_PACA(PACA_EXSLB, r9) 735 - mr r3,r12 736 - ld r9,PACA_EXSLB+EX_R9(r13) 737 - ld r10,PACA_EXSLB+EX_R10(r13) 738 - ld r11,PACA_EXSLB+EX_R11(r13) 739 - ld r12,PACA_EXSLB+EX_R12(r13) 740 - ld r13,PACA_EXSLB+EX_R13(r13) 741 - RFI_TO_USER 742 - b . /* prevent speculative execution */ 743 - 1: 744 - mtcrf 0x80,r9 745 - mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ 746 - mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 747 - mtcrf 0x02,r9 /* I/D indication is in cr6 */ 748 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 749 - 750 - RESTORE_CTR(r9, PACA_EXSLB) 751 - RESTORE_PPR_PACA(PACA_EXSLB, r9) 752 - mr r3,r12 753 - ld r9,PACA_EXSLB+EX_R9(r13) 754 - ld r10,PACA_EXSLB+EX_R10(r13) 755 - ld r11,PACA_EXSLB+EX_R11(r13) 756 - ld r12,PACA_EXSLB+EX_R12(r13) 757 - ld r13,PACA_EXSLB+EX_R13(r13) 758 - RFI_TO_KERNEL 759 - b . /* prevent speculative execution */ 760 - 761 - 762 - 2: std r3,PACA_EXSLB+EX_DAR(r13) 763 - mr r3,r12 764 - mfspr r11,SPRN_SRR0 765 - mfspr r12,SPRN_SRR1 766 - LOAD_HANDLER(r10,unrecov_slb) 767 - mtspr SPRN_SRR0,r10 768 - ld r10,PACAKMSR(r13) 769 - mtspr SPRN_SRR1,r10 770 - RFI_TO_KERNEL 771 - b . 772 - 773 - 8: std r3,PACA_EXSLB+EX_DAR(r13) 774 - mr r3,r12 775 - mfspr r11,SPRN_SRR0 776 - mfspr r12,SPRN_SRR1 777 - LOAD_HANDLER(r10, large_addr_slb) 778 - mtspr SPRN_SRR0,r10 779 - ld r10,PACAKMSR(r13) 780 - mtspr SPRN_SRR1,r10 781 - RFI_TO_KERNEL 782 - b . 783 - 784 - EXC_COMMON_BEGIN(unrecov_slb) 785 - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) 786 - RECONCILE_IRQ_STATE(r10, r11) 652 + EXC_COMMON_BEGIN(instruction_access_slb_common) 653 + EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB) 654 + ld r4,_NIP(r1) 655 + addi r3,r1,STACK_FRAME_OVERHEAD 656 + bl do_slb_fault 657 + cmpdi r3,0 658 + bne- 1f 659 + b fast_exception_return 660 + 1: /* Error case */ 661 + std r3,RESULT(r1) 787 662 bl save_nvgprs 788 - 1: addi r3,r1,STACK_FRAME_OVERHEAD 789 - bl unrecoverable_exception 790 - b 1b 791 - 792 - EXC_COMMON_BEGIN(large_addr_slb) 793 - EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) 794 663 RECONCILE_IRQ_STATE(r10, r11) 795 - ld r3, PACA_EXSLB+EX_DAR(r13) 796 - std r3, _DAR(r1) 797 - beq cr6, 2f 798 - li r10, 0x481 /* fix trap number for I-SLB miss */ 799 - std r10, _TRAP(r1) 800 - 2: bl save_nvgprs 801 - addi r3, r1, STACK_FRAME_OVERHEAD 802 - bl slb_miss_large_addr 664 + ld r4,_NIP(r1) 665 + ld r5,RESULT(r1) 666 + addi r3,r1,STACK_FRAME_OVERHEAD 667 + bl do_bad_slb_fault 803 668 b ret_from_except 669 + 804 670 805 671 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) 806 672 .globl hardware_interrupt_hv;
+3 -1
arch/powerpc/mm/Makefile
··· 7 7 8 8 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 9 9 10 + CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) 11 + 10 12 obj-y := fault.o mem.o pgtable.o mmap.o \ 11 13 init_$(BITS).o pgtable_$(BITS).o \ 12 14 init-common.o mmu_context.o drmem.o ··· 17 15 obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o 18 16 hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o 19 17 obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o 20 - obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o 18 + obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o 21 19 obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o 22 20 obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o 23 21 obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
+170 -125
arch/powerpc/mm/slb.c
··· 14 14 * 2 of the License, or (at your option) any later version. 15 15 */ 16 16 17 + #include <asm/asm-prototypes.h> 17 18 #include <asm/pgtable.h> 18 19 #include <asm/mmu.h> 19 20 #include <asm/mmu_context.h> ··· 34 33 KSTACK_INDEX = 1, /* Kernel stack map */ 35 34 }; 36 35 37 - extern void slb_allocate(unsigned long ea); 36 + static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); 38 37 39 38 #define slb_esid_mask(ssize) \ 40 39 (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) ··· 45 44 return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; 46 45 } 47 46 47 + static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, 48 + unsigned long flags) 49 + { 50 + return (vsid << slb_vsid_shift(ssize)) | flags | 51 + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); 52 + } 53 + 48 54 static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, 49 55 unsigned long flags) 50 56 { 51 - return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags | 52 - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); 57 + return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); 53 58 } 54 59 55 60 static inline void slb_shadow_update(unsigned long ea, int ssize, ··· 360 353 is_kernel_addr(exec_base)) 361 354 return; 362 355 363 - slb_allocate(pc); 356 + slb_allocate_user(mm, pc); 364 357 365 358 if (!esids_match(pc, stack)) 366 - slb_allocate(stack); 359 + slb_allocate_user(mm, stack); 367 360 368 361 if (!esids_match(pc, exec_base) && 369 362 !esids_match(stack, exec_base)) 370 - slb_allocate(exec_base); 363 + slb_allocate_user(mm, exec_base); 371 364 } 372 - 373 - static inline void patch_slb_encoding(unsigned int *insn_addr, 374 - unsigned int immed) 375 - { 376 - 377 - /* 378 - * This function patches either an li or a cmpldi instruction with 379 - * a new immediate value. This relies on the fact that both li 380 - * (which is actually addi) and cmpldi both take a 16-bit immediate 381 - * value, and it is situated in the same location in the instruction, 382 - * ie. bits 16-31 (Big endian bit order) or the lower 16 bits. 383 - * The signedness of the immediate operand differs between the two 384 - * instructions however this code is only ever patching a small value, 385 - * much less than 1 << 15, so we can get away with it. 386 - * To patch the value we read the existing instruction, clear the 387 - * immediate value, and or in our new value, then write the instruction 388 - * back. 389 - */ 390 - unsigned int insn = (*insn_addr & 0xffff0000) | immed; 391 - patch_instruction(insn_addr, insn); 392 - } 393 - 394 - extern u32 slb_miss_kernel_load_linear[]; 395 - extern u32 slb_miss_kernel_load_io[]; 396 - extern u32 slb_compare_rr_to_size[]; 397 - extern u32 slb_miss_kernel_load_vmemmap[]; 398 365 399 366 void slb_set_size(u16 size) 400 367 { 401 - if (mmu_slb_size == size) 402 - return; 403 - 404 368 mmu_slb_size = size; 405 - patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); 406 369 } 407 370 408 371 void slb_initialize(void) ··· 394 417 #endif 395 418 if (!slb_encoding_inited) { 396 419 slb_encoding_inited = 1; 397 - patch_slb_encoding(slb_miss_kernel_load_linear, 398 - SLB_VSID_KERNEL | linear_llp); 399 - patch_slb_encoding(slb_miss_kernel_load_io, 400 - SLB_VSID_KERNEL | io_llp); 401 - patch_slb_encoding(slb_compare_rr_to_size, 402 - mmu_slb_size); 403 - 404 420 pr_devel("SLB: linear LLP = %04lx\n", linear_llp); 405 421 pr_devel("SLB: io LLP = %04lx\n", io_llp); 406 - 407 422 #ifdef CONFIG_SPARSEMEM_VMEMMAP 408 - patch_slb_encoding(slb_miss_kernel_load_vmemmap, 409 - SLB_VSID_KERNEL | vmemmap_llp); 410 423 pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); 411 424 #endif 412 425 } ··· 425 458 asm volatile("isync":::"memory"); 426 459 } 427 460 428 - static void insert_slb_entry(unsigned long vsid, unsigned long ea, 429 - int bpsize, int ssize) 461 + static void slb_cache_update(unsigned long esid_data) 430 462 { 431 - unsigned long flags, vsid_data, esid_data; 432 - enum slb_index index; 433 463 int slb_cache_index; 434 464 435 465 if (cpu_has_feature(CPU_FTR_ARCH_300)) 436 466 return; /* ISAv3.0B and later does not use slb_cache */ 437 467 438 468 /* 439 - * We are irq disabled, hence should be safe to access PACA. 440 - */ 441 - VM_WARN_ON(!irqs_disabled()); 442 - 443 - /* 444 - * We can't take a PMU exception in the following code, so hard 445 - * disable interrupts. 446 - */ 447 - hard_irq_disable(); 448 - 449 - index = get_paca()->stab_rr; 450 - 451 - /* 452 - * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. 453 - */ 454 - if (index < (mmu_slb_size - 1)) 455 - index++; 456 - else 457 - index = SLB_NUM_BOLTED; 458 - 459 - get_paca()->stab_rr = index; 460 - 461 - flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; 462 - vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | 463 - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); 464 - esid_data = mk_esid_data(ea, ssize, index); 465 - 466 - /* 467 - * No need for an isync before or after this slbmte. The exception 468 - * we enter with and the rfid we exit with are context synchronizing. 469 - * Also we only handle user segments here. 470 - */ 471 - asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) 472 - : "memory"); 473 - 474 - /* 475 469 * Now update slb cache entries 476 470 */ 477 - slb_cache_index = get_paca()->slb_cache_ptr; 471 + slb_cache_index = local_paca->slb_cache_ptr; 478 472 if (slb_cache_index < SLB_CACHE_ENTRIES) { 479 473 /* 480 474 * We have space in slb cache for optimized switch_slb(). 481 475 * Top 36 bits from esid_data as per ISA 482 476 */ 483 - get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; 484 - get_paca()->slb_cache_ptr++; 477 + local_paca->slb_cache[slb_cache_index++] = esid_data >> 28; 478 + local_paca->slb_cache_ptr++; 485 479 } else { 486 480 /* 487 481 * Our cache is full and the current cache content strictly 488 482 * doesn't indicate the active SLB conents. Bump the ptr 489 483 * so that switch_slb() will ignore the cache. 490 484 */ 491 - get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; 485 + local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; 492 486 } 493 487 } 494 488 495 - static void handle_multi_context_slb_miss(int context_id, unsigned long ea) 489 + static enum slb_index alloc_slb_index(void) 496 490 { 497 - struct mm_struct *mm = current->mm; 498 - unsigned long vsid; 499 - int bpsize; 491 + enum slb_index index; 500 492 501 - /* 502 - * We are always above 1TB, hence use high user segment size. 503 - */ 504 - vsid = get_vsid(context_id, ea, mmu_highuser_ssize); 505 - bpsize = get_slice_psize(mm, ea); 506 - insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); 493 + /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */ 494 + index = get_paca()->stab_rr; 495 + if (index < (mmu_slb_size - 1)) 496 + index++; 497 + else 498 + index = SLB_NUM_BOLTED; 499 + get_paca()->stab_rr = index; 500 + 501 + return index; 507 502 } 508 503 509 - void slb_miss_large_addr(struct pt_regs *regs) 504 + static long slb_insert_entry(unsigned long ea, unsigned long context, 505 + unsigned long flags, int ssize, bool kernel) 510 506 { 511 - enum ctx_state prev_state = exception_enter(); 512 - unsigned long ea = regs->dar; 513 - int context; 507 + unsigned long vsid; 508 + unsigned long vsid_data, esid_data; 509 + enum slb_index index; 514 510 515 - if (REGION_ID(ea) != USER_REGION_ID) 516 - goto slb_bad_addr; 511 + vsid = get_vsid(context, ea, ssize); 512 + if (!vsid) 513 + return -EFAULT; 517 514 518 515 /* 519 - * Are we beyound what the page table layout supports ? 516 + * There must not be a kernel SLB fault in alloc_slb_index or before 517 + * slbmte here or the allocation bitmaps could get out of whack with 518 + * the SLB. 519 + * 520 + * User SLB faults or preloads take this path which might get inlined 521 + * into the caller, so add compiler barriers here to ensure unsafe 522 + * memory accesses do not come between. 520 523 */ 521 - if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) 522 - goto slb_bad_addr; 524 + barrier(); 523 525 524 - /* Lower address should have been handled by asm code */ 525 - if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) 526 - goto slb_bad_addr; 526 + index = alloc_slb_index(); 527 + 528 + vsid_data = __mk_vsid_data(vsid, ssize, flags); 529 + esid_data = mk_esid_data(ea, ssize, index); 530 + 531 + /* 532 + * No need for an isync before or after this slbmte. The exception 533 + * we enter with and the rfid we exit with are context synchronizing. 534 + * User preloads should add isync afterwards in case the kernel 535 + * accesses user memory before it returns to userspace with rfid. 536 + */ 537 + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); 538 + 539 + barrier(); 540 + 541 + if (!kernel) 542 + slb_cache_update(esid_data); 543 + 544 + return 0; 545 + } 546 + 547 + static long slb_allocate_kernel(unsigned long ea, unsigned long id) 548 + { 549 + unsigned long context; 550 + unsigned long flags; 551 + int ssize; 552 + 553 + if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) 554 + return -EFAULT; 555 + 556 + if (id == KERNEL_REGION_ID) { 557 + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; 558 + #ifdef CONFIG_SPARSEMEM_VMEMMAP 559 + } else if (id == VMEMMAP_REGION_ID) { 560 + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; 561 + #endif 562 + } else if (id == VMALLOC_REGION_ID) { 563 + if (ea < H_VMALLOC_END) 564 + flags = get_paca()->vmalloc_sllp; 565 + else 566 + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; 567 + } else { 568 + return -EFAULT; 569 + } 570 + 571 + ssize = MMU_SEGSIZE_1T; 572 + if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) 573 + ssize = MMU_SEGSIZE_256M; 574 + 575 + context = id - KERNEL_REGION_CONTEXT_OFFSET; 576 + 577 + return slb_insert_entry(ea, context, flags, ssize, true); 578 + } 579 + 580 + static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) 581 + { 582 + unsigned long context; 583 + unsigned long flags; 584 + int bpsize; 585 + int ssize; 527 586 528 587 /* 529 588 * consider this as bad access if we take a SLB miss 530 589 * on an address above addr limit. 531 590 */ 532 - if (ea >= current->mm->context.slb_addr_limit) 533 - goto slb_bad_addr; 591 + if (ea >= mm->context.slb_addr_limit) 592 + return -EFAULT; 534 593 535 - context = get_ea_context(&current->mm->context, ea); 594 + context = get_ea_context(&mm->context, ea); 536 595 if (!context) 537 - goto slb_bad_addr; 596 + return -EFAULT; 538 597 539 - handle_multi_context_slb_miss(context, ea); 540 - exception_exit(prev_state); 541 - return; 598 + if (unlikely(ea >= H_PGTABLE_RANGE)) { 599 + WARN_ON(1); 600 + return -EFAULT; 601 + } 542 602 543 - slb_bad_addr: 544 - if (user_mode(regs)) 545 - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); 546 - else 547 - bad_page_fault(regs, ea, SIGSEGV); 548 - exception_exit(prev_state); 603 + ssize = user_segment_size(ea); 604 + 605 + bpsize = get_slice_psize(mm, ea); 606 + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; 607 + 608 + return slb_insert_entry(ea, context, flags, ssize, false); 609 + } 610 + 611 + long do_slb_fault(struct pt_regs *regs, unsigned long ea) 612 + { 613 + unsigned long id = REGION_ID(ea); 614 + 615 + /* IRQs are not reconciled here, so can't check irqs_disabled */ 616 + VM_WARN_ON(mfmsr() & MSR_EE); 617 + 618 + if (unlikely(!(regs->msr & MSR_RI))) 619 + return -EINVAL; 620 + 621 + /* 622 + * SLB kernel faults must be very careful not to touch anything 623 + * that is not bolted. E.g., PACA and global variables are okay, 624 + * mm->context stuff is not. 625 + * 626 + * SLB user faults can access all of kernel memory, but must be 627 + * careful not to touch things like IRQ state because it is not 628 + * "reconciled" here. The difficulty is that we must use 629 + * fast_exception_return to return from kernel SLB faults without 630 + * looking at possible non-bolted memory. We could test user vs 631 + * kernel faults in the interrupt handler asm and do a full fault, 632 + * reconcile, ret_from_except for user faults which would make them 633 + * first class kernel code. But for performance it's probably nicer 634 + * if they go via fast_exception_return too. 635 + */ 636 + if (id >= KERNEL_REGION_ID) { 637 + return slb_allocate_kernel(ea, id); 638 + } else { 639 + struct mm_struct *mm = current->mm; 640 + 641 + if (unlikely(!mm)) 642 + return -EFAULT; 643 + 644 + return slb_allocate_user(mm, ea); 645 + } 646 + } 647 + 648 + void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) 649 + { 650 + if (err == -EFAULT) { 651 + if (user_mode(regs)) 652 + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); 653 + else 654 + bad_page_fault(regs, ea, SIGSEGV); 655 + } else if (err == -EINVAL) { 656 + unrecoverable_exception(regs); 657 + } else { 658 + BUG(); 659 + } 549 660 }
-335
arch/powerpc/mm/slb_low.S
··· 1 - /* 2 - * Low-level SLB routines 3 - * 4 - * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM 5 - * 6 - * Based on earlier C version: 7 - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com 8 - * Copyright (c) 2001 Dave Engebretsen 9 - * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 10 - * 11 - * This program is free software; you can redistribute it and/or 12 - * modify it under the terms of the GNU General Public License 13 - * as published by the Free Software Foundation; either version 14 - * 2 of the License, or (at your option) any later version. 15 - */ 16 - 17 - #include <asm/processor.h> 18 - #include <asm/ppc_asm.h> 19 - #include <asm/asm-offsets.h> 20 - #include <asm/cputable.h> 21 - #include <asm/page.h> 22 - #include <asm/mmu.h> 23 - #include <asm/pgtable.h> 24 - #include <asm/firmware.h> 25 - #include <asm/feature-fixups.h> 26 - 27 - /* 28 - * This macro generates asm code to compute the VSID scramble 29 - * function. Used in slb_allocate() and do_stab_bolted. The function 30 - * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS 31 - * 32 - * rt = register containing the proto-VSID and into which the 33 - * VSID will be stored 34 - * rx = scratch register (clobbered) 35 - * rf = flags 36 - * 37 - * - rt and rx must be different registers 38 - * - The answer will end up in the low VSID_BITS bits of rt. The higher 39 - * bits may contain other garbage, so you may need to mask the 40 - * result. 41 - */ 42 - #define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \ 43 - lis rx,VSID_MULTIPLIER_##size@h; \ 44 - ori rx,rx,VSID_MULTIPLIER_##size@l; \ 45 - mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ 46 - /* \ 47 - * powermac get slb fault before feature fixup, so make 65 bit part \ 48 - * the default part of feature fixup \ 49 - */ \ 50 - BEGIN_MMU_FTR_SECTION \ 51 - srdi rx,rt,VSID_BITS_65_##size; \ 52 - clrldi rt,rt,(64-VSID_BITS_65_##size); \ 53 - add rt,rt,rx; \ 54 - addi rx,rt,1; \ 55 - srdi rx,rx,VSID_BITS_65_##size; \ 56 - add rt,rt,rx; \ 57 - rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \ 58 - MMU_FTR_SECTION_ELSE \ 59 - srdi rx,rt,VSID_BITS_##size; \ 60 - clrldi rt,rt,(64-VSID_BITS_##size); \ 61 - add rt,rt,rx; /* add high and low bits */ \ 62 - addi rx,rt,1; \ 63 - srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ 64 - add rt,rt,rx; \ 65 - rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \ 66 - ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) 67 - 68 - 69 - /* void slb_allocate(unsigned long ea); 70 - * 71 - * Create an SLB entry for the given EA (user or kernel). 72 - * r3 = faulting address, r13 = PACA 73 - * r9, r10, r11 are clobbered by this function 74 - * r3 is preserved. 75 - * No other registers are examined or changed. 76 - */ 77 - _GLOBAL(slb_allocate) 78 - /* 79 - * Check if the address falls within the range of the first context, or 80 - * if we may need to handle multi context. For the first context we 81 - * allocate the slb entry via the fast path below. For large address we 82 - * branch out to C-code and see if additional contexts have been 83 - * allocated. 84 - * The test here is: 85 - * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) 86 - */ 87 - rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) 88 - bne- 8f 89 - 90 - srdi r9,r3,60 /* get region */ 91 - srdi r10,r3,SID_SHIFT /* get esid */ 92 - cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ 93 - 94 - /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ 95 - blt cr7,0f /* user or kernel? */ 96 - 97 - /* Check if hitting the linear mapping or some other kernel space 98 - */ 99 - bne cr7,1f 100 - 101 - /* Linear mapping encoding bits, the "li" instruction below will 102 - * be patched by the kernel at boot 103 - */ 104 - .globl slb_miss_kernel_load_linear 105 - slb_miss_kernel_load_linear: 106 - li r11,0 107 - /* 108 - * context = (ea >> 60) - (0xc - 1) 109 - * r9 = region id. 110 - */ 111 - subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET 112 - 113 - BEGIN_FTR_SECTION 114 - b .Lslb_finish_load 115 - END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 116 - b .Lslb_finish_load_1T 117 - 118 - 1: 119 - #ifdef CONFIG_SPARSEMEM_VMEMMAP 120 - cmpldi cr0,r9,0xf 121 - bne 1f 122 - /* Check virtual memmap region. To be patched at kernel boot */ 123 - .globl slb_miss_kernel_load_vmemmap 124 - slb_miss_kernel_load_vmemmap: 125 - li r11,0 126 - b 6f 127 - 1: 128 - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ 129 - 130 - /* 131 - * r10 contains the ESID, which is the original faulting EA shifted 132 - * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28) 133 - * which is 0xd00038000. That can't be used as an immediate, even if we 134 - * ignored the 0xd, so we have to load it into a register, and we only 135 - * have one register free. So we must load all of (H_VMALLOC_END >> 28) 136 - * into a register and compare ESID against that. 137 - */ 138 - lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000 139 - ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800 140 - // Rotate left 4, then mask with 0xffffffff0 141 - rldic r11,r11,4,28 // r11 = 0xd00038000 142 - cmpld r10,r11 // if r10 >= r11 143 - bge 5f // goto io_mapping 144 - 145 - /* 146 - * vmalloc mapping gets the encoding from the PACA as the mapping 147 - * can be demoted from 64K -> 4K dynamically on some machines. 148 - */ 149 - lhz r11,PACAVMALLOCSLLP(r13) 150 - b 6f 151 - 5: 152 - /* IO mapping */ 153 - .globl slb_miss_kernel_load_io 154 - slb_miss_kernel_load_io: 155 - li r11,0 156 - 6: 157 - /* 158 - * context = (ea >> 60) - (0xc - 1) 159 - * r9 = region id. 160 - */ 161 - subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET 162 - 163 - BEGIN_FTR_SECTION 164 - b .Lslb_finish_load 165 - END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 166 - b .Lslb_finish_load_1T 167 - 168 - 0: /* 169 - * For userspace addresses, make sure this is region 0. 170 - */ 171 - cmpdi r9, 0 172 - bne- 8f 173 - /* 174 - * user space make sure we are within the allowed limit 175 - */ 176 - ld r11,PACA_SLB_ADDR_LIMIT(r13) 177 - cmpld r3,r11 178 - bge- 8f 179 - 180 - /* when using slices, we extract the psize off the slice bitmaps 181 - * and then we need to get the sllp encoding off the mmu_psize_defs 182 - * array. 183 - * 184 - * XXX This is a bit inefficient especially for the normal case, 185 - * so we should try to implement a fast path for the standard page 186 - * size using the old sllp value so we avoid the array. We cannot 187 - * really do dynamic patching unfortunately as processes might flip 188 - * between 4k and 64k standard page size 189 - */ 190 - #ifdef CONFIG_PPC_MM_SLICES 191 - /* r10 have esid */ 192 - cmpldi r10,16 193 - /* below SLICE_LOW_TOP */ 194 - blt 5f 195 - /* 196 - * Handle hpsizes, 197 - * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index 198 - */ 199 - srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */ 200 - addi r9,r11,PACAHIGHSLICEPSIZE 201 - lbzx r9,r13,r9 /* r9 is hpsizes[r11] */ 202 - /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */ 203 - rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63 204 - b 6f 205 - 206 - 5: 207 - /* 208 - * Handle lpsizes 209 - * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index 210 - */ 211 - srdi r11,r10,1 /* index */ 212 - addi r9,r11,PACALOWSLICESPSIZE 213 - lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ 214 - rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ 215 - 6: 216 - sldi r11,r11,2 /* index * 4 */ 217 - /* Extract the psize and multiply to get an array offset */ 218 - srd r9,r9,r11 219 - andi. r9,r9,0xf 220 - mulli r9,r9,MMUPSIZEDEFSIZE 221 - 222 - /* Now get to the array and obtain the sllp 223 - */ 224 - ld r11,PACATOC(r13) 225 - ld r11,mmu_psize_defs@got(r11) 226 - add r11,r11,r9 227 - ld r11,MMUPSIZESLLP(r11) 228 - ori r11,r11,SLB_VSID_USER 229 - #else 230 - /* paca context sllp already contains the SLB_VSID_USER bits */ 231 - lhz r11,PACACONTEXTSLLP(r13) 232 - #endif /* CONFIG_PPC_MM_SLICES */ 233 - 234 - ld r9,PACACONTEXTID(r13) 235 - BEGIN_FTR_SECTION 236 - cmpldi r10,0x1000 237 - bge .Lslb_finish_load_1T 238 - END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) 239 - b .Lslb_finish_load 240 - 241 - 8: /* invalid EA - return an error indication */ 242 - crset 4*cr0+eq /* indicate failure */ 243 - blr 244 - 245 - /* 246 - * Finish loading of an SLB entry and return 247 - * 248 - * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET 249 - */ 250 - .Lslb_finish_load: 251 - rldimi r10,r9,ESID_BITS,0 252 - ASM_VSID_SCRAMBLE(r10,r9,r11,256M) 253 - /* r3 = EA, r11 = VSID data */ 254 - /* 255 - * Find a slot, round robin. Previously we tried to find a 256 - * free slot first but that took too long. Unfortunately we 257 - * dont have any LRU information to help us choose a slot. 258 - */ 259 - 260 - mr r9,r3 261 - 262 - /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ 263 - 7: ld r10,PACASTABRR(r13) 264 - addi r10,r10,1 265 - /* This gets soft patched on boot. */ 266 - .globl slb_compare_rr_to_size 267 - slb_compare_rr_to_size: 268 - cmpldi r10,0 269 - 270 - blt+ 4f 271 - li r10,SLB_NUM_BOLTED 272 - 273 - 4: 274 - std r10,PACASTABRR(r13) 275 - 276 - 3: 277 - rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ 278 - oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ 279 - 280 - /* r9 = ESID data, r11 = VSID data */ 281 - 282 - /* 283 - * No need for an isync before or after this slbmte. The exception 284 - * we enter with and the rfid we exit with are context synchronizing. 285 - */ 286 - slbmte r11,r10 287 - 288 - /* we're done for kernel addresses */ 289 - crclr 4*cr0+eq /* set result to "success" */ 290 - bgelr cr7 291 - 292 - /* Update the slb cache */ 293 - lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 294 - cmpldi r9,SLB_CACHE_ENTRIES 295 - bge 1f 296 - 297 - /* still room in the slb cache */ 298 - sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ 299 - srdi r10,r10,28 /* get the 36 bits of the ESID */ 300 - add r11,r11,r13 /* r11 = (u32 *)paca + offset */ 301 - stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ 302 - addi r9,r9,1 /* offset++ */ 303 - b 2f 304 - 1: /* offset >= SLB_CACHE_ENTRIES */ 305 - li r9,SLB_CACHE_ENTRIES+1 306 - 2: 307 - sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 308 - crclr 4*cr0+eq /* set result to "success" */ 309 - blr 310 - 311 - /* 312 - * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. 313 - * 314 - * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 315 - */ 316 - .Lslb_finish_load_1T: 317 - srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ 318 - rldimi r10,r9,ESID_BITS_1T,0 319 - ASM_VSID_SCRAMBLE(r10,r9,r11,1T) 320 - 321 - li r10,MMU_SEGSIZE_1T 322 - rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ 323 - 324 - /* r3 = EA, r11 = VSID data */ 325 - clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ 326 - b 7b 327 - 328 - 329 - _ASM_NOKPROBE_SYMBOL(slb_allocate) 330 - _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) 331 - _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) 332 - _ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) 333 - #ifdef CONFIG_SPARSEMEM_VMEMMAP 334 - _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap) 335 - #endif