Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses

The current phys_to_virt patching mechanism works only for 32 bit
physical addresses and this patch extends the idea for 64bit physical
addresses.

The 64bit v2p patching mechanism patches the higher 8 bits of physical
address with a constant using 'mov' instruction and lower 32bits are patched
using 'add'. While this is correct, in those platforms where the lowmem addressable
physical memory spawns across 4GB boundary, a carry bit can be produced as a
result of addition of lower 32bits. This has to be taken in to account and added
in to the upper. The patched __pv_offset and va are added in lower 32bits, where
__pv_offset can be in two's complement form when PA_START < VA_START and that can
result in a false carry bit.

e.g
1) PA = 0x80000000; VA = 0xC0000000
__pv_offset = PA - VA = 0xC0000000 (2's complement)

2) PA = 0x2 80000000; VA = 0xC000000
__pv_offset = PA - VA = 0x1 C0000000

So adding __pv_offset + VA should never result in a true overflow for (1).
So in order to differentiate between a true carry, a __pv_offset is extended
to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is
2's complement. So 'mvn #0' is inserted instead of 'mov' while patching
for the same reason. Since mov, add, sub instruction are to patched
with different constants inside the same stub, the rotation field
of the opcode is using to differentiate between them.

So the above examples for v2p translation becomes for VA=0xC0000000,
1) PA[63:32] = 0xffffffff
PA[31:0] = VA + 0xC0000000 --> results in a carry
PA[63:32] = PA[63:32] + carry

PA[63:0] = 0x0 80000000

2) PA[63:32] = 0x1
PA[31:0] = VA + 0xC0000000 --> results in a carry
PA[63:32] = PA[63:32] + carry

PA[63:0] = 0x2 80000000

The above ideas were suggested by Nicolas Pitre <nico@linaro.org> as
part of the review of first and second versions of the subject patch.

There is no corresponding change on the phys_to_virt() side, because
computations on the upper 32-bits would be discarded anyway.

Cc: Russell King <linux@arm.linux.org.uk>

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Sricharan R <r.sricharan@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

authored by

Sricharan R and committed by
Santosh Shilimkar
f52bb722 c1a5f4f6

+82 -19
+34 -3
arch/arm/include/asm/memory.h
··· 172 172 * so that all we need to do is modify the 8-bit constant field. 173 173 */ 174 174 #define __PV_BITS_31_24 0x81000000 175 + #define __PV_BITS_7_0 0x81 175 176 176 177 extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x); 177 - extern unsigned long __pv_phys_offset; 178 + extern u64 __pv_phys_offset; 179 + extern u64 __pv_offset; 180 + extern void fixup_pv_table(const void *, unsigned long); 181 + extern const void *__pv_table_begin, *__pv_table_end; 182 + 178 183 #define PHYS_OFFSET __pv_phys_offset 179 184 180 185 #define __pv_stub(from,to,instr,type) \ ··· 191 186 : "=r" (to) \ 192 187 : "r" (from), "I" (type)) 193 188 189 + #define __pv_stub_mov_hi(t) \ 190 + __asm__ volatile("@ __pv_stub_mov\n" \ 191 + "1: mov %R0, %1\n" \ 192 + " .pushsection .pv_table,\"a\"\n" \ 193 + " .long 1b\n" \ 194 + " .popsection\n" \ 195 + : "=r" (t) \ 196 + : "I" (__PV_BITS_7_0)) 197 + 198 + #define __pv_add_carry_stub(x, y) \ 199 + __asm__ volatile("@ __pv_add_carry_stub\n" \ 200 + "1: adds %Q0, %1, %2\n" \ 201 + " adc %R0, %R0, #0\n" \ 202 + " .pushsection .pv_table,\"a\"\n" \ 203 + " .long 1b\n" \ 204 + " .popsection\n" \ 205 + : "+r" (y) \ 206 + : "r" (x), "I" (__PV_BITS_31_24) \ 207 + : "cc") 208 + 194 209 static inline phys_addr_t __virt_to_phys(unsigned long x) 195 210 { 196 - unsigned long t; 197 - __pv_stub(x, t, "add", __PV_BITS_31_24); 211 + phys_addr_t t; 212 + 213 + if (sizeof(phys_addr_t) == 4) { 214 + __pv_stub(x, t, "add", __PV_BITS_31_24); 215 + } else { 216 + __pv_stub_mov_hi(t); 217 + __pv_add_carry_stub(x, t); 218 + } 198 219 return t; 199 220 } 200 221
+1
arch/arm/kernel/armksyms.c
··· 155 155 156 156 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT 157 157 EXPORT_SYMBOL(__pv_phys_offset); 158 + EXPORT_SYMBOL(__pv_offset); 158 159 #endif
+47 -16
arch/arm/kernel/head.S
··· 536 536 ldmfd sp!, {r4 - r6, pc} 537 537 ENDPROC(fixup_smp) 538 538 539 + #ifdef __ARMEB_ 540 + #define LOW_OFFSET 0x4 541 + #define HIGH_OFFSET 0x0 542 + #else 543 + #define LOW_OFFSET 0x0 544 + #define HIGH_OFFSET 0x4 545 + #endif 546 + 539 547 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT 540 548 541 549 /* __fixup_pv_table - patch the stub instructions with the delta between ··· 554 546 __HEAD 555 547 __fixup_pv_table: 556 548 adr r0, 1f 557 - ldmia r0, {r3-r5, r7} 558 - sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET 549 + ldmia r0, {r3-r7} 550 + mvn ip, #0 551 + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET 559 552 add r4, r4, r3 @ adjust table start address 560 553 add r5, r5, r3 @ adjust table end address 561 - add r7, r7, r3 @ adjust __pv_phys_offset address 562 - str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset 554 + add r6, r6, r3 @ adjust __pv_phys_offset address 555 + add r7, r7, r3 @ adjust __pv_offset address 556 + str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset 557 + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits 563 558 mov r6, r3, lsr #24 @ constant for add/sub instructions 564 559 teq r3, r6, lsl #24 @ must be 16MiB aligned 565 560 THUMB( it ne @ cross section branch ) 566 561 bne __error 567 - str r6, [r7, #4] @ save to __pv_offset 562 + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits 568 563 b __fixup_a_pv_table 569 564 ENDPROC(__fixup_pv_table) 570 565 ··· 576 565 .long __pv_table_begin 577 566 .long __pv_table_end 578 567 2: .long __pv_phys_offset 568 + .long __pv_offset 579 569 580 570 .text 581 571 __fixup_a_pv_table: 572 + adr r0, 3f 573 + ldr r6, [r0] 574 + add r6, r6, r3 575 + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word 576 + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word 577 + mov r6, r6, lsr #24 578 + cmn r0, #1 582 579 #ifdef CONFIG_THUMB2_KERNEL 580 + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction 583 581 lsls r6, #24 584 582 beq 2f 585 583 clz r7, r6 ··· 602 582 b 2f 603 583 1: add r7, r3 604 584 ldrh ip, [r7, #2] 605 - and ip, 0x8f00 606 - orr ip, r6 @ mask in offset bits 31-24 585 + tst ip, #0x4000 586 + and ip, #0x8f00 587 + orrne ip, r6 @ mask in offset bits 31-24 588 + orreq ip, r0 @ mask in offset bits 7-0 607 589 strh ip, [r7, #2] 590 + ldrheq ip, [r7] 591 + biceq ip, #0x20 592 + orreq ip, ip, r0, lsr #16 593 + strheq ip, [r7] 608 594 2: cmp r4, r5 609 595 ldrcc r7, [r4], #4 @ use branch for delay slot 610 596 bcc 1b 611 597 bx lr 612 598 #else 599 + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction 613 600 b 2f 614 601 1: ldr ip, [r7, r3] 615 602 bic ip, ip, #0x000000ff 616 - orr ip, ip, r6 @ mask in offset bits 31-24 603 + tst ip, #0xf00 @ check the rotation field 604 + orrne ip, ip, r6 @ mask in offset bits 31-24 605 + biceq ip, ip, #0x400000 @ clear bit 22 606 + orreq ip, ip, r0 @ mask in offset bits 7-0 617 607 str ip, [r7, r3] 618 608 2: cmp r4, r5 619 609 ldrcc r7, [r4], #4 @ use branch for delay slot ··· 632 602 #endif 633 603 ENDPROC(__fixup_a_pv_table) 634 604 605 + 3: .long __pv_offset 606 + 635 607 ENTRY(fixup_pv_table) 636 608 stmfd sp!, {r4 - r7, lr} 637 - ldr r2, 2f @ get address of __pv_phys_offset 638 609 mov r3, #0 @ no offset 639 610 mov r4, r0 @ r0 = table start 640 611 add r5, r0, r1 @ r1 = table size 641 - ldr r6, [r2, #4] @ get __pv_offset 642 612 bl __fixup_a_pv_table 643 613 ldmfd sp!, {r4 - r7, pc} 644 614 ENDPROC(fixup_pv_table) 645 - 646 - .align 647 - 2: .long __pv_phys_offset 648 615 649 616 .data 650 617 .globl __pv_phys_offset 651 618 .type __pv_phys_offset, %object 652 619 __pv_phys_offset: 653 - .long 0 654 - .size __pv_phys_offset, . - __pv_phys_offset 620 + .quad 0 621 + .size __pv_phys_offset, . -__pv_phys_offset 622 + 623 + .globl __pv_offset 624 + .type __pv_offset, %object 655 625 __pv_offset: 656 - .long 0 626 + .quad 0 627 + .size __pv_offset, . -__pv_offset 657 628 #endif 658 629 659 630 #include "head-common.S"