i386: Make patching more robust, fix paravirt issue

Commit 19d36ccdc34f5ed444f8a6af0cbfdb6790eb1177 "x86: Fix alternatives
and kprobes to remap write-protected kernel text" uses code which is
being patched for patching.

In particular, paravirt_ops does patching in two stages: first it
calls paravirt_ops.patch, then it fills any remaining instructions
with nop_out(). nop_out calls text_poke() which calls
lookup_address() which calls pgd_val() (aka paravirt_ops.pgd_val):
that call site is one of the places we patch.

If we always do patching as one single call to text_poke(), we only
need make sure we're not patching the memcpy in text_poke itself.
This means the prototype to paravirt_ops.patch needs to change, to
marshal the new code into a buffer rather than patching in place as it
does now. It also means all patching goes through text_poke(), which
is known to be safe (apply_alternatives is also changed to make a
single patch).

AK: fix compilation on x86-64 (bad rusty!)
AK: fix boot on x86-64 (sigh)
AK: merged with other patches

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Andi Kleen and committed by Linus Torvalds ab144f5e d3f3c934

+90 -67
+21 -12
arch/i386/kernel/alternative.c
··· 11 #include <asm/mce.h> 12 #include <asm/nmi.h> 13 14 #ifdef CONFIG_HOTPLUG_CPU 15 static int smp_alt_once; 16 ··· 150 151 #endif /* CONFIG_X86_64 */ 152 153 - static void nop_out(void *insns, unsigned int len) 154 { 155 unsigned char **noptable = find_nop_table(); 156 ··· 159 unsigned int noplen = len; 160 if (noplen > ASM_NOP_MAX) 161 noplen = ASM_NOP_MAX; 162 - text_poke(insns, noptable[noplen], noplen); 163 insns += noplen; 164 len -= noplen; 165 } ··· 177 void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 178 { 179 struct alt_instr *a; 180 - u8 *instr; 181 - int diff; 182 183 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 184 for (a = start; a < end; a++) { 185 BUG_ON(a->replacementlen > a->instrlen); 186 if (!boot_cpu_has(a->cpuid)) 187 continue; 188 - instr = a->instr; 189 #ifdef CONFIG_X86_64 190 /* vsyscall code is not mapped yet. resolve it manually. */ 191 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { ··· 194 __FUNCTION__, a->instr, instr); 195 } 196 #endif 197 - memcpy(instr, a->replacement, a->replacementlen); 198 - diff = a->instrlen - a->replacementlen; 199 - nop_out(instr + a->replacementlen, diff); 200 } 201 } 202 ··· 219 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 220 { 221 u8 **ptr; 222 223 if (noreplace_smp) 224 return; 225 226 for (ptr = start; ptr < end; ptr++) { 227 if (*ptr < text) 228 continue; 229 if (*ptr > text_end) 230 continue; 231 - nop_out(*ptr, 1); 232 }; 233 } 234 ··· 357 struct paravirt_patch_site *end) 358 { 359 struct paravirt_patch_site *p; 360 361 if (noreplace_paravirt) 362 return; ··· 365 for (p = start; p < end; p++) { 366 unsigned int used; 367 368 - used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, 369 - p->len); 370 371 BUG_ON(used > p->len); 372 373 /* Pad the rest with nops */ 374 - nop_out(p->instr + used, p->len - used); 375 } 376 } 377 extern struct paravirt_patch_site __start_parainstructions[],
··· 11 #include <asm/mce.h> 12 #include <asm/nmi.h> 13 14 + #define MAX_PATCH_LEN (255-1) 15 + 16 #ifdef CONFIG_HOTPLUG_CPU 17 static int smp_alt_once; 18 ··· 148 149 #endif /* CONFIG_X86_64 */ 150 151 + /* Use this to add nops to a buffer, then text_poke the whole buffer. */ 152 + static void add_nops(void *insns, unsigned int len) 153 { 154 unsigned char **noptable = find_nop_table(); 155 ··· 156 unsigned int noplen = len; 157 if (noplen > ASM_NOP_MAX) 158 noplen = ASM_NOP_MAX; 159 + memcpy(insns, noptable[noplen], noplen); 160 insns += noplen; 161 len -= noplen; 162 } ··· 174 void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 175 { 176 struct alt_instr *a; 177 + char insnbuf[MAX_PATCH_LEN]; 178 179 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 180 for (a = start; a < end; a++) { 181 + u8 *instr = a->instr; 182 BUG_ON(a->replacementlen > a->instrlen); 183 + BUG_ON(a->instrlen > sizeof(insnbuf)); 184 if (!boot_cpu_has(a->cpuid)) 185 continue; 186 #ifdef CONFIG_X86_64 187 /* vsyscall code is not mapped yet. resolve it manually. */ 188 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { ··· 191 __FUNCTION__, a->instr, instr); 192 } 193 #endif 194 + memcpy(insnbuf, a->replacement, a->replacementlen); 195 + add_nops(insnbuf + a->replacementlen, 196 + a->instrlen - a->replacementlen); 197 + text_poke(instr, insnbuf, a->instrlen); 198 } 199 } 200 ··· 215 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 216 { 217 u8 **ptr; 218 + char insn[1]; 219 220 if (noreplace_smp) 221 return; 222 223 + add_nops(insn, 1); 224 for (ptr = start; ptr < end; ptr++) { 225 if (*ptr < text) 226 continue; 227 if (*ptr > text_end) 228 continue; 229 + text_poke(*ptr, insn, 1); 230 }; 231 } 232 ··· 351 struct paravirt_patch_site *end) 352 { 353 struct paravirt_patch_site *p; 354 + char insnbuf[MAX_PATCH_LEN]; 355 356 if (noreplace_paravirt) 357 return; ··· 358 for (p = start; p < end; p++) { 359 unsigned int used; 360 361 + BUG_ON(p->len > MAX_PATCH_LEN); 362 + used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, 363 + (unsigned long)p->instr, p->len); 364 365 BUG_ON(used > p->len); 366 367 /* Pad the rest with nops */ 368 + add_nops(insnbuf + used, p->len - used); 369 + text_poke(p->instr, insnbuf, p->len); 370 } 371 } 372 extern struct paravirt_patch_site __start_parainstructions[],
+26 -26
arch/i386/kernel/paravirt.c
··· 69 70 DEF_NATIVE(ud2a, "ud2a"); 71 72 - static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) 73 { 74 const unsigned char *start, *end; 75 unsigned ret; ··· 91 #undef SITE 92 93 patch_site: 94 - ret = paravirt_patch_insns(insns, len, start, end); 95 break; 96 97 case PARAVIRT_PATCH(make_pgd): ··· 108 break; 109 110 default: 111 - ret = paravirt_patch_default(type, clobbers, insns, len); 112 break; 113 } 114 ··· 130 u32 delta; 131 } __attribute__((packed)); 132 133 - unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, 134 - void *site, u16 site_clobbers, 135 unsigned len) 136 { 137 - unsigned char *call = site; 138 - unsigned long delta = (unsigned long)target - (unsigned long)(call+5); 139 - struct branch b; 140 141 if (tgt_clobbers & ~site_clobbers) 142 return len; /* target would clobber too much for this site */ 143 if (len < 5) 144 return len; /* call too long for patch site */ 145 146 - b.opcode = 0xe8; /* call */ 147 - b.delta = delta; 148 - BUILD_BUG_ON(sizeof(b) != 5); 149 - text_poke(call, (unsigned char *)&b, 5); 150 151 return 5; 152 } 153 154 - unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) 155 { 156 - unsigned char *jmp = site; 157 - unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); 158 - struct branch b; 159 160 if (len < 5) 161 return len; /* call too long for patch site */ 162 163 - b.opcode = 0xe9; /* jmp */ 164 - b.delta = delta; 165 - text_poke(jmp, (unsigned char *)&b, 5); 166 167 return 5; 168 } 169 170 - unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) 171 { 172 void *opfunc = *((void **)&paravirt_ops + type); 173 unsigned ret; 174 175 if (opfunc == NULL) 176 /* If there's no function, patch it with a ud2a (BUG) */ 177 - ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); 178 else if (opfunc == paravirt_nop) 179 /* If the operation is a nop, then nop the callsite */ 180 ret = paravirt_patch_nop(); 181 else if (type == PARAVIRT_PATCH(iret) || 182 type == PARAVIRT_PATCH(irq_enable_sysexit)) 183 /* If operation requires a jmp, then jmp */ 184 - ret = paravirt_patch_jmp(opfunc, site, len); 185 else 186 /* Otherwise call the function; assume target could 187 clobber any caller-save reg */ 188 - ret = paravirt_patch_call(opfunc, CLBR_ANY, 189 - site, clobbers, len); 190 191 return ret; 192 } 193 194 - unsigned paravirt_patch_insns(void *site, unsigned len, 195 const char *start, const char *end) 196 { 197 unsigned insn_len = end - start; ··· 198 if (insn_len > len || start == NULL) 199 insn_len = len; 200 else 201 - memcpy(site, start, insn_len); 202 203 return insn_len; 204 }
··· 69 70 DEF_NATIVE(ud2a, "ud2a"); 71 72 + static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 73 + unsigned long addr, unsigned len) 74 { 75 const unsigned char *start, *end; 76 unsigned ret; ··· 90 #undef SITE 91 92 patch_site: 93 + ret = paravirt_patch_insns(ibuf, len, start, end); 94 break; 95 96 case PARAVIRT_PATCH(make_pgd): ··· 107 break; 108 109 default: 110 + ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 111 break; 112 } 113 ··· 129 u32 delta; 130 } __attribute__((packed)); 131 132 + unsigned paravirt_patch_call(void *insnbuf, 133 + const void *target, u16 tgt_clobbers, 134 + unsigned long addr, u16 site_clobbers, 135 unsigned len) 136 { 137 + struct branch *b = insnbuf; 138 + unsigned long delta = (unsigned long)target - (addr+5); 139 140 if (tgt_clobbers & ~site_clobbers) 141 return len; /* target would clobber too much for this site */ 142 if (len < 5) 143 return len; /* call too long for patch site */ 144 145 + b->opcode = 0xe8; /* call */ 146 + b->delta = delta; 147 + BUILD_BUG_ON(sizeof(*b) != 5); 148 149 return 5; 150 } 151 152 + unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 153 + unsigned long addr, unsigned len) 154 { 155 + struct branch *b = insnbuf; 156 + unsigned long delta = (unsigned long)target - (addr+5); 157 158 if (len < 5) 159 return len; /* call too long for patch site */ 160 161 + b->opcode = 0xe9; /* jmp */ 162 + b->delta = delta; 163 164 return 5; 165 } 166 167 + unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 168 + unsigned long addr, unsigned len) 169 { 170 void *opfunc = *((void **)&paravirt_ops + type); 171 unsigned ret; 172 173 if (opfunc == NULL) 174 /* If there's no function, patch it with a ud2a (BUG) */ 175 + ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); 176 else if (opfunc == paravirt_nop) 177 /* If the operation is a nop, then nop the callsite */ 178 ret = paravirt_patch_nop(); 179 else if (type == PARAVIRT_PATCH(iret) || 180 type == PARAVIRT_PATCH(irq_enable_sysexit)) 181 /* If operation requires a jmp, then jmp */ 182 + ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); 183 else 184 /* Otherwise call the function; assume target could 185 clobber any caller-save reg */ 186 + ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, 187 + addr, clobbers, len); 188 189 return ret; 190 } 191 192 + unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 193 const char *start, const char *end) 194 { 195 unsigned insn_len = end - start; ··· 198 if (insn_len > len || start == NULL) 199 insn_len = len; 200 else 201 + memcpy(insnbuf, start, insn_len); 202 203 return insn_len; 204 }
+21 -14
arch/i386/kernel/vmi.c
··· 87 #define IRQ_PATCH_INT_MASK 0 88 #define IRQ_PATCH_DISABLE 5 89 90 - static inline void patch_offset(unsigned char *eip, unsigned char *dest) 91 { 92 - *(unsigned long *)(eip+1) = dest-eip-5; 93 } 94 95 - static unsigned patch_internal(int call, unsigned len, void *insns) 96 { 97 u64 reloc; 98 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; ··· 102 switch(rel->type) { 103 case VMI_RELOCATION_CALL_REL: 104 BUG_ON(len < 5); 105 - *(char *)insns = MNEM_CALL; 106 - patch_offset(insns, rel->eip); 107 return 5; 108 109 case VMI_RELOCATION_JUMP_REL: 110 BUG_ON(len < 5); 111 - *(char *)insns = MNEM_JMP; 112 - patch_offset(insns, rel->eip); 113 return 5; 114 115 case VMI_RELOCATION_NOP: ··· 130 * Apply patch if appropriate, return length of new instruction 131 * sequence. The callee does nop padding for us. 132 */ 133 - static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) 134 { 135 switch (type) { 136 case PARAVIRT_PATCH(irq_disable): 137 - return patch_internal(VMI_CALL_DisableInterrupts, len, insns); 138 case PARAVIRT_PATCH(irq_enable): 139 - return patch_internal(VMI_CALL_EnableInterrupts, len, insns); 140 case PARAVIRT_PATCH(restore_fl): 141 - return patch_internal(VMI_CALL_SetInterruptMask, len, insns); 142 case PARAVIRT_PATCH(save_fl): 143 - return patch_internal(VMI_CALL_GetInterruptMask, len, insns); 144 case PARAVIRT_PATCH(iret): 145 - return patch_internal(VMI_CALL_IRET, len, insns); 146 case PARAVIRT_PATCH(irq_enable_sysexit): 147 - return patch_internal(VMI_CALL_SYSEXIT, len, insns); 148 default: 149 break; 150 }
··· 87 #define IRQ_PATCH_INT_MASK 0 88 #define IRQ_PATCH_DISABLE 5 89 90 + static inline void patch_offset(void *insnbuf, 91 + unsigned long eip, unsigned long dest) 92 { 93 + *(unsigned long *)(insnbuf+1) = dest-eip-5; 94 } 95 96 + static unsigned patch_internal(int call, unsigned len, void *insnbuf, 97 + unsigned long eip) 98 { 99 u64 reloc; 100 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; ··· 100 switch(rel->type) { 101 case VMI_RELOCATION_CALL_REL: 102 BUG_ON(len < 5); 103 + *(char *)insnbuf = MNEM_CALL; 104 + patch_offset(insnbuf, eip, (unsigned long)rel->eip); 105 return 5; 106 107 case VMI_RELOCATION_JUMP_REL: 108 BUG_ON(len < 5); 109 + *(char *)insnbuf = MNEM_JMP; 110 + patch_offset(insnbuf, eip, (unsigned long)rel->eip); 111 return 5; 112 113 case VMI_RELOCATION_NOP: ··· 128 * Apply patch if appropriate, return length of new instruction 129 * sequence. The callee does nop padding for us. 130 */ 131 + static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, 132 + unsigned long eip, unsigned len) 133 { 134 switch (type) { 135 case PARAVIRT_PATCH(irq_disable): 136 + return patch_internal(VMI_CALL_DisableInterrupts, len, 137 + insns, eip); 138 case PARAVIRT_PATCH(irq_enable): 139 + return patch_internal(VMI_CALL_EnableInterrupts, len, 140 + insns, eip); 141 case PARAVIRT_PATCH(restore_fl): 142 + return patch_internal(VMI_CALL_SetInterruptMask, len, 143 + insns, eip); 144 case PARAVIRT_PATCH(save_fl): 145 + return patch_internal(VMI_CALL_GetInterruptMask, len, 146 + insns, eip); 147 case PARAVIRT_PATCH(iret): 148 + return patch_internal(VMI_CALL_IRET, len, insns, eip); 149 case PARAVIRT_PATCH(irq_enable_sysexit): 150 + return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); 151 default: 152 break; 153 }
+7 -5
arch/i386/xen/enlighten.c
··· 842 } 843 } 844 845 - static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) 846 { 847 char *start, *end, *reloc; 848 unsigned ret; ··· 870 if (start == NULL || (end-start) > len) 871 goto default_patch; 872 873 - ret = paravirt_patch_insns(insns, len, start, end); 874 875 /* Note: because reloc is assigned from something that 876 appears to be an array, gcc assumes it's non-null, ··· 878 end. */ 879 if (reloc > start && reloc < end) { 880 int reloc_off = reloc - start; 881 - long *relocp = (long *)(insns + reloc_off); 882 - long delta = start - (char *)insns; 883 884 *relocp += delta; 885 } ··· 887 888 default_patch: 889 default: 890 - ret = paravirt_patch_default(type, clobbers, insns, len); 891 break; 892 } 893
··· 842 } 843 } 844 845 + static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 846 + unsigned long addr, unsigned len) 847 { 848 char *start, *end, *reloc; 849 unsigned ret; ··· 869 if (start == NULL || (end-start) > len) 870 goto default_patch; 871 872 + ret = paravirt_patch_insns(insnbuf, len, start, end); 873 874 /* Note: because reloc is assigned from something that 875 appears to be an array, gcc assumes it's non-null, ··· 877 end. */ 878 if (reloc > start && reloc < end) { 879 int reloc_off = reloc - start; 880 + long *relocp = (long *)(insnbuf + reloc_off); 881 + long delta = start - (char *)addr; 882 883 *relocp += delta; 884 } ··· 886 887 default_patch: 888 default: 889 + ret = paravirt_patch_default(type, clobbers, insnbuf, 890 + addr, len); 891 break; 892 } 893
+5 -4
drivers/lguest/lguest.c
··· 936 /* Now our patch routine is fairly simple (based on the native one in 937 * paravirt.c). If we have a replacement, we copy it in and return how much of 938 * the available space we used. */ 939 - static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) 940 { 941 unsigned int insn_len; 942 943 /* Don't do anything special if we don't have a replacement */ 944 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) 945 - return paravirt_patch_default(type, clobber, insns, len); 946 947 insn_len = lguest_insns[type].end - lguest_insns[type].start; 948 949 /* Similarly if we can't fit replacement (shouldn't happen, but let's 950 * be thorough). */ 951 if (len < insn_len) 952 - return paravirt_patch_default(type, clobber, insns, len); 953 954 /* Copy in our instructions. */ 955 - memcpy(insns, lguest_insns[type].start, insn_len); 956 return insn_len; 957 } 958
··· 936 /* Now our patch routine is fairly simple (based on the native one in 937 * paravirt.c). If we have a replacement, we copy it in and return how much of 938 * the available space we used. */ 939 + static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, 940 + unsigned long addr, unsigned len) 941 { 942 unsigned int insn_len; 943 944 /* Don't do anything special if we don't have a replacement */ 945 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) 946 + return paravirt_patch_default(type, clobber, ibuf, addr, len); 947 948 insn_len = lguest_insns[type].end - lguest_insns[type].start; 949 950 /* Similarly if we can't fit replacement (shouldn't happen, but let's 951 * be thorough). */ 952 if (len < insn_len) 953 + return paravirt_patch_default(type, clobber, ibuf, addr, len); 954 955 /* Copy in our instructions. */ 956 + memcpy(ibuf, lguest_insns[type].start, insn_len); 957 return insn_len; 958 } 959
+10 -6
include/asm-i386/paravirt.h
··· 47 * The patch function should return the number of bytes of code 48 * generated, as we nop pad the rest in generic code. 49 */ 50 - unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); 51 52 /* Basic arch-specific setup */ 53 void (*arch_setup)(void); ··· 254 255 unsigned paravirt_patch_nop(void); 256 unsigned paravirt_patch_ignore(unsigned len); 257 - unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, 258 - void *site, u16 site_clobbers, 259 unsigned len); 260 - unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); 261 - unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); 262 263 - unsigned paravirt_patch_insns(void *site, unsigned len, 264 const char *start, const char *end); 265 266 int paravirt_disable_iospace(void);
··· 47 * The patch function should return the number of bytes of code 48 * generated, as we nop pad the rest in generic code. 49 */ 50 + unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 51 + unsigned long addr, unsigned len); 52 53 /* Basic arch-specific setup */ 54 void (*arch_setup)(void); ··· 253 254 unsigned paravirt_patch_nop(void); 255 unsigned paravirt_patch_ignore(unsigned len); 256 + unsigned paravirt_patch_call(void *insnbuf, 257 + const void *target, u16 tgt_clobbers, 258 + unsigned long addr, u16 site_clobbers, 259 unsigned len); 260 + unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 261 + unsigned long addr, unsigned len); 262 + unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 263 + unsigned long addr, unsigned len); 264 265 + unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 266 const char *start, const char *end); 267 268 int paravirt_disable_iospace(void);