i386: Make patching more robust, fix paravirt issue

Commit 19d36ccdc34f5ed444f8a6af0cbfdb6790eb1177 "x86: Fix alternatives
and kprobes to remap write-protected kernel text" uses code which is
being patched for patching.

In particular, paravirt_ops does patching in two stages: first it
calls paravirt_ops.patch, then it fills any remaining instructions
with nop_out(). nop_out calls text_poke() which calls
lookup_address() which calls pgd_val() (aka paravirt_ops.pgd_val):
that call site is one of the places we patch.

If we always do patching as one single call to text_poke(), we only
need make sure we're not patching the memcpy in text_poke itself.
This means the prototype to paravirt_ops.patch needs to change, to
marshal the new code into a buffer rather than patching in place as it
does now. It also means all patching goes through text_poke(), which
is known to be safe (apply_alternatives is also changed to make a
single patch).

AK: fix compilation on x86-64 (bad rusty!)
AK: fix boot on x86-64 (sigh)
AK: merged with other patches

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Andi Kleen and committed by Linus Torvalds ab144f5e d3f3c934

+90 -67
+21 -12
arch/i386/kernel/alternative.c
··· 11 11 #include <asm/mce.h> 12 12 #include <asm/nmi.h> 13 13 14 + #define MAX_PATCH_LEN (255-1) 15 + 14 16 #ifdef CONFIG_HOTPLUG_CPU 15 17 static int smp_alt_once; 16 18 ··· 150 148 151 149 #endif /* CONFIG_X86_64 */ 152 150 153 - static void nop_out(void *insns, unsigned int len) 151 + /* Use this to add nops to a buffer, then text_poke the whole buffer. */ 152 + static void add_nops(void *insns, unsigned int len) 154 153 { 155 154 unsigned char **noptable = find_nop_table(); 156 155 ··· 159 156 unsigned int noplen = len; 160 157 if (noplen > ASM_NOP_MAX) 161 158 noplen = ASM_NOP_MAX; 162 - text_poke(insns, noptable[noplen], noplen); 159 + memcpy(insns, noptable[noplen], noplen); 163 160 insns += noplen; 164 161 len -= noplen; 165 162 } ··· 177 174 void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 178 175 { 179 176 struct alt_instr *a; 180 - u8 *instr; 181 - int diff; 177 + char insnbuf[MAX_PATCH_LEN]; 182 178 183 179 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 184 180 for (a = start; a < end; a++) { 181 + u8 *instr = a->instr; 185 182 BUG_ON(a->replacementlen > a->instrlen); 183 + BUG_ON(a->instrlen > sizeof(insnbuf)); 186 184 if (!boot_cpu_has(a->cpuid)) 187 185 continue; 188 - instr = a->instr; 189 186 #ifdef CONFIG_X86_64 190 187 /* vsyscall code is not mapped yet. resolve it manually. */ 191 188 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { ··· 194 191 __FUNCTION__, a->instr, instr); 195 192 } 196 193 #endif 197 - memcpy(instr, a->replacement, a->replacementlen); 198 - diff = a->instrlen - a->replacementlen; 199 - nop_out(instr + a->replacementlen, diff); 194 + memcpy(insnbuf, a->replacement, a->replacementlen); 195 + add_nops(insnbuf + a->replacementlen, 196 + a->instrlen - a->replacementlen); 197 + text_poke(instr, insnbuf, a->instrlen); 200 198 } 201 199 } 202 200 ··· 219 215 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 220 216 { 221 217 u8 **ptr; 218 + char insn[1]; 222 219 223 220 if (noreplace_smp) 224 221 return; 225 222 223 + add_nops(insn, 1); 226 224 for (ptr = start; ptr < end; ptr++) { 227 225 if (*ptr < text) 228 226 continue; 229 227 if (*ptr > text_end) 230 228 continue; 231 - nop_out(*ptr, 1); 229 + text_poke(*ptr, insn, 1); 232 230 }; 233 231 } 234 232 ··· 357 351 struct paravirt_patch_site *end) 358 352 { 359 353 struct paravirt_patch_site *p; 354 + char insnbuf[MAX_PATCH_LEN]; 360 355 361 356 if (noreplace_paravirt) 362 357 return; ··· 365 358 for (p = start; p < end; p++) { 366 359 unsigned int used; 367 360 368 - used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, 369 - p->len); 361 + BUG_ON(p->len > MAX_PATCH_LEN); 362 + used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, 363 + (unsigned long)p->instr, p->len); 370 364 371 365 BUG_ON(used > p->len); 372 366 373 367 /* Pad the rest with nops */ 374 - nop_out(p->instr + used, p->len - used); 368 + add_nops(insnbuf + used, p->len - used); 369 + text_poke(p->instr, insnbuf, p->len); 375 370 } 376 371 } 377 372 extern struct paravirt_patch_site __start_parainstructions[],
+26 -26
arch/i386/kernel/paravirt.c
··· 69 69 70 70 DEF_NATIVE(ud2a, "ud2a"); 71 71 72 - static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) 72 + static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 73 + unsigned long addr, unsigned len) 73 74 { 74 75 const unsigned char *start, *end; 75 76 unsigned ret; ··· 91 90 #undef SITE 92 91 93 92 patch_site: 94 - ret = paravirt_patch_insns(insns, len, start, end); 93 + ret = paravirt_patch_insns(ibuf, len, start, end); 95 94 break; 96 95 97 96 case PARAVIRT_PATCH(make_pgd): ··· 108 107 break; 109 108 110 109 default: 111 - ret = paravirt_patch_default(type, clobbers, insns, len); 110 + ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 112 111 break; 113 112 } 114 113 ··· 130 129 u32 delta; 131 130 } __attribute__((packed)); 132 131 133 - unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, 134 - void *site, u16 site_clobbers, 132 + unsigned paravirt_patch_call(void *insnbuf, 133 + const void *target, u16 tgt_clobbers, 134 + unsigned long addr, u16 site_clobbers, 135 135 unsigned len) 136 136 { 137 - unsigned char *call = site; 138 - unsigned long delta = (unsigned long)target - (unsigned long)(call+5); 139 - struct branch b; 137 + struct branch *b = insnbuf; 138 + unsigned long delta = (unsigned long)target - (addr+5); 140 139 141 140 if (tgt_clobbers & ~site_clobbers) 142 141 return len; /* target would clobber too much for this site */ 143 142 if (len < 5) 144 143 return len; /* call too long for patch site */ 145 144 146 - b.opcode = 0xe8; /* call */ 147 - b.delta = delta; 148 - BUILD_BUG_ON(sizeof(b) != 5); 149 - text_poke(call, (unsigned char *)&b, 5); 145 + b->opcode = 0xe8; /* call */ 146 + b->delta = delta; 147 + BUILD_BUG_ON(sizeof(*b) != 5); 150 148 151 149 return 5; 152 150 } 153 151 154 - unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) 152 + unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 153 + unsigned long addr, unsigned len) 155 154 { 156 - unsigned char *jmp = site; 157 - unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); 158 - struct branch b; 155 + struct branch *b = insnbuf; 156 + unsigned long delta = (unsigned long)target - (addr+5); 159 157 160 158 if (len < 5) 161 159 return len; /* call too long for patch site */ 162 160 163 - b.opcode = 0xe9; /* jmp */ 164 - b.delta = delta; 165 - text_poke(jmp, (unsigned char *)&b, 5); 161 + b->opcode = 0xe9; /* jmp */ 162 + b->delta = delta; 166 163 167 164 return 5; 168 165 } 169 166 170 - unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) 167 + unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 168 + unsigned long addr, unsigned len) 171 169 { 172 170 void *opfunc = *((void **)&paravirt_ops + type); 173 171 unsigned ret; 174 172 175 173 if (opfunc == NULL) 176 174 /* If there's no function, patch it with a ud2a (BUG) */ 177 - ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); 175 + ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); 178 176 else if (opfunc == paravirt_nop) 179 177 /* If the operation is a nop, then nop the callsite */ 180 178 ret = paravirt_patch_nop(); 181 179 else if (type == PARAVIRT_PATCH(iret) || 182 180 type == PARAVIRT_PATCH(irq_enable_sysexit)) 183 181 /* If operation requires a jmp, then jmp */ 184 - ret = paravirt_patch_jmp(opfunc, site, len); 182 + ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); 185 183 else 186 184 /* Otherwise call the function; assume target could 187 185 clobber any caller-save reg */ 188 - ret = paravirt_patch_call(opfunc, CLBR_ANY, 189 - site, clobbers, len); 186 + ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, 187 + addr, clobbers, len); 190 188 191 189 return ret; 192 190 } 193 191 194 - unsigned paravirt_patch_insns(void *site, unsigned len, 192 + unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 195 193 const char *start, const char *end) 196 194 { 197 195 unsigned insn_len = end - start; ··· 198 198 if (insn_len > len || start == NULL) 199 199 insn_len = len; 200 200 else 201 - memcpy(site, start, insn_len); 201 + memcpy(insnbuf, start, insn_len); 202 202 203 203 return insn_len; 204 204 }
+21 -14
arch/i386/kernel/vmi.c
··· 87 87 #define IRQ_PATCH_INT_MASK 0 88 88 #define IRQ_PATCH_DISABLE 5 89 89 90 - static inline void patch_offset(unsigned char *eip, unsigned char *dest) 90 + static inline void patch_offset(void *insnbuf, 91 + unsigned long eip, unsigned long dest) 91 92 { 92 - *(unsigned long *)(eip+1) = dest-eip-5; 93 + *(unsigned long *)(insnbuf+1) = dest-eip-5; 93 94 } 94 95 95 - static unsigned patch_internal(int call, unsigned len, void *insns) 96 + static unsigned patch_internal(int call, unsigned len, void *insnbuf, 97 + unsigned long eip) 96 98 { 97 99 u64 reloc; 98 100 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; ··· 102 100 switch(rel->type) { 103 101 case VMI_RELOCATION_CALL_REL: 104 102 BUG_ON(len < 5); 105 - *(char *)insns = MNEM_CALL; 106 - patch_offset(insns, rel->eip); 103 + *(char *)insnbuf = MNEM_CALL; 104 + patch_offset(insnbuf, eip, (unsigned long)rel->eip); 107 105 return 5; 108 106 109 107 case VMI_RELOCATION_JUMP_REL: 110 108 BUG_ON(len < 5); 111 - *(char *)insns = MNEM_JMP; 112 - patch_offset(insns, rel->eip); 109 + *(char *)insnbuf = MNEM_JMP; 110 + patch_offset(insnbuf, eip, (unsigned long)rel->eip); 113 111 return 5; 114 112 115 113 case VMI_RELOCATION_NOP: ··· 130 128 * Apply patch if appropriate, return length of new instruction 131 129 * sequence. The callee does nop padding for us. 132 130 */ 133 - static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) 131 + static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, 132 + unsigned long eip, unsigned len) 134 133 { 135 134 switch (type) { 136 135 case PARAVIRT_PATCH(irq_disable): 137 - return patch_internal(VMI_CALL_DisableInterrupts, len, insns); 136 + return patch_internal(VMI_CALL_DisableInterrupts, len, 137 + insns, eip); 138 138 case PARAVIRT_PATCH(irq_enable): 139 - return patch_internal(VMI_CALL_EnableInterrupts, len, insns); 139 + return patch_internal(VMI_CALL_EnableInterrupts, len, 140 + insns, eip); 140 141 case PARAVIRT_PATCH(restore_fl): 141 - return patch_internal(VMI_CALL_SetInterruptMask, len, insns); 142 + return patch_internal(VMI_CALL_SetInterruptMask, len, 143 + insns, eip); 142 144 case PARAVIRT_PATCH(save_fl): 143 - return patch_internal(VMI_CALL_GetInterruptMask, len, insns); 145 + return patch_internal(VMI_CALL_GetInterruptMask, len, 146 + insns, eip); 144 147 case PARAVIRT_PATCH(iret): 145 - return patch_internal(VMI_CALL_IRET, len, insns); 148 + return patch_internal(VMI_CALL_IRET, len, insns, eip); 146 149 case PARAVIRT_PATCH(irq_enable_sysexit): 147 - return patch_internal(VMI_CALL_SYSEXIT, len, insns); 150 + return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); 148 151 default: 149 152 break; 150 153 }
+7 -5
arch/i386/xen/enlighten.c
··· 842 842 } 843 843 } 844 844 845 - static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) 845 + static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, 846 + unsigned long addr, unsigned len) 846 847 { 847 848 char *start, *end, *reloc; 848 849 unsigned ret; ··· 870 869 if (start == NULL || (end-start) > len) 871 870 goto default_patch; 872 871 873 - ret = paravirt_patch_insns(insns, len, start, end); 872 + ret = paravirt_patch_insns(insnbuf, len, start, end); 874 873 875 874 /* Note: because reloc is assigned from something that 876 875 appears to be an array, gcc assumes it's non-null, ··· 878 877 end. */ 879 878 if (reloc > start && reloc < end) { 880 879 int reloc_off = reloc - start; 881 - long *relocp = (long *)(insns + reloc_off); 882 - long delta = start - (char *)insns; 880 + long *relocp = (long *)(insnbuf + reloc_off); 881 + long delta = start - (char *)addr; 883 882 884 883 *relocp += delta; 885 884 } ··· 887 886 888 887 default_patch: 889 888 default: 890 - ret = paravirt_patch_default(type, clobbers, insns, len); 889 + ret = paravirt_patch_default(type, clobbers, insnbuf, 890 + addr, len); 891 891 break; 892 892 } 893 893
+5 -4
drivers/lguest/lguest.c
··· 936 936 /* Now our patch routine is fairly simple (based on the native one in 937 937 * paravirt.c). If we have a replacement, we copy it in and return how much of 938 938 * the available space we used. */ 939 - static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) 939 + static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, 940 + unsigned long addr, unsigned len) 940 941 { 941 942 unsigned int insn_len; 942 943 943 944 /* Don't do anything special if we don't have a replacement */ 944 945 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) 945 - return paravirt_patch_default(type, clobber, insns, len); 946 + return paravirt_patch_default(type, clobber, ibuf, addr, len); 946 947 947 948 insn_len = lguest_insns[type].end - lguest_insns[type].start; 948 949 949 950 /* Similarly if we can't fit replacement (shouldn't happen, but let's 950 951 * be thorough). */ 951 952 if (len < insn_len) 952 - return paravirt_patch_default(type, clobber, insns, len); 953 + return paravirt_patch_default(type, clobber, ibuf, addr, len); 953 954 954 955 /* Copy in our instructions. */ 955 - memcpy(insns, lguest_insns[type].start, insn_len); 956 + memcpy(ibuf, lguest_insns[type].start, insn_len); 956 957 return insn_len; 957 958 } 958 959
+10 -6
include/asm-i386/paravirt.h
··· 47 47 * The patch function should return the number of bytes of code 48 48 * generated, as we nop pad the rest in generic code. 49 49 */ 50 - unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); 50 + unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 51 + unsigned long addr, unsigned len); 51 52 52 53 /* Basic arch-specific setup */ 53 54 void (*arch_setup)(void); ··· 254 253 255 254 unsigned paravirt_patch_nop(void); 256 255 unsigned paravirt_patch_ignore(unsigned len); 257 - unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, 258 - void *site, u16 site_clobbers, 256 + unsigned paravirt_patch_call(void *insnbuf, 257 + const void *target, u16 tgt_clobbers, 258 + unsigned long addr, u16 site_clobbers, 259 259 unsigned len); 260 - unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); 261 - unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); 260 + unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 261 + unsigned long addr, unsigned len); 262 + unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 263 + unsigned long addr, unsigned len); 262 264 263 - unsigned paravirt_patch_insns(void *site, unsigned len, 265 + unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 264 266 const char *start, const char *end); 265 267 266 268 int paravirt_disable_iospace(void);