Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: simplify and consolidate TLB updates

The HVC update was mostly used to compress consecutive calls into one.
This is mostly relevant for userspace where it is already handled by the
syscall stub code.

Simplify the whole logic and consolidate it for both kernel and
userspace. This does remove the sequential syscall compression for the
kernel, however that shouldn't be the main factor in most runs.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20240703134536.1161108-12-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Benjamin Berg and committed by
Johannes Berg
573a446f ef714f15

+103 -321
+6 -6
arch/um/include/shared/os.h
··· 279 279 struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp); 280 280 void syscall_stub_dump_error(struct mm_id *mm_idp); 281 281 282 - void map(struct mm_id *mm_idp, unsigned long virt, 283 - unsigned long len, int prot, int phys_fd, 284 - unsigned long long offset); 285 - void unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); 286 - void protect(struct mm_id *mm_idp, unsigned long addr, 287 - unsigned long len, unsigned int prot); 282 + int map(struct mm_id *mm_idp, unsigned long virt, 283 + unsigned long len, int prot, int phys_fd, 284 + unsigned long long offset); 285 + int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); 286 + int protect(struct mm_id *mm_idp, unsigned long addr, 287 + unsigned long len, unsigned int prot); 288 288 289 289 /* skas/process.c */ 290 290 extern int is_skas_winch(int pid, int fd, void *data);
+85 -309
arch/um/kernel/tlb.c
··· 15 15 #include <skas.h> 16 16 #include <kern_util.h> 17 17 18 - struct host_vm_change { 19 - struct host_vm_op { 20 - enum { NONE, MMAP, MUNMAP, MPROTECT } type; 21 - union { 22 - struct { 23 - unsigned long addr; 24 - unsigned long len; 25 - unsigned int prot; 26 - int fd; 27 - __u64 offset; 28 - } mmap; 29 - struct { 30 - unsigned long addr; 31 - unsigned long len; 32 - } munmap; 33 - struct { 34 - unsigned long addr; 35 - unsigned long len; 36 - unsigned int prot; 37 - } mprotect; 38 - } u; 39 - } ops[1]; 40 - int userspace; 41 - int index; 42 - struct mm_struct *mm; 43 - void *data; 18 + struct vm_ops { 19 + struct mm_id *mm_idp; 20 + 21 + int (*mmap)(struct mm_id *mm_idp, 22 + unsigned long virt, unsigned long len, int prot, 23 + int phys_fd, unsigned long long offset); 24 + int (*unmap)(struct mm_id *mm_idp, 25 + unsigned long virt, unsigned long len); 26 + int (*mprotect)(struct mm_id *mm_idp, 27 + unsigned long virt, unsigned long len, 28 + unsigned int prot); 44 29 }; 45 30 46 - #define INIT_HVC(mm, userspace) \ 47 - ((struct host_vm_change) \ 48 - { .ops = { { .type = NONE } }, \ 49 - .mm = mm, \ 50 - .data = NULL, \ 51 - .userspace = userspace, \ 52 - .index = 0 }) 31 + static int kern_map(struct mm_id *mm_idp, 32 + unsigned long virt, unsigned long len, int prot, 33 + int phys_fd, unsigned long long offset) 34 + { 35 + /* TODO: Why is executable needed to be always set in the kernel? */ 36 + return os_map_memory((void *)virt, phys_fd, offset, len, 37 + prot & UM_PROT_READ, prot & UM_PROT_WRITE, 38 + 1); 39 + } 40 + 41 + static int kern_unmap(struct mm_id *mm_idp, 42 + unsigned long virt, unsigned long len) 43 + { 44 + return os_unmap_memory((void *)virt, len); 45 + } 46 + 47 + static int kern_mprotect(struct mm_id *mm_idp, 48 + unsigned long virt, unsigned long len, 49 + unsigned int prot) 50 + { 51 + return os_protect_memory((void *)virt, len, 52 + prot & UM_PROT_READ, prot & UM_PROT_WRITE, 53 + 1); 54 + } 53 55 54 56 void report_enomem(void) 55 57 { ··· 60 58 "vm.max_map_count has been reached.\n"); 61 59 } 62 60 63 - static int do_ops(struct host_vm_change *hvc, int end, 64 - int finished) 65 - { 66 - struct host_vm_op *op; 67 - int i, ret = 0; 68 - 69 - for (i = 0; i < end && !ret; i++) { 70 - op = &hvc->ops[i]; 71 - switch (op->type) { 72 - case MMAP: 73 - if (hvc->userspace) 74 - map(&hvc->mm->context.id, op->u.mmap.addr, 75 - op->u.mmap.len, op->u.mmap.prot, 76 - op->u.mmap.fd, 77 - op->u.mmap.offset); 78 - else 79 - map_memory(op->u.mmap.addr, op->u.mmap.offset, 80 - op->u.mmap.len, 1, 1, 1); 81 - break; 82 - case MUNMAP: 83 - if (hvc->userspace) 84 - unmap(&hvc->mm->context.id, 85 - op->u.munmap.addr, 86 - op->u.munmap.len); 87 - else 88 - ret = os_unmap_memory( 89 - (void *) op->u.munmap.addr, 90 - op->u.munmap.len); 91 - 92 - break; 93 - case MPROTECT: 94 - if (hvc->userspace) 95 - protect(&hvc->mm->context.id, 96 - op->u.mprotect.addr, 97 - op->u.mprotect.len, 98 - op->u.mprotect.prot); 99 - else 100 - ret = os_protect_memory( 101 - (void *) op->u.mprotect.addr, 102 - op->u.mprotect.len, 103 - 1, 1, 1); 104 - break; 105 - default: 106 - printk(KERN_ERR "Unknown op type %d in do_ops\n", 107 - op->type); 108 - BUG(); 109 - break; 110 - } 111 - } 112 - 113 - if (hvc->userspace && finished) 114 - ret = syscall_stub_flush(&hvc->mm->context.id); 115 - 116 - if (ret == -ENOMEM) 117 - report_enomem(); 118 - 119 - return ret; 120 - } 121 - 122 - static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 123 - unsigned int prot, struct host_vm_change *hvc) 124 - { 125 - __u64 offset; 126 - struct host_vm_op *last; 127 - int fd = -1, ret = 0; 128 - 129 - if (hvc->userspace) 130 - fd = phys_mapping(phys, &offset); 131 - else 132 - offset = phys; 133 - if (hvc->index != 0) { 134 - last = &hvc->ops[hvc->index - 1]; 135 - if ((last->type == MMAP) && 136 - (last->u.mmap.addr + last->u.mmap.len == virt) && 137 - (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 138 - (last->u.mmap.offset + last->u.mmap.len == offset)) { 139 - last->u.mmap.len += len; 140 - return 0; 141 - } 142 - } 143 - 144 - if (hvc->index == ARRAY_SIZE(hvc->ops)) { 145 - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 146 - hvc->index = 0; 147 - } 148 - 149 - hvc->ops[hvc->index++] = ((struct host_vm_op) 150 - { .type = MMAP, 151 - .u = { .mmap = { .addr = virt, 152 - .len = len, 153 - .prot = prot, 154 - .fd = fd, 155 - .offset = offset } 156 - } }); 157 - return ret; 158 - } 159 - 160 - static int add_munmap(unsigned long addr, unsigned long len, 161 - struct host_vm_change *hvc) 162 - { 163 - struct host_vm_op *last; 164 - int ret = 0; 165 - 166 - if (hvc->index != 0) { 167 - last = &hvc->ops[hvc->index - 1]; 168 - if ((last->type == MUNMAP) && 169 - (last->u.munmap.addr + last->u.mmap.len == addr)) { 170 - last->u.munmap.len += len; 171 - return 0; 172 - } 173 - } 174 - 175 - if (hvc->index == ARRAY_SIZE(hvc->ops)) { 176 - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 177 - hvc->index = 0; 178 - } 179 - 180 - hvc->ops[hvc->index++] = ((struct host_vm_op) 181 - { .type = MUNMAP, 182 - .u = { .munmap = { .addr = addr, 183 - .len = len } } }); 184 - return ret; 185 - } 186 - 187 - static int add_mprotect(unsigned long addr, unsigned long len, 188 - unsigned int prot, struct host_vm_change *hvc) 189 - { 190 - struct host_vm_op *last; 191 - int ret = 0; 192 - 193 - if (hvc->index != 0) { 194 - last = &hvc->ops[hvc->index - 1]; 195 - if ((last->type == MPROTECT) && 196 - (last->u.mprotect.addr + last->u.mprotect.len == addr) && 197 - (last->u.mprotect.prot == prot)) { 198 - last->u.mprotect.len += len; 199 - return 0; 200 - } 201 - } 202 - 203 - if (hvc->index == ARRAY_SIZE(hvc->ops)) { 204 - ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 205 - hvc->index = 0; 206 - } 207 - 208 - hvc->ops[hvc->index++] = ((struct host_vm_op) 209 - { .type = MPROTECT, 210 - .u = { .mprotect = { .addr = addr, 211 - .len = len, 212 - .prot = prot } } }); 213 - return ret; 214 - } 215 - 216 - #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 217 - 218 61 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 219 62 unsigned long end, 220 - struct host_vm_change *hvc) 63 + struct vm_ops *ops) 221 64 { 222 65 pte_t *pte; 223 66 int r, w, x, prot, ret = 0; ··· 82 235 (x ? UM_PROT_EXEC : 0)); 83 236 if (pte_newpage(*pte)) { 84 237 if (pte_present(*pte)) { 85 - if (pte_newpage(*pte)) 86 - ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 87 - PAGE_SIZE, prot, hvc); 238 + if (pte_newpage(*pte)) { 239 + __u64 offset; 240 + unsigned long phys = 241 + pte_val(*pte) & PAGE_MASK; 242 + int fd = phys_mapping(phys, &offset); 243 + 244 + ret = ops->mmap(ops->mm_idp, addr, 245 + PAGE_SIZE, prot, fd, 246 + offset); 247 + } 88 248 } else 89 - ret = add_munmap(addr, PAGE_SIZE, hvc); 249 + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); 90 250 } else if (pte_newprot(*pte)) 91 - ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 251 + ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); 92 252 *pte = pte_mkuptodate(*pte); 93 253 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 94 254 return ret; ··· 103 249 104 250 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 105 251 unsigned long end, 106 - struct host_vm_change *hvc) 252 + struct vm_ops *ops) 107 253 { 108 254 pmd_t *pmd; 109 255 unsigned long next; ··· 114 260 next = pmd_addr_end(addr, end); 115 261 if (!pmd_present(*pmd)) { 116 262 if (pmd_newpage(*pmd)) { 117 - ret = add_munmap(addr, next - addr, hvc); 263 + ret = ops->unmap(ops->mm_idp, addr, 264 + next - addr); 118 265 pmd_mkuptodate(*pmd); 119 266 } 120 267 } 121 - else ret = update_pte_range(pmd, addr, next, hvc); 268 + else ret = update_pte_range(pmd, addr, next, ops); 122 269 } while (pmd++, addr = next, ((addr < end) && !ret)); 123 270 return ret; 124 271 } 125 272 126 273 static inline int update_pud_range(p4d_t *p4d, unsigned long addr, 127 274 unsigned long end, 128 - struct host_vm_change *hvc) 275 + struct vm_ops *ops) 129 276 { 130 277 pud_t *pud; 131 278 unsigned long next; ··· 137 282 next = pud_addr_end(addr, end); 138 283 if (!pud_present(*pud)) { 139 284 if (pud_newpage(*pud)) { 140 - ret = add_munmap(addr, next - addr, hvc); 285 + ret = ops->unmap(ops->mm_idp, addr, 286 + next - addr); 141 287 pud_mkuptodate(*pud); 142 288 } 143 289 } 144 - else ret = update_pmd_range(pud, addr, next, hvc); 290 + else ret = update_pmd_range(pud, addr, next, ops); 145 291 } while (pud++, addr = next, ((addr < end) && !ret)); 146 292 return ret; 147 293 } 148 294 149 295 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, 150 296 unsigned long end, 151 - struct host_vm_change *hvc) 297 + struct vm_ops *ops) 152 298 { 153 299 p4d_t *p4d; 154 300 unsigned long next; ··· 160 304 next = p4d_addr_end(addr, end); 161 305 if (!p4d_present(*p4d)) { 162 306 if (p4d_newpage(*p4d)) { 163 - ret = add_munmap(addr, next - addr, hvc); 307 + ret = ops->unmap(ops->mm_idp, addr, 308 + next - addr); 164 309 p4d_mkuptodate(*p4d); 165 310 } 166 311 } else 167 - ret = update_pud_range(p4d, addr, next, hvc); 312 + ret = update_pud_range(p4d, addr, next, ops); 168 313 } while (p4d++, addr = next, ((addr < end) && !ret)); 169 314 return ret; 170 315 } 171 316 172 - static void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 317 + static int fix_range_common(struct mm_struct *mm, unsigned long start_addr, 173 318 unsigned long end_addr) 174 319 { 175 320 pgd_t *pgd; 176 - struct host_vm_change hvc; 321 + struct vm_ops ops; 177 322 unsigned long addr = start_addr, next; 178 - int ret = 0, userspace = 1; 323 + int ret = 0; 179 324 180 - hvc = INIT_HVC(mm, userspace); 325 + ops.mm_idp = &mm->context.id; 326 + if (mm == &init_mm) { 327 + ops.mmap = kern_map; 328 + ops.unmap = kern_unmap; 329 + ops.mprotect = kern_mprotect; 330 + } else { 331 + ops.mmap = map; 332 + ops.unmap = unmap; 333 + ops.mprotect = protect; 334 + } 335 + 181 336 pgd = pgd_offset(mm, addr); 182 337 do { 183 338 next = pgd_addr_end(addr, end_addr); 184 339 if (!pgd_present(*pgd)) { 185 340 if (pgd_newpage(*pgd)) { 186 - ret = add_munmap(addr, next - addr, &hvc); 341 + ret = ops.unmap(ops.mm_idp, addr, 342 + next - addr); 187 343 pgd_mkuptodate(*pgd); 188 344 } 189 345 } else 190 - ret = update_p4d_range(pgd, addr, next, &hvc); 346 + ret = update_p4d_range(pgd, addr, next, &ops); 191 347 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 192 348 193 - if (!ret) 194 - ret = do_ops(&hvc, hvc.index, 1); 349 + if (ret == -ENOMEM) 350 + report_enomem(); 351 + 352 + return ret; 195 353 } 196 354 197 - static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 355 + static void flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 198 356 { 199 - struct mm_struct *mm; 200 - pgd_t *pgd; 201 - p4d_t *p4d; 202 - pud_t *pud; 203 - pmd_t *pmd; 204 - pte_t *pte; 205 - unsigned long addr, last; 206 - int updated = 0, err = 0, userspace = 0; 207 - struct host_vm_change hvc; 357 + int err; 208 358 209 - mm = &init_mm; 210 - hvc = INIT_HVC(mm, userspace); 211 - for (addr = start; addr < end;) { 212 - pgd = pgd_offset(mm, addr); 213 - if (!pgd_present(*pgd)) { 214 - last = ADD_ROUND(addr, PGDIR_SIZE); 215 - if (last > end) 216 - last = end; 217 - if (pgd_newpage(*pgd)) { 218 - updated = 1; 219 - err = add_munmap(addr, last - addr, &hvc); 220 - if (err < 0) 221 - panic("munmap failed, errno = %d\n", 222 - -err); 223 - } 224 - addr = last; 225 - continue; 226 - } 359 + err = fix_range_common(&init_mm, start, end); 227 360 228 - p4d = p4d_offset(pgd, addr); 229 - if (!p4d_present(*p4d)) { 230 - last = ADD_ROUND(addr, P4D_SIZE); 231 - if (last > end) 232 - last = end; 233 - if (p4d_newpage(*p4d)) { 234 - updated = 1; 235 - err = add_munmap(addr, last - addr, &hvc); 236 - if (err < 0) 237 - panic("munmap failed, errno = %d\n", 238 - -err); 239 - } 240 - addr = last; 241 - continue; 242 - } 243 - 244 - pud = pud_offset(p4d, addr); 245 - if (!pud_present(*pud)) { 246 - last = ADD_ROUND(addr, PUD_SIZE); 247 - if (last > end) 248 - last = end; 249 - if (pud_newpage(*pud)) { 250 - updated = 1; 251 - err = add_munmap(addr, last - addr, &hvc); 252 - if (err < 0) 253 - panic("munmap failed, errno = %d\n", 254 - -err); 255 - } 256 - addr = last; 257 - continue; 258 - } 259 - 260 - pmd = pmd_offset(pud, addr); 261 - if (!pmd_present(*pmd)) { 262 - last = ADD_ROUND(addr, PMD_SIZE); 263 - if (last > end) 264 - last = end; 265 - if (pmd_newpage(*pmd)) { 266 - updated = 1; 267 - err = add_munmap(addr, last - addr, &hvc); 268 - if (err < 0) 269 - panic("munmap failed, errno = %d\n", 270 - -err); 271 - } 272 - addr = last; 273 - continue; 274 - } 275 - 276 - pte = pte_offset_kernel(pmd, addr); 277 - if (!pte_present(*pte) || pte_newpage(*pte)) { 278 - updated = 1; 279 - err = add_munmap(addr, PAGE_SIZE, &hvc); 280 - if (err < 0) 281 - panic("munmap failed, errno = %d\n", 282 - -err); 283 - if (pte_present(*pte)) 284 - err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 285 - PAGE_SIZE, 0, &hvc); 286 - } 287 - else if (pte_newprot(*pte)) { 288 - updated = 1; 289 - err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); 290 - } 291 - addr += PAGE_SIZE; 292 - } 293 - if (!err) 294 - err = do_ops(&hvc, hvc.index, 1); 295 - 296 - if (err < 0) 361 + if (err) 297 362 panic("flush_tlb_kernel failed, errno = %d\n", err); 298 - return updated; 299 363 } 300 364 301 365 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
+12 -6
arch/um/os-Linux/skas/mem.c
··· 175 175 return NULL; 176 176 } 177 177 178 - void map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, 178 + int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, 179 179 int phys_fd, unsigned long long offset) 180 180 { 181 181 struct stub_syscall *sc; ··· 185 185 if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd && 186 186 sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) { 187 187 sc->mem.length += len; 188 - return; 188 + return 0; 189 189 } 190 190 191 191 sc = syscall_stub_alloc(mm_idp); ··· 195 195 sc->mem.prot = prot; 196 196 sc->mem.fd = phys_fd; 197 197 sc->mem.offset = MMAP_OFFSET(offset); 198 + 199 + return 0; 198 200 } 199 201 200 - void unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len) 202 + int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len) 201 203 { 202 204 struct stub_syscall *sc; 203 205 ··· 207 205 sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr); 208 206 if (sc) { 209 207 sc->mem.length += len; 210 - return; 208 + return 0; 211 209 } 212 210 213 211 sc = syscall_stub_alloc(mm_idp); 214 212 sc->syscall = STUB_SYSCALL_MUNMAP; 215 213 sc->mem.addr = addr; 216 214 sc->mem.length = len; 215 + 216 + return 0; 217 217 } 218 218 219 - void protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, 219 + int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, 220 220 unsigned int prot) 221 221 { 222 222 struct stub_syscall *sc; ··· 227 223 sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr); 228 224 if (sc && sc->mem.prot == prot) { 229 225 sc->mem.length += len; 230 - return; 226 + return 0; 231 227 } 232 228 233 229 sc = syscall_stub_alloc(mm_idp); ··· 235 231 sc->mem.addr = addr; 236 232 sc->mem.length = len; 237 233 sc->mem.prot = prot; 234 + 235 + return 0; 238 236 }