Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Abandon the _PAGE_NEWPROT bit

When a PTE is updated in the page table, the _PAGE_NEWPAGE bit will
always be set. And the corresponding page will always be mapped or
unmapped depending on whether the PTE is present or not. The check
on the _PAGE_NEWPROT bit is not really reachable. Abandoning it will
allow us to simplify the code and remove the unreachable code.

Reviewed-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20241011102354.1682626-2-tiwei.btw@antgroup.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Tiwei Bie and committed by
Johannes Berg
2717c6b6 90daca7c

+34 -103
+6 -31
arch/um/include/asm/pgtable.h
··· 12 12 13 13 #define _PAGE_PRESENT 0x001 14 14 #define _PAGE_NEWPAGE 0x002 15 - #define _PAGE_NEWPROT 0x004 16 15 #define _PAGE_RW 0x020 17 16 #define _PAGE_USER 0x040 18 17 #define _PAGE_ACCESSED 0x080 ··· 150 151 return pte_get_bits(pte, _PAGE_NEWPAGE); 151 152 } 152 153 153 - static inline int pte_newprot(pte_t pte) 154 - { 155 - return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); 156 - } 157 - 158 154 /* 159 155 * ================================= 160 156 * Flags setting section. 161 157 * ================================= 162 158 */ 163 - 164 - static inline pte_t pte_mknewprot(pte_t pte) 165 - { 166 - pte_set_bits(pte, _PAGE_NEWPROT); 167 - return(pte); 168 - } 169 159 170 160 static inline pte_t pte_mkclean(pte_t pte) 171 161 { ··· 170 182 171 183 static inline pte_t pte_wrprotect(pte_t pte) 172 184 { 173 - if (likely(pte_get_bits(pte, _PAGE_RW))) 174 - pte_clear_bits(pte, _PAGE_RW); 175 - else 176 - return pte; 177 - return(pte_mknewprot(pte)); 185 + pte_clear_bits(pte, _PAGE_RW); 186 + return pte; 178 187 } 179 188 180 189 static inline pte_t pte_mkread(pte_t pte) 181 190 { 182 - if (unlikely(pte_get_bits(pte, _PAGE_USER))) 183 - return pte; 184 191 pte_set_bits(pte, _PAGE_USER); 185 - return(pte_mknewprot(pte)); 192 + return pte; 186 193 } 187 194 188 195 static inline pte_t pte_mkdirty(pte_t pte) ··· 194 211 195 212 static inline pte_t pte_mkwrite_novma(pte_t pte) 196 213 { 197 - if (unlikely(pte_get_bits(pte, _PAGE_RW))) 198 - return pte; 199 214 pte_set_bits(pte, _PAGE_RW); 200 - return(pte_mknewprot(pte)); 215 + return pte; 201 216 } 202 217 203 218 static inline pte_t pte_mkuptodate(pte_t pte) 204 219 { 205 220 pte_clear_bits(pte, _PAGE_NEWPAGE); 206 - if(pte_present(pte)) 207 - pte_clear_bits(pte, _PAGE_NEWPROT); 208 - return(pte); 221 + return pte; 209 222 } 210 223 211 224 static inline pte_t pte_mknewpage(pte_t pte) ··· 215 236 pte_copy(*pteptr, pteval); 216 237 217 238 /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so 218 - * fix_range knows to unmap it. _PAGE_NEWPROT is specific to 219 - * mapped pages. 239 + * update_pte_range knows to unmap it. 220 240 */ 221 241 222 242 *pteptr = pte_mknewpage(*pteptr); 223 - if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); 224 243 } 225 244 226 245 #define PFN_PTE_SHIFT PAGE_SHIFT ··· 275 298 ({ pte_t pte; \ 276 299 \ 277 300 pte_set_val(pte, page_to_phys(page), (pgprot)); \ 278 - if (pte_present(pte)) \ 279 - pte_mknewprot(pte_mknewpage(pte)); \ 280 301 pte;}) 281 302 282 303 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+2 -2
arch/um/include/asm/tlbflush.h
··· 9 9 #include <linux/mm.h> 10 10 11 11 /* 12 - * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls 13 - * from the process handling the MM (which can be the kernel itself). 12 + * In UML, we need to sync the TLB over by using mmap/munmap syscalls from 13 + * the process handling the MM (which can be the kernel itself). 14 14 * 15 15 * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes 16 16 * we catch all PTE transitions where memory that was unusable becomes usable.
-2
arch/um/include/shared/os.h
··· 279 279 unsigned long len, int prot, int phys_fd, 280 280 unsigned long long offset); 281 281 int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); 282 - int protect(struct mm_id *mm_idp, unsigned long addr, 283 - unsigned long len, unsigned int prot); 284 282 285 283 /* skas/process.c */ 286 284 extern int is_skas_winch(int pid, int fd, void *data);
-1
arch/um/include/shared/skas/stub-data.h
··· 30 30 STUB_SYSCALL_UNSET = 0, 31 31 STUB_SYSCALL_MMAP, 32 32 STUB_SYSCALL_MUNMAP, 33 - STUB_SYSCALL_MPROTECT, 34 33 }; 35 34 36 35 struct stub_syscall {
-10
arch/um/kernel/skas/stub.c
··· 35 35 return -1; 36 36 } 37 37 break; 38 - case STUB_SYSCALL_MPROTECT: 39 - res = stub_syscall3(__NR_mprotect, 40 - sc->mem.addr, sc->mem.length, 41 - sc->mem.prot); 42 - if (res) { 43 - d->err = res; 44 - d->syscall_data_len = i; 45 - return -1; 46 - } 47 - break; 48 38 default: 49 39 d->err = -95; /* EOPNOTSUPP */ 50 40 d->syscall_data_len = i;
+26 -36
arch/um/kernel/tlb.c
··· 23 23 int phys_fd, unsigned long long offset); 24 24 int (*unmap)(struct mm_id *mm_idp, 25 25 unsigned long virt, unsigned long len); 26 - int (*mprotect)(struct mm_id *mm_idp, 27 - unsigned long virt, unsigned long len, 28 - unsigned int prot); 29 26 }; 30 27 31 28 static int kern_map(struct mm_id *mm_idp, ··· 41 44 return os_unmap_memory((void *)virt, len); 42 45 } 43 46 44 - static int kern_mprotect(struct mm_id *mm_idp, 45 - unsigned long virt, unsigned long len, 46 - unsigned int prot) 47 - { 48 - return os_protect_memory((void *)virt, len, 49 - prot & UM_PROT_READ, prot & UM_PROT_WRITE, 50 - 1); 51 - } 52 - 53 47 void report_enomem(void) 54 48 { 55 49 printk(KERN_ERR "UML ran out of memory on the host side! " ··· 53 65 struct vm_ops *ops) 54 66 { 55 67 pte_t *pte; 56 - int r, w, x, prot, ret = 0; 68 + int ret = 0; 57 69 58 70 pte = pte_offset_kernel(pmd, addr); 59 71 do { 60 - r = pte_read(*pte); 61 - w = pte_write(*pte); 62 - x = pte_exec(*pte); 63 - if (!pte_young(*pte)) { 64 - r = 0; 65 - w = 0; 66 - } else if (!pte_dirty(*pte)) 67 - w = 0; 72 + if (!pte_newpage(*pte)) 73 + continue; 68 74 69 - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 70 - (x ? UM_PROT_EXEC : 0)); 71 - if (pte_newpage(*pte)) { 72 - if (pte_present(*pte)) { 73 - __u64 offset; 74 - unsigned long phys = pte_val(*pte) & PAGE_MASK; 75 - int fd = phys_mapping(phys, &offset); 75 + if (pte_present(*pte)) { 76 + __u64 offset; 77 + unsigned long phys = pte_val(*pte) & PAGE_MASK; 78 + int fd = phys_mapping(phys, &offset); 79 + int r, w, x, prot; 76 80 77 - ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, 78 - prot, fd, offset); 79 - } else 80 - ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); 81 - } else if (pte_newprot(*pte)) 82 - ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); 81 + r = pte_read(*pte); 82 + w = pte_write(*pte); 83 + x = pte_exec(*pte); 84 + if (!pte_young(*pte)) { 85 + r = 0; 86 + w = 0; 87 + } else if (!pte_dirty(*pte)) 88 + w = 0; 89 + 90 + prot = (r ? UM_PROT_READ : 0) | 91 + (w ? UM_PROT_WRITE : 0) | 92 + (x ? UM_PROT_EXEC : 0); 93 + 94 + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, 95 + prot, fd, offset); 96 + } else 97 + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); 98 + 83 99 *pte = pte_mkuptodate(*pte); 84 100 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 85 101 return ret; ··· 172 180 if (mm == &init_mm) { 173 181 ops.mmap = kern_map; 174 182 ops.unmap = kern_unmap; 175 - ops.mprotect = kern_mprotect; 176 183 } else { 177 184 ops.mmap = map; 178 185 ops.unmap = unmap; 179 - ops.mprotect = protect; 180 186 } 181 187 182 188 pgd = pgd_offset(mm, addr);
-21
arch/um/os-Linux/skas/mem.c
··· 217 217 218 218 return 0; 219 219 } 220 - 221 - int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, 222 - unsigned int prot) 223 - { 224 - struct stub_syscall *sc; 225 - 226 - /* Compress with previous syscall if that is possible */ 227 - sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr); 228 - if (sc && sc->mem.prot == prot) { 229 - sc->mem.length += len; 230 - return 0; 231 - } 232 - 233 - sc = syscall_stub_alloc(mm_idp); 234 - sc->syscall = STUB_SYSCALL_MPROTECT; 235 - sc->mem.addr = addr; 236 - sc->mem.length = len; 237 - sc->mem.prot = prot; 238 - 239 - return 0; 240 - }