Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/vdso: Add support for time namespaces

This patch adds the necessary glue to provide time namespaces.

Things are mainly copied from ARM64.

__arch_get_timens_vdso_data() calculates timens vdso data position
based on the vdso data position, knowing it is the next page in vvar.
This avoids having to redo the mflr/bcl/mflr/mtlr dance to locate
the page relative to running code position.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com> # vDSO parts
Acked-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1a15495f80ec19a87b16cf874dbf7c3fa5ec40fe.1617209142.git.christophe.leroy@csgroup.eu

authored by

Christophe Leroy and committed by
Michael Ellerman
74205b3f 1c4bce67

+114 -21
+2 -1
arch/powerpc/Kconfig
··· 173 173 select GENERIC_CPU_AUTOPROBE 174 174 select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC 175 175 select GENERIC_EARLY_IOREMAP 176 + select GENERIC_GETTIMEOFDAY 176 177 select GENERIC_IRQ_SHOW 177 178 select GENERIC_IRQ_SHOW_LEVEL 178 179 select GENERIC_PCI_IOMAP if PCI ··· 181 180 select GENERIC_STRNCPY_FROM_USER 182 181 select GENERIC_STRNLEN_USER 183 182 select GENERIC_TIME_VSYSCALL 184 - select GENERIC_GETTIMEOFDAY 183 + select GENERIC_VDSO_TIME_NS 185 184 select HAVE_ARCH_AUDITSYSCALL 186 185 select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU 187 186 select HAVE_ARCH_JUMP_LABEL
+10
arch/powerpc/include/asm/vdso/gettimeofday.h
··· 2 2 #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H 3 3 #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H 4 4 5 + #include <asm/page.h> 6 + 5 7 #ifdef __ASSEMBLY__ 6 8 7 9 #include <asm/ppc_asm.h> ··· 155 153 } 156 154 157 155 const struct vdso_data *__arch_get_vdso_data(void); 156 + 157 + #ifdef CONFIG_TIME_NS 158 + static __always_inline 159 + const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) 160 + { 161 + return (void *)vd + PAGE_SIZE; 162 + } 163 + #endif 158 164 159 165 static inline bool vdso_clocksource_ok(const struct vdso_data *vd) 160 166 {
-2
arch/powerpc/include/asm/vdso_datapage.h
··· 107 107 bcl 20, 31, .+4 108 108 999: 109 109 mflr \ptr 110 - #if CONFIG_PPC_PAGE_SHIFT > 14 111 110 addis \ptr, \ptr, (_vdso_datapage - 999b)@ha 112 - #endif 113 111 addi \ptr, \ptr, (_vdso_datapage - 999b)@l 114 112 .endm 115 113
+100 -16
arch/powerpc/kernel/vdso.c
··· 18 18 #include <linux/security.h> 19 19 #include <linux/memblock.h> 20 20 #include <linux/syscalls.h> 21 + #include <linux/time_namespace.h> 21 22 #include <vdso/datapage.h> 22 23 23 24 #include <asm/syscall.h> ··· 51 50 } vdso_data_store __page_aligned_data; 52 51 struct vdso_arch_data *vdso_data = &vdso_data_store.data; 53 52 53 + enum vvar_pages { 54 + VVAR_DATA_PAGE_OFFSET, 55 + VVAR_TIMENS_PAGE_OFFSET, 56 + VVAR_NR_PAGES, 57 + }; 58 + 54 59 static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, 55 60 unsigned long text_size) 56 61 { ··· 80 73 return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); 81 74 } 82 75 76 + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, 77 + struct vm_area_struct *vma, struct vm_fault *vmf); 78 + 83 79 static struct vm_special_mapping vvar_spec __ro_after_init = { 84 80 .name = "[vvar]", 81 + .fault = vvar_fault, 85 82 }; 86 83 87 84 static struct vm_special_mapping vdso32_spec __ro_after_init = { ··· 98 87 .mremap = vdso64_mremap, 99 88 }; 100 89 90 + #ifdef CONFIG_TIME_NS 91 + struct vdso_data *arch_get_vdso_data(void *vvar_page) 92 + { 93 + return ((struct vdso_arch_data *)vvar_page)->data; 94 + } 95 + 96 + /* 97 + * The vvar mapping contains data for a specific time namespace, so when a task 98 + * changes namespace we must unmap its vvar data for the old namespace. 99 + * Subsequent faults will map in data for the new namespace. 100 + * 101 + * For more details see timens_setup_vdso_data(). 102 + */ 103 + int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) 104 + { 105 + struct mm_struct *mm = task->mm; 106 + struct vm_area_struct *vma; 107 + 108 + mmap_read_lock(mm); 109 + 110 + for (vma = mm->mmap; vma; vma = vma->vm_next) { 111 + unsigned long size = vma->vm_end - vma->vm_start; 112 + 113 + if (vma_is_special_mapping(vma, &vvar_spec)) 114 + zap_page_range(vma, vma->vm_start, size); 115 + } 116 + 117 + mmap_read_unlock(mm); 118 + return 0; 119 + } 120 + 121 + static struct page *find_timens_vvar_page(struct vm_area_struct *vma) 122 + { 123 + if (likely(vma->vm_mm == current->mm)) 124 + return current->nsproxy->time_ns->vvar_page; 125 + 126 + /* 127 + * VM_PFNMAP | VM_IO protect .fault() handler from being called 128 + * through interfaces like /proc/$pid/mem or 129 + * process_vm_{readv,writev}() as long as there's no .access() 130 + * in special_mapping_vmops. 131 + * For more details check_vma_flags() and __access_remote_vm() 132 + */ 133 + WARN(1, "vvar_page accessed remotely"); 134 + 135 + return NULL; 136 + } 137 + #else 138 + static struct page *find_timens_vvar_page(struct vm_area_struct *vma) 139 + { 140 + return NULL; 141 + } 142 + #endif 143 + 144 + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, 145 + struct vm_area_struct *vma, struct vm_fault *vmf) 146 + { 147 + struct page *timens_page = find_timens_vvar_page(vma); 148 + unsigned long pfn; 149 + 150 + switch (vmf->pgoff) { 151 + case VVAR_DATA_PAGE_OFFSET: 152 + if (timens_page) 153 + pfn = page_to_pfn(timens_page); 154 + else 155 + pfn = virt_to_pfn(vdso_data); 156 + break; 157 + #ifdef CONFIG_TIME_NS 158 + case VVAR_TIMENS_PAGE_OFFSET: 159 + /* 160 + * If a task belongs to a time namespace then a namespace 161 + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and 162 + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET 163 + * offset. 164 + * See also the comment near timens_setup_vdso_data(). 165 + */ 166 + if (!timens_page) 167 + return VM_FAULT_SIGBUS; 168 + pfn = virt_to_pfn(vdso_data); 169 + break; 170 + #endif /* CONFIG_TIME_NS */ 171 + default: 172 + return VM_FAULT_SIGBUS; 173 + } 174 + 175 + return vmf_insert_pfn(vma, vmf->address, pfn); 176 + } 177 + 101 178 /* 102 179 * This is called from binfmt_elf, we create the special vma for the 103 180 * vDSO and insert it into the mm struct tree ··· 194 95 { 195 96 unsigned long vdso_size, vdso_base, mappings_size; 196 97 struct vm_special_mapping *vdso_spec; 197 - unsigned long vvar_size = PAGE_SIZE; 98 + unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; 198 99 struct mm_struct *mm = current->mm; 199 100 struct vm_area_struct *vma; 200 101 ··· 365 266 return pagelist; 366 267 } 367 268 368 - static struct page ** __init vvar_setup_pages(void) 369 - { 370 - struct page **pagelist; 371 - 372 - /* .pages is NULL-terminated */ 373 - pagelist = kcalloc(2, sizeof(struct page *), GFP_KERNEL); 374 - if (!pagelist) 375 - panic("%s: Cannot allocate page list for VVAR", __func__); 376 - 377 - pagelist[0] = virt_to_page(vdso_data); 378 - return pagelist; 379 - } 380 - 381 269 static int __init vdso_init(void) 382 270 { 383 271 #ifdef CONFIG_PPC64 ··· 402 316 403 317 if (IS_ENABLED(CONFIG_PPC64)) 404 318 vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); 405 - 406 - vvar_spec.pages = vvar_setup_pages(); 407 319 408 320 smp_wmb(); 409 321
+1 -1
arch/powerpc/kernel/vdso32/vdso32.lds.S
··· 17 17 18 18 SECTIONS 19 19 { 20 - PROVIDE(_vdso_datapage = . - PAGE_SIZE); 20 + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); 21 21 . = SIZEOF_HEADERS; 22 22 23 23 .hash : { *(.hash) } :text
+1 -1
arch/powerpc/kernel/vdso64/vdso64.lds.S
··· 17 17 18 18 SECTIONS 19 19 { 20 - PROVIDE(_vdso_datapage = . - PAGE_SIZE); 20 + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); 21 21 . = SIZEOF_HEADERS; 22 22 23 23 .hash : { *(.hash) } :text