Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Add initial SMP support

Add initial symmetric multi-processing (SMP) support to UML. With
this support enabled, users can tell UML to start multiple virtual
processors, each represented as a separate host thread.

In UML, kthreads and normal threads (when running in kernel mode)
can be scheduled and executed simultaneously on different virtual
processors. However, the userspace code of normal threads still
runs within their respective single-threaded stubs.

That is, SMP support is currently available both within the kernel
and across different processes, but still remains limited within
threads of the same process in userspace.

Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Link: https://patch.msgid.link/20251027001815.1666872-6-tiwei.bie@linux.dev
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Tiwei Bie and committed by
Johannes Berg
1e4ee513 9c82de55

+766 -48
+1 -1
Documentation/features/core/generic-idle-thread/arch-support.txt
··· 24 24 | s390: | ok | 25 25 | sh: | ok | 26 26 | sparc: | ok | 27 - | um: | TODO | 27 + | um: | ok | 28 28 | x86: | ok | 29 29 | xtensa: | ok | 30 30 -----------------------
+43 -3
arch/um/Kconfig
··· 28 28 select OF_EARLY_FLATTREE if OF 29 29 select GENERIC_IRQ_SHOW 30 30 select GENERIC_CPU_DEVICES 31 + select GENERIC_SMP_IDLE_THREAD 31 32 select HAVE_GCC_PLUGINS 32 33 select ARCH_SUPPORTS_LTO_CLANG 33 34 select ARCH_SUPPORTS_LTO_CLANG_THIN ··· 82 81 int 83 82 default 100 84 83 85 - config NR_CPUS 84 + config UML_SUBARCH_SUPPORTS_SMP 85 + bool 86 + 87 + config SMP 88 + bool "Symmetric multi-processing support" 89 + default n 90 + depends on UML_SUBARCH_SUPPORTS_SMP 91 + help 92 + This option enables UML SMP support. 93 + 94 + With this enabled, users can tell UML to start multiple virtual 95 + processors. Each virtual processor is represented as a separate 96 + host thread. 97 + 98 + In UML, kthreads and normal threads (when running in kernel mode) 99 + can be scheduled and executed simultaneously on different virtual 100 + processors. However, the userspace code of normal threads still 101 + runs within their respective single-threaded stubs. 102 + 103 + That is, SMP support is available both within the kernel and 104 + across different processes, but remains limited within threads 105 + of the same process in userspace. 106 + 107 + config NR_CPUS_RANGE_BEGIN 86 108 int 87 - range 1 1 88 - default 1 109 + default 1 if !SMP 110 + default 2 111 + 112 + config NR_CPUS_RANGE_END 113 + int 114 + default 1 if !SMP 115 + default 64 116 + 117 + config NR_CPUS_DEFAULT 118 + int 119 + default 1 if !SMP 120 + default 2 121 + 122 + config NR_CPUS 123 + int "Maximum number of CPUs" if SMP 124 + range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END 125 + default NR_CPUS_DEFAULT 89 126 90 127 source "arch/$(HEADER_ARCH)/um/Kconfig" 91 128 ··· 293 254 294 255 config ARCH_SUSPEND_POSSIBLE 295 256 def_bool y 257 + depends on !SMP 296 258 297 259 menu "Power management options" 298 260
+3 -2
arch/um/include/asm/current.h
··· 7 7 8 8 #ifndef __ASSEMBLER__ 9 9 10 + #include <shared/smp.h> 11 + 10 12 struct task_struct; 11 13 extern struct task_struct *cpu_tasks[NR_CPUS]; 12 14 13 15 static __always_inline struct task_struct *get_current(void) 14 16 { 15 - return cpu_tasks[0]; 17 + return cpu_tasks[uml_curr_cpu()]; 16 18 } 17 - 18 19 19 20 #define current get_current() 20 21
+23 -1
arch/um/include/asm/hardirq.h
··· 2 2 #ifndef __ASM_UM_HARDIRQ_H 3 3 #define __ASM_UM_HARDIRQ_H 4 4 5 - #include <asm-generic/hardirq.h> 5 + #include <linux/cache.h> 6 + #include <linux/threads.h> 6 7 7 8 #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 9 + 10 + typedef struct { 11 + unsigned int __softirq_pending; 12 + #if IS_ENABLED(CONFIG_SMP) 13 + unsigned int irq_resched_count; 14 + unsigned int irq_call_count; 15 + #endif 16 + } ____cacheline_aligned irq_cpustat_t; 17 + 18 + DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 19 + 20 + #define __ARCH_IRQ_STAT 21 + 22 + #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) 23 + 24 + #include <linux/irq.h> 25 + 26 + static inline void ack_bad_irq(unsigned int irq) 27 + { 28 + pr_crit("unexpected IRQ trap at vector %02x\n", irq); 29 + } 8 30 9 31 #endif /* __ASM_UM_HARDIRQ_H */
+10
arch/um/include/asm/mmu.h
··· 7 7 #define __ARCH_UM_MMU_H 8 8 9 9 #include "linux/types.h" 10 + #include <linux/mutex.h> 11 + #include <linux/spinlock.h> 10 12 #include <mm_id.h> 11 13 12 14 typedef struct mm_context { 13 15 struct mm_id id; 16 + struct mutex turnstile; 14 17 15 18 struct list_head list; 16 19 17 20 /* Address range in need of a TLB sync */ 21 + spinlock_t sync_tlb_lock; 18 22 unsigned long sync_tlb_range_from; 19 23 unsigned long sync_tlb_range_to; 20 24 } mm_context_t; 25 + 26 + #define INIT_MM_CONTEXT(mm) \ 27 + .context = { \ 28 + .turnstile = __MUTEX_INITIALIZER(mm.context.turnstile), \ 29 + .sync_tlb_lock = __SPIN_LOCK_INITIALIZER(mm.context.sync_tlb_lock), \ 30 + } 21 31 22 32 #endif
+2
arch/um/include/asm/pgtable.h
··· 225 225 static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start, 226 226 unsigned long end) 227 227 { 228 + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); 229 + 228 230 if (!mm->context.sync_tlb_range_to) { 229 231 mm->context.sync_tlb_range_from = start; 230 232 mm->context.sync_tlb_range_to = end;
+14 -1
arch/um/include/asm/smp.h
··· 2 2 #ifndef __UM_SMP_H 3 3 #define __UM_SMP_H 4 4 5 - #define hard_smp_processor_id() 0 5 + #if IS_ENABLED(CONFIG_SMP) 6 + 7 + #include <linux/cpumask.h> 8 + #include <shared/smp.h> 9 + 10 + #define raw_smp_processor_id() uml_curr_cpu() 11 + 12 + void arch_smp_send_reschedule(int cpu); 13 + 14 + void arch_send_call_function_single_ipi(int cpu); 15 + 16 + void arch_send_call_function_ipi_mask(const struct cpumask *mask); 17 + 18 + #endif /* CONFIG_SMP */ 6 19 7 20 #endif
+17
arch/um/include/linux/smp-internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __UM_SMP_INTERNAL_H 3 + #define __UM_SMP_INTERNAL_H 4 + 5 + #if IS_ENABLED(CONFIG_SMP) 6 + 7 + void prefill_possible_map(void); 8 + 9 + #else /* !CONFIG_SMP */ 10 + 11 + static inline void prefill_possible_map(void) { } 12 + 13 + #endif /* CONFIG_SMP */ 14 + 15 + extern char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); 16 + 17 + #endif /* __UM_SMP_INTERNAL_H */
+17
arch/um/include/shared/os.h
··· 216 216 217 217 void os_set_pdeathsig(void); 218 218 219 + int os_futex_wait(void *uaddr, unsigned int val); 220 + int os_futex_wake(void *uaddr); 221 + 219 222 /* execvp.c */ 220 223 extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); 221 224 /* helper.c */ ··· 270 267 __attribute__ ((format (printf, 1, 2))); 271 268 272 269 /* time.c */ 270 + void os_idle_prepare(void); 273 271 extern void os_idle_sleep(void); 274 272 extern int os_timer_create(void); 275 273 extern int os_timer_set_interval(int cpu, unsigned long long nsecs); ··· 342 338 343 339 /* time-travel */ 344 340 extern void deliver_time_travel_irqs(void); 341 + 342 + /* smp.c */ 343 + #if IS_ENABLED(CONFIG_SMP) 344 + void os_init_smp(void); 345 + int os_start_cpu_thread(int cpu); 346 + void os_start_secondary(void *arg, jmp_buf *switch_buf); 347 + int os_send_ipi(int cpu, int vector); 348 + void os_local_ipi_enable(void); 349 + void os_local_ipi_disable(void); 350 + #else /* !CONFIG_SMP */ 351 + static inline void os_local_ipi_enable(void) { } 352 + static inline void os_local_ipi_disable(void) { } 353 + #endif /* CONFIG_SMP */ 345 354 346 355 #endif
+5
arch/um/include/shared/skas/mm_id.h
··· 6 6 #ifndef __MM_ID_H 7 7 #define __MM_ID_H 8 8 9 + #include <linux/compiler_types.h> 10 + 9 11 #define STUB_MAX_FDS 4 10 12 11 13 struct mm_id { ··· 20 18 int syscall_fd_num; 21 19 int syscall_fd_map[STUB_MAX_FDS]; 22 20 }; 21 + 22 + void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile); 23 + void exit_turnstile(struct mm_id *mm_id) __releases(turnstile); 23 24 24 25 void notify_mm_kill(int pid); 25 26
+2
arch/um/include/shared/skas/skas.h
··· 15 15 extern unsigned long current_stub_stack(void); 16 16 extern struct mm_id *current_mm_id(void); 17 17 extern void current_mm_sync(void); 18 + void initial_jmpbuf_lock(void); 19 + void initial_jmpbuf_unlock(void); 18 20 19 21 #endif
+20
arch/um/include/shared/smp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __UM_SHARED_SMP_H 3 + #define __UM_SHARED_SMP_H 4 + 5 + #if IS_ENABLED(CONFIG_SMP) 6 + 7 + extern int uml_ncpus; 8 + 9 + int uml_curr_cpu(void); 10 + void uml_start_secondary(void *opaque); 11 + void uml_ipi_handler(int vector); 12 + 13 + #else /* !CONFIG_SMP */ 14 + 15 + #define uml_ncpus 1 16 + #define uml_curr_cpu() 0 17 + 18 + #endif /* CONFIG_SMP */ 19 + 20 + #endif /* __UM_SHARED_SMP_H */
+1
arch/um/kernel/Makefile
··· 25 25 obj-$(CONFIG_OF) += dtb.o 26 26 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 27 27 obj-$(CONFIG_STACKTRACE) += stacktrace.o 28 + obj-$(CONFIG_SMP) += smp.o 28 29 29 30 USER_OBJS := config.o 30 31
+25
arch/um/kernel/irq.c
··· 22 22 #include <irq_kern.h> 23 23 #include <linux/time-internal.h> 24 24 25 + DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 26 + 27 + #define irq_stats(x) (&per_cpu(irq_stat, x)) 25 28 26 29 /* When epoll triggers we do not know why it did so 27 30 * we can also have different IRQs for read and write. ··· 703 700 struct uml_pt_regs *regs, void *mc) 704 701 { 705 702 do_IRQ(SIGCHLD_IRQ, regs); 703 + } 704 + 705 + /* 706 + * /proc/interrupts printing for arch specific interrupts 707 + */ 708 + int arch_show_interrupts(struct seq_file *p, int prec) 709 + { 710 + #if IS_ENABLED(CONFIG_SMP) 711 + int cpu; 712 + 713 + seq_printf(p, "%*s: ", prec, "RES"); 714 + for_each_online_cpu(cpu) 715 + seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count); 716 + seq_puts(p, " Rescheduling interrupts\n"); 717 + 718 + seq_printf(p, "%*s: ", prec, "CAL"); 719 + for_each_online_cpu(cpu) 720 + seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count); 721 + seq_puts(p, " Function call interrupts\n"); 722 + #endif 723 + 724 + return 0; 706 725 }
+5
arch/um/kernel/process.c
··· 218 218 um_idle_sleep(); 219 219 } 220 220 221 + void arch_cpu_idle_prepare(void) 222 + { 223 + os_idle_prepare(); 224 + } 225 + 221 226 int __uml_cant_sleep(void) { 222 227 return in_atomic() || irqs_disabled() || in_interrupt(); 223 228 /* Is in_interrupt() really needed? */
+25 -8
arch/um/kernel/skas/mmu.c
··· 23 23 static spinlock_t mm_list_lock; 24 24 static struct list_head mm_list; 25 25 26 + void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile) 27 + { 28 + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); 29 + 30 + mutex_lock(&ctx->turnstile); 31 + } 32 + 33 + void exit_turnstile(struct mm_id *mm_id) __releases(turnstile) 34 + { 35 + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); 36 + 37 + mutex_unlock(&ctx->turnstile); 38 + } 39 + 26 40 int init_new_context(struct task_struct *task, struct mm_struct *mm) 27 41 { 28 42 struct mm_id *new_id = &mm->context.id; 29 43 unsigned long stack = 0; 30 44 int ret = -ENOMEM; 31 45 46 + mutex_init(&mm->context.turnstile); 47 + spin_lock_init(&mm->context.sync_tlb_lock); 48 + 32 49 stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES)); 33 50 if (stack == 0) 34 51 goto out; 35 52 36 53 new_id->stack = stack; 54 + new_id->syscall_data_len = 0; 55 + new_id->syscall_fd_num = 0; 37 56 38 57 scoped_guard(spinlock_irqsave, &mm_list_lock) { 39 58 /* Insert into list, used for lookups when the child dies */ ··· 92 73 return; 93 74 } 94 75 76 + scoped_guard(spinlock_irqsave, &mm_list_lock) 77 + list_del(&mm->context.list); 78 + 95 79 if (mmu->id.pid > 0) { 96 80 os_kill_ptraced_process(mmu->id.pid, 1); 97 81 mmu->id.pid = -1; ··· 104 82 os_close_file(mmu->id.sock); 105 83 106 84 free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); 107 - 108 - guard(spinlock_irqsave)(&mm_list_lock); 109 - 110 - list_del(&mm->context.list); 111 85 } 112 86 113 87 static irqreturn_t mm_sigchld_irq(int irq, void* dev) ··· 128 110 /* Marks the MM as dead */ 129 111 mm_context->id.pid = -1; 130 112 131 - /* 132 - * NOTE: If SMP is implemented, a futex_wake 133 - * needs to be added here. 134 - */ 135 113 stub_data = (void *)mm_context->id.stack; 136 114 stub_data->futex = FUTEX_IN_KERN; 115 + #if IS_ENABLED(CONFIG_SMP) 116 + os_futex_wake(&stub_data->futex); 117 + #endif 137 118 138 119 /* 139 120 * NOTE: Currently executing syscalls by
+16 -3
arch/um/kernel/skas/process.c
··· 7 7 #include <linux/sched/mm.h> 8 8 #include <linux/sched/task_stack.h> 9 9 #include <linux/sched/task.h> 10 + #include <linux/smp-internal.h> 10 11 11 12 #include <asm/tlbflush.h> 12 13 ··· 27 26 return 0; 28 27 } 29 28 30 - static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); 29 + char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); 31 30 32 31 int __init start_uml(void) 33 32 { 34 - stack_protections((unsigned long) &cpu0_irqstack); 35 - set_sigstack(cpu0_irqstack, THREAD_SIZE); 33 + stack_protections((unsigned long) &cpu_irqstacks[0]); 34 + set_sigstack(cpu_irqstacks[0], THREAD_SIZE); 36 35 37 36 init_new_thread_signals(); 38 37 ··· 64 63 return; 65 64 66 65 um_tlb_sync(current->mm); 66 + } 67 + 68 + static DEFINE_SPINLOCK(initial_jmpbuf_spinlock); 69 + 70 + void initial_jmpbuf_lock(void) 71 + { 72 + spin_lock_irq(&initial_jmpbuf_spinlock); 73 + } 74 + 75 + void initial_jmpbuf_unlock(void) 76 + { 77 + spin_unlock_irq(&initial_jmpbuf_spinlock); 67 78 }
+242
arch/um/kernel/smp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2025 Ant Group 4 + * Author: Tiwei Bie <tiwei.btw@antgroup.com> 5 + * 6 + * Based on the previous implementation in TT mode 7 + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 8 + */ 9 + 10 + #include <linux/sched.h> 11 + #include <linux/sched/task.h> 12 + #include <linux/sched/task_stack.h> 13 + #include <linux/module.h> 14 + #include <linux/processor.h> 15 + #include <linux/threads.h> 16 + #include <linux/cpu.h> 17 + #include <linux/hardirq.h> 18 + #include <linux/smp.h> 19 + #include <linux/smp-internal.h> 20 + #include <init.h> 21 + #include <kern.h> 22 + #include <os.h> 23 + #include <smp.h> 24 + 25 + enum { 26 + UML_IPI_RES = 0, 27 + UML_IPI_CALL_SINGLE, 28 + UML_IPI_CALL, 29 + UML_IPI_STOP, 30 + }; 31 + 32 + void arch_smp_send_reschedule(int cpu) 33 + { 34 + os_send_ipi(cpu, UML_IPI_RES); 35 + } 36 + 37 + void arch_send_call_function_single_ipi(int cpu) 38 + { 39 + os_send_ipi(cpu, UML_IPI_CALL_SINGLE); 40 + } 41 + 42 + void arch_send_call_function_ipi_mask(const struct cpumask *mask) 43 + { 44 + int cpu; 45 + 46 + for_each_cpu(cpu, mask) 47 + os_send_ipi(cpu, UML_IPI_CALL); 48 + } 49 + 50 + void smp_send_stop(void) 51 + { 52 + int cpu, me = smp_processor_id(); 53 + 54 + for_each_online_cpu(cpu) { 55 + if (cpu == me) 56 + continue; 57 + os_send_ipi(cpu, UML_IPI_STOP); 58 + } 59 + } 60 + 61 + static void ipi_handler(int vector, struct uml_pt_regs *regs) 62 + { 63 + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); 64 + int cpu = raw_smp_processor_id(); 65 + 66 + irq_enter(); 67 + 68 + if (current->mm) 69 + os_alarm_process(current->mm->context.id.pid); 70 + 71 + switch (vector) { 72 + case UML_IPI_RES: 73 + inc_irq_stat(irq_resched_count); 74 + scheduler_ipi(); 75 + break; 76 + 77 + case UML_IPI_CALL_SINGLE: 78 + inc_irq_stat(irq_call_count); 79 + generic_smp_call_function_single_interrupt(); 80 + break; 81 + 82 + case UML_IPI_CALL: 83 + inc_irq_stat(irq_call_count); 84 + generic_smp_call_function_interrupt(); 85 + break; 86 + 87 + case UML_IPI_STOP: 88 + set_cpu_online(cpu, false); 89 + while (1) 90 + pause(); 91 + break; 92 + 93 + default: 94 + pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector); 95 + break; 96 + } 97 + 98 + irq_exit(); 99 + set_irq_regs(old_regs); 100 + } 101 + 102 + void uml_ipi_handler(int vector) 103 + { 104 + struct uml_pt_regs r = { .is_user = 0 }; 105 + 106 + preempt_disable(); 107 + ipi_handler(vector, &r); 108 + preempt_enable(); 109 + } 110 + 111 + /* AP states used only during CPU startup */ 112 + enum { 113 + UML_CPU_PAUSED = 0, 114 + UML_CPU_RUNNING, 115 + }; 116 + 117 + static int cpu_states[NR_CPUS]; 118 + 119 + static int start_secondary(void *unused) 120 + { 121 + int err, cpu = raw_smp_processor_id(); 122 + 123 + notify_cpu_starting(cpu); 124 + set_cpu_online(cpu, true); 125 + 126 + err = um_setup_timer(); 127 + if (err) 128 + panic("CPU#%d failed to setup timer, err = %d", cpu, err); 129 + 130 + local_irq_enable(); 131 + 132 + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 133 + 134 + return 0; 135 + } 136 + 137 + void uml_start_secondary(void *opaque) 138 + { 139 + int cpu = raw_smp_processor_id(); 140 + struct mm_struct *mm = &init_mm; 141 + struct task_struct *idle; 142 + 143 + stack_protections((unsigned long) &cpu_irqstacks[cpu]); 144 + set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE); 145 + 146 + set_cpu_present(cpu, true); 147 + os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED); 148 + 149 + smp_rmb(); /* paired with smp_wmb() in __cpu_up() */ 150 + 151 + idle = cpu_tasks[cpu]; 152 + idle->thread_info.cpu = cpu; 153 + 154 + mmgrab(mm); 155 + idle->active_mm = mm; 156 + 157 + idle->thread.request.thread.proc = start_secondary; 158 + idle->thread.request.thread.arg = NULL; 159 + 160 + new_thread(task_stack_page(idle), &idle->thread.switch_buf, 161 + new_thread_handler); 162 + os_start_secondary(opaque, &idle->thread.switch_buf); 163 + } 164 + 165 + void __init smp_prepare_cpus(unsigned int max_cpus) 166 + { 167 + int err, cpu, me = smp_processor_id(); 168 + unsigned long deadline; 169 + 170 + os_init_smp(); 171 + 172 + for_each_possible_cpu(cpu) { 173 + if (cpu == me) 174 + continue; 175 + 176 + pr_debug("Booting processor %d...\n", cpu); 177 + err = os_start_cpu_thread(cpu); 178 + if (err) { 179 + pr_crit("CPU#%d failed to start cpu thread, err = %d", 180 + cpu, err); 181 + continue; 182 + } 183 + 184 + deadline = jiffies + msecs_to_jiffies(1000); 185 + spin_until_cond(cpu_present(cpu) || 186 + time_is_before_jiffies(deadline)); 187 + 188 + if (!cpu_present(cpu)) 189 + pr_crit("CPU#%d failed to boot\n", cpu); 190 + } 191 + } 192 + 193 + int __cpu_up(unsigned int cpu, struct task_struct *tidle) 194 + { 195 + cpu_tasks[cpu] = tidle; 196 + smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */ 197 + cpu_states[cpu] = UML_CPU_RUNNING; 198 + os_futex_wake(&cpu_states[cpu]); 199 + spin_until_cond(cpu_online(cpu)); 200 + 201 + return 0; 202 + } 203 + 204 + void __init smp_cpus_done(unsigned int max_cpus) 205 + { 206 + } 207 + 208 + /* Set in uml_ncpus_setup */ 209 + int uml_ncpus = 1; 210 + 211 + void __init prefill_possible_map(void) 212 + { 213 + int cpu; 214 + 215 + for (cpu = 0; cpu < uml_ncpus; cpu++) 216 + set_cpu_possible(cpu, true); 217 + for (; cpu < NR_CPUS; cpu++) 218 + set_cpu_possible(cpu, false); 219 + } 220 + 221 + static int __init uml_ncpus_setup(char *line, int *add) 222 + { 223 + *add = 0; 224 + 225 + if (kstrtoint(line, 10, &uml_ncpus)) { 226 + os_warn("%s: Couldn't parse '%s'\n", __func__, line); 227 + return -1; 228 + } 229 + 230 + uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS); 231 + 232 + return 0; 233 + } 234 + 235 + __uml_setup("ncpus=", uml_ncpus_setup, 236 + "ncpus=<# of desired CPUs>\n" 237 + " This tells UML how many virtual processors to start. The maximum\n" 238 + " number of supported virtual processors can be obtained by querying\n" 239 + " the CONFIG_NR_CPUS option using --showconfig.\n\n" 240 + ); 241 + 242 + EXPORT_SYMBOL(uml_curr_cpu);
+4 -1
arch/um/kernel/tlb.c
··· 162 162 { 163 163 pgd_t *pgd; 164 164 struct vm_ops ops; 165 - unsigned long addr = mm->context.sync_tlb_range_from, next; 165 + unsigned long addr, next; 166 166 int ret = 0; 167 + 168 + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); 167 169 168 170 if (mm->context.sync_tlb_range_to == 0) 169 171 return 0; ··· 179 177 ops.unmap = unmap; 180 178 } 181 179 180 + addr = mm->context.sync_tlb_range_from; 182 181 pgd = pgd_offset(mm, addr); 183 182 do { 184 183 next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
+1 -1
arch/um/kernel/trap.c
··· 316 316 if (!is_user && regs) 317 317 current->thread.segv_regs = container_of(regs, struct pt_regs, regs); 318 318 319 - if (!is_user && init_mm.context.sync_tlb_range_to) { 319 + if (!is_user && address >= start_vm && address < end_vm) { 320 320 /* 321 321 * Kernel has pending updates from set_ptes that were not 322 322 * flushed yet. Syncing them should fix the pagefault (if not
+23 -2
arch/um/kernel/um_arch.c
··· 19 19 #include <linux/kmsg_dump.h> 20 20 #include <linux/suspend.h> 21 21 #include <linux/random.h> 22 + #include <linux/smp-internal.h> 22 23 23 24 #include <asm/processor.h> 24 25 #include <asm/cpufeature.h> ··· 72 71 { 73 72 int i = 0; 74 73 74 + #if IS_ENABLED(CONFIG_SMP) 75 + i = (uintptr_t) v - 1; 76 + if (!cpu_online(i)) 77 + return 0; 78 + #endif 79 + 75 80 seq_printf(m, "processor\t: %d\n", i); 76 81 seq_printf(m, "vendor_id\t: User Mode Linux\n"); 77 82 seq_printf(m, "model name\t: UML\n"); ··· 94 87 loops_per_jiffy/(500000/HZ), 95 88 (loops_per_jiffy/(5000/HZ)) % 100); 96 89 97 - 98 90 return 0; 99 91 } 100 92 101 93 static void *c_start(struct seq_file *m, loff_t *pos) 102 94 { 103 - return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL; 95 + if (*pos < nr_cpu_ids) 96 + return (void *)(uintptr_t)(*pos + 1); 97 + return NULL; 104 98 } 105 99 106 100 static void *c_next(struct seq_file *m, void *v, loff_t *pos) ··· 417 409 strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 418 410 *cmdline_p = command_line; 419 411 setup_hostinfo(host_info, sizeof host_info); 412 + prefill_possible_map(); 420 413 421 414 if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) { 422 415 add_bootloader_randomness(rng_seed, sizeof(rng_seed)); ··· 451 442 void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 452 443 { 453 444 } 445 + 446 + #if IS_ENABLED(CONFIG_SMP) 447 + void alternatives_smp_module_add(struct module *mod, char *name, 448 + void *locks, void *locks_end, 449 + void *text, void *text_end) 450 + { 451 + } 452 + 453 + void alternatives_smp_module_del(struct module *mod) 454 + { 455 + } 456 + #endif 454 457 455 458 void *text_poke(void *addr, const void *opcode, size_t len) 456 459 {
+3 -1
arch/um/os-Linux/Makefile
··· 16 16 17 17 obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o 18 18 19 + obj-$(CONFIG_SMP) += smp.o 20 + 19 21 USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ 20 22 main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ 21 - tty.o umid.o util.o 23 + tty.o umid.o util.o smp.o 22 24 23 25 include $(srctree)/arch/um/scripts/Makefile.rules
+8
arch/um/os-Linux/internal.h
··· 4 4 5 5 #include <mm_id.h> 6 6 #include <stub-data.h> 7 + #include <signal.h> 7 8 8 9 /* 9 10 * elf_aux.c ··· 19 18 /* 20 19 * signal.c 21 20 */ 21 + extern __thread int signals_enabled; 22 22 int timer_alarm_pending(void); 23 23 24 24 /* ··· 27 25 */ 28 26 void wait_stub_done(int pid); 29 27 void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); 28 + 29 + /* 30 + * smp.c 31 + */ 32 + #define IPI_SIGNAL SIGRTMIN 33 + 30 34 #endif /* __UM_OS_LINUX_INTERNAL_H */
+20
arch/um/os-Linux/process.c
··· 10 10 #include <errno.h> 11 11 #include <signal.h> 12 12 #include <fcntl.h> 13 + #include <limits.h> 14 + #include <linux/futex.h> 13 15 #include <sys/mman.h> 14 16 #include <sys/ptrace.h> 15 17 #include <sys/prctl.h> ··· 190 188 void os_set_pdeathsig(void) 191 189 { 192 190 prctl(PR_SET_PDEATHSIG, SIGKILL); 191 + } 192 + 193 + int os_futex_wait(void *uaddr, unsigned int val) 194 + { 195 + int r; 196 + 197 + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val, 198 + NULL, NULL, 0)); 199 + return r < 0 ? -errno : r; 200 + } 201 + 202 + int os_futex_wake(void *uaddr) 203 + { 204 + int r; 205 + 206 + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX, 207 + NULL, NULL, 0)); 208 + return r < 0 ? -errno : r; 193 209 }
+26 -5
arch/um/os-Linux/signal.c
··· 69 69 #define SIGCHLD_BIT 2 70 70 #define SIGCHLD_MASK (1 << SIGCHLD_BIT) 71 71 72 - static __thread int signals_enabled; 72 + __thread int signals_enabled; 73 73 #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) 74 74 static int signals_blocked, signals_blocked_pending; 75 75 #endif ··· 259 259 return 0; 260 260 } 261 261 262 + static inline void __block_signals(void) 263 + { 264 + if (!signals_enabled) 265 + return; 266 + 267 + os_local_ipi_disable(); 268 + barrier(); 269 + signals_enabled = 0; 270 + } 271 + 272 + static inline void __unblock_signals(void) 273 + { 274 + if (signals_enabled) 275 + return; 276 + 277 + signals_enabled = 1; 278 + barrier(); 279 + os_local_ipi_enable(); 280 + } 281 + 262 282 void block_signals(void) 263 283 { 264 - signals_enabled = 0; 284 + __block_signals(); 265 285 /* 266 286 * This must return with signals disabled, so this barrier 267 287 * ensures that writes are flushed out before the return. ··· 298 278 if (signals_enabled == 1) 299 279 return; 300 280 301 - signals_enabled = 1; 281 + __unblock_signals(); 282 + 302 283 #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) 303 284 deliver_time_travel_irqs(); 304 285 #endif ··· 333 312 * tracing that happens inside the handlers we call for the 334 313 * pending signals will mess up the tracing state. 335 314 */ 336 - signals_enabled = 0; 315 + __block_signals(); 337 316 um_trace_signals_off(); 338 317 339 318 /* ··· 365 344 366 345 /* Re-enable signals and trace that we're doing so. */ 367 346 um_trace_signals_on(); 368 - signals_enabled = 1; 347 + __unblock_signals(); 369 348 } 370 349 } 371 350
+25 -14
arch/um/os-Linux/skas/process.c
··· 546 546 void userspace(struct uml_pt_regs *regs) 547 547 { 548 548 int err, status, op; 549 - siginfo_t si_ptrace; 549 + siginfo_t si_local; 550 550 siginfo_t *si; 551 551 int sig; 552 552 ··· 555 555 556 556 while (1) { 557 557 struct mm_id *mm_id = current_mm_id(); 558 + 559 + /* 560 + * At any given time, only one CPU thread can enter the 561 + * turnstile to operate on the same stub process, including 562 + * executing stub system calls (mmap and munmap). 563 + */ 564 + enter_turnstile(mm_id); 558 565 559 566 /* 560 567 * When we are in time-travel mode, userspace can theoretically ··· 630 623 } 631 624 632 625 if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) 633 - panic("%s - Invalid siginfo offset from child", 634 - __func__); 635 - si = (void *)&proc_data->sigstack[proc_data->si_offset]; 626 + panic("%s - Invalid siginfo offset from child", __func__); 627 + 628 + si = &si_local; 629 + memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si)); 636 630 637 631 regs->is_user = 1; 638 632 ··· 729 721 case SIGFPE: 730 722 case SIGWINCH: 731 723 ptrace(PTRACE_GETSIGINFO, pid, 0, 732 - (struct siginfo *)&si_ptrace); 733 - si = &si_ptrace; 724 + (struct siginfo *)&si_local); 725 + si = &si_local; 734 726 break; 735 727 default: 736 728 si = NULL; ··· 740 732 sig = 0; 741 733 } 742 734 } 735 + 736 + exit_turnstile(mm_id); 743 737 744 738 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 745 739 ··· 812 802 813 803 static jmp_buf initial_jmpbuf; 814 804 815 - /* XXX Make these percpu */ 816 - static void (*cb_proc)(void *arg); 817 - static void *cb_arg; 818 - static jmp_buf *cb_back; 805 + static __thread void (*cb_proc)(void *arg); 806 + static __thread void *cb_arg; 807 + static __thread jmp_buf *cb_back; 819 808 820 809 int start_idle_thread(void *stack, jmp_buf *switch_buf) 821 810 { ··· 868 859 cb_arg = arg; 869 860 cb_back = &here; 870 861 871 - block_signals_trace(); 862 + initial_jmpbuf_lock(); 872 863 if (UML_SETJMP(&here) == 0) 873 864 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); 874 - unblock_signals_trace(); 865 + initial_jmpbuf_unlock(); 875 866 876 867 cb_proc = NULL; 877 868 cb_arg = NULL; ··· 880 871 881 872 void halt_skas(void) 882 873 { 883 - block_signals_trace(); 874 + initial_jmpbuf_lock(); 884 875 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); 876 + /* unreachable */ 885 877 } 886 878 887 879 static bool noreboot; ··· 902 892 903 893 void reboot_skas(void) 904 894 { 905 - block_signals_trace(); 895 + initial_jmpbuf_lock(); 906 896 UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); 897 + /* unreachable */ 907 898 }
+148
arch/um/os-Linux/smp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2025 Ant Group 4 + * Author: Tiwei Bie <tiwei.btw@antgroup.com> 5 + */ 6 + 7 + #include <errno.h> 8 + #include <pthread.h> 9 + #include <signal.h> 10 + #include <kern_util.h> 11 + #include <um_malloc.h> 12 + #include <init.h> 13 + #include <os.h> 14 + #include <smp.h> 15 + #include "internal.h" 16 + 17 + struct cpu_thread_data { 18 + int cpu; 19 + sigset_t sigset; 20 + }; 21 + 22 + static __thread int __curr_cpu; 23 + 24 + int uml_curr_cpu(void) 25 + { 26 + return __curr_cpu; 27 + } 28 + 29 + static pthread_t cpu_threads[CONFIG_NR_CPUS]; 30 + 31 + static void *cpu_thread(void *arg) 32 + { 33 + struct cpu_thread_data *data = arg; 34 + 35 + __curr_cpu = data->cpu; 36 + 37 + uml_start_secondary(data); 38 + 39 + return NULL; 40 + } 41 + 42 + int os_start_cpu_thread(int cpu) 43 + { 44 + struct cpu_thread_data *data; 45 + sigset_t sigset, oset; 46 + int err; 47 + 48 + data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC); 49 + if (!data) 50 + return -ENOMEM; 51 + 52 + sigfillset(&sigset); 53 + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { 54 + err = errno; 55 + goto err; 56 + } 57 + 58 + data->cpu = cpu; 59 + data->sigset = oset; 60 + 61 + err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data); 62 + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) 63 + panic("Failed to restore the signal mask, errno = %d", errno); 64 + if (err != 0) 65 + goto err; 66 + 67 + return 0; 68 + 69 + err: 70 + kfree(data); 71 + return -err; 72 + } 73 + 74 + void os_start_secondary(void *arg, jmp_buf *switch_buf) 75 + { 76 + struct cpu_thread_data *data = arg; 77 + 78 + sigaddset(&data->sigset, IPI_SIGNAL); 79 + sigaddset(&data->sigset, SIGIO); 80 + 81 + if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0) 82 + panic("Failed to restore the signal mask, errno = %d", errno); 83 + 84 + kfree(data); 85 + longjmp(*switch_buf, 1); 86 + 87 + /* unreachable */ 88 + printk(UM_KERN_ERR "impossible long jump!"); 89 + fatal_sigsegv(); 90 + } 91 + 92 + int os_send_ipi(int cpu, int vector) 93 + { 94 + union sigval value = { .sival_int = vector }; 95 + 96 + return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value); 97 + } 98 + 99 + static void __local_ipi_set(int enable) 100 + { 101 + sigset_t sigset; 102 + 103 + sigemptyset(&sigset); 104 + sigaddset(&sigset, IPI_SIGNAL); 105 + 106 + if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) 107 + panic("%s: sigprocmask failed, errno = %d", __func__, errno); 108 + } 109 + 110 + void os_local_ipi_enable(void) 111 + { 112 + __local_ipi_set(1); 113 + } 114 + 115 + void os_local_ipi_disable(void) 116 + { 117 + __local_ipi_set(0); 118 + } 119 + 120 + static void ipi_sig_handler(int sig, siginfo_t *si, void *uc) 121 + { 122 + int save_errno = errno; 123 + 124 + signals_enabled = 0; 125 + um_trace_signals_off(); 126 + 127 + uml_ipi_handler(si->si_value.sival_int); 128 + 129 + um_trace_signals_on(); 130 + signals_enabled = 1; 131 + 132 + errno = save_errno; 133 + } 134 + 135 + void __init os_init_smp(void) 136 + { 137 + struct sigaction action = { 138 + .sa_sigaction = ipi_sig_handler, 139 + .sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART, 140 + }; 141 + 142 + sigfillset(&action.sa_mask); 143 + 144 + if (sigaction(IPI_SIGNAL, &action, NULL) < 0) 145 + panic("%s: sigaction failed, errno = %d", __func__, errno); 146 + 147 + cpu_threads[0] = pthread_self(); 148 + }
+4
arch/um/os-Linux/start_up.c
··· 22 22 #include <asm/unistd.h> 23 23 #include <init.h> 24 24 #include <os.h> 25 + #include <smp.h> 25 26 #include <kern_util.h> 26 27 #include <mem_user.h> 27 28 #include <ptrace_user.h> ··· 481 480 if (seccomp_config == 2) 482 481 fatal("SECCOMP userspace requested but not functional!\n"); 483 482 } 483 + 484 + if (uml_ncpus > 1) 485 + fatal("SMP is not supported with PTRACE userspace.\n"); 484 486 485 487 using_seccomp = 0; 486 488 check_ptrace();
+33 -5
arch/um/os-Linux/time.c
··· 11 11 #include <errno.h> 12 12 #include <signal.h> 13 13 #include <time.h> 14 + #include <sys/signalfd.h> 14 15 #include <sys/time.h> 15 16 #include <kern_util.h> 16 17 #include <os.h> 18 + #include <smp.h> 17 19 #include <string.h> 18 20 #include "internal.h" 19 21 ··· 43 41 */ 44 42 int os_timer_create(void) 45 43 { 46 - timer_t *t = &event_high_res_timer[0]; 44 + int cpu = uml_curr_cpu(); 45 + timer_t *t = &event_high_res_timer[cpu]; 47 46 struct sigevent sev = { 48 47 .sigev_notify = SIGEV_THREAD_ID, 49 48 .sigev_signo = SIGALRM, ··· 108 105 return timespec_to_ns(&ts); 109 106 } 110 107 108 + static __thread int wake_signals; 109 + 110 + void os_idle_prepare(void) 111 + { 112 + sigset_t set; 113 + 114 + sigemptyset(&set); 115 + sigaddset(&set, SIGALRM); 116 + sigaddset(&set, IPI_SIGNAL); 117 + 118 + /* 119 + * We need to use signalfd rather than sigsuspend in idle sleep 120 + * because the IPI signal is a real-time signal that carries data, 121 + * and unlike handling SIGALRM, we cannot simply flag it in 122 + * signals_pending. 123 + */ 124 + wake_signals = signalfd(-1, &set, SFD_CLOEXEC); 125 + if (wake_signals < 0) 126 + panic("Failed to create signal FD, errno = %d", errno); 127 + } 128 + 111 129 /** 112 130 * os_idle_sleep() - sleep until interrupted 113 131 */ 114 132 void os_idle_sleep(void) 115 133 { 116 - sigset_t set, old; 134 + sigset_t set; 117 135 118 - /* Block SIGALRM while performing the need_resched check. */ 136 + /* 137 + * Block SIGALRM while performing the need_resched check. 138 + * Note that, because IRQs are disabled, the IPI signal is 139 + * already blocked. 140 + */ 119 141 sigemptyset(&set); 120 142 sigaddset(&set, SIGALRM); 121 - sigprocmask(SIG_BLOCK, &set, &old); 143 + sigprocmask(SIG_BLOCK, &set, NULL); 122 144 123 145 /* 124 146 * Because disabling IRQs does not block SIGALRM, it is also 125 147 * necessary to check for any pending timer alarms. 126 148 */ 127 149 if (!uml_need_resched() && !timer_alarm_pending()) 128 - sigsuspend(&old); 150 + os_poll(1, &wake_signals); 129 151 130 152 /* Restore the signal mask. */ 131 153 sigprocmask(SIG_UNBLOCK, &set, NULL);