Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Switch clocksource to hrtimers

UML is using an obsolete itimer call for
all timers and "polls" for kernel space timer firing
in its userspace portion resulting in a long list
of bugs and incorrect behaviour(s). It also uses
ITIMER_VIRTUAL for its timer which results in the
timer being dependent on it running and the cpu
load.

This patch fixes this by moving to posix high resolution
timers firing off CLOCK_MONOTONIC and relaying the timer
correctly to the UML userspace.

Fixes:
- crashes when hosts suspends/resumes
- broken userspace timers - effecive ~40Hz instead
of what they should be. Note - this modifies skas behavior
by no longer setting an itimer per clone(). Timer events
are relayed instead.
- kernel network packet scheduling disciplines
- tcp behaviour especially under load
- various timer related corner cases

Finally, overall responsiveness of userspace is better.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Anton Ivanov <aivanov@brocade.com>
[rw: massaged commit message]
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Anton Ivanov and committed by
Richard Weinberger
2eb5f31b e17c6d77

+280 -249
+1 -1
arch/um/Makefile
··· 131 131 # The wrappers will select whether using "malloc" or the kernel allocator. 132 132 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc 133 133 134 - LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) 134 + LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt 135 135 136 136 # Used by link-vmlinux.sh which has special support for um link 137 137 export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
+13 -5
arch/um/include/shared/os.h
··· 1 1 /* 2 + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) 3 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 4 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 5 * Licensed under the GPL 4 6 */ ··· 185 183 /* process.c */ 186 184 extern unsigned long os_process_pc(int pid); 187 185 extern int os_process_parent(int pid); 186 + extern void os_alarm_process(int pid); 188 187 extern void os_stop_process(int pid); 189 188 extern void os_kill_process(int pid, int reap_child); 190 189 extern void os_kill_ptraced_process(int pid, int reap_child); ··· 220 217 extern char *get_umid(void); 221 218 222 219 /* signal.c */ 223 - extern void timer_init(void); 220 + extern void timer_set_signal_handler(void); 224 221 extern void set_sigstack(void *sig_stack, int size); 225 222 extern void remove_sigstack(void); 226 223 extern void set_handler(int sig); ··· 230 227 extern int get_signals(void); 231 228 extern int set_signals(int enable); 232 229 extern int os_is_signal_stack(void); 230 + extern void deliver_alarm(void); 233 231 234 232 /* util.c */ 235 233 extern void stack_protections(unsigned long address); ··· 242 238 extern void os_fix_helper_signals(void); 243 239 244 240 /* time.c */ 245 - extern void idle_sleep(unsigned long long nsecs); 246 - extern int set_interval(void); 247 - extern int timer_one_shot(int ticks); 248 - extern long long disable_timer(void); 241 + extern void os_idle_sleep(unsigned long long nsecs); 242 + extern int os_timer_create(void* timer); 243 + extern int os_timer_set_interval(void* timer, void* its); 244 + extern int os_timer_one_shot(int ticks); 245 + extern long long os_timer_disable(void); 246 + extern long os_timer_remain(void* timer); 249 247 extern void uml_idle_timer(void); 248 + extern long long os_persistent_clock_emulation(void); 250 249 extern long long os_nsecs(void); 250 + extern long long os_vnsecs(void); 251 251 252 252 /* skas/mem.c */ 253 253 extern long run_syscall_stub(struct mm_id * mm_idp,
+4 -3
arch/um/include/shared/skas/stub-data.h
··· 1 1 /* 2 + 3 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 4 * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) 3 5 * Licensed under the GPL 4 6 */ ··· 8 6 #ifndef __STUB_DATA_H 9 7 #define __STUB_DATA_H 10 8 11 - #include <sys/time.h> 9 + #include <time.h> 12 10 13 11 struct stub_data { 14 - long offset; 12 + unsigned long offset; 15 13 int fd; 16 - struct itimerval timer; 17 14 long err; 18 15 }; 19 16
+13
arch/um/include/shared/timer-internal.h
··· 1 + /* 2 + * Copyright (C) 2012 - 2014 Cisco Systems 3 + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 + * Licensed under the GPL 5 + */ 6 + 7 + #ifndef __TIMER_INTERNAL_H__ 8 + #define __TIMER_INTERNAL_H__ 9 + 10 + #define TIMER_MULTIPLIER 256 11 + #define TIMER_MIN_DELTA 500 12 + 13 + #endif
+4 -4
arch/um/kernel/process.c
··· 1 1 /* 2 + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) 3 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 4 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 5 * Copyright 2003 PathScale, Inc. 4 6 * Licensed under the GPL ··· 29 27 #include <kern_util.h> 30 28 #include <os.h> 31 29 #include <skas.h> 30 + #include <timer-internal.h> 32 31 33 32 /* 34 33 * This is a per-cpu array. A processor only modifies its entry and it only ··· 206 203 207 204 void arch_cpu_idle(void) 208 205 { 209 - unsigned long long nsecs; 210 - 211 206 cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); 212 - nsecs = disable_timer(); 213 - idle_sleep(nsecs); 207 + os_idle_sleep(UM_NSEC_PER_SEC); 214 208 local_irq_enable(); 215 209 } 216 210
+1 -5
arch/um/kernel/skas/clone.c
··· 1 1 /* 2 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 3 * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 4 * Licensed under the GPL 4 5 */ ··· 33 32 goto out; 34 33 35 34 err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); 36 - if (err) 37 - goto out; 38 - 39 - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, 40 - (long) &data->timer, 0); 41 35 if (err) 42 36 goto out; 43 37
+3
arch/um/kernel/skas/mmu.c
··· 1 1 /* 2 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 3 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 4 * Licensed under the GPL 4 5 */ ··· 62 61 if (current->mm != NULL && current->mm != &init_mm) 63 62 from_mm = &current->mm->context; 64 63 64 + block_signals(); 65 65 if (from_mm) 66 66 to_mm->id.u.pid = copy_context_skas0(stack, 67 67 from_mm->id.u.pid); 68 68 else to_mm->id.u.pid = start_userspace(stack); 69 + unblock_signals(); 69 70 70 71 if (to_mm->id.u.pid < 0) { 71 72 ret = to_mm->id.u.pid;
+48 -25
arch/um/kernel/time.c
··· 1 1 /* 2 + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) 3 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 4 + * Copyright (C) 2012-2014 Cisco Systems 2 5 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 6 * Licensed under the GPL 4 7 */ ··· 10 7 #include <linux/init.h> 11 8 #include <linux/interrupt.h> 12 9 #include <linux/jiffies.h> 10 + #include <linux/mm.h> 11 + #include <linux/sched.h> 12 + #include <linux/spinlock.h> 13 13 #include <linux/threads.h> 14 14 #include <asm/irq.h> 15 15 #include <asm/param.h> 16 16 #include <kern_util.h> 17 17 #include <os.h> 18 + #include <timer-internal.h> 18 19 19 20 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 20 21 { ··· 31 24 32 25 static int itimer_shutdown(struct clock_event_device *evt) 33 26 { 34 - disable_timer(); 27 + os_timer_disable(); 35 28 return 0; 36 29 } 37 30 38 31 static int itimer_set_periodic(struct clock_event_device *evt) 39 32 { 40 - set_interval(); 33 + os_timer_set_interval(NULL, NULL); 41 34 return 0; 42 35 } 43 36 44 37 static int itimer_next_event(unsigned long delta, 45 38 struct clock_event_device *evt) 46 39 { 47 - return timer_one_shot(delta + 1); 40 + return os_timer_one_shot(delta); 48 41 } 49 42 50 - static struct clock_event_device itimer_clockevent = { 51 - .name = "itimer", 43 + static int itimer_one_shot(struct clock_event_device *evt) 44 + { 45 + os_timer_one_shot(1); 46 + return 0; 47 + } 48 + 49 + static struct clock_event_device timer_clockevent = { 50 + .name = "posix-timer", 52 51 .rating = 250, 53 52 .cpumask = cpu_all_mask, 54 53 .features = CLOCK_EVT_FEAT_PERIODIC | 55 54 CLOCK_EVT_FEAT_ONESHOT, 56 55 .set_state_shutdown = itimer_shutdown, 57 56 .set_state_periodic = itimer_set_periodic, 58 - .set_state_oneshot = itimer_shutdown, 57 + .set_state_oneshot = itimer_one_shot, 59 58 .set_next_event = itimer_next_event, 60 - .shift = 32, 59 + .shift = 0, 60 + .max_delta_ns = 0xffffffff, 61 + .min_delta_ns = TIMER_MIN_DELTA, //microsecond resolution should be enough for anyone, same as 640K RAM 61 62 .irq = 0, 63 + .mult = 1, 62 64 }; 63 65 64 66 static irqreturn_t um_timer(int irq, void *dev) 65 67 { 66 - (*itimer_clockevent.event_handler)(&itimer_clockevent); 68 + if (get_current()->mm != NULL) 69 + { 70 + /* userspace - relay signal, results in correct userspace timers */ 71 + os_alarm_process(get_current()->mm->context.id.u.pid); 72 + } 73 + 74 + (*timer_clockevent.event_handler)(&timer_clockevent); 67 75 68 76 return IRQ_HANDLED; 69 77 } 70 78 71 - static cycle_t itimer_read(struct clocksource *cs) 79 + static cycle_t timer_read(struct clocksource *cs) 72 80 { 73 - return os_nsecs() / 1000; 81 + return os_nsecs() / TIMER_MULTIPLIER; 74 82 } 75 83 76 - static struct clocksource itimer_clocksource = { 77 - .name = "itimer", 84 + static struct clocksource timer_clocksource = { 85 + .name = "timer", 78 86 .rating = 300, 79 - .read = itimer_read, 87 + .read = timer_read, 80 88 .mask = CLOCKSOURCE_MASK(64), 81 89 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 82 90 }; 83 91 84 - static void __init setup_itimer(void) 92 + static void __init timer_setup(void) 85 93 { 86 94 int err; 87 95 88 - err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); 96 + err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL); 89 97 if (err != 0) 90 98 printk(KERN_ERR "register_timer : request_irq failed - " 91 99 "errno = %d\n", -err); 92 100 93 - itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); 94 - itimer_clockevent.max_delta_ns = 95 - clockevent_delta2ns(60 * HZ, &itimer_clockevent); 96 - itimer_clockevent.min_delta_ns = 97 - clockevent_delta2ns(1, &itimer_clockevent); 98 - err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); 101 + err = os_timer_create(NULL); 102 + if (err != 0) { 103 + printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); 104 + return; 105 + } 106 + 107 + err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER); 99 108 if (err) { 100 109 printk(KERN_ERR "clocksource_register_hz returned %d\n", err); 101 110 return; 102 111 } 103 - clockevents_register_device(&itimer_clockevent); 112 + clockevents_register_device(&timer_clockevent); 104 113 } 105 114 106 115 void read_persistent_clock(struct timespec *ts) 107 116 { 108 - long long nsecs = os_nsecs(); 117 + long long nsecs = os_persistent_clock_emulation(); 109 118 110 119 set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, 111 120 nsecs % NSEC_PER_SEC); ··· 129 106 130 107 void __init time_init(void) 131 108 { 132 - timer_init(); 133 - late_time_init = setup_itimer; 109 + timer_set_signal_handler(); 110 + late_time_init = timer_setup; 134 111 }
-1
arch/um/os-Linux/internal.h
··· 1 - void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc);
+4 -3
arch/um/os-Linux/main.c
··· 1 1 /* 2 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 3 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 4 * Licensed under the GPL 4 5 */ ··· 164 163 165 164 /* 166 165 * This signal stuff used to be in the reboot case. However, 167 - * sometimes a SIGVTALRM can come in when we're halting (reproducably 166 + * sometimes a timer signal can come in when we're halting (reproducably 168 167 * when writing out gcov information, presumably because that takes 169 168 * some time) and cause a segfault. 170 169 */ 171 170 172 - /* stop timers and set SIGVTALRM to be ignored */ 173 - disable_timer(); 171 + /* stop timers and set timer signal to be ignored */ 172 + os_timer_disable(); 174 173 175 174 /* disable SIGIO for the fds and set SIGIO to be ignored */ 176 175 err = deactivate_all_fds();
+6
arch/um/os-Linux/process.c
··· 1 1 /* 2 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 3 * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 4 * Licensed under the GPL 4 5 */ ··· 88 87 printk(UM_KERN_ERR "Failed to scan '%s'\n", data); 89 88 90 89 return parent; 90 + } 91 + 92 + void os_alarm_process(int pid) 93 + { 94 + kill(pid, SIGALRM); 91 95 } 92 96 93 97 void os_stop_process(int pid)
+22 -19
arch/um/os-Linux/signal.c
··· 1 1 /* 2 + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) 3 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 4 * Copyright (C) 2004 PathScale, Inc 3 5 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 6 * Licensed under the GPL ··· 15 13 #include <kern_util.h> 16 14 #include <os.h> 17 15 #include <sysdep/mcontext.h> 18 - #include "internal.h" 19 16 20 17 void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { 21 18 [SIGTRAP] = relay_signal, ··· 24 23 [SIGBUS] = bus_handler, 25 24 [SIGSEGV] = segv_handler, 26 25 [SIGIO] = sigio_handler, 27 - [SIGVTALRM] = timer_handler }; 26 + [SIGALRM] = timer_handler 27 + }; 28 28 29 29 static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) 30 30 { ··· 40 38 } 41 39 42 40 /* enable signals if sig isn't IRQ signal */ 43 - if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) 41 + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) 44 42 unblock_signals(); 45 43 46 44 (*sig_info[sig])(sig, si, &r); ··· 57 55 #define SIGIO_BIT 0 58 56 #define SIGIO_MASK (1 << SIGIO_BIT) 59 57 60 - #define SIGVTALRM_BIT 1 61 - #define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) 58 + #define SIGALRM_BIT 1 59 + #define SIGALRM_MASK (1 << SIGALRM_BIT) 62 60 63 61 static int signals_enabled; 64 62 static unsigned int signals_pending; ··· 80 78 set_signals(enabled); 81 79 } 82 80 83 - static void real_alarm_handler(mcontext_t *mc) 81 + static void timer_real_alarm_handler(mcontext_t *mc) 84 82 { 85 83 struct uml_pt_regs regs; 86 84 87 85 if (mc != NULL) 88 86 get_regs_from_mc(&regs, mc); 89 - regs.is_user = 0; 90 - unblock_signals(); 91 - timer_handler(SIGVTALRM, NULL, &regs); 87 + timer_handler(SIGALRM, NULL, &regs); 92 88 } 93 89 94 - void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) 90 + void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) 95 91 { 96 92 int enabled; 97 93 98 94 enabled = signals_enabled; 99 95 if (!signals_enabled) { 100 - signals_pending |= SIGVTALRM_MASK; 96 + signals_pending |= SIGALRM_MASK; 101 97 return; 102 98 } 103 99 104 100 block_signals(); 105 101 106 - real_alarm_handler(mc); 102 + timer_real_alarm_handler(mc); 107 103 set_signals(enabled); 108 104 } 109 105 110 - void timer_init(void) 106 + void deliver_alarm(void) { 107 + timer_alarm_handler(SIGALRM, NULL, NULL); 108 + } 109 + 110 + void timer_set_signal_handler(void) 111 111 { 112 - set_handler(SIGVTALRM); 112 + set_handler(SIGALRM); 113 113 } 114 114 115 115 void set_sigstack(void *sig_stack, int size) ··· 135 131 136 132 [SIGIO] = sig_handler, 137 133 [SIGWINCH] = sig_handler, 138 - [SIGVTALRM] = alarm_handler 134 + [SIGALRM] = timer_alarm_handler 139 135 }; 140 - 141 136 142 137 static void hard_handler(int sig, siginfo_t *si, void *p) 143 138 { ··· 191 188 192 189 /* block irq ones */ 193 190 sigemptyset(&action.sa_mask); 194 - sigaddset(&action.sa_mask, SIGVTALRM); 195 191 sigaddset(&action.sa_mask, SIGIO); 196 192 sigaddset(&action.sa_mask, SIGWINCH); 193 + sigaddset(&action.sa_mask, SIGALRM); 197 194 198 195 if (sig == SIGSEGV) 199 196 flags |= SA_NODEFER; ··· 286 283 if (save_pending & SIGIO_MASK) 287 284 sig_handler_common(SIGIO, NULL, NULL); 288 285 289 - if (save_pending & SIGVTALRM_MASK) 290 - real_alarm_handler(NULL); 286 + if (save_pending & SIGALRM_MASK) 287 + timer_real_alarm_handler(NULL); 291 288 } 292 289 } 293 290
+10 -35
arch/um/os-Linux/skas/process.c
··· 1 1 /* 2 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 2 3 * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 4 * Licensed under the GPL 4 5 */ ··· 46 45 * Signals that are OK to receive in the stub - we'll just continue it. 47 46 * SIGWINCH will happen when UML is inside a detached screen. 48 47 */ 49 - #define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) 48 + #define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH)) 50 49 51 50 /* Signals that the stub will finish with - anything else is an error */ 52 51 #define STUB_DONE_MASK (1 << SIGTRAP) ··· 184 183 static int userspace_tramp(void *stack) 185 184 { 186 185 void *addr; 187 - int err, fd; 186 + int fd; 188 187 unsigned long long offset; 189 188 190 189 ptrace(PTRACE_TRACEME, 0, 0, 0); 191 190 192 191 signal(SIGTERM, SIG_DFL); 193 192 signal(SIGWINCH, SIG_IGN); 194 - err = set_interval(); 195 - if (err) { 196 - printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " 197 - "errno = %d\n", err); 198 - exit(1); 199 - } 200 193 201 194 /* 202 195 * This has a pte, but it can't be mapped in with the usual ··· 281 286 "errno = %d\n", errno); 282 287 goto out_kill; 283 288 } 284 - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); 289 + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 285 290 286 291 if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 287 292 err = -EINVAL; ··· 314 319 315 320 void userspace(struct uml_pt_regs *regs) 316 321 { 317 - struct itimerval timer; 318 - unsigned long long nsecs, now; 319 322 int err, status, op, pid = userspace_pid[0]; 320 323 /* To prevent races if using_sysemu changes under us.*/ 321 324 int local_using_sysemu; ··· 322 329 /* Handle any immediate reschedules or signals */ 323 330 interrupt_end(); 324 331 325 - if (getitimer(ITIMER_VIRTUAL, &timer)) 326 - printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); 327 - nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + 328 - timer.it_value.tv_usec * UM_NSEC_PER_USEC; 329 - nsecs += os_nsecs(); 330 - 331 332 while (1) { 333 + 332 334 /* 333 335 * This can legitimately fail if the process loads a 334 336 * bogus value into a segment register. It will ··· 393 405 case SIGTRAP: 394 406 relay_signal(SIGTRAP, (struct siginfo *)&si, regs); 395 407 break; 396 - case SIGVTALRM: 397 - now = os_nsecs(); 398 - if (now < nsecs) 399 - break; 400 - block_signals(); 401 - (*sig_info[sig])(sig, (struct siginfo *)&si, regs); 402 - unblock_signals(); 403 - nsecs = timer.it_value.tv_sec * 404 - UM_NSEC_PER_SEC + 405 - timer.it_value.tv_usec * 406 - UM_NSEC_PER_USEC; 407 - nsecs += os_nsecs(); 408 + case SIGALRM: 408 409 break; 409 410 case SIGIO: 410 411 case SIGILL: ··· 441 464 442 465 int copy_context_skas0(unsigned long new_stack, int pid) 443 466 { 444 - struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; 445 467 int err; 446 468 unsigned long current_stack = current_stub_stack(); 447 469 struct stub_data *data = (struct stub_data *) current_stack; ··· 452 476 * prepare offset and fd of child's stack as argument for parent's 453 477 * and child's mmap2 calls 454 478 */ 455 - *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), 456 - .fd = new_fd, 457 - .timer = ((struct itimerval) 458 - { .it_value = tv, 459 - .it_interval = tv }) }); 479 + *data = ((struct stub_data) { 480 + .offset = MMAP_OFFSET(new_offset), 481 + .fd = new_fd 482 + }); 460 483 461 484 err = ptrace_setregs(pid, thread_regs); 462 485 if (err < 0) {
+151 -148
arch/um/os-Linux/time.c
··· 1 1 /* 2 + * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) 3 + * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 4 + * Copyright (C) 2012-2014 Cisco Systems 2 5 * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) 3 6 * Licensed under the GPL 4 7 */ ··· 13 10 #include <sys/time.h> 14 11 #include <kern_util.h> 15 12 #include <os.h> 16 - #include "internal.h" 13 + #include <string.h> 14 + #include <timer-internal.h> 17 15 18 - int set_interval(void) 19 - { 20 - int usec = UM_USEC_PER_SEC / UM_HZ; 21 - struct itimerval interval = ((struct itimerval) { { 0, usec }, 22 - { 0, usec } }); 16 + static timer_t event_high_res_timer = 0; 23 17 24 - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) 25 - return -errno; 26 - 27 - return 0; 28 - } 29 - 30 - int timer_one_shot(int ticks) 31 - { 32 - unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; 33 - unsigned long sec = usec / UM_USEC_PER_SEC; 34 - struct itimerval interval; 35 - 36 - usec %= UM_USEC_PER_SEC; 37 - interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); 38 - 39 - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) 40 - return -errno; 41 - 42 - return 0; 43 - } 44 - 45 - /** 46 - * timeval_to_ns - Convert timeval to nanoseconds 47 - * @ts: pointer to the timeval variable to be converted 48 - * 49 - * Returns the scalar nanosecond representation of the timeval 50 - * parameter. 51 - * 52 - * Ripped from linux/time.h because it's a kernel header, and thus 53 - * unusable from here. 54 - */ 55 18 static inline long long timeval_to_ns(const struct timeval *tv) 56 19 { 57 20 return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + 58 21 tv->tv_usec * UM_NSEC_PER_USEC; 59 22 } 60 23 61 - long long disable_timer(void) 24 + static inline long long timespec_to_ns(const struct timespec *ts) 62 25 { 63 - struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); 64 - long long remain, max = UM_NSEC_PER_SEC / UM_HZ; 26 + return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + 27 + ts->tv_nsec; 28 + } 65 29 66 - if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) 67 - printk(UM_KERN_ERR "disable_timer - setitimer failed, " 68 - "errno = %d\n", errno); 30 + long long os_persistent_clock_emulation (void) { 31 + struct timespec realtime_tp; 69 32 70 - remain = timeval_to_ns(&time.it_value); 71 - if (remain > max) 72 - remain = max; 33 + clock_gettime(CLOCK_REALTIME, &realtime_tp); 34 + return timespec_to_ns(&realtime_tp); 35 + } 73 36 74 - return remain; 37 + /** 38 + * os_timer_create() - create an new posix (interval) timer 39 + */ 40 + int os_timer_create(void* timer) { 41 + 42 + timer_t* t = timer; 43 + 44 + if(t == NULL) { 45 + t = &event_high_res_timer; 46 + } 47 + 48 + if (timer_create( 49 + CLOCK_MONOTONIC, 50 + NULL, 51 + t) == -1) { 52 + return -1; 53 + } 54 + return 0; 55 + } 56 + 57 + int os_timer_set_interval(void* timer, void* i) 58 + { 59 + struct itimerspec its; 60 + unsigned long long nsec; 61 + timer_t* t = timer; 62 + struct itimerspec* its_in = i; 63 + 64 + if(t == NULL) { 65 + t = &event_high_res_timer; 66 + } 67 + 68 + nsec = UM_NSEC_PER_SEC / UM_HZ; 69 + 70 + if(its_in != NULL) { 71 + its.it_value.tv_sec = its_in->it_value.tv_sec; 72 + its.it_value.tv_nsec = its_in->it_value.tv_nsec; 73 + } else { 74 + its.it_value.tv_sec = 0; 75 + its.it_value.tv_nsec = nsec; 76 + } 77 + 78 + its.it_interval.tv_sec = 0; 79 + its.it_interval.tv_nsec = nsec; 80 + 81 + if(timer_settime(*t, 0, &its, NULL) == -1) { 82 + return -errno; 83 + } 84 + 85 + return 0; 86 + } 87 + 88 + /** 89 + * os_timer_remain() - returns the remaining nano seconds of the given interval 90 + * timer 91 + * Because this is the remaining time of an interval timer, which correspondends 92 + * to HZ, this value can never be bigger than one second. Just 93 + * the nanosecond part of the timer is returned. 94 + * The returned time is relative to the start time of the interval timer. 95 + * Return an negative value in an error case. 96 + */ 97 + long os_timer_remain(void* timer) 98 + { 99 + struct itimerspec its; 100 + timer_t* t = timer; 101 + 102 + if(t == NULL) { 103 + t = &event_high_res_timer; 104 + } 105 + 106 + if(timer_gettime(t, &its) == -1) { 107 + return -errno; 108 + } 109 + 110 + return its.it_value.tv_nsec; 111 + } 112 + 113 + int os_timer_one_shot(int ticks) 114 + { 115 + struct itimerspec its; 116 + unsigned long long nsec; 117 + unsigned long sec; 118 + 119 + nsec = (ticks + 1); 120 + sec = nsec / UM_NSEC_PER_SEC; 121 + nsec = nsec % UM_NSEC_PER_SEC; 122 + 123 + its.it_value.tv_sec = nsec / UM_NSEC_PER_SEC; 124 + its.it_value.tv_nsec = nsec; 125 + 126 + its.it_interval.tv_sec = 0; 127 + its.it_interval.tv_nsec = 0; // we cheat here 128 + 129 + timer_settime(event_high_res_timer, 0, &its, NULL); 130 + return 0; 131 + } 132 + 133 + /** 134 + * os_timer_disable() - disable the posix (interval) timer 135 + * Returns the remaining interval timer time in nanoseconds 136 + */ 137 + long long os_timer_disable(void) 138 + { 139 + struct itimerspec its; 140 + 141 + memset(&its, 0, sizeof(struct itimerspec)); 142 + timer_settime(event_high_res_timer, 0, &its, &its); 143 + 144 + return its.it_value.tv_sec * UM_NSEC_PER_SEC + its.it_value.tv_nsec; 145 + } 146 + 147 + long long os_vnsecs(void) 148 + { 149 + struct timespec ts; 150 + 151 + clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&ts); 152 + return timespec_to_ns(&ts); 75 153 } 76 154 77 155 long long os_nsecs(void) 78 156 { 79 - struct timeval tv; 157 + struct timespec ts; 80 158 81 - gettimeofday(&tv, NULL); 82 - return timeval_to_ns(&tv); 159 + clock_gettime(CLOCK_MONOTONIC,&ts); 160 + return timespec_to_ns(&ts); 83 161 } 84 162 85 - #ifdef UML_CONFIG_NO_HZ_COMMON 86 - static int after_sleep_interval(struct timespec *ts) 87 - { 88 - return 0; 89 - } 90 - 91 - static void deliver_alarm(void) 92 - { 93 - alarm_handler(SIGVTALRM, NULL, NULL); 94 - } 95 - 96 - static unsigned long long sleep_time(unsigned long long nsecs) 97 - { 98 - return nsecs; 99 - } 100 - 101 - #else 102 - unsigned long long last_tick; 103 - unsigned long long skew; 104 - 105 - static void deliver_alarm(void) 106 - { 107 - unsigned long long this_tick = os_nsecs(); 108 - int one_tick = UM_NSEC_PER_SEC / UM_HZ; 109 - 110 - /* Protection against the host's time going backwards */ 111 - if ((last_tick != 0) && (this_tick < last_tick)) 112 - this_tick = last_tick; 113 - 114 - if (last_tick == 0) 115 - last_tick = this_tick - one_tick; 116 - 117 - skew += this_tick - last_tick; 118 - 119 - while (skew >= one_tick) { 120 - alarm_handler(SIGVTALRM, NULL, NULL); 121 - skew -= one_tick; 122 - } 123 - 124 - last_tick = this_tick; 125 - } 126 - 127 - static unsigned long long sleep_time(unsigned long long nsecs) 128 - { 129 - return nsecs > skew ? nsecs - skew : 0; 130 - } 131 - 132 - static inline long long timespec_to_us(const struct timespec *ts) 133 - { 134 - return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + 135 - ts->tv_nsec / UM_NSEC_PER_USEC; 136 - } 137 - 138 - static int after_sleep_interval(struct timespec *ts) 139 - { 140 - int usec = UM_USEC_PER_SEC / UM_HZ; 141 - long long start_usecs = timespec_to_us(ts); 142 - struct timeval tv; 143 - struct itimerval interval; 144 - 145 - /* 146 - * It seems that rounding can increase the value returned from 147 - * setitimer to larger than the one passed in. Over time, 148 - * this will cause the remaining time to be greater than the 149 - * tick interval. If this happens, then just reduce the first 150 - * tick to the interval value. 151 - */ 152 - if (start_usecs > usec) 153 - start_usecs = usec; 154 - 155 - start_usecs -= skew / UM_NSEC_PER_USEC; 156 - if (start_usecs < 0) 157 - start_usecs = 0; 158 - 159 - tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, 160 - .tv_usec = start_usecs % UM_USEC_PER_SEC }); 161 - interval = ((struct itimerval) { { 0, usec }, tv }); 162 - 163 - if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) 164 - return -errno; 165 - 166 - return 0; 167 - } 168 - #endif 169 - 170 - void idle_sleep(unsigned long long nsecs) 163 + /** 164 + * os_idle_sleep() - sleep for a given time of nsecs 165 + * @nsecs: nanoseconds to sleep 166 + */ 167 + void os_idle_sleep(unsigned long long nsecs) 171 168 { 172 169 struct timespec ts; 173 170 171 + if (nsecs <= 0) { 172 + return; 173 + } 174 + 175 + ts = ((struct timespec) { 176 + .tv_sec = nsecs / UM_NSEC_PER_SEC, 177 + .tv_nsec = nsecs % UM_NSEC_PER_SEC 178 + }); 179 + 174 180 /* 175 - * nsecs can come in as zero, in which case, this starts a 176 - * busy loop. To prevent this, reset nsecs to the tick 177 - * interval if it is zero. 181 + * Relay the signal if clock_nanosleep is interrupted. 178 182 */ 179 - if (nsecs == 0) 180 - nsecs = UM_NSEC_PER_SEC / UM_HZ; 181 - 182 - nsecs = sleep_time(nsecs); 183 - ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, 184 - .tv_nsec = nsecs % UM_NSEC_PER_SEC }); 185 - 186 - if (nanosleep(&ts, &ts) == 0) 183 + if (clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL)) { 187 184 deliver_alarm(); 188 - after_sleep_interval(&ts); 185 + } 189 186 }