Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched

* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
latencytop: Change Kconfig dependency.
futex: Add bitset conditional wait/wakeup functionality
futex: Remove warn on in return fixup path
x86: replace LOCK_PREFIX in futex.h
tick-sched: add more debug information
timekeeping: update xtime_cache when time(zone) changes
hrtimer: fix hrtimer_init_sleeper() users

+68 -17
+3
arch/x86/Kconfig
··· 44 config STACKTRACE_SUPPORT 45 def_bool y 46 47 config SEMAPHORE_SLEEPERS 48 def_bool y 49
··· 44 config STACKTRACE_SUPPORT 45 def_bool y 46 47 + config HAVE_LATENCYTOP_SUPPORT 48 + def_bool y 49 + 50 config SEMAPHORE_SLEEPERS 51 def_bool y 52
+3 -3
include/asm-x86/futex.h
··· 30 "1: movl %2, %0\n \ 31 movl %0, %3\n" \ 32 insn "\n" \ 33 - "2: " LOCK_PREFIX "cmpxchgl %3, %2\n \ 34 jnz 1b\n \ 35 3: .section .fixup,\"ax\"\n \ 36 4: mov %5, %1\n \ ··· 72 __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); 73 break; 74 case FUTEX_OP_ADD: 75 - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, 76 uaddr, oparg); 77 break; 78 case FUTEX_OP_OR: ··· 111 return -EFAULT; 112 113 __asm__ __volatile__( 114 - "1: " LOCK_PREFIX "cmpxchgl %3, %1 \n" 115 116 "2: .section .fixup, \"ax\" \n" 117 "3: mov %2, %0 \n" 118 " jmp 2b \n"
··· 30 "1: movl %2, %0\n \ 31 movl %0, %3\n" \ 32 insn "\n" \ 33 + "2: lock; cmpxchgl %3, %2\n \ 34 jnz 1b\n \ 35 3: .section .fixup,\"ax\"\n \ 36 4: mov %5, %1\n \ ··· 72 __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); 73 break; 74 case FUTEX_OP_ADD: 75 + __futex_atomic_op1("lock; xaddl %0, %2", ret, oldval, 76 uaddr, oparg); 77 break; 78 case FUTEX_OP_OR: ··· 111 return -EFAULT; 112 113 __asm__ __volatile__( 114 115 + "1: lock; cmpxchgl %3, %1 \n" 116 "2: .section .fixup, \"ax\" \n" 117 "3: mov %2, %0 \n" 118 " jmp 2b \n"
+10
include/linux/futex.h
··· 21 #define FUTEX_LOCK_PI 6 22 #define FUTEX_UNLOCK_PI 7 23 #define FUTEX_TRYLOCK_PI 8 24 25 #define FUTEX_PRIVATE_FLAG 128 26 #define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG ··· 35 #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) 36 #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) 37 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) 38 39 /* 40 * Support for robust futexes: the kernel cleans up held futexes at ··· 114 * (Not worth introducing an rlimit for it) 115 */ 116 #define ROBUST_LIST_LIMIT 2048 117 118 #ifdef __KERNEL__ 119 long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
··· 21 #define FUTEX_LOCK_PI 6 22 #define FUTEX_UNLOCK_PI 7 23 #define FUTEX_TRYLOCK_PI 8 24 + #define FUTEX_WAIT_BITSET 9 25 + #define FUTEX_WAKE_BITSET 10 26 27 #define FUTEX_PRIVATE_FLAG 128 28 #define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG ··· 33 #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) 34 #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) 35 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) 36 + #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) 37 + #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) 38 39 /* 40 * Support for robust futexes: the kernel cleans up held futexes at ··· 110 * (Not worth introducing an rlimit for it) 111 */ 112 #define ROBUST_LIST_LIMIT 2048 113 + 114 + /* 115 + * bitset with all bits set for the FUTEX_xxx_BITSET OPs to request a 116 + * match of any bit. 117 + */ 118 + #define FUTEX_BITSET_MATCH_ANY 0xffffffff 119 120 #ifdef __KERNEL__ 121 long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
+1
include/linux/thread_info.h
··· 23 u32 *uaddr; 24 u32 val; 25 u32 flags; 26 u64 time; 27 } futex; 28 };
··· 23 u32 *uaddr; 24 u32 val; 25 u32 flags; 26 + u32 bitset; 27 u64 time; 28 } futex; 29 };
+4
include/linux/tick.h
··· 39 * @idle_calls: Total number of idle calls 40 * @idle_sleeps: Number of idle calls, where the sched tick was stopped 41 * @idle_entrytime: Time when the idle call was entered 42 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped 43 * @sleep_length: Duration of the current idle sleep 44 */ ··· 55 unsigned long idle_sleeps; 56 int idle_active; 57 ktime_t idle_entrytime; 58 ktime_t idle_sleeptime; 59 ktime_t idle_lastupdate; 60 ktime_t sleep_length;
··· 39 * @idle_calls: Total number of idle calls 40 * @idle_sleeps: Number of idle calls, where the sched tick was stopped 41 * @idle_entrytime: Time when the idle call was entered 42 + * @idle_waketime: Time when the idle was interrupted 43 + * @idle_exittime: Time when the idle state was left 44 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped 45 * @sleep_length: Duration of the current idle sleep 46 */ ··· 53 unsigned long idle_sleeps; 54 int idle_active; 55 ktime_t idle_entrytime; 56 + ktime_t idle_waketime; 57 + ktime_t idle_exittime; 58 ktime_t idle_sleeptime; 59 ktime_t idle_lastupdate; 60 ktime_t sleep_length;
+1
include/linux/time.h
··· 122 extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 123 extern int timekeeping_is_continuous(void); 124 extern void update_wall_time(void); 125 126 /** 127 * timespec_to_ns - Convert timespec to nanoseconds
··· 122 extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 123 extern int timekeeping_is_continuous(void); 124 extern void update_wall_time(void); 125 + extern void update_xtime_cache(u64 nsec); 126 127 /** 128 * timespec_to_ns - Convert timespec to nanoseconds
+32 -10
kernel/futex.c
··· 109 /* Optional priority inheritance state: */ 110 struct futex_pi_state *pi_state; 111 struct task_struct *task; 112 }; 113 114 /* ··· 725 * to this virtual address: 726 */ 727 static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 728 - int nr_wake) 729 { 730 struct futex_hash_bucket *hb; 731 struct futex_q *this, *next; 732 struct plist_head *head; 733 union futex_key key; 734 int ret; 735 736 futex_lock_mm(fshared); 737 ··· 752 ret = -EINVAL; 753 break; 754 } 755 wake_futex(this); 756 if (++ret >= nr_wake) 757 break; ··· 1167 static long futex_wait_restart(struct restart_block *restart); 1168 1169 static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1170 - u32 val, ktime_t *abs_time) 1171 { 1172 struct task_struct *curr = current; 1173 DECLARE_WAITQUEUE(wait, curr); ··· 1178 struct hrtimer_sleeper t; 1179 int rem = 0; 1180 1181 q.pi_state = NULL; 1182 retry: 1183 futex_lock_mm(fshared); 1184 ··· 1267 t.timer.expires = *abs_time; 1268 1269 hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); 1270 1271 /* 1272 * the timer could have already expired, in which ··· 1310 restart->futex.uaddr = (u32 *)uaddr; 1311 restart->futex.val = val; 1312 restart->futex.time = abs_time->tv64; 1313 restart->futex.flags = 0; 1314 1315 if (fshared) ··· 1337 restart->fn = do_no_restart_syscall; 1338 if (restart->futex.flags & FLAGS_SHARED) 1339 fshared = &current->mm->mmap_sem; 1340 - return (long)futex_wait(uaddr, fshared, restart->futex.val, &t); 1341 } 1342 1343 ··· 1553 1554 owner = rt_mutex_owner(&q.pi_state->pi_mutex); 1555 res = fixup_pi_state_owner(uaddr, &q, owner); 1556 - 1557 - WARN_ON(rt_mutex_owner(&q.pi_state->pi_mutex) != 1558 - owner); 1559 1560 /* propagate -EFAULT, if the fixup failed */ 1561 if (res) ··· 1959 * PI futexes happens in exit_pi_state(): 1960 */ 1961 if (!pi && (uval & FUTEX_WAITERS)) 1962 - futex_wake(uaddr, &curr->mm->mmap_sem, 1); 1963 } 1964 return 0; 1965 } ··· 2060 2061 switch (cmd) { 2062 case FUTEX_WAIT: 2063 - ret = futex_wait(uaddr, fshared, val, timeout); 2064 break; 2065 case FUTEX_WAKE: 2066 - ret = futex_wake(uaddr, fshared, val); 2067 break; 2068 case FUTEX_FD: 2069 /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ ··· 2107 u32 val2 = 0; 2108 int cmd = op & FUTEX_CMD_MASK; 2109 2110 - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { 2111 if (copy_from_user(&ts, utime, sizeof(ts)) != 0) 2112 return -EFAULT; 2113 if (!timespec_valid(&ts))
··· 109 /* Optional priority inheritance state: */ 110 struct futex_pi_state *pi_state; 111 struct task_struct *task; 112 + 113 + /* Bitset for the optional bitmasked wakeup */ 114 + u32 bitset; 115 }; 116 117 /* ··· 722 * to this virtual address: 723 */ 724 static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 725 + int nr_wake, u32 bitset) 726 { 727 struct futex_hash_bucket *hb; 728 struct futex_q *this, *next; 729 struct plist_head *head; 730 union futex_key key; 731 int ret; 732 + 733 + if (!bitset) 734 + return -EINVAL; 735 736 futex_lock_mm(fshared); 737 ··· 746 ret = -EINVAL; 747 break; 748 } 749 + 750 + /* Check if one of the bits is set in both bitsets */ 751 + if (!(this->bitset & bitset)) 752 + continue; 753 + 754 wake_futex(this); 755 if (++ret >= nr_wake) 756 break; ··· 1156 static long futex_wait_restart(struct restart_block *restart); 1157 1158 static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1159 + u32 val, ktime_t *abs_time, u32 bitset) 1160 { 1161 struct task_struct *curr = current; 1162 DECLARE_WAITQUEUE(wait, curr); ··· 1167 struct hrtimer_sleeper t; 1168 int rem = 0; 1169 1170 + if (!bitset) 1171 + return -EINVAL; 1172 + 1173 q.pi_state = NULL; 1174 + q.bitset = bitset; 1175 retry: 1176 futex_lock_mm(fshared); 1177 ··· 1252 t.timer.expires = *abs_time; 1253 1254 hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); 1255 + if (!hrtimer_active(&t.timer)) 1256 + t.task = NULL; 1257 1258 /* 1259 * the timer could have already expired, in which ··· 1293 restart->futex.uaddr = (u32 *)uaddr; 1294 restart->futex.val = val; 1295 restart->futex.time = abs_time->tv64; 1296 + restart->futex.bitset = bitset; 1297 restart->futex.flags = 0; 1298 1299 if (fshared) ··· 1319 restart->fn = do_no_restart_syscall; 1320 if (restart->futex.flags & FLAGS_SHARED) 1321 fshared = &current->mm->mmap_sem; 1322 + return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, 1323 + restart->futex.bitset); 1324 } 1325 1326 ··· 1534 1535 owner = rt_mutex_owner(&q.pi_state->pi_mutex); 1536 res = fixup_pi_state_owner(uaddr, &q, owner); 1537 1538 /* propagate -EFAULT, if the fixup failed */ 1539 if (res) ··· 1943 * PI futexes happens in exit_pi_state(): 1944 */ 1945 if (!pi && (uval & FUTEX_WAITERS)) 1946 + futex_wake(uaddr, &curr->mm->mmap_sem, 1, 1947 + FUTEX_BITSET_MATCH_ANY); 1948 } 1949 return 0; 1950 } ··· 2043 2044 switch (cmd) { 2045 case FUTEX_WAIT: 2046 + val3 = FUTEX_BITSET_MATCH_ANY; 2047 + case FUTEX_WAIT_BITSET: 2048 + ret = futex_wait(uaddr, fshared, val, timeout, val3); 2049 break; 2050 case FUTEX_WAKE: 2051 + val3 = FUTEX_BITSET_MATCH_ANY; 2052 + case FUTEX_WAKE_BITSET: 2053 + ret = futex_wake(uaddr, fshared, val, val3); 2054 break; 2055 case FUTEX_FD: 2056 /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ ··· 2086 u32 val2 = 0; 2087 int cmd = op & FUTEX_CMD_MASK; 2088 2089 + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 2090 + cmd == FUTEX_WAIT_BITSET)) { 2091 if (copy_from_user(&ts, utime, sizeof(ts)) != 0) 2092 return -EFAULT; 2093 if (!timespec_valid(&ts))
+2 -1
kernel/futex_compat.c
··· 167 int val2 = 0; 168 int cmd = op & FUTEX_CMD_MASK; 169 170 - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { 171 if (get_compat_timespec(&ts, utime)) 172 return -EFAULT; 173 if (!timespec_valid(&ts))
··· 167 int val2 = 0; 168 int cmd = op & FUTEX_CMD_MASK; 169 170 + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 171 + cmd == FUTEX_WAIT_BITSET)) { 172 if (get_compat_timespec(&ts, utime)) 173 return -EFAULT; 174 if (!timespec_valid(&ts))
+2
kernel/hrtimer.c
··· 1315 1316 } while (t->task && !signal_pending(current)); 1317 1318 return t->task == NULL; 1319 } 1320
··· 1315 1316 } while (t->task && !signal_pending(current)); 1317 1318 + __set_current_state(TASK_RUNNING); 1319 + 1320 return t->task == NULL; 1321 } 1322
+1
kernel/time.c
··· 129 write_seqlock_irq(&xtime_lock); 130 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 131 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 132 write_sequnlock_irq(&xtime_lock); 133 clock_was_set(); 134 }
··· 129 write_seqlock_irq(&xtime_lock); 130 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 131 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 132 + update_xtime_cache(0); 133 write_sequnlock_irq(&xtime_lock); 134 clock_was_set(); 135 }
+2
kernel/time/tick-sched.c
··· 137 138 cpu_clear(cpu, nohz_cpu_mask); 139 now = ktime_get(); 140 141 local_irq_save(flags); 142 tick_do_update_jiffies64(now); ··· 401 * Cancel the scheduled timer and restore the tick 402 */ 403 ts->tick_stopped = 0; 404 hrtimer_cancel(&ts->sched_timer); 405 ts->sched_timer.expires = ts->idle_tick; 406
··· 137 138 cpu_clear(cpu, nohz_cpu_mask); 139 now = ktime_get(); 140 + ts->idle_waketime = now; 141 142 local_irq_save(flags); 143 tick_do_update_jiffies64(now); ··· 400 * Cancel the scheduled timer and restore the tick 401 */ 402 ts->tick_stopped = 0; 403 + ts->idle_exittime = now; 404 hrtimer_cancel(&ts->sched_timer); 405 ts->sched_timer.expires = ts->idle_tick; 406
+4 -2
kernel/time/timekeeping.c
··· 47 static unsigned long total_sleep_time; /* seconds */ 48 49 static struct timespec xtime_cache __attribute__ ((aligned (16))); 50 - static inline void update_xtime_cache(u64 nsec) 51 { 52 xtime_cache = xtime; 53 timespec_add_ns(&xtime_cache, nsec); ··· 145 146 set_normalized_timespec(&xtime, sec, nsec); 147 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 148 149 clock->error = 0; 150 ntp_clear(); ··· 253 xtime.tv_nsec = 0; 254 set_normalized_timespec(&wall_to_monotonic, 255 -xtime.tv_sec, -xtime.tv_nsec); 256 total_sleep_time = 0; 257 - 258 write_sequnlock_irqrestore(&xtime_lock, flags); 259 } 260 ··· 291 } 292 /* Make sure that we have the correct xtime reference */ 293 timespec_add_ns(&xtime, timekeeping_suspend_nsecs); 294 /* re-base the last cycle value */ 295 clock->cycle_last = clocksource_read(clock); 296 clock->error = 0;
··· 47 static unsigned long total_sleep_time; /* seconds */ 48 49 static struct timespec xtime_cache __attribute__ ((aligned (16))); 50 + void update_xtime_cache(u64 nsec) 51 { 52 xtime_cache = xtime; 53 timespec_add_ns(&xtime_cache, nsec); ··· 145 146 set_normalized_timespec(&xtime, sec, nsec); 147 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 148 + update_xtime_cache(0); 149 150 clock->error = 0; 151 ntp_clear(); ··· 252 xtime.tv_nsec = 0; 253 set_normalized_timespec(&wall_to_monotonic, 254 -xtime.tv_sec, -xtime.tv_nsec); 255 + update_xtime_cache(0); 256 total_sleep_time = 0; 257 write_sequnlock_irqrestore(&xtime_lock, flags); 258 } 259 ··· 290 } 291 /* Make sure that we have the correct xtime reference */ 292 timespec_add_ns(&xtime, timekeeping_suspend_nsecs); 293 + update_xtime_cache(0); 294 /* re-base the last cycle value */ 295 clock->cycle_last = clocksource_read(clock); 296 clock->error = 0;
+2
kernel/time/timer_list.c
··· 166 P(idle_calls); 167 P(idle_sleeps); 168 P_ns(idle_entrytime); 169 P_ns(idle_sleeptime); 170 P(last_jiffies); 171 P(next_jiffies);
··· 166 P(idle_calls); 167 P(idle_sleeps); 168 P_ns(idle_entrytime); 169 + P_ns(idle_waketime); 170 + P_ns(idle_exittime); 171 P_ns(idle_sleeptime); 172 P(last_jiffies); 173 P(next_jiffies);
+1 -1
lib/Kconfig.debug
··· 581 select STACKTRACE 582 select SCHEDSTATS 583 select SCHED_DEBUG 584 - depends on X86 || X86_64 585 help 586 Enable this option if you want to use the LatencyTOP tool 587 to find out which userspace is blocking on what kernel operations.
··· 581 select STACKTRACE 582 select SCHEDSTATS 583 select SCHED_DEBUG 584 + depends on HAVE_LATENCYTOP_SUPPORT 585 help 586 Enable this option if you want to use the LatencyTOP tool 587 to find out which userspace is blocking on what kernel operations.