Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: Change task_struct::state

Change the type and name of task_struct::state. Drop the volatile and
shrink it to an 'unsigned int'. Rename it in order to find all uses
such that we can use READ_ONCE/WRITE_ONCE as appropriate.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20210611082838.550736351@infradead.org

+123 -111
+1 -1
arch/ia64/kernel/mca.c
··· 1788 1788 ti->task = p; 1789 1789 ti->cpu = cpu; 1790 1790 p->stack = ti; 1791 - p->state = TASK_UNINTERRUPTIBLE; 1791 + p->__state = TASK_UNINTERRUPTIBLE; 1792 1792 cpumask_set_cpu(cpu, &p->cpus_mask); 1793 1793 INIT_LIST_HEAD(&p->tasks); 1794 1794 p->parent = p->real_parent = p->group_leader = p;
+4 -4
arch/ia64/kernel/ptrace.c
··· 641 641 read_lock(&tasklist_lock); 642 642 if (child->sighand) { 643 643 spin_lock_irq(&child->sighand->siglock); 644 - if (child->state == TASK_STOPPED && 644 + if (READ_ONCE(child->__state) == TASK_STOPPED && 645 645 !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { 646 646 set_notify_resume(child); 647 647 648 - child->state = TASK_TRACED; 648 + WRITE_ONCE(child->__state, TASK_TRACED); 649 649 stopped = 1; 650 650 } 651 651 spin_unlock_irq(&child->sighand->siglock); ··· 665 665 read_lock(&tasklist_lock); 666 666 if (child->sighand) { 667 667 spin_lock_irq(&child->sighand->siglock); 668 - if (child->state == TASK_TRACED && 668 + if (READ_ONCE(child->__state) == TASK_TRACED && 669 669 (child->signal->flags & SIGNAL_STOP_STOPPED)) { 670 - child->state = TASK_STOPPED; 670 + WRITE_ONCE(child->__state, TASK_STOPPED); 671 671 } 672 672 spin_unlock_irq(&child->sighand->siglock); 673 673 }
+7 -6
arch/powerpc/xmon/xmon.c
··· 3162 3162 3163 3163 static void show_task(struct task_struct *tsk) 3164 3164 { 3165 + unsigned int p_state = READ_ONCE(tsk->__state); 3165 3166 char state; 3166 3167 3167 3168 /* ··· 3170 3169 * appropriate for calling from xmon. This could be moved 3171 3170 * to a common, generic, routine used by both. 3172 3171 */ 3173 - state = (tsk->state == 0) ? 'R' : 3174 - (tsk->state < 0) ? 'U' : 3175 - (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' : 3176 - (tsk->state & TASK_STOPPED) ? 'T' : 3177 - (tsk->state & TASK_TRACED) ? 'C' : 3172 + state = (p_state == 0) ? 'R' : 3173 + (p_state < 0) ? 'U' : 3174 + (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : 3175 + (p_state & TASK_STOPPED) ? 'T' : 3176 + (p_state & TASK_TRACED) ? 'C' : 3178 3177 (tsk->exit_state & EXIT_ZOMBIE) ? 'Z' : 3179 3178 (tsk->exit_state & EXIT_DEAD) ? 'E' : 3180 - (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; 3179 + (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?'; 3181 3180 3182 3181 printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk, 3183 3182 tsk->thread.ksp, tsk->thread.regs,
+1 -1
block/blk-mq.c
··· 3886 3886 int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) 3887 3887 { 3888 3888 struct blk_mq_hw_ctx *hctx; 3889 - long state; 3889 + unsigned int state; 3890 3890 3891 3891 if (!blk_qc_t_valid(cookie) || 3892 3892 !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+3 -3
drivers/md/dm.c
··· 2328 2328 return sum != 0; 2329 2329 } 2330 2330 2331 - static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state) 2331 + static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state) 2332 2332 { 2333 2333 int r = 0; 2334 2334 DEFINE_WAIT(wait); ··· 2351 2351 return r; 2352 2352 } 2353 2353 2354 - static int dm_wait_for_completion(struct mapped_device *md, long task_state) 2354 + static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state) 2355 2355 { 2356 2356 int r = 0; 2357 2357 ··· 2478 2478 * are being added to md->deferred list. 2479 2479 */ 2480 2480 static int __dm_suspend(struct mapped_device *md, struct dm_table *map, 2481 - unsigned suspend_flags, long task_state, 2481 + unsigned suspend_flags, unsigned int task_state, 2482 2482 int dmf_suspended_flag) 2483 2483 { 2484 2484 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
+5 -3
fs/binfmt_elf.c
··· 1537 1537 { 1538 1538 const struct cred *cred; 1539 1539 unsigned int i, len; 1540 - 1540 + unsigned int state; 1541 + 1541 1542 /* first copy the parameters from user space */ 1542 1543 memset(psinfo, 0, sizeof(struct elf_prpsinfo)); 1543 1544 ··· 1560 1559 psinfo->pr_pgrp = task_pgrp_vnr(p); 1561 1560 psinfo->pr_sid = task_session_vnr(p); 1562 1561 1563 - i = p->state ? ffz(~p->state) + 1 : 0; 1562 + state = READ_ONCE(p->__state); 1563 + i = state ? ffz(~state) + 1 : 0; 1564 1564 psinfo->pr_state = i; 1565 1565 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; 1566 1566 psinfo->pr_zomb = psinfo->pr_sname == 'Z'; ··· 1573 1571 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); 1574 1572 rcu_read_unlock(); 1575 1573 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); 1576 - 1574 + 1577 1575 return 0; 1578 1576 } 1579 1577
+3 -1
fs/binfmt_elf_fdpic.c
··· 1331 1331 { 1332 1332 const struct cred *cred; 1333 1333 unsigned int i, len; 1334 + unsigned int state; 1334 1335 1335 1336 /* first copy the parameters from user space */ 1336 1337 memset(psinfo, 0, sizeof(struct elf_prpsinfo)); ··· 1354 1353 psinfo->pr_pgrp = task_pgrp_vnr(p); 1355 1354 psinfo->pr_sid = task_session_vnr(p); 1356 1355 1357 - i = p->state ? ffz(~p->state) + 1 : 0; 1356 + state = READ_ONCE(p->__state); 1357 + i = state ? ffz(~state) + 1 : 0; 1358 1358 psinfo->pr_state = i; 1359 1359 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; 1360 1360 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
+2 -2
fs/userfaultfd.c
··· 337 337 return ret; 338 338 } 339 339 340 - static inline long userfaultfd_get_blocking_state(unsigned int flags) 340 + static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags) 341 341 { 342 342 if (flags & FAULT_FLAG_INTERRUPTIBLE) 343 343 return TASK_INTERRUPTIBLE; ··· 370 370 struct userfaultfd_wait_queue uwq; 371 371 vm_fault_t ret = VM_FAULT_SIGBUS; 372 372 bool must_wait; 373 - long blocking_state; 373 + unsigned int blocking_state; 374 374 375 375 /* 376 376 * We don't do userfault handling for the final child pid update.
+15 -16
include/linux/sched.h
··· 113 113 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ 114 114 TASK_PARKED) 115 115 116 - #define task_is_running(task) (READ_ONCE((task)->state) == TASK_RUNNING) 116 + #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) 117 117 118 - #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) 118 + #define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) 119 119 120 - #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) 120 + #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) 121 121 122 - #define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) 122 + #define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) 123 123 124 124 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP 125 125 ··· 134 134 do { \ 135 135 WARN_ON_ONCE(is_special_task_state(state_value));\ 136 136 current->task_state_change = _THIS_IP_; \ 137 - current->state = (state_value); \ 137 + WRITE_ONCE(current->__state, (state_value)); \ 138 138 } while (0) 139 139 140 140 #define set_current_state(state_value) \ 141 141 do { \ 142 142 WARN_ON_ONCE(is_special_task_state(state_value));\ 143 143 current->task_state_change = _THIS_IP_; \ 144 - smp_store_mb(current->state, (state_value)); \ 144 + smp_store_mb(current->__state, (state_value)); \ 145 145 } while (0) 146 146 147 147 #define set_special_state(state_value) \ ··· 150 150 WARN_ON_ONCE(!is_special_task_state(state_value)); \ 151 151 raw_spin_lock_irqsave(&current->pi_lock, flags); \ 152 152 current->task_state_change = _THIS_IP_; \ 153 - current->state = (state_value); \ 153 + WRITE_ONCE(current->__state, (state_value)); \ 154 154 raw_spin_unlock_irqrestore(&current->pi_lock, flags); \ 155 155 } while (0) 156 156 #else ··· 192 192 * Also see the comments of try_to_wake_up(). 193 193 */ 194 194 #define __set_current_state(state_value) \ 195 - current->state = (state_value) 195 + WRITE_ONCE(current->__state, (state_value)) 196 196 197 197 #define set_current_state(state_value) \ 198 - smp_store_mb(current->state, (state_value)) 198 + smp_store_mb(current->__state, (state_value)) 199 199 200 200 /* 201 201 * set_special_state() should be used for those states when the blocking task ··· 207 207 do { \ 208 208 unsigned long flags; /* may shadow */ \ 209 209 raw_spin_lock_irqsave(&current->pi_lock, flags); \ 210 - current->state = (state_value); \ 210 + WRITE_ONCE(current->__state, (state_value)); \ 211 211 raw_spin_unlock_irqrestore(&current->pi_lock, flags); \ 212 212 } while (0) 213 213 214 214 #endif 215 215 216 - #define get_current_state() READ_ONCE(current->state) 216 + #define get_current_state() READ_ONCE(current->__state) 217 217 218 218 /* Task command name length: */ 219 219 #define TASK_COMM_LEN 16 ··· 666 666 */ 667 667 struct thread_info thread_info; 668 668 #endif 669 - /* -1 unrunnable, 0 runnable, >0 stopped: */ 670 - volatile long state; 669 + unsigned int __state; 671 670 672 671 /* 673 672 * This begins the randomizable portion of task_struct. Only ··· 1531 1532 1532 1533 static inline unsigned int task_state_index(struct task_struct *tsk) 1533 1534 { 1534 - unsigned int tsk_state = READ_ONCE(tsk->state); 1535 + unsigned int tsk_state = READ_ONCE(tsk->__state); 1535 1536 unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; 1536 1537 1537 1538 BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); ··· 1839 1840 */ 1840 1841 preempt_fold_need_resched(); 1841 1842 } 1842 - extern unsigned long wait_task_inactive(struct task_struct *, long match_state); 1843 + extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); 1843 1844 #else 1844 1845 static inline void scheduler_ipi(void) { } 1845 - static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) 1846 + static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) 1846 1847 { 1847 1848 return 1; 1848 1849 }
+1 -1
include/linux/sched/debug.h
··· 14 14 /* 15 15 * Only dump TASK_* tasks. (0 for all tasks) 16 16 */ 17 - extern void show_state_filter(unsigned long state_filter); 17 + extern void show_state_filter(unsigned int state_filter); 18 18 19 19 static inline void show_state(void) 20 20 {
+1 -1
include/linux/sched/signal.h
··· 382 382 return task_sigpending(p) && __fatal_signal_pending(p); 383 383 } 384 384 385 - static inline int signal_pending_state(long state, struct task_struct *p) 385 + static inline int signal_pending_state(unsigned int state, struct task_struct *p) 386 386 { 387 387 if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) 388 388 return 0;
+1 -1
init/init_task.c
··· 71 71 .thread_info = INIT_THREAD_INFO(init_task), 72 72 .stack_refcount = REFCOUNT_INIT(1), 73 73 #endif 74 - .state = 0, 74 + .__state = 0, 75 75 .stack = init_stack, 76 76 .usage = REFCOUNT_INIT(2), 77 77 .flags = PF_KTHREAD,
+1 -1
kernel/cgroup/cgroup-v1.c
··· 713 713 714 714 css_task_iter_start(&cgrp->self, 0, &it); 715 715 while ((tsk = css_task_iter_next(&it))) { 716 - switch (tsk->state) { 716 + switch (READ_ONCE(tsk->__state)) { 717 717 case TASK_RUNNING: 718 718 stats->nr_running++; 719 719 break;
+10 -8
kernel/debug/kdb/kdb_support.c
··· 609 609 */ 610 610 char kdb_task_state_char (const struct task_struct *p) 611 611 { 612 - int cpu; 613 - char state; 612 + unsigned int p_state; 614 613 unsigned long tmp; 614 + char state; 615 + int cpu; 615 616 616 617 if (!p || 617 618 copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) 618 619 return 'E'; 619 620 620 621 cpu = kdb_process_cpu(p); 621 - state = (p->state == 0) ? 'R' : 622 - (p->state < 0) ? 'U' : 623 - (p->state & TASK_UNINTERRUPTIBLE) ? 'D' : 624 - (p->state & TASK_STOPPED) ? 'T' : 625 - (p->state & TASK_TRACED) ? 'C' : 622 + p_state = READ_ONCE(p->__state); 623 + state = (p_state == 0) ? 'R' : 624 + (p_state < 0) ? 'U' : 625 + (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : 626 + (p_state & TASK_STOPPED) ? 'T' : 627 + (p_state & TASK_TRACED) ? 'C' : 626 628 (p->exit_state & EXIT_ZOMBIE) ? 'Z' : 627 629 (p->exit_state & EXIT_DEAD) ? 'E' : 628 - (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; 630 + (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?'; 629 631 if (is_idle_task(p)) { 630 632 /* Idle task. Is it really idle, apart from the kdb 631 633 * interrupt? */
+2 -2
kernel/fork.c
··· 425 425 426 426 static void release_task_stack(struct task_struct *tsk) 427 427 { 428 - if (WARN_ON(tsk->state != TASK_DEAD)) 428 + if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) 429 429 return; /* Better to leak the stack than to free prematurely */ 430 430 431 431 account_kernel_stack(tsk, -1); ··· 2392 2392 atomic_dec(&p->cred->user->processes); 2393 2393 exit_creds(p); 2394 2394 bad_fork_free: 2395 - p->state = TASK_DEAD; 2395 + WRITE_ONCE(p->__state, TASK_DEAD); 2396 2396 put_task_stack(p); 2397 2397 delayed_free_task(p); 2398 2398 fork_out:
+1 -1
kernel/hung_task.c
··· 196 196 last_break = jiffies; 197 197 } 198 198 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ 199 - if (t->state == TASK_UNINTERRUPTIBLE) 199 + if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE) 200 200 check_hung_task(t, timeout); 201 201 } 202 202 unlock:
+2 -2
kernel/kthread.c
··· 457 457 } 458 458 EXPORT_SYMBOL(kthread_create_on_node); 459 459 460 - static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state) 460 + static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state) 461 461 { 462 462 unsigned long flags; 463 463 ··· 473 473 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 474 474 } 475 475 476 - static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state) 476 + static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state) 477 477 { 478 478 __kthread_bind_mask(p, cpumask_of(cpu), state); 479 479 }
+3 -3
kernel/locking/mutex.c
··· 923 923 * Lock a mutex (possibly interruptible), slowpath: 924 924 */ 925 925 static __always_inline int __sched 926 - __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, 926 + __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclass, 927 927 struct lockdep_map *nest_lock, unsigned long ip, 928 928 struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) 929 929 { ··· 1098 1098 } 1099 1099 1100 1100 static int __sched 1101 - __mutex_lock(struct mutex *lock, long state, unsigned int subclass, 1101 + __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, 1102 1102 struct lockdep_map *nest_lock, unsigned long ip) 1103 1103 { 1104 1104 return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); 1105 1105 } 1106 1106 1107 1107 static int __sched 1108 - __ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass, 1108 + __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, 1109 1109 struct lockdep_map *nest_lock, unsigned long ip, 1110 1110 struct ww_acquire_ctx *ww_ctx) 1111 1111 {
+2 -2
kernel/locking/rtmutex.c
··· 1135 1135 * 1136 1136 * Must be called with lock->wait_lock held and interrupts disabled 1137 1137 */ 1138 - static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1138 + static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, 1139 1139 struct hrtimer_sleeper *timeout, 1140 1140 struct rt_mutex_waiter *waiter) 1141 1141 { ··· 1190 1190 /* 1191 1191 * Slow path lock function: 1192 1192 */ 1193 - static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, 1193 + static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, 1194 1194 struct hrtimer_sleeper *timeout, 1195 1195 enum rtmutex_chainwalk chwalk) 1196 1196 {
+1 -1
kernel/locking/rwsem.c
··· 889 889 * Wait for the read lock to be granted 890 890 */ 891 891 static struct rw_semaphore __sched * 892 - rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, int state) 892 + rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state) 893 893 { 894 894 long adjustment = -RWSEM_READER_BIAS; 895 895 long rcnt = (count >> RWSEM_READER_SHIFT);
+6 -6
kernel/ptrace.c
··· 197 197 spin_lock_irq(&task->sighand->siglock); 198 198 if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && 199 199 !__fatal_signal_pending(task)) { 200 - task->state = __TASK_TRACED; 200 + WRITE_ONCE(task->__state, __TASK_TRACED); 201 201 ret = true; 202 202 } 203 203 spin_unlock_irq(&task->sighand->siglock); ··· 207 207 208 208 static void ptrace_unfreeze_traced(struct task_struct *task) 209 209 { 210 - if (task->state != __TASK_TRACED) 210 + if (READ_ONCE(task->__state) != __TASK_TRACED) 211 211 return; 212 212 213 213 WARN_ON(!task->ptrace || task->parent != current); ··· 217 217 * Recheck state under the lock to close this race. 218 218 */ 219 219 spin_lock_irq(&task->sighand->siglock); 220 - if (task->state == __TASK_TRACED) { 220 + if (READ_ONCE(task->__state) == __TASK_TRACED) { 221 221 if (__fatal_signal_pending(task)) 222 222 wake_up_state(task, __TASK_TRACED); 223 223 else 224 - task->state = TASK_TRACED; 224 + WRITE_ONCE(task->__state, TASK_TRACED); 225 225 } 226 226 spin_unlock_irq(&task->sighand->siglock); 227 227 } ··· 256 256 */ 257 257 read_lock(&tasklist_lock); 258 258 if (child->ptrace && child->parent == current) { 259 - WARN_ON(child->state == __TASK_TRACED); 259 + WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); 260 260 /* 261 261 * child->sighand can't be NULL, release_task() 262 262 * does ptrace_unlink() before __exit_signal(). ··· 273 273 * ptrace_stop() changes ->state back to TASK_RUNNING, 274 274 * so we should not worry about leaking __TASK_TRACED. 275 275 */ 276 - WARN_ON(child->state == __TASK_TRACED); 276 + WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); 277 277 ret = -ESRCH; 278 278 } 279 279 }
+2 -2
kernel/rcu/rcutorture.c
··· 1831 1831 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, 1832 1832 &flags, &gp_seq); 1833 1833 wtp = READ_ONCE(writer_task); 1834 - pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#lx cpu %d\n", 1834 + pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#x cpu %d\n", 1835 1835 rcu_torture_writer_state_getname(), 1836 1836 rcu_torture_writer_state, gp_seq, flags, 1837 - wtp == NULL ? ~0UL : wtp->state, 1837 + wtp == NULL ? ~0U : wtp->__state, 1838 1838 wtp == NULL ? -1 : (int)task_cpu(wtp)); 1839 1839 if (!splatted && wtp) { 1840 1840 sched_show_task(wtp);
+6 -6
kernel/rcu/tree_stall.h
··· 460 460 461 461 if (rcu_is_gp_kthread_starving(&j)) { 462 462 cpu = gpk ? task_cpu(gpk) : -1; 463 - pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", 463 + pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n", 464 464 rcu_state.name, j, 465 465 (long)rcu_seq_current(&rcu_state.gp_seq), 466 466 data_race(rcu_state.gp_flags), 467 467 gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, 468 - gpk ? gpk->state : ~0, cpu); 468 + gpk ? gpk->__state : ~0, cpu); 469 469 if (gpk) { 470 470 pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); 471 471 pr_err("RCU grace-period kthread stack dump:\n"); ··· 503 503 time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) && 504 504 gpk && !READ_ONCE(gpk->on_rq)) { 505 505 cpu = task_cpu(gpk); 506 - pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n", 506 + pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x\n", 507 507 rcu_state.name, (jiffies - jiffies_fqs), 508 508 (long)rcu_seq_current(&rcu_state.gp_seq), 509 509 data_race(rcu_state.gp_flags), 510 510 gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, 511 - gpk->state); 511 + gpk->__state); 512 512 pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", 513 513 cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); 514 514 } ··· 735 735 ja = j - data_race(rcu_state.gp_activity); 736 736 jr = j - data_race(rcu_state.gp_req_activity); 737 737 jw = j - data_race(rcu_state.gp_wake_time); 738 - pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", 738 + pr_info("%s: wait state: %s(%d) ->state: %#x delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", 739 739 rcu_state.name, gp_state_getname(rcu_state.gp_state), 740 - rcu_state.gp_state, t ? t->state : 0x1ffffL, 740 + rcu_state.gp_state, t ? t->__state : 0x1ffff, 741 741 ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq), 742 742 (long)data_race(rcu_state.gp_seq), 743 743 (long)data_race(rcu_get_root()->gp_seq_needed),
+28 -25
kernel/sched/core.c
··· 2638 2638 return -EINVAL; 2639 2639 } 2640 2640 2641 - if (task_running(rq, p) || p->state == TASK_WAKING) { 2641 + if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) { 2642 2642 /* 2643 2643 * MIGRATE_ENABLE gets here because 'p == current', but for 2644 2644 * anything else we cannot do is_migration_disabled(), punt ··· 2781 2781 void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2782 2782 { 2783 2783 #ifdef CONFIG_SCHED_DEBUG 2784 + unsigned int state = READ_ONCE(p->__state); 2785 + 2784 2786 /* 2785 2787 * We should never call set_task_cpu() on a blocked task, 2786 2788 * ttwu() will sort out the placement. 2787 2789 */ 2788 - WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && 2789 - !p->on_rq); 2790 + WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq); 2790 2791 2791 2792 /* 2792 2793 * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING, 2793 2794 * because schedstat_wait_{start,end} rebase migrating task's wait_start 2794 2795 * time relying on p->on_rq. 2795 2796 */ 2796 - WARN_ON_ONCE(p->state == TASK_RUNNING && 2797 + WARN_ON_ONCE(state == TASK_RUNNING && 2797 2798 p->sched_class == &fair_sched_class && 2798 2799 (p->on_rq && !task_on_rq_migrating(p))); 2799 2800 ··· 2966 2965 * smp_call_function() if an IPI is sent by the same process we are 2967 2966 * waiting to become inactive. 2968 2967 */ 2969 - unsigned long wait_task_inactive(struct task_struct *p, long match_state) 2968 + unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) 2970 2969 { 2971 2970 int running, queued; 2972 2971 struct rq_flags rf; ··· 2994 2993 * is actually now running somewhere else! 2995 2994 */ 2996 2995 while (task_running(rq, p)) { 2997 - if (match_state && unlikely(p->state != match_state)) 2996 + if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) 2998 2997 return 0; 2999 2998 cpu_relax(); 3000 2999 } ··· 3009 3008 running = task_running(rq, p); 3010 3009 queued = task_on_rq_queued(p); 3011 3010 ncsw = 0; 3012 - if (!match_state || p->state == match_state) 3011 + if (!match_state || READ_ONCE(p->__state) == match_state) 3013 3012 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 3014 3013 task_rq_unlock(rq, p, &rf); 3015 3014 ··· 3318 3317 struct rq_flags *rf) 3319 3318 { 3320 3319 check_preempt_curr(rq, p, wake_flags); 3321 - p->state = TASK_RUNNING; 3320 + WRITE_ONCE(p->__state, TASK_RUNNING); 3322 3321 trace_sched_wakeup(p); 3323 3322 3324 3323 #ifdef CONFIG_SMP ··· 3710 3709 * - we're serialized against set_special_state() by virtue of 3711 3710 * it disabling IRQs (this allows not taking ->pi_lock). 3712 3711 */ 3713 - if (!(p->state & state)) 3712 + if (!(READ_ONCE(p->__state) & state)) 3714 3713 goto out; 3715 3714 3716 3715 success = 1; 3717 3716 trace_sched_waking(p); 3718 - p->state = TASK_RUNNING; 3717 + WRITE_ONCE(p->__state, TASK_RUNNING); 3719 3718 trace_sched_wakeup(p); 3720 3719 goto out; 3721 3720 } ··· 3728 3727 */ 3729 3728 raw_spin_lock_irqsave(&p->pi_lock, flags); 3730 3729 smp_mb__after_spinlock(); 3731 - if (!(p->state & state)) 3730 + if (!(READ_ONCE(p->__state) & state)) 3732 3731 goto unlock; 3733 3732 3734 3733 trace_sched_waking(p); ··· 3794 3793 * TASK_WAKING such that we can unlock p->pi_lock before doing the 3795 3794 * enqueue, such as ttwu_queue_wakelist(). 3796 3795 */ 3797 - p->state = TASK_WAKING; 3796 + WRITE_ONCE(p->__state, TASK_WAKING); 3798 3797 3799 3798 /* 3800 3799 * If the owning (remote) CPU is still in the middle of schedule() with ··· 3887 3886 ret = func(p, arg); 3888 3887 rq_unlock(rq, &rf); 3889 3888 } else { 3890 - switch (p->state) { 3889 + switch (READ_ONCE(p->__state)) { 3891 3890 case TASK_RUNNING: 3892 3891 case TASK_WAKING: 3893 3892 break; ··· 4087 4086 * nobody will actually run it, and a signal or other external 4088 4087 * event cannot wake it up and insert it on the runqueue either. 4089 4088 */ 4090 - p->state = TASK_NEW; 4089 + p->__state = TASK_NEW; 4091 4090 4092 4091 /* 4093 4092 * Make sure we do not leak PI boosting priority to the child. ··· 4193 4192 struct rq *rq; 4194 4193 4195 4194 raw_spin_lock_irqsave(&p->pi_lock, rf.flags); 4196 - p->state = TASK_RUNNING; 4195 + WRITE_ONCE(p->__state, TASK_RUNNING); 4197 4196 #ifdef CONFIG_SMP 4198 4197 /* 4199 4198 * Fork balancing, do it here and not earlier because: ··· 4555 4554 * running on another CPU and we could rave with its RUNNING -> DEAD 4556 4555 * transition, resulting in a double drop. 4557 4556 */ 4558 - prev_state = prev->state; 4557 + prev_state = READ_ONCE(prev->__state); 4559 4558 vtime_task_switch(prev); 4560 4559 perf_event_task_sched_in(prev, current); 4561 4560 finish_task(prev); ··· 5249 5248 #endif 5250 5249 5251 5250 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP 5252 - if (!preempt && prev->state && prev->non_block_count) { 5251 + if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { 5253 5252 printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", 5254 5253 prev->comm, prev->pid, prev->non_block_count); 5255 5254 dump_stack(); ··· 5875 5874 * - we form a control dependency vs deactivate_task() below. 5876 5875 * - ptrace_{,un}freeze_traced() can change ->state underneath us. 5877 5876 */ 5878 - prev_state = prev->state; 5877 + prev_state = READ_ONCE(prev->__state); 5879 5878 if (!preempt && prev_state) { 5880 5879 if (signal_pending_state(prev_state, prev)) { 5881 - prev->state = TASK_RUNNING; 5880 + WRITE_ONCE(prev->__state, TASK_RUNNING); 5882 5881 } else { 5883 5882 prev->sched_contributes_to_load = 5884 5883 (prev_state & TASK_UNINTERRUPTIBLE) && ··· 6050 6049 * current task can be in any other state. Note, idle is always in the 6051 6050 * TASK_RUNNING state. 6052 6051 */ 6053 - WARN_ON_ONCE(current->state); 6052 + WARN_ON_ONCE(current->__state); 6054 6053 do { 6055 6054 __schedule(false); 6056 6055 } while (need_resched()); ··· 8177 8176 static inline bool 8178 8177 state_filter_match(unsigned long state_filter, struct task_struct *p) 8179 8178 { 8179 + unsigned int state = READ_ONCE(p->__state); 8180 + 8180 8181 /* no filter, everything matches */ 8181 8182 if (!state_filter) 8182 8183 return true; 8183 8184 8184 8185 /* filter, but doesn't match */ 8185 - if (!(p->state & state_filter)) 8186 + if (!(state & state_filter)) 8186 8187 return false; 8187 8188 8188 8189 /* 8189 8190 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows 8190 8191 * TASK_KILLABLE). 8191 8192 */ 8192 - if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) 8193 + if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE) 8193 8194 return false; 8194 8195 8195 8196 return true; 8196 8197 } 8197 8198 8198 8199 8199 - void show_state_filter(unsigned long state_filter) 8200 + void show_state_filter(unsigned int state_filter) 8200 8201 { 8201 8202 struct task_struct *g, *p; 8202 8203 ··· 8255 8252 raw_spin_lock_irqsave(&idle->pi_lock, flags); 8256 8253 raw_spin_rq_lock(rq); 8257 8254 8258 - idle->state = TASK_RUNNING; 8255 + idle->__state = TASK_RUNNING; 8259 8256 idle->se.exec_start = sched_clock(); 8260 8257 /* 8261 8258 * PF_KTHREAD should already be set at this point; regardless, make it ··· 9570 9567 * has happened. This would lead to problems with PELT, due to 9571 9568 * move wanting to detach+attach while we're not attached yet. 9572 9569 */ 9573 - if (task->state == TASK_NEW) 9570 + if (READ_ONCE(task->__state) == TASK_NEW) 9574 9571 ret = -EINVAL; 9575 9572 raw_spin_unlock_irq(&task->pi_lock); 9576 9573
+5 -5
kernel/sched/deadline.c
··· 348 348 if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) { 349 349 if (dl_task(p)) 350 350 sub_running_bw(dl_se, dl_rq); 351 - if (!dl_task(p) || p->state == TASK_DEAD) { 351 + if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) { 352 352 struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); 353 353 354 - if (p->state == TASK_DEAD) 354 + if (READ_ONCE(p->__state) == TASK_DEAD) 355 355 sub_rq_bw(&p->dl, &rq->dl); 356 356 raw_spin_lock(&dl_b->lock); 357 357 __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); ··· 1355 1355 sched_clock_tick(); 1356 1356 update_rq_clock(rq); 1357 1357 1358 - if (!dl_task(p) || p->state == TASK_DEAD) { 1358 + if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) { 1359 1359 struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); 1360 1360 1361 - if (p->state == TASK_DEAD && dl_se->dl_non_contending) { 1361 + if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) { 1362 1362 sub_running_bw(&p->dl, dl_rq_of_se(&p->dl)); 1363 1363 sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl)); 1364 1364 dl_se->dl_non_contending = 0; ··· 1722 1722 { 1723 1723 struct rq *rq; 1724 1724 1725 - if (p->state != TASK_WAKING) 1725 + if (READ_ONCE(p->__state) != TASK_WAKING) 1726 1726 return; 1727 1727 1728 1728 rq = task_rq(p);
+7 -4
kernel/sched/fair.c
··· 993 993 994 994 if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) { 995 995 struct task_struct *tsk = task_of(se); 996 + unsigned int state; 996 997 997 - if (tsk->state & TASK_INTERRUPTIBLE) 998 + /* XXX racy against TTWU */ 999 + state = READ_ONCE(tsk->__state); 1000 + if (state & TASK_INTERRUPTIBLE) 998 1001 __schedstat_set(se->statistics.sleep_start, 999 1002 rq_clock(rq_of(cfs_rq))); 1000 - if (tsk->state & TASK_UNINTERRUPTIBLE) 1003 + if (state & TASK_UNINTERRUPTIBLE) 1001 1004 __schedstat_set(se->statistics.block_start, 1002 1005 rq_clock(rq_of(cfs_rq))); 1003 1006 } ··· 6891 6888 * min_vruntime -- the latter is done by enqueue_entity() when placing 6892 6889 * the task on the new runqueue. 6893 6890 */ 6894 - if (p->state == TASK_WAKING) { 6891 + if (READ_ONCE(p->__state) == TASK_WAKING) { 6895 6892 struct sched_entity *se = &p->se; 6896 6893 struct cfs_rq *cfs_rq = cfs_rq_of(se); 6897 6894 u64 min_vruntime; ··· 11056 11053 * waiting for actually being woken up by sched_ttwu_pending(). 11057 11054 */ 11058 11055 if (!se->sum_exec_runtime || 11059 - (p->state == TASK_WAKING && p->sched_remote_wakeup)) 11056 + (READ_ONCE(p->__state) == TASK_WAKING && p->sched_remote_wakeup)) 11060 11057 return true; 11061 11058 11062 11059 return false;
+2 -2
lib/syscall.c
··· 68 68 */ 69 69 int task_current_syscall(struct task_struct *target, struct syscall_info *info) 70 70 { 71 - long state; 72 71 unsigned long ncsw; 72 + unsigned int state; 73 73 74 74 if (target == current) 75 75 return collect_syscall(target, info); 76 76 77 - state = target->state; 77 + state = READ_ONCE(target->__state); 78 78 if (unlikely(!state)) 79 79 return -EAGAIN; 80 80
+1 -1
net/core/dev.c
··· 4363 4363 * makes sure to proceed with napi polling 4364 4364 * if the thread is explicitly woken from here. 4365 4365 */ 4366 - if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE) 4366 + if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE) 4367 4367 set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); 4368 4368 wake_up_process(thread); 4369 4369 return;