Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
"The scheduler pull request comes with the following updates:

- Prevent a divide by zero issue by validating the input value of
sysctl_sched_time_avg

- Make task state printing consistent all over the place and have
explicit state characters for IDLE and PARKED so they wont be
displayed as 'D' state which confuses tools"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/sysctl: Check user input value of sysctl_sched_time_avg
sched/debug: Add explicit TASK_PARKED printing
sched/debug: Ignore TASK_IDLE for SysRq-W
sched/debug: Add explicit TASK_IDLE printing
sched/tracing: Use common task-state helpers
sched/tracing: Fix trace_sched_switch task-state printing
sched/debug: Remove unused variable
sched/debug: Convert TASK_state to hex
sched/debug: Implement consistent task-state printing

Changed files
+102 -74
fs
proc
include
linux
trace
events
kernel
+15 -20
fs/proc/array.c
··· 119 119 * simple bit tests. 120 120 */ 121 121 static const char * const task_state_array[] = { 122 - "R (running)", /* 0 */ 123 - "S (sleeping)", /* 1 */ 124 - "D (disk sleep)", /* 2 */ 125 - "T (stopped)", /* 4 */ 126 - "t (tracing stop)", /* 8 */ 127 - "X (dead)", /* 16 */ 128 - "Z (zombie)", /* 32 */ 122 + 123 + /* states in TASK_REPORT: */ 124 + "R (running)", /* 0x00 */ 125 + "S (sleeping)", /* 0x01 */ 126 + "D (disk sleep)", /* 0x02 */ 127 + "T (stopped)", /* 0x04 */ 128 + "t (tracing stop)", /* 0x08 */ 129 + "X (dead)", /* 0x10 */ 130 + "Z (zombie)", /* 0x20 */ 131 + "P (parked)", /* 0x40 */ 132 + 133 + /* states beyond TASK_REPORT: */ 134 + "I (idle)", /* 0x80 */ 129 135 }; 130 136 131 137 static inline const char *get_task_state(struct task_struct *tsk) 132 138 { 133 - unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; 134 - 135 - /* 136 - * Parked tasks do not run; they sit in __kthread_parkme(). 137 - * Without this check, we would report them as running, which is 138 - * clearly wrong, so we report them as sleeping instead. 139 - */ 140 - if (tsk->state == TASK_PARKED) 141 - state = TASK_INTERRUPTIBLE; 142 - 143 - BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); 144 - 145 - return task_state_array[fls(state)]; 139 + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array)); 140 + return task_state_array[__get_task_state(tsk)]; 146 141 } 147 142 148 143 static inline int get_task_umask(struct task_struct *tsk)
+40 -24
include/linux/sched.h
··· 65 65 */ 66 66 67 67 /* Used in tsk->state: */ 68 - #define TASK_RUNNING 0 69 - #define TASK_INTERRUPTIBLE 1 70 - #define TASK_UNINTERRUPTIBLE 2 71 - #define __TASK_STOPPED 4 72 - #define __TASK_TRACED 8 68 + #define TASK_RUNNING 0x0000 69 + #define TASK_INTERRUPTIBLE 0x0001 70 + #define TASK_UNINTERRUPTIBLE 0x0002 71 + #define __TASK_STOPPED 0x0004 72 + #define __TASK_TRACED 0x0008 73 73 /* Used in tsk->exit_state: */ 74 - #define EXIT_DEAD 16 75 - #define EXIT_ZOMBIE 32 74 + #define EXIT_DEAD 0x0010 75 + #define EXIT_ZOMBIE 0x0020 76 76 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) 77 77 /* Used in tsk->state again: */ 78 - #define TASK_DEAD 64 79 - #define TASK_WAKEKILL 128 80 - #define TASK_WAKING 256 81 - #define TASK_PARKED 512 82 - #define TASK_NOLOAD 1024 83 - #define TASK_NEW 2048 84 - #define TASK_STATE_MAX 4096 85 - 86 - #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" 78 + #define TASK_PARKED 0x0040 79 + #define TASK_DEAD 0x0080 80 + #define TASK_WAKEKILL 0x0100 81 + #define TASK_WAKING 0x0200 82 + #define TASK_NOLOAD 0x0400 83 + #define TASK_NEW 0x0800 84 + #define TASK_STATE_MAX 0x1000 87 85 88 86 /* Convenience macros for the sake of set_current_state: */ 89 87 #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) ··· 97 99 /* get_task_state(): */ 98 100 #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ 99 101 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ 100 - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) 102 + __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ 103 + TASK_PARKED) 101 104 102 105 #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) 103 106 ··· 1242 1243 return task_pgrp_nr_ns(tsk, &init_pid_ns); 1243 1244 } 1244 1245 1245 - static inline char task_state_to_char(struct task_struct *task) 1246 + #define TASK_REPORT_IDLE (TASK_REPORT + 1) 1247 + #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) 1248 + 1249 + static inline unsigned int __get_task_state(struct task_struct *tsk) 1246 1250 { 1247 - const char stat_nam[] = TASK_STATE_TO_CHAR_STR; 1248 - unsigned long state = task->state; 1251 + unsigned int tsk_state = READ_ONCE(tsk->state); 1252 + unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; 1249 1253 1250 - state = state ? __ffs(state) + 1 : 0; 1254 + BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); 1251 1255 1252 - /* Make sure the string lines up properly with the number of task states: */ 1253 - BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); 1256 + if (tsk_state == TASK_IDLE) 1257 + state = TASK_REPORT_IDLE; 1254 1258 1255 - return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'; 1259 + return fls(state); 1260 + } 1261 + 1262 + static inline char __task_state_to_char(unsigned int state) 1263 + { 1264 + static const char state_char[] = "RSDTtXZPI"; 1265 + 1266 + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); 1267 + 1268 + return state_char[state]; 1269 + } 1270 + 1271 + static inline char task_state_to_char(struct task_struct *tsk) 1272 + { 1273 + return __task_state_to_char(__get_task_state(tsk)); 1256 1274 } 1257 1275 1258 1276 /**
+12 -7
include/trace/events/sched.h
··· 114 114 * Preemption ignores task state, therefore preempted tasks are always 115 115 * RUNNING (we will not have dequeued if state != RUNNING). 116 116 */ 117 - return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; 117 + if (preempt) 118 + return TASK_STATE_MAX; 119 + 120 + return __get_task_state(p); 118 121 } 119 122 #endif /* CREATE_TRACE_POINTS */ 120 123 ··· 155 152 156 153 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 157 154 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 158 - __entry->prev_state & (TASK_STATE_MAX-1) ? 159 - __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", 160 - { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, 161 - { 16, "Z" }, { 32, "X" }, { 64, "x" }, 162 - { 128, "K" }, { 256, "W" }, { 512, "P" }, 163 - { 1024, "N" }) : "R", 155 + 156 + (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? 157 + __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", 158 + { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, 159 + { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, 160 + { 0x40, "P" }, { 0x80, "I" }) : 161 + "R", 162 + 164 163 __entry->prev_state & TASK_STATE_MAX ? "+" : "", 165 164 __entry->next_comm, __entry->next_pid, __entry->next_prio) 166 165 );
+23 -1
kernel/sched/core.c
··· 5166 5166 put_task_stack(p); 5167 5167 } 5168 5168 5169 + static inline bool 5170 + state_filter_match(unsigned long state_filter, struct task_struct *p) 5171 + { 5172 + /* no filter, everything matches */ 5173 + if (!state_filter) 5174 + return true; 5175 + 5176 + /* filter, but doesn't match */ 5177 + if (!(p->state & state_filter)) 5178 + return false; 5179 + 5180 + /* 5181 + * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows 5182 + * TASK_KILLABLE). 5183 + */ 5184 + if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) 5185 + return false; 5186 + 5187 + return true; 5188 + } 5189 + 5190 + 5169 5191 void show_state_filter(unsigned long state_filter) 5170 5192 { 5171 5193 struct task_struct *g, *p; ··· 5210 5188 */ 5211 5189 touch_nmi_watchdog(); 5212 5190 touch_all_softlockup_watchdogs(); 5213 - if (!state_filter || (p->state & state_filter)) 5191 + if (state_filter_match(state_filter, p)) 5214 5192 sched_show_task(p); 5215 5193 } 5216 5194
-2
kernel/sched/debug.c
··· 466 466 } 467 467 #endif 468 468 469 - static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; 470 - 471 469 static void 472 470 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 473 471 {
+2 -1
kernel/sysctl.c
··· 367 367 .data = &sysctl_sched_time_avg, 368 368 .maxlen = sizeof(unsigned int), 369 369 .mode = 0644, 370 - .proc_handler = proc_dointvec, 370 + .proc_handler = proc_dointvec_minmax, 371 + .extra1 = &one, 371 372 }, 372 373 #ifdef CONFIG_SCHEDSTATS 373 374 {
+6 -15
kernel/trace/trace_output.c
··· 656 656 return !trace_seq_has_overflowed(s); 657 657 } 658 658 659 - static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 660 - 661 - static int task_state_char(unsigned long state) 662 - { 663 - int bit = state ? __ffs(state) + 1 : 0; 664 - 665 - return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; 666 - } 667 - 668 659 /** 669 660 * ftrace_find_event - find a registered event 670 661 * @type: the type of event to look for ··· 921 930 922 931 trace_assign_type(field, iter->ent); 923 932 924 - T = task_state_char(field->next_state); 925 - S = task_state_char(field->prev_state); 933 + T = __task_state_to_char(field->next_state); 934 + S = __task_state_to_char(field->prev_state); 926 935 trace_find_cmdline(field->next_pid, comm); 927 936 trace_seq_printf(&iter->seq, 928 937 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", ··· 957 966 trace_assign_type(field, iter->ent); 958 967 959 968 if (!S) 960 - S = task_state_char(field->prev_state); 961 - T = task_state_char(field->next_state); 969 + S = __task_state_to_char(field->prev_state); 970 + T = __task_state_to_char(field->next_state); 962 971 trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", 963 972 field->prev_pid, 964 973 field->prev_prio, ··· 993 1002 trace_assign_type(field, iter->ent); 994 1003 995 1004 if (!S) 996 - S = task_state_char(field->prev_state); 997 - T = task_state_char(field->next_state); 1005 + S = __task_state_to_char(field->prev_state); 1006 + T = __task_state_to_char(field->next_state); 998 1007 999 1008 SEQ_PUT_HEX_FIELD(s, field->prev_pid); 1000 1009 SEQ_PUT_HEX_FIELD(s, field->prev_prio);
+4 -4
kernel/trace/trace_sched_wakeup.c
··· 397 397 entry = ring_buffer_event_data(event); 398 398 entry->prev_pid = prev->pid; 399 399 entry->prev_prio = prev->prio; 400 - entry->prev_state = prev->state; 400 + entry->prev_state = __get_task_state(prev); 401 401 entry->next_pid = next->pid; 402 402 entry->next_prio = next->prio; 403 - entry->next_state = next->state; 403 + entry->next_state = __get_task_state(next); 404 404 entry->next_cpu = task_cpu(next); 405 405 406 406 if (!call_filter_check_discard(call, entry, buffer, event)) ··· 425 425 entry = ring_buffer_event_data(event); 426 426 entry->prev_pid = curr->pid; 427 427 entry->prev_prio = curr->prio; 428 - entry->prev_state = curr->state; 428 + entry->prev_state = __get_task_state(curr); 429 429 entry->next_pid = wakee->pid; 430 430 entry->next_prio = wakee->prio; 431 - entry->next_state = wakee->state; 431 + entry->next_state = __get_task_state(wakee); 432 432 entry->next_cpu = task_cpu(wakee); 433 433 434 434 if (!call_filter_check_discard(call, entry, buffer, event))