include/linux/sched.h at 4ff9083b8a9a80bdf4ebbbec22cda4cbfb60f7aa

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / sched.h
at 4ff9083b8a9a80bdf4ebbbec22cda4cbfb60f7aa 1654 lines 46 kB view raw
wrap content
   1#ifndef _LINUX_SCHED_H
   2#define _LINUX_SCHED_H
   3
   4/*
   5 * Define 'struct task_struct' and provide the main scheduler
   6 * APIs (schedule(), wakeup variants, etc.)
   7 */
   8
   9#include <uapi/linux/sched.h>
  10
  11#include <asm/current.h>
  12
  13#include <linux/pid.h>
  14#include <linux/sem.h>
  15#include <linux/shm.h>
  16#include <linux/kcov.h>
  17#include <linux/mutex.h>
  18#include <linux/plist.h>
  19#include <linux/hrtimer.h>
  20#include <linux/seccomp.h>
  21#include <linux/nodemask.h>
  22#include <linux/rcupdate.h>
  23#include <linux/resource.h>
  24#include <linux/latencytop.h>
  25#include <linux/sched/prio.h>
  26#include <linux/signal_types.h>
  27#include <linux/mm_types_task.h>
  28#include <linux/task_io_accounting.h>
  29
  30/* task_struct member predeclarations (sorted alphabetically): */
  31struct audit_context;
  32struct backing_dev_info;
  33struct bio_list;
  34struct blk_plug;
  35struct cfs_rq;
  36struct fs_struct;
  37struct futex_pi_state;
  38struct io_context;
  39struct mempolicy;
  40struct nameidata;
  41struct nsproxy;
  42struct perf_event_context;
  43struct pid_namespace;
  44struct pipe_inode_info;
  45struct rcu_node;
  46struct reclaim_state;
  47struct robust_list_head;
  48struct sched_attr;
  49struct sched_param;
  50struct seq_file;
  51struct sighand_struct;
  52struct signal_struct;
  53struct task_delay_info;
  54struct task_group;
  55
  56/*
  57 * Task state bitmask. NOTE! These bits are also
  58 * encoded in fs/proc/array.c: get_task_state().
  59 *
  60 * We have two separate sets of flags: task->state
  61 * is about runnability, while task->exit_state are
  62 * about the task exiting. Confusing, but this way
  63 * modifying one set can't modify the other one by
  64 * mistake.
  65 */
  66
  67/* Used in tsk->state: */
  68#define TASK_RUNNING			0
  69#define TASK_INTERRUPTIBLE		1
  70#define TASK_UNINTERRUPTIBLE		2
  71#define __TASK_STOPPED			4
  72#define __TASK_TRACED			8
  73/* Used in tsk->exit_state: */
  74#define EXIT_DEAD			16
  75#define EXIT_ZOMBIE			32
  76#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
  77/* Used in tsk->state again: */
  78#define TASK_DEAD			64
  79#define TASK_WAKEKILL			128
  80#define TASK_WAKING			256
  81#define TASK_PARKED			512
  82#define TASK_NOLOAD			1024
  83#define TASK_NEW			2048
  84#define TASK_STATE_MAX			4096
  85
  86#define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
  87
  88/* Convenience macros for the sake of set_current_state: */
  89#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
  90#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
  91#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)
  92
  93#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
  94
  95/* Convenience macros for the sake of wake_up(): */
  96#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
  97#define TASK_ALL			(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
  98
  99/* get_task_state(): */
 100#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
 101					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
 102					 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
 103
 104#define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
 105
 106#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
 107
 108#define task_is_stopped_or_traced(task)	((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 109
 110#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
 111					 (task->flags & PF_FROZEN) == 0 && \
 112					 (task->state & TASK_NOLOAD) == 0)
 113
 114#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 115
 116#define __set_current_state(state_value)			\
 117	do {							\
 118		current->task_state_change = _THIS_IP_;		\
 119		current->state = (state_value);			\
 120	} while (0)
 121#define set_current_state(state_value)				\
 122	do {							\
 123		current->task_state_change = _THIS_IP_;		\
 124		smp_store_mb(current->state, (state_value));	\
 125	} while (0)
 126
 127#else
 128/*
 129 * set_current_state() includes a barrier so that the write of current->state
 130 * is correctly serialised wrt the caller's subsequent test of whether to
 131 * actually sleep:
 132 *
 133 *   for (;;) {
 134 *	set_current_state(TASK_UNINTERRUPTIBLE);
 135 *	if (!need_sleep)
 136 *		break;
 137 *
 138 *	schedule();
 139 *   }
 140 *   __set_current_state(TASK_RUNNING);
 141 *
 142 * If the caller does not need such serialisation (because, for instance, the
 143 * condition test and condition change and wakeup are under the same lock) then
 144 * use __set_current_state().
 145 *
 146 * The above is typically ordered against the wakeup, which does:
 147 *
 148 *	need_sleep = false;
 149 *	wake_up_state(p, TASK_UNINTERRUPTIBLE);
 150 *
 151 * Where wake_up_state() (and all other wakeup primitives) imply enough
 152 * barriers to order the store of the variable against wakeup.
 153 *
 154 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
 155 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
 156 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
 157 *
 158 * This is obviously fine, since they both store the exact same value.
 159 *
 160 * Also see the comments of try_to_wake_up().
 161 */
 162#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
 163#define set_current_state(state_value)	 smp_store_mb(current->state, (state_value))
 164#endif
 165
 166/* Task command name length: */
 167#define TASK_COMM_LEN			16
 168
 169extern cpumask_var_t			cpu_isolated_map;
 170
 171extern void scheduler_tick(void);
 172
 173#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
 174
 175extern long schedule_timeout(long timeout);
 176extern long schedule_timeout_interruptible(long timeout);
 177extern long schedule_timeout_killable(long timeout);
 178extern long schedule_timeout_uninterruptible(long timeout);
 179extern long schedule_timeout_idle(long timeout);
 180asmlinkage void schedule(void);
 181extern void schedule_preempt_disabled(void);
 182
 183extern int __must_check io_schedule_prepare(void);
 184extern void io_schedule_finish(int token);
 185extern long io_schedule_timeout(long timeout);
 186extern void io_schedule(void);
 187
 188/**
 189 * struct prev_cputime - snapshot of system and user cputime
 190 * @utime: time spent in user mode
 191 * @stime: time spent in system mode
 192 * @lock: protects the above two fields
 193 *
 194 * Stores previous user/system time values such that we can guarantee
 195 * monotonicity.
 196 */
 197struct prev_cputime {
 198#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 199	u64				utime;
 200	u64				stime;
 201	raw_spinlock_t			lock;
 202#endif
 203};
 204
 205/**
 206 * struct task_cputime - collected CPU time counts
 207 * @utime:		time spent in user mode, in nanoseconds
 208 * @stime:		time spent in kernel mode, in nanoseconds
 209 * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
 210 *
 211 * This structure groups together three kinds of CPU time that are tracked for
 212 * threads and thread groups.  Most things considering CPU time want to group
 213 * these counts together and treat all three of them in parallel.
 214 */
 215struct task_cputime {
 216	u64				utime;
 217	u64				stime;
 218	unsigned long long		sum_exec_runtime;
 219};
 220
 221/* Alternate field names when used on cache expirations: */
 222#define virt_exp			utime
 223#define prof_exp			stime
 224#define sched_exp			sum_exec_runtime
 225
 226enum vtime_state {
 227	/* Task is sleeping or running in a CPU with VTIME inactive: */
 228	VTIME_INACTIVE = 0,
 229	/* Task runs in userspace in a CPU with VTIME active: */
 230	VTIME_USER,
 231	/* Task runs in kernelspace in a CPU with VTIME active: */
 232	VTIME_SYS,
 233};
 234
 235struct vtime {
 236	seqcount_t		seqcount;
 237	unsigned long long	starttime;
 238	enum vtime_state	state;
 239	u64			utime;
 240	u64			stime;
 241	u64			gtime;
 242};
 243
 244struct sched_info {
 245#ifdef CONFIG_SCHED_INFO
 246	/* Cumulative counters: */
 247
 248	/* # of times we have run on this CPU: */
 249	unsigned long			pcount;
 250
 251	/* Time spent waiting on a runqueue: */
 252	unsigned long long		run_delay;
 253
 254	/* Timestamps: */
 255
 256	/* When did we last run on a CPU? */
 257	unsigned long long		last_arrival;
 258
 259	/* When were we last queued to run? */
 260	unsigned long long		last_queued;
 261
 262#endif /* CONFIG_SCHED_INFO */
 263};
 264
 265/*
 266 * Integer metrics need fixed point arithmetic, e.g., sched/fair
 267 * has a few: load, load_avg, util_avg, freq, and capacity.
 268 *
 269 * We define a basic fixed point arithmetic range, and then formalize
 270 * all these metrics based on that basic range.
 271 */
 272# define SCHED_FIXEDPOINT_SHIFT		10
 273# define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT)
 274
 275struct load_weight {
 276	unsigned long			weight;
 277	u32				inv_weight;
 278};
 279
 280/*
 281 * The load_avg/util_avg accumulates an infinite geometric series
 282 * (see __update_load_avg() in kernel/sched/fair.c).
 283 *
 284 * [load_avg definition]
 285 *
 286 *   load_avg = runnable% * scale_load_down(load)
 287 *
 288 * where runnable% is the time ratio that a sched_entity is runnable.
 289 * For cfs_rq, it is the aggregated load_avg of all runnable and
 290 * blocked sched_entities.
 291 *
 292 * load_avg may also take frequency scaling into account:
 293 *
 294 *   load_avg = runnable% * scale_load_down(load) * freq%
 295 *
 296 * where freq% is the CPU frequency normalized to the highest frequency.
 297 *
 298 * [util_avg definition]
 299 *
 300 *   util_avg = running% * SCHED_CAPACITY_SCALE
 301 *
 302 * where running% is the time ratio that a sched_entity is running on
 303 * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
 304 * and blocked sched_entities.
 305 *
 306 * util_avg may also factor frequency scaling and CPU capacity scaling:
 307 *
 308 *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
 309 *
 310 * where freq% is the same as above, and capacity% is the CPU capacity
 311 * normalized to the greatest capacity (due to uarch differences, etc).
 312 *
 313 * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
 314 * themselves are in the range of [0, 1]. To do fixed point arithmetics,
 315 * we therefore scale them to as large a range as necessary. This is for
 316 * example reflected by util_avg's SCHED_CAPACITY_SCALE.
 317 *
 318 * [Overflow issue]
 319 *
 320 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
 321 * with the highest load (=88761), always runnable on a single cfs_rq,
 322 * and should not overflow as the number already hits PID_MAX_LIMIT.
 323 *
 324 * For all other cases (including 32-bit kernels), struct load_weight's
 325 * weight will overflow first before we do, because:
 326 *
 327 *    Max(load_avg) <= Max(load.weight)
 328 *
 329 * Then it is the load_weight's responsibility to consider overflow
 330 * issues.
 331 */
 332struct sched_avg {
 333	u64				last_update_time;
 334	u64				load_sum;
 335	u32				util_sum;
 336	u32				period_contrib;
 337	unsigned long			load_avg;
 338	unsigned long			util_avg;
 339};
 340
 341struct sched_statistics {
 342#ifdef CONFIG_SCHEDSTATS
 343	u64				wait_start;
 344	u64				wait_max;
 345	u64				wait_count;
 346	u64				wait_sum;
 347	u64				iowait_count;
 348	u64				iowait_sum;
 349
 350	u64				sleep_start;
 351	u64				sleep_max;
 352	s64				sum_sleep_runtime;
 353
 354	u64				block_start;
 355	u64				block_max;
 356	u64				exec_max;
 357	u64				slice_max;
 358
 359	u64				nr_migrations_cold;
 360	u64				nr_failed_migrations_affine;
 361	u64				nr_failed_migrations_running;
 362	u64				nr_failed_migrations_hot;
 363	u64				nr_forced_migrations;
 364
 365	u64				nr_wakeups;
 366	u64				nr_wakeups_sync;
 367	u64				nr_wakeups_migrate;
 368	u64				nr_wakeups_local;
 369	u64				nr_wakeups_remote;
 370	u64				nr_wakeups_affine;
 371	u64				nr_wakeups_affine_attempts;
 372	u64				nr_wakeups_passive;
 373	u64				nr_wakeups_idle;
 374#endif
 375};
 376
 377struct sched_entity {
 378	/* For load-balancing: */
 379	struct load_weight		load;
 380	struct rb_node			run_node;
 381	struct list_head		group_node;
 382	unsigned int			on_rq;
 383
 384	u64				exec_start;
 385	u64				sum_exec_runtime;
 386	u64				vruntime;
 387	u64				prev_sum_exec_runtime;
 388
 389	u64				nr_migrations;
 390
 391	struct sched_statistics		statistics;
 392
 393#ifdef CONFIG_FAIR_GROUP_SCHED
 394	int				depth;
 395	struct sched_entity		*parent;
 396	/* rq on which this entity is (to be) queued: */
 397	struct cfs_rq			*cfs_rq;
 398	/* rq "owned" by this entity/group: */
 399	struct cfs_rq			*my_q;
 400#endif
 401
 402#ifdef CONFIG_SMP
 403	/*
 404	 * Per entity load average tracking.
 405	 *
 406	 * Put into separate cache line so it does not
 407	 * collide with read-mostly values above.
 408	 */
 409	struct sched_avg		avg ____cacheline_aligned_in_smp;
 410#endif
 411};
 412
 413struct sched_rt_entity {
 414	struct list_head		run_list;
 415	unsigned long			timeout;
 416	unsigned long			watchdog_stamp;
 417	unsigned int			time_slice;
 418	unsigned short			on_rq;
 419	unsigned short			on_list;
 420
 421	struct sched_rt_entity		*back;
 422#ifdef CONFIG_RT_GROUP_SCHED
 423	struct sched_rt_entity		*parent;
 424	/* rq on which this entity is (to be) queued: */
 425	struct rt_rq			*rt_rq;
 426	/* rq "owned" by this entity/group: */
 427	struct rt_rq			*my_q;
 428#endif
 429} __randomize_layout;
 430
 431struct sched_dl_entity {
 432	struct rb_node			rb_node;
 433
 434	/*
 435	 * Original scheduling parameters. Copied here from sched_attr
 436	 * during sched_setattr(), they will remain the same until
 437	 * the next sched_setattr().
 438	 */
 439	u64				dl_runtime;	/* Maximum runtime for each instance	*/
 440	u64				dl_deadline;	/* Relative deadline of each instance	*/
 441	u64				dl_period;	/* Separation of two instances (period) */
 442	u64				dl_bw;		/* dl_runtime / dl_period		*/
 443	u64				dl_density;	/* dl_runtime / dl_deadline		*/
 444
 445	/*
 446	 * Actual scheduling parameters. Initialized with the values above,
 447	 * they are continously updated during task execution. Note that
 448	 * the remaining runtime could be < 0 in case we are in overrun.
 449	 */
 450	s64				runtime;	/* Remaining runtime for this instance	*/
 451	u64				deadline;	/* Absolute deadline for this instance	*/
 452	unsigned int			flags;		/* Specifying the scheduler behaviour	*/
 453
 454	/*
 455	 * Some bool flags:
 456	 *
 457	 * @dl_throttled tells if we exhausted the runtime. If so, the
 458	 * task has to wait for a replenishment to be performed at the
 459	 * next firing of dl_timer.
 460	 *
 461	 * @dl_boosted tells if we are boosted due to DI. If so we are
 462	 * outside bandwidth enforcement mechanism (but only until we
 463	 * exit the critical section);
 464	 *
 465	 * @dl_yielded tells if task gave up the CPU before consuming
 466	 * all its available runtime during the last job.
 467	 *
 468	 * @dl_non_contending tells if the task is inactive while still
 469	 * contributing to the active utilization. In other words, it
 470	 * indicates if the inactive timer has been armed and its handler
 471	 * has not been executed yet. This flag is useful to avoid race
 472	 * conditions between the inactive timer handler and the wakeup
 473	 * code.
 474	 */
 475	int				dl_throttled;
 476	int				dl_boosted;
 477	int				dl_yielded;
 478	int				dl_non_contending;
 479
 480	/*
 481	 * Bandwidth enforcement timer. Each -deadline task has its
 482	 * own bandwidth to be enforced, thus we need one timer per task.
 483	 */
 484	struct hrtimer			dl_timer;
 485
 486	/*
 487	 * Inactive timer, responsible for decreasing the active utilization
 488	 * at the "0-lag time". When a -deadline task blocks, it contributes
 489	 * to GRUB's active utilization until the "0-lag time", hence a
 490	 * timer is needed to decrease the active utilization at the correct
 491	 * time.
 492	 */
 493	struct hrtimer inactive_timer;
 494};
 495
 496union rcu_special {
 497	struct {
 498		u8			blocked;
 499		u8			need_qs;
 500		u8			exp_need_qs;
 501
 502		/* Otherwise the compiler can store garbage here: */
 503		u8			pad;
 504	} b; /* Bits. */
 505	u32 s; /* Set of bits. */
 506};
 507
 508enum perf_event_task_context {
 509	perf_invalid_context = -1,
 510	perf_hw_context = 0,
 511	perf_sw_context,
 512	perf_nr_task_contexts,
 513};
 514
 515struct wake_q_node {
 516	struct wake_q_node *next;
 517};
 518
 519struct task_struct {
 520#ifdef CONFIG_THREAD_INFO_IN_TASK
 521	/*
 522	 * For reasons of header soup (see current_thread_info()), this
 523	 * must be the first element of task_struct.
 524	 */
 525	struct thread_info		thread_info;
 526#endif
 527	/* -1 unrunnable, 0 runnable, >0 stopped: */
 528	volatile long			state;
 529
 530	/*
 531	 * This begins the randomizable portion of task_struct. Only
 532	 * scheduling-critical items should be added above here.
 533	 */
 534	randomized_struct_fields_start
 535
 536	void				*stack;
 537	atomic_t			usage;
 538	/* Per task flags (PF_*), defined further below: */
 539	unsigned int			flags;
 540	unsigned int			ptrace;
 541
 542#ifdef CONFIG_SMP
 543	struct llist_node		wake_entry;
 544	int				on_cpu;
 545#ifdef CONFIG_THREAD_INFO_IN_TASK
 546	/* Current CPU: */
 547	unsigned int			cpu;
 548#endif
 549	unsigned int			wakee_flips;
 550	unsigned long			wakee_flip_decay_ts;
 551	struct task_struct		*last_wakee;
 552
 553	int				wake_cpu;
 554#endif
 555	int				on_rq;
 556
 557	int				prio;
 558	int				static_prio;
 559	int				normal_prio;
 560	unsigned int			rt_priority;
 561
 562	const struct sched_class	*sched_class;
 563	struct sched_entity		se;
 564	struct sched_rt_entity		rt;
 565#ifdef CONFIG_CGROUP_SCHED
 566	struct task_group		*sched_task_group;
 567#endif
 568	struct sched_dl_entity		dl;
 569
 570#ifdef CONFIG_PREEMPT_NOTIFIERS
 571	/* List of struct preempt_notifier: */
 572	struct hlist_head		preempt_notifiers;
 573#endif
 574
 575#ifdef CONFIG_BLK_DEV_IO_TRACE
 576	unsigned int			btrace_seq;
 577#endif
 578
 579	unsigned int			policy;
 580	int				nr_cpus_allowed;
 581	cpumask_t			cpus_allowed;
 582
 583#ifdef CONFIG_PREEMPT_RCU
 584	int				rcu_read_lock_nesting;
 585	union rcu_special		rcu_read_unlock_special;
 586	struct list_head		rcu_node_entry;
 587	struct rcu_node			*rcu_blocked_node;
 588#endif /* #ifdef CONFIG_PREEMPT_RCU */
 589
 590#ifdef CONFIG_TASKS_RCU
 591	unsigned long			rcu_tasks_nvcsw;
 592	u8				rcu_tasks_holdout;
 593	u8				rcu_tasks_idx;
 594	int				rcu_tasks_idle_cpu;
 595	struct list_head		rcu_tasks_holdout_list;
 596#endif /* #ifdef CONFIG_TASKS_RCU */
 597
 598	struct sched_info		sched_info;
 599
 600	struct list_head		tasks;
 601#ifdef CONFIG_SMP
 602	struct plist_node		pushable_tasks;
 603	struct rb_node			pushable_dl_tasks;
 604#endif
 605
 606	struct mm_struct		*mm;
 607	struct mm_struct		*active_mm;
 608
 609	/* Per-thread vma caching: */
 610	struct vmacache			vmacache;
 611
 612#ifdef SPLIT_RSS_COUNTING
 613	struct task_rss_stat		rss_stat;
 614#endif
 615	int				exit_state;
 616	int				exit_code;
 617	int				exit_signal;
 618	/* The signal sent when the parent dies: */
 619	int				pdeath_signal;
 620	/* JOBCTL_*, siglock protected: */
 621	unsigned long			jobctl;
 622
 623	/* Used for emulating ABI behavior of previous Linux versions: */
 624	unsigned int			personality;
 625
 626	/* Scheduler bits, serialized by scheduler locks: */
 627	unsigned			sched_reset_on_fork:1;
 628	unsigned			sched_contributes_to_load:1;
 629	unsigned			sched_migrated:1;
 630	unsigned			sched_remote_wakeup:1;
 631	/* Force alignment to the next boundary: */
 632	unsigned			:0;
 633
 634	/* Unserialized, strictly 'current' */
 635
 636	/* Bit to tell LSMs we're in execve(): */
 637	unsigned			in_execve:1;
 638	unsigned			in_iowait:1;
 639#ifndef TIF_RESTORE_SIGMASK
 640	unsigned			restore_sigmask:1;
 641#endif
 642#ifdef CONFIG_MEMCG
 643	unsigned			memcg_may_oom:1;
 644#ifndef CONFIG_SLOB
 645	unsigned			memcg_kmem_skip_account:1;
 646#endif
 647#endif
 648#ifdef CONFIG_COMPAT_BRK
 649	unsigned			brk_randomized:1;
 650#endif
 651#ifdef CONFIG_CGROUPS
 652	/* disallow userland-initiated cgroup migration */
 653	unsigned			no_cgroup_migration:1;
 654#endif
 655
 656	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 657
 658	struct restart_block		restart_block;
 659
 660	pid_t				pid;
 661	pid_t				tgid;
 662
 663#ifdef CONFIG_CC_STACKPROTECTOR
 664	/* Canary value for the -fstack-protector GCC feature: */
 665	unsigned long			stack_canary;
 666#endif
 667	/*
 668	 * Pointers to the (original) parent process, youngest child, younger sibling,
 669	 * older sibling, respectively.  (p->father can be replaced with
 670	 * p->real_parent->pid)
 671	 */
 672
 673	/* Real parent process: */
 674	struct task_struct __rcu	*real_parent;
 675
 676	/* Recipient of SIGCHLD, wait4() reports: */
 677	struct task_struct __rcu	*parent;
 678
 679	/*
 680	 * Children/sibling form the list of natural children:
 681	 */
 682	struct list_head		children;
 683	struct list_head		sibling;
 684	struct task_struct		*group_leader;
 685
 686	/*
 687	 * 'ptraced' is the list of tasks this task is using ptrace() on.
 688	 *
 689	 * This includes both natural children and PTRACE_ATTACH targets.
 690	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
 691	 */
 692	struct list_head		ptraced;
 693	struct list_head		ptrace_entry;
 694
 695	/* PID/PID hash table linkage. */
 696	struct pid_link			pids[PIDTYPE_MAX];
 697	struct list_head		thread_group;
 698	struct list_head		thread_node;
 699
 700	struct completion		*vfork_done;
 701
 702	/* CLONE_CHILD_SETTID: */
 703	int __user			*set_child_tid;
 704
 705	/* CLONE_CHILD_CLEARTID: */
 706	int __user			*clear_child_tid;
 707
 708	u64				utime;
 709	u64				stime;
 710#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 711	u64				utimescaled;
 712	u64				stimescaled;
 713#endif
 714	u64				gtime;
 715	struct prev_cputime		prev_cputime;
 716#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 717	struct vtime			vtime;
 718#endif
 719
 720#ifdef CONFIG_NO_HZ_FULL
 721	atomic_t			tick_dep_mask;
 722#endif
 723	/* Context switch counts: */
 724	unsigned long			nvcsw;
 725	unsigned long			nivcsw;
 726
 727	/* Monotonic time in nsecs: */
 728	u64				start_time;
 729
 730	/* Boot based time in nsecs: */
 731	u64				real_start_time;
 732
 733	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
 734	unsigned long			min_flt;
 735	unsigned long			maj_flt;
 736
 737#ifdef CONFIG_POSIX_TIMERS
 738	struct task_cputime		cputime_expires;
 739	struct list_head		cpu_timers[3];
 740#endif
 741
 742	/* Process credentials: */
 743
 744	/* Tracer's credentials at attach: */
 745	const struct cred __rcu		*ptracer_cred;
 746
 747	/* Objective and real subjective task credentials (COW): */
 748	const struct cred __rcu		*real_cred;
 749
 750	/* Effective (overridable) subjective task credentials (COW): */
 751	const struct cred __rcu		*cred;
 752
 753	/*
 754	 * executable name, excluding path.
 755	 *
 756	 * - normally initialized setup_new_exec()
 757	 * - access it with [gs]et_task_comm()
 758	 * - lock it with task_lock()
 759	 */
 760	char				comm[TASK_COMM_LEN];
 761
 762	struct nameidata		*nameidata;
 763
 764#ifdef CONFIG_SYSVIPC
 765	struct sysv_sem			sysvsem;
 766	struct sysv_shm			sysvshm;
 767#endif
 768#ifdef CONFIG_DETECT_HUNG_TASK
 769	unsigned long			last_switch_count;
 770#endif
 771	/* Filesystem information: */
 772	struct fs_struct		*fs;
 773
 774	/* Open file information: */
 775	struct files_struct		*files;
 776
 777	/* Namespaces: */
 778	struct nsproxy			*nsproxy;
 779
 780	/* Signal handlers: */
 781	struct signal_struct		*signal;
 782	struct sighand_struct		*sighand;
 783	sigset_t			blocked;
 784	sigset_t			real_blocked;
 785	/* Restored if set_restore_sigmask() was used: */
 786	sigset_t			saved_sigmask;
 787	struct sigpending		pending;
 788	unsigned long			sas_ss_sp;
 789	size_t				sas_ss_size;
 790	unsigned int			sas_ss_flags;
 791
 792	struct callback_head		*task_works;
 793
 794	struct audit_context		*audit_context;
 795#ifdef CONFIG_AUDITSYSCALL
 796	kuid_t				loginuid;
 797	unsigned int			sessionid;
 798#endif
 799	struct seccomp			seccomp;
 800
 801	/* Thread group tracking: */
 802	u32				parent_exec_id;
 803	u32				self_exec_id;
 804
 805	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
 806	spinlock_t			alloc_lock;
 807
 808	/* Protection of the PI data structures: */
 809	raw_spinlock_t			pi_lock;
 810
 811	struct wake_q_node		wake_q;
 812
 813#ifdef CONFIG_RT_MUTEXES
 814	/* PI waiters blocked on a rt_mutex held by this task: */
 815	struct rb_root			pi_waiters;
 816	struct rb_node			*pi_waiters_leftmost;
 817	/* Updated under owner's pi_lock and rq lock */
 818	struct task_struct		*pi_top_task;
 819	/* Deadlock detection and priority inheritance handling: */
 820	struct rt_mutex_waiter		*pi_blocked_on;
 821#endif
 822
 823#ifdef CONFIG_DEBUG_MUTEXES
 824	/* Mutex deadlock detection: */
 825	struct mutex_waiter		*blocked_on;
 826#endif
 827
 828#ifdef CONFIG_TRACE_IRQFLAGS
 829	unsigned int			irq_events;
 830	unsigned long			hardirq_enable_ip;
 831	unsigned long			hardirq_disable_ip;
 832	unsigned int			hardirq_enable_event;
 833	unsigned int			hardirq_disable_event;
 834	int				hardirqs_enabled;
 835	int				hardirq_context;
 836	unsigned long			softirq_disable_ip;
 837	unsigned long			softirq_enable_ip;
 838	unsigned int			softirq_disable_event;
 839	unsigned int			softirq_enable_event;
 840	int				softirqs_enabled;
 841	int				softirq_context;
 842#endif
 843
 844#ifdef CONFIG_LOCKDEP
 845# define MAX_LOCK_DEPTH			48UL
 846	u64				curr_chain_key;
 847	int				lockdep_depth;
 848	unsigned int			lockdep_recursion;
 849	struct held_lock		held_locks[MAX_LOCK_DEPTH];
 850#endif
 851
 852#ifdef CONFIG_LOCKDEP_CROSSRELEASE
 853#define MAX_XHLOCKS_NR 64UL
 854	struct hist_lock *xhlocks; /* Crossrelease history locks */
 855	unsigned int xhlock_idx;
 856	/* For restoring at history boundaries */
 857	unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
 858	unsigned int hist_id;
 859	/* For overwrite check at each context exit */
 860	unsigned int hist_id_save[XHLOCK_CTX_NR];
 861#endif
 862
 863#ifdef CONFIG_UBSAN
 864	unsigned int			in_ubsan;
 865#endif
 866
 867	/* Journalling filesystem info: */
 868	void				*journal_info;
 869
 870	/* Stacked block device info: */
 871	struct bio_list			*bio_list;
 872
 873#ifdef CONFIG_BLOCK
 874	/* Stack plugging: */
 875	struct blk_plug			*plug;
 876#endif
 877
 878	/* VM state: */
 879	struct reclaim_state		*reclaim_state;
 880
 881	struct backing_dev_info		*backing_dev_info;
 882
 883	struct io_context		*io_context;
 884
 885	/* Ptrace state: */
 886	unsigned long			ptrace_message;
 887	siginfo_t			*last_siginfo;
 888
 889	struct task_io_accounting	ioac;
 890#ifdef CONFIG_TASK_XACCT
 891	/* Accumulated RSS usage: */
 892	u64				acct_rss_mem1;
 893	/* Accumulated virtual memory usage: */
 894	u64				acct_vm_mem1;
 895	/* stime + utime since last update: */
 896	u64				acct_timexpd;
 897#endif
 898#ifdef CONFIG_CPUSETS
 899	/* Protected by ->alloc_lock: */
 900	nodemask_t			mems_allowed;
 901	/* Seqence number to catch updates: */
 902	seqcount_t			mems_allowed_seq;
 903	int				cpuset_mem_spread_rotor;
 904	int				cpuset_slab_spread_rotor;
 905#endif
 906#ifdef CONFIG_CGROUPS
 907	/* Control Group info protected by css_set_lock: */
 908	struct css_set __rcu		*cgroups;
 909	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
 910	struct list_head		cg_list;
 911#endif
 912#ifdef CONFIG_INTEL_RDT
 913	u32				closid;
 914	u32				rmid;
 915#endif
 916#ifdef CONFIG_FUTEX
 917	struct robust_list_head __user	*robust_list;
 918#ifdef CONFIG_COMPAT
 919	struct compat_robust_list_head __user *compat_robust_list;
 920#endif
 921	struct list_head		pi_state_list;
 922	struct futex_pi_state		*pi_state_cache;
 923#endif
 924#ifdef CONFIG_PERF_EVENTS
 925	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
 926	struct mutex			perf_event_mutex;
 927	struct list_head		perf_event_list;
 928#endif
 929#ifdef CONFIG_DEBUG_PREEMPT
 930	unsigned long			preempt_disable_ip;
 931#endif
 932#ifdef CONFIG_NUMA
 933	/* Protected by alloc_lock: */
 934	struct mempolicy		*mempolicy;
 935	short				il_prev;
 936	short				pref_node_fork;
 937#endif
 938#ifdef CONFIG_NUMA_BALANCING
 939	int				numa_scan_seq;
 940	unsigned int			numa_scan_period;
 941	unsigned int			numa_scan_period_max;
 942	int				numa_preferred_nid;
 943	unsigned long			numa_migrate_retry;
 944	/* Migration stamp: */
 945	u64				node_stamp;
 946	u64				last_task_numa_placement;
 947	u64				last_sum_exec_runtime;
 948	struct callback_head		numa_work;
 949
 950	struct list_head		numa_entry;
 951	struct numa_group		*numa_group;
 952
 953	/*
 954	 * numa_faults is an array split into four regions:
 955	 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
 956	 * in this precise order.
 957	 *
 958	 * faults_memory: Exponential decaying average of faults on a per-node
 959	 * basis. Scheduling placement decisions are made based on these
 960	 * counts. The values remain static for the duration of a PTE scan.
 961	 * faults_cpu: Track the nodes the process was running on when a NUMA
 962	 * hinting fault was incurred.
 963	 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
 964	 * during the current scan window. When the scan completes, the counts
 965	 * in faults_memory and faults_cpu decay and these values are copied.
 966	 */
 967	unsigned long			*numa_faults;
 968	unsigned long			total_numa_faults;
 969
 970	/*
 971	 * numa_faults_locality tracks if faults recorded during the last
 972	 * scan window were remote/local or failed to migrate. The task scan
 973	 * period is adapted based on the locality of the faults with different
 974	 * weights depending on whether they were shared or private faults
 975	 */
 976	unsigned long			numa_faults_locality[3];
 977
 978	unsigned long			numa_pages_migrated;
 979#endif /* CONFIG_NUMA_BALANCING */
 980
 981	struct tlbflush_unmap_batch	tlb_ubc;
 982
 983	struct rcu_head			rcu;
 984
 985	/* Cache last used pipe for splice(): */
 986	struct pipe_inode_info		*splice_pipe;
 987
 988	struct page_frag		task_frag;
 989
 990#ifdef CONFIG_TASK_DELAY_ACCT
 991	struct task_delay_info		*delays;
 992#endif
 993
 994#ifdef CONFIG_FAULT_INJECTION
 995	int				make_it_fail;
 996	unsigned int			fail_nth;
 997#endif
 998	/*
 999	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
1000	 * balance_dirty_pages() for a dirty throttling pause:
1001	 */
1002	int				nr_dirtied;
1003	int				nr_dirtied_pause;
1004	/* Start of a write-and-pause period: */
1005	unsigned long			dirty_paused_when;
1006
1007#ifdef CONFIG_LATENCYTOP
1008	int				latency_record_count;
1009	struct latency_record		latency_record[LT_SAVECOUNT];
1010#endif
1011	/*
1012	 * Time slack values; these are used to round up poll() and
1013	 * select() etc timeout values. These are in nanoseconds.
1014	 */
1015	u64				timer_slack_ns;
1016	u64				default_timer_slack_ns;
1017
1018#ifdef CONFIG_KASAN
1019	unsigned int			kasan_depth;
1020#endif
1021
1022#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1023	/* Index of current stored address in ret_stack: */
1024	int				curr_ret_stack;
1025
1026	/* Stack of return addresses for return function tracing: */
1027	struct ftrace_ret_stack		*ret_stack;
1028
1029	/* Timestamp for last schedule: */
1030	unsigned long long		ftrace_timestamp;
1031
1032	/*
1033	 * Number of functions that haven't been traced
1034	 * because of depth overrun:
1035	 */
1036	atomic_t			trace_overrun;
1037
1038	/* Pause tracing: */
1039	atomic_t			tracing_graph_pause;
1040#endif
1041
1042#ifdef CONFIG_TRACING
1043	/* State flags for use by tracers: */
1044	unsigned long			trace;
1045
1046	/* Bitmask and counter of trace recursion: */
1047	unsigned long			trace_recursion;
1048#endif /* CONFIG_TRACING */
1049
1050#ifdef CONFIG_KCOV
1051	/* Coverage collection mode enabled for this task (0 if disabled): */
1052	enum kcov_mode			kcov_mode;
1053
1054	/* Size of the kcov_area: */
1055	unsigned int			kcov_size;
1056
1057	/* Buffer for coverage collection: */
1058	void				*kcov_area;
1059
1060	/* KCOV descriptor wired with this task or NULL: */
1061	struct kcov			*kcov;
1062#endif
1063
1064#ifdef CONFIG_MEMCG
1065	struct mem_cgroup		*memcg_in_oom;
1066	gfp_t				memcg_oom_gfp_mask;
1067	int				memcg_oom_order;
1068
1069	/* Number of pages to reclaim on returning to userland: */
1070	unsigned int			memcg_nr_pages_over_high;
1071#endif
1072
1073#ifdef CONFIG_UPROBES
1074	struct uprobe_task		*utask;
1075#endif
1076#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1077	unsigned int			sequential_io;
1078	unsigned int			sequential_io_avg;
1079#endif
1080#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1081	unsigned long			task_state_change;
1082#endif
1083	int				pagefault_disabled;
1084#ifdef CONFIG_MMU
1085	struct task_struct		*oom_reaper_list;
1086#endif
1087#ifdef CONFIG_VMAP_STACK
1088	struct vm_struct		*stack_vm_area;
1089#endif
1090#ifdef CONFIG_THREAD_INFO_IN_TASK
1091	/* A live task holds one reference: */
1092	atomic_t			stack_refcount;
1093#endif
1094#ifdef CONFIG_LIVEPATCH
1095	int patch_state;
1096#endif
1097#ifdef CONFIG_SECURITY
1098	/* Used by LSM modules for access restriction: */
1099	void				*security;
1100#endif
1101
1102	/*
1103	 * New fields for task_struct should be added above here, so that
1104	 * they are included in the randomized portion of task_struct.
1105	 */
1106	randomized_struct_fields_end
1107
1108	/* CPU-specific state of this task: */
1109	struct thread_struct		thread;
1110
1111	/*
1112	 * WARNING: on x86, 'thread_struct' contains a variable-sized
1113	 * structure.  It *MUST* be at the end of 'task_struct'.
1114	 *
1115	 * Do not put anything below here!
1116	 */
1117};
1118
1119static inline struct pid *task_pid(struct task_struct *task)
1120{
1121	return task->pids[PIDTYPE_PID].pid;
1122}
1123
1124static inline struct pid *task_tgid(struct task_struct *task)
1125{
1126	return task->group_leader->pids[PIDTYPE_PID].pid;
1127}
1128
1129/*
1130 * Without tasklist or RCU lock it is not safe to dereference
1131 * the result of task_pgrp/task_session even if task == current,
1132 * we can race with another thread doing sys_setsid/sys_setpgid.
1133 */
1134static inline struct pid *task_pgrp(struct task_struct *task)
1135{
1136	return task->group_leader->pids[PIDTYPE_PGID].pid;
1137}
1138
1139static inline struct pid *task_session(struct task_struct *task)
1140{
1141	return task->group_leader->pids[PIDTYPE_SID].pid;
1142}
1143
1144/*
1145 * the helpers to get the task's different pids as they are seen
1146 * from various namespaces
1147 *
1148 * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
1149 * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
1150 *                     current.
1151 * task_xid_nr_ns()  : id seen from the ns specified;
1152 *
1153 * see also pid_nr() etc in include/linux/pid.h
1154 */
1155pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
1156
1157static inline pid_t task_pid_nr(struct task_struct *tsk)
1158{
1159	return tsk->pid;
1160}
1161
1162static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
1163{
1164	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
1165}
1166
1167static inline pid_t task_pid_vnr(struct task_struct *tsk)
1168{
1169	return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
1170}
1171
1172
1173static inline pid_t task_tgid_nr(struct task_struct *tsk)
1174{
1175	return tsk->tgid;
1176}
1177
1178/**
1179 * pid_alive - check that a task structure is not stale
1180 * @p: Task structure to be checked.
1181 *
1182 * Test if a process is not yet dead (at most zombie state)
1183 * If pid_alive fails, then pointers within the task structure
1184 * can be stale and must not be dereferenced.
1185 *
1186 * Return: 1 if the process is alive. 0 otherwise.
1187 */
1188static inline int pid_alive(const struct task_struct *p)
1189{
1190	return p->pids[PIDTYPE_PID].pid != NULL;
1191}
1192
1193static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
1194{
1195	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
1196}
1197
1198static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
1199{
1200	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
1201}
1202
1203
1204static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
1205{
1206	return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
1207}
1208
1209static inline pid_t task_session_vnr(struct task_struct *tsk)
1210{
1211	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
1212}
1213
1214static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
1215{
1216	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
1217}
1218
1219static inline pid_t task_tgid_vnr(struct task_struct *tsk)
1220{
1221	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
1222}
1223
1224static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
1225{
1226	pid_t pid = 0;
1227
1228	rcu_read_lock();
1229	if (pid_alive(tsk))
1230		pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
1231	rcu_read_unlock();
1232
1233	return pid;
1234}
1235
1236static inline pid_t task_ppid_nr(const struct task_struct *tsk)
1237{
1238	return task_ppid_nr_ns(tsk, &init_pid_ns);
1239}
1240
1241/* Obsolete, do not use: */
1242static inline pid_t task_pgrp_nr(struct task_struct *tsk)
1243{
1244	return task_pgrp_nr_ns(tsk, &init_pid_ns);
1245}
1246
1247static inline char task_state_to_char(struct task_struct *task)
1248{
1249	const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
1250	unsigned long state = task->state;
1251
1252	state = state ? __ffs(state) + 1 : 0;
1253
1254	/* Make sure the string lines up properly with the number of task states: */
1255	BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);
1256
1257	return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
1258}
1259
1260/**
1261 * is_global_init - check if a task structure is init. Since init
1262 * is free to have sub-threads we need to check tgid.
1263 * @tsk: Task structure to be checked.
1264 *
1265 * Check if a task structure is the first user space task the kernel created.
1266 *
1267 * Return: 1 if the task structure is init. 0 otherwise.
1268 */
1269static inline int is_global_init(struct task_struct *tsk)
1270{
1271	return task_tgid_nr(tsk) == 1;
1272}
1273
1274extern struct pid *cad_pid;
1275
1276/*
1277 * Per process flags
1278 */
1279#define PF_IDLE			0x00000002	/* I am an IDLE thread */
1280#define PF_EXITING		0x00000004	/* Getting shut down */
1281#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
1282#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
1283#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
1284#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
1285#define PF_MCE_PROCESS		0x00000080      /* Process policy on mce errors */
1286#define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
1287#define PF_DUMPCORE		0x00000200	/* Dumped core */
1288#define PF_SIGNALED		0x00000400	/* Killed by a signal */
1289#define PF_MEMALLOC		0x00000800	/* Allocating memory */
1290#define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
1291#define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
1292#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
1293#define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
1294#define PF_FROZEN		0x00010000	/* Frozen for system suspend */
1295#define PF_KSWAPD		0x00020000	/* I am kswapd */
1296#define PF_MEMALLOC_NOFS	0x00040000	/* All allocation requests will inherit GFP_NOFS */
1297#define PF_MEMALLOC_NOIO	0x00080000	/* All allocation requests will inherit GFP_NOIO */
1298#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
1299#define PF_KTHREAD		0x00200000	/* I am a kernel thread */
1300#define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
1301#define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
1302#define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
1303#define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
1304#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
1305#define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
1306#define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
1307
1308/*
1309 * Only the _current_ task can read/write to tsk->flags, but other
1310 * tasks can access tsk->flags in readonly mode for example
1311 * with tsk_used_math (like during threaded core dumping).
1312 * There is however an exception to this rule during ptrace
1313 * or during fork: the ptracer task is allowed to write to the
1314 * child->flags of its traced child (same goes for fork, the parent
1315 * can write to the child->flags), because we're guaranteed the
1316 * child is not running and in turn not changing child->flags
1317 * at the same time the parent does it.
1318 */
1319#define clear_stopped_child_used_math(child)	do { (child)->flags &= ~PF_USED_MATH; } while (0)
1320#define set_stopped_child_used_math(child)	do { (child)->flags |= PF_USED_MATH; } while (0)
1321#define clear_used_math()			clear_stopped_child_used_math(current)
1322#define set_used_math()				set_stopped_child_used_math(current)
1323
1324#define conditional_stopped_child_used_math(condition, child) \
1325	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
1326
1327#define conditional_used_math(condition)	conditional_stopped_child_used_math(condition, current)
1328
1329#define copy_to_stopped_child_used_math(child) \
1330	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
1331
1332/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
1333#define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
1334#define used_math()				tsk_used_math(current)
1335
1336static inline bool is_percpu_thread(void)
1337{
1338#ifdef CONFIG_SMP
1339	return (current->flags & PF_NO_SETAFFINITY) &&
1340		(current->nr_cpus_allowed  == 1);
1341#else
1342	return true;
1343#endif
1344}
1345
1346/* Per-process atomic flags. */
1347#define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
1348#define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
1349#define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
1350
1351
1352#define TASK_PFA_TEST(name, func)					\
1353	static inline bool task_##func(struct task_struct *p)		\
1354	{ return test_bit(PFA_##name, &p->atomic_flags); }
1355
1356#define TASK_PFA_SET(name, func)					\
1357	static inline void task_set_##func(struct task_struct *p)	\
1358	{ set_bit(PFA_##name, &p->atomic_flags); }
1359
1360#define TASK_PFA_CLEAR(name, func)					\
1361	static inline void task_clear_##func(struct task_struct *p)	\
1362	{ clear_bit(PFA_##name, &p->atomic_flags); }
1363
1364TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
1365TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
1366
1367TASK_PFA_TEST(SPREAD_PAGE, spread_page)
1368TASK_PFA_SET(SPREAD_PAGE, spread_page)
1369TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
1370
1371TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1372TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1373TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1374
1375static inline void
1376current_restore_flags(unsigned long orig_flags, unsigned long flags)
1377{
1378	current->flags &= ~flags;
1379	current->flags |= orig_flags & flags;
1380}
1381
1382extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
1383extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
1384#ifdef CONFIG_SMP
1385extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
1386extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
1387#else
1388static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
1389{
1390}
1391static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
1392{
1393	if (!cpumask_test_cpu(0, new_mask))
1394		return -EINVAL;
1395	return 0;
1396}
1397#endif
1398
1399#ifndef cpu_relax_yield
1400#define cpu_relax_yield() cpu_relax()
1401#endif
1402
1403extern int yield_to(struct task_struct *p, bool preempt);
1404extern void set_user_nice(struct task_struct *p, long nice);
1405extern int task_prio(const struct task_struct *p);
1406
1407/**
1408 * task_nice - return the nice value of a given task.
1409 * @p: the task in question.
1410 *
1411 * Return: The nice value [ -20 ... 0 ... 19 ].
1412 */
1413static inline int task_nice(const struct task_struct *p)
1414{
1415	return PRIO_TO_NICE((p)->static_prio);
1416}
1417
1418extern int can_nice(const struct task_struct *p, const int nice);
1419extern int task_curr(const struct task_struct *p);
1420extern int idle_cpu(int cpu);
1421extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
1422extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
1423extern int sched_setattr(struct task_struct *, const struct sched_attr *);
1424extern struct task_struct *idle_task(int cpu);
1425
1426/**
1427 * is_idle_task - is the specified task an idle task?
1428 * @p: the task in question.
1429 *
1430 * Return: 1 if @p is an idle task. 0 otherwise.
1431 */
1432static inline bool is_idle_task(const struct task_struct *p)
1433{
1434	return !!(p->flags & PF_IDLE);
1435}
1436
1437extern struct task_struct *curr_task(int cpu);
1438extern void ia64_set_curr_task(int cpu, struct task_struct *p);
1439
1440void yield(void);
1441
1442union thread_union {
1443#ifndef CONFIG_THREAD_INFO_IN_TASK
1444	struct thread_info thread_info;
1445#endif
1446	unsigned long stack[THREAD_SIZE/sizeof(long)];
1447};
1448
1449#ifdef CONFIG_THREAD_INFO_IN_TASK
1450static inline struct thread_info *task_thread_info(struct task_struct *task)
1451{
1452	return &task->thread_info;
1453}
1454#elif !defined(__HAVE_THREAD_FUNCTIONS)
1455# define task_thread_info(task)	((struct thread_info *)(task)->stack)
1456#endif
1457
1458/*
1459 * find a task by one of its numerical ids
1460 *
1461 * find_task_by_pid_ns():
1462 *      finds a task by its pid in the specified namespace
1463 * find_task_by_vpid():
1464 *      finds a task by its virtual pid
1465 *
1466 * see also find_vpid() etc in include/linux/pid.h
1467 */
1468
1469extern struct task_struct *find_task_by_vpid(pid_t nr);
1470extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
1471
1472extern int wake_up_state(struct task_struct *tsk, unsigned int state);
1473extern int wake_up_process(struct task_struct *tsk);
1474extern void wake_up_new_task(struct task_struct *tsk);
1475
1476#ifdef CONFIG_SMP
1477extern void kick_process(struct task_struct *tsk);
1478#else
1479static inline void kick_process(struct task_struct *tsk) { }
1480#endif
1481
1482extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
1483
1484static inline void set_task_comm(struct task_struct *tsk, const char *from)
1485{
1486	__set_task_comm(tsk, from, false);
1487}
1488
1489extern char *get_task_comm(char *to, struct task_struct *tsk);
1490
1491#ifdef CONFIG_SMP
1492void scheduler_ipi(void);
1493extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
1494#else
1495static inline void scheduler_ipi(void) { }
1496static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1497{
1498	return 1;
1499}
1500#endif
1501
1502/*
1503 * Set thread flags in other task's structures.
1504 * See asm/thread_info.h for TIF_xxxx flags available:
1505 */
1506static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
1507{
1508	set_ti_thread_flag(task_thread_info(tsk), flag);
1509}
1510
1511static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
1512{
1513	clear_ti_thread_flag(task_thread_info(tsk), flag);
1514}
1515
1516static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
1517{
1518	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
1519}
1520
1521static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
1522{
1523	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
1524}
1525
1526static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
1527{
1528	return test_ti_thread_flag(task_thread_info(tsk), flag);
1529}
1530
1531static inline void set_tsk_need_resched(struct task_struct *tsk)
1532{
1533	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1534}
1535
1536static inline void clear_tsk_need_resched(struct task_struct *tsk)
1537{
1538	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1539}
1540
1541static inline int test_tsk_need_resched(struct task_struct *tsk)
1542{
1543	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
1544}
1545
1546/*
1547 * cond_resched() and cond_resched_lock(): latency reduction via
1548 * explicit rescheduling in places that are safe. The return
1549 * value indicates whether a reschedule was done in fact.
1550 * cond_resched_lock() will drop the spinlock before scheduling,
1551 * cond_resched_softirq() will enable bhs before scheduling.
1552 */
1553#ifndef CONFIG_PREEMPT
1554extern int _cond_resched(void);
1555#else
1556static inline int _cond_resched(void) { return 0; }
1557#endif
1558
1559#define cond_resched() ({			\
1560	___might_sleep(__FILE__, __LINE__, 0);	\
1561	_cond_resched();			\
1562})
1563
1564extern int __cond_resched_lock(spinlock_t *lock);
1565
1566#define cond_resched_lock(lock) ({				\
1567	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
1568	__cond_resched_lock(lock);				\
1569})
1570
1571extern int __cond_resched_softirq(void);
1572
1573#define cond_resched_softirq() ({					\
1574	___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
1575	__cond_resched_softirq();					\
1576})
1577
1578static inline void cond_resched_rcu(void)
1579{
1580#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
1581	rcu_read_unlock();
1582	cond_resched();
1583	rcu_read_lock();
1584#endif
1585}
1586
1587/*
1588 * Does a critical section need to be broken due to another
1589 * task waiting?: (technically does not depend on CONFIG_PREEMPT,
1590 * but a general need for low latency)
1591 */
1592static inline int spin_needbreak(spinlock_t *lock)
1593{
1594#ifdef CONFIG_PREEMPT
1595	return spin_is_contended(lock);
1596#else
1597	return 0;
1598#endif
1599}
1600
1601static __always_inline bool need_resched(void)
1602{
1603	return unlikely(tif_need_resched());
1604}
1605
1606/*
1607 * Wrappers for p->thread_info->cpu access. No-op on UP.
1608 */
1609#ifdef CONFIG_SMP
1610
1611static inline unsigned int task_cpu(const struct task_struct *p)
1612{
1613#ifdef CONFIG_THREAD_INFO_IN_TASK
1614	return p->cpu;
1615#else
1616	return task_thread_info(p)->cpu;
1617#endif
1618}
1619
1620extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
1621
1622#else
1623
1624static inline unsigned int task_cpu(const struct task_struct *p)
1625{
1626	return 0;
1627}
1628
1629static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
1630{
1631}
1632
1633#endif /* CONFIG_SMP */
1634
1635/*
1636 * In order to reduce various lock holder preemption latencies provide an
1637 * interface to see if a vCPU is currently running or not.
1638 *
1639 * This allows us to terminate optimistic spin loops and block, analogous to
1640 * the native optimistic spin heuristic of testing if the lock owner task is
1641 * running or not.
1642 */
1643#ifndef vcpu_is_preempted
1644# define vcpu_is_preempted(cpu)	false
1645#endif
1646
1647extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
1648extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
1649
1650#ifndef TASK_SIZE_OF
1651#define TASK_SIZE_OF(tsk)	TASK_SIZE
1652#endif
1653
1654#endif