Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_PSI_TYPES_H
3#define _LINUX_PSI_TYPES_H
4
5#include <linux/kthread.h>
6#include <linux/seqlock.h>
7#include <linux/types.h>
8#include <linux/kref.h>
9#include <linux/wait.h>
10
11#ifdef CONFIG_PSI
12
13/* Tracked task states */
14enum psi_task_count {
15 NR_IOWAIT,
16 NR_MEMSTALL,
17 NR_RUNNING,
18 /*
19 * This can't have values other than 0 or 1 and could be
20 * implemented as a bit flag. But for now we still have room
21 * in the first cacheline of psi_group_cpu, and this way we
22 * don't have to special case any state tracking for it.
23 */
24 NR_ONCPU,
25 /*
26 * For IO and CPU stalls the presence of running/oncpu tasks
27 * in the domain means a partial rather than a full stall.
28 * For memory it's not so simple because of page reclaimers:
29 * they are running/oncpu while representing a stall. To tell
30 * whether a domain has productivity left or not, we need to
31 * distinguish between regular running (i.e. productive)
32 * threads and memstall ones.
33 */
34 NR_MEMSTALL_RUNNING,
35 NR_PSI_TASK_COUNTS = 5,
36};
37
38/* Task state bitmasks */
39#define TSK_IOWAIT (1 << NR_IOWAIT)
40#define TSK_MEMSTALL (1 << NR_MEMSTALL)
41#define TSK_RUNNING (1 << NR_RUNNING)
42#define TSK_ONCPU (1 << NR_ONCPU)
43#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
44
45/* Resources that workloads could be stalled on */
46enum psi_res {
47 PSI_IO,
48 PSI_MEM,
49 PSI_CPU,
50 NR_PSI_RESOURCES = 3,
51};
52
53/*
54 * Pressure states for each resource:
55 *
56 * SOME: Stalled tasks & working tasks
57 * FULL: Stalled tasks & no working tasks
58 */
59enum psi_states {
60 PSI_IO_SOME,
61 PSI_IO_FULL,
62 PSI_MEM_SOME,
63 PSI_MEM_FULL,
64 PSI_CPU_SOME,
65 PSI_CPU_FULL,
66 /* Only per-CPU, to weigh the CPU in the global average: */
67 PSI_NONIDLE,
68 NR_PSI_STATES = 7,
69};
70
71enum psi_aggregators {
72 PSI_AVGS = 0,
73 PSI_POLL,
74 NR_PSI_AGGREGATORS,
75};
76
77struct psi_group_cpu {
78 /* 1st cacheline updated by the scheduler */
79
80 /* Aggregator needs to know of concurrent changes */
81 seqcount_t seq ____cacheline_aligned_in_smp;
82
83 /* States of the tasks belonging to this group */
84 unsigned int tasks[NR_PSI_TASK_COUNTS];
85
86 /* Aggregate pressure state derived from the tasks */
87 u32 state_mask;
88
89 /* Period time sampling buckets for each state of interest (ns) */
90 u32 times[NR_PSI_STATES];
91
92 /* Time of last task change in this group (rq_clock) */
93 u64 state_start;
94
95 /* 2nd cacheline updated by the aggregator */
96
97 /* Delta detection against the sampling buckets */
98 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
99 ____cacheline_aligned_in_smp;
100};
101
102/* PSI growth tracking window */
103struct psi_window {
104 /* Window size in ns */
105 u64 size;
106
107 /* Start time of the current window in ns */
108 u64 start_time;
109
110 /* Value at the start of the window */
111 u64 start_value;
112
113 /* Value growth in the previous window */
114 u64 prev_growth;
115};
116
117struct psi_trigger {
118 /* PSI state being monitored by the trigger */
119 enum psi_states state;
120
121 /* User-spacified threshold in ns */
122 u64 threshold;
123
124 /* List node inside triggers list */
125 struct list_head node;
126
127 /* Backpointer needed during trigger destruction */
128 struct psi_group *group;
129
130 /* Wait queue for polling */
131 wait_queue_head_t event_wait;
132
133 /* Pending event flag */
134 int event;
135
136 /* Tracking window */
137 struct psi_window win;
138
139 /*
140 * Time last event was generated. Used for rate-limiting
141 * events to one per window
142 */
143 u64 last_event_time;
144};
145
146struct psi_group {
147 /* Protects data used by the aggregator */
148 struct mutex avgs_lock;
149
150 /* Per-cpu task state & time tracking */
151 struct psi_group_cpu __percpu *pcpu;
152
153 /* Running pressure averages */
154 u64 avg_total[NR_PSI_STATES - 1];
155 u64 avg_last_update;
156 u64 avg_next_update;
157
158 /* Aggregator work control */
159 struct delayed_work avgs_work;
160
161 /* Total stall times and sampled pressure averages */
162 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
163 unsigned long avg[NR_PSI_STATES - 1][3];
164
165 /* Monitor work control */
166 struct task_struct __rcu *poll_task;
167 struct timer_list poll_timer;
168 wait_queue_head_t poll_wait;
169 atomic_t poll_wakeup;
170
171 /* Protects data used by the monitor */
172 struct mutex trigger_lock;
173
174 /* Configured polling triggers */
175 struct list_head triggers;
176 u32 nr_triggers[NR_PSI_STATES - 1];
177 u32 poll_states;
178 u64 poll_min_period;
179
180 /* Total stall times at the start of monitor activation */
181 u64 polling_total[NR_PSI_STATES - 1];
182 u64 polling_next_update;
183 u64 polling_until;
184};
185
186#else /* CONFIG_PSI */
187
188struct psi_group { };
189
190#endif /* CONFIG_PSI */
191
192#endif /* _LINUX_PSI_TYPES_H */