commit 8d9133c67d25c15348ec12720ee2ce90762d4d4c · pyrox.dev/nixpkgs

+6

nixos/doc/manual/from_md/release-notes/rl-2211.section.xml

··· 863 </listitem> 864 <listitem> 865 <para> 866 (Neo)Vim can not be configured with 867 <literal>configure.pathogen</literal> anymore to reduce 868 maintainance burden. Use <literal>configure.packages</literal>

··· 863 </listitem> 864 <listitem> 865 <para> 866 + Linux 4.9 has been removed because it will reach its end of 867 + life within the lifespan of 22.11. 868 + </para> 869 + </listitem> 870 + <listitem> 871 + <para> 872 (Neo)Vim can not be configured with 873 <literal>configure.pathogen</literal> anymore to reduce 874 maintainance burden. Use <literal>configure.packages</literal>

+2

nixos/doc/manual/release-notes/rl-2211.section.md

··· 265 266 - The top-level `termonad-with-packages` alias for `termonad` has been removed. 267 268 - (Neo)Vim can not be configured with `configure.pathogen` anymore to reduce maintainance burden. 269 Use `configure.packages` instead. 270 - Neovim can not be configured with plug anymore (still works for vim).

··· 265 266 - The top-level `termonad-with-packages` alias for `termonad` has been removed. 267 268 + - Linux 4.9 has been removed because it will reach its end of life within the lifespan of 22.11. 269 + 270 - (Neo)Vim can not be configured with `configure.pathogen` anymore to reduce maintainance burden. 271 Use `configure.packages` instead. 272 - Neovim can not be configured with plug anymore (still works for vim).

-784

pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch

··· 1 - commit 280858b0bb3384b9ec06b455e196b453888bd6b8 2 - Author: Tejun Heo <tj@kernel.org> 3 - Date: Fri Mar 11 07:31:23 2016 -0500 4 - 5 - sched: Misc preps for cgroup unified hierarchy interface 6 - 7 - Make the following changes in preparation for the cpu controller 8 - interface implementation for the unified hierarchy. This patch 9 - doesn't cause any functional differences. 10 - 11 - * s/cpu_stats_show()/cpu_cfs_stats_show()/ 12 - 13 - * s/cpu_files/cpu_legacy_files/ 14 - 15 - * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While 16 - at it, make the @val array u64 for consistency. 17 - 18 - Signed-off-by: Tejun Heo <tj@kernel.org> 19 - Cc: Ingo Molnar <mingo@redhat.com> 20 - Cc: Peter Zijlstra <peterz@infradead.org> 21 - Cc: Li Zefan <lizefan@huawei.com> 22 - Cc: Johannes Weiner <hannes@cmpxchg.org> 23 - 24 - diff --git a/kernel/sched/core.c b/kernel/sched/core.c 25 - index 154fd689fe02..57472485b79c 100644 26 - --- a/kernel/sched/core.c 27 - +++ b/kernel/sched/core.c 28 - @@ -8705,7 +8705,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) 29 - return ret; 30 - } 31 - 32 - -static int cpu_stats_show(struct seq_file *sf, void *v) 33 - +static int cpu_cfs_stats_show(struct seq_file *sf, void *v) 34 - { 35 - struct task_group *tg = css_tg(seq_css(sf)); 36 - struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; 37 - @@ -8745,7 +8745,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, 38 - } 39 - #endif /* CONFIG_RT_GROUP_SCHED */ 40 - 41 - -static struct cftype cpu_files[] = { 42 - +static struct cftype cpu_legacy_files[] = { 43 - #ifdef CONFIG_FAIR_GROUP_SCHED 44 - { 45 - .name = "shares", 46 - @@ -8766,7 +8766,7 @@ static struct cftype cpu_files[] = { 47 - }, 48 - { 49 - .name = "stat", 50 - - .seq_show = cpu_stats_show, 51 - + .seq_show = cpu_cfs_stats_show, 52 - }, 53 - #endif 54 - #ifdef CONFIG_RT_GROUP_SCHED 55 - @@ -8791,7 +8791,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { 56 - .fork = cpu_cgroup_fork, 57 - .can_attach = cpu_cgroup_can_attach, 58 - .attach = cpu_cgroup_attach, 59 - - .legacy_cftypes = cpu_files, 60 - + .legacy_cftypes = cpu_legacy_files, 61 - .early_init = true, 62 - }; 63 - 64 - diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c 65 - index bc0b309c3f19..d1e5dd0b3a64 100644 66 - --- a/kernel/sched/cpuacct.c 67 - +++ b/kernel/sched/cpuacct.c 68 - @@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) 69 - return 0; 70 - } 71 - 72 - -static int cpuacct_stats_show(struct seq_file *sf, void *v) 73 - +static void cpuacct_stats_read(struct cpuacct *ca, 74 - + u64 (*val)[CPUACCT_STAT_NSTATS]) 75 - { 76 - - struct cpuacct *ca = css_ca(seq_css(sf)); 77 - - s64 val[CPUACCT_STAT_NSTATS]; 78 - int cpu; 79 - - int stat; 80 - 81 - - memset(val, 0, sizeof(val)); 82 - + memset(val, 0, sizeof(*val)); 83 - + 84 - for_each_possible_cpu(cpu) { 85 - u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; 86 - 87 - - val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; 88 - - val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; 89 - - val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; 90 - - val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; 91 - - val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; 92 - + (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; 93 - + (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; 94 - + (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; 95 - + (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; 96 - + (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; 97 - } 98 - +} 99 - + 100 - +static int cpuacct_stats_show(struct seq_file *sf, void *v) 101 - +{ 102 - + u64 val[CPUACCT_STAT_NSTATS]; 103 - + int stat; 104 - + 105 - + cpuacct_stats_read(css_ca(seq_css(sf)), &val); 106 - 107 - for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { 108 - - seq_printf(sf, "%s %lld\n", 109 - + seq_printf(sf, "%s %llu\n", 110 - cpuacct_stat_desc[stat], 111 - cputime64_to_clock_t(val[stat])); 112 - } 113 - 114 - commit 015cbdcb90034fd566d00de9d3d405613da3cd26 115 - Author: Tejun Heo <tj@kernel.org> 116 - Date: Fri Mar 11 07:31:23 2016 -0500 117 - 118 - sched: Implement interface for cgroup unified hierarchy 119 - 120 - While the cpu controller doesn't have any functional problems, there 121 - are a couple interface issues which can be addressed in the v2 122 - interface. 123 - 124 - * cpuacct being a separate controller. This separation is artificial 125 - and rather pointless as demonstrated by most use cases co-mounting 126 - the two controllers. It also forces certain information to be 127 - accounted twice. 128 - 129 - * Use of different time units. Writable control knobs use 130 - microseconds, some stat fields use nanoseconds while other cpuacct 131 - stat fields use centiseconds. 132 - 133 - * Control knobs which can't be used in the root cgroup still show up 134 - in the root. 135 - 136 - * Control knob names and semantics aren't consistent with other 137 - controllers. 138 - 139 - This patchset implements cpu controller's interface on the unified 140 - hierarchy which adheres to the controller file conventions described 141 - in Documentation/cgroups/unified-hierarchy.txt. Overall, the 142 - following changes are made. 143 - 144 - * cpuacct is implictly enabled and disabled by cpu and its information 145 - is reported through "cpu.stat" which now uses microseconds for all 146 - time durations. All time duration fields now have "_usec" appended 147 - to them for clarity. While this doesn't solve the double accounting 148 - immediately, once majority of users switch to v2, cpu can directly 149 - account and report the relevant stats and cpuacct can be disabled on 150 - the unified hierarchy. 151 - 152 - Note that cpuacct.usage_percpu is currently not included in 153 - "cpu.stat". If this information is actually called for, it can be 154 - added later. 155 - 156 - * "cpu.shares" is replaced with "cpu.weight" and operates on the 157 - standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). 158 - The weight is scaled to scheduler weight so that 100 maps to 1024 159 - and the ratio relationship is preserved - if weight is W and its 160 - scaled value is S, W / 100 == S / 1024. While the mapped range is a 161 - bit smaller than the orignal scheduler weight range, the dead zones 162 - on both sides are relatively small and covers wider range than the 163 - nice value mappings. This file doesn't make sense in the root 164 - cgroup and isn't create on root. 165 - 166 - * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" 167 - which contains both quota and period. 168 - 169 - * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by 170 - "cpu.rt.max" which contains both runtime and period. 171 - 172 - v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for 173 - CFS bandwidth stats and also using raw division for u64. Use 174 - CONFIG_CFS_BANDWITH and do_div() instead. 175 - 176 - The semantics of "cpu.rt.max" is not fully decided yet. Dropped 177 - for now. 178 - 179 - Signed-off-by: Tejun Heo <tj@kernel.org> 180 - Cc: Ingo Molnar <mingo@redhat.com> 181 - Cc: Peter Zijlstra <peterz@infradead.org> 182 - Cc: Li Zefan <lizefan@huawei.com> 183 - Cc: Johannes Weiner <hannes@cmpxchg.org> 184 - 185 - diff --git a/kernel/sched/core.c b/kernel/sched/core.c 186 - index 57472485b79c..c0ae869f51c4 100644 187 - --- a/kernel/sched/core.c 188 - +++ b/kernel/sched/core.c 189 - @@ -8784,6 +8784,139 @@ static struct cftype cpu_legacy_files[] = { 190 - { } /* terminate */ 191 - }; 192 - 193 - +static int cpu_stats_show(struct seq_file *sf, void *v) 194 - +{ 195 - + cpuacct_cpu_stats_show(sf); 196 - + 197 - +#ifdef CONFIG_CFS_BANDWIDTH 198 - + { 199 - + struct task_group *tg = css_tg(seq_css(sf)); 200 - + struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; 201 - + u64 throttled_usec; 202 - + 203 - + throttled_usec = cfs_b->throttled_time; 204 - + do_div(throttled_usec, NSEC_PER_USEC); 205 - + 206 - + seq_printf(sf, "nr_periods %d\n" 207 - + "nr_throttled %d\n" 208 - + "throttled_usec %llu\n", 209 - + cfs_b->nr_periods, cfs_b->nr_throttled, 210 - + throttled_usec); 211 - + } 212 - +#endif 213 - + return 0; 214 - +} 215 - + 216 - +#ifdef CONFIG_FAIR_GROUP_SCHED 217 - +static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, 218 - + struct cftype *cft) 219 - +{ 220 - + struct task_group *tg = css_tg(css); 221 - + u64 weight = scale_load_down(tg->shares); 222 - + 223 - + return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); 224 - +} 225 - + 226 - +static int cpu_weight_write_u64(struct cgroup_subsys_state *css, 227 - + struct cftype *cftype, u64 weight) 228 - +{ 229 - + /* 230 - + * cgroup weight knobs should use the common MIN, DFL and MAX 231 - + * values which are 1, 100 and 10000 respectively. While it loses 232 - + * a bit of range on both ends, it maps pretty well onto the shares 233 - + * value used by scheduler and the round-trip conversions preserve 234 - + * the original value over the entire range. 235 - + */ 236 - + if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) 237 - + return -ERANGE; 238 - + 239 - + weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); 240 - + 241 - + return sched_group_set_shares(css_tg(css), scale_load(weight)); 242 - +} 243 - +#endif 244 - + 245 - +static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, 246 - + long period, long quota) 247 - +{ 248 - + if (quota < 0) 249 - + seq_puts(sf, "max"); 250 - + else 251 - + seq_printf(sf, "%ld", quota); 252 - + 253 - + seq_printf(sf, " %ld\n", period); 254 - +} 255 - + 256 - +/* caller should put the current value in *@periodp before calling */ 257 - +static int __maybe_unused cpu_period_quota_parse(char *buf, 258 - + u64 *periodp, u64 *quotap) 259 - +{ 260 - + char tok[21]; /* U64_MAX */ 261 - + 262 - + if (!sscanf(buf, "%s %llu", tok, periodp)) 263 - + return -EINVAL; 264 - + 265 - + *periodp *= NSEC_PER_USEC; 266 - + 267 - + if (sscanf(tok, "%llu", quotap)) 268 - + *quotap *= NSEC_PER_USEC; 269 - + else if (!strcmp(tok, "max")) 270 - + *quotap = RUNTIME_INF; 271 - + else 272 - + return -EINVAL; 273 - + 274 - + return 0; 275 - +} 276 - + 277 - +#ifdef CONFIG_CFS_BANDWIDTH 278 - +static int cpu_max_show(struct seq_file *sf, void *v) 279 - +{ 280 - + struct task_group *tg = css_tg(seq_css(sf)); 281 - + 282 - + cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); 283 - + return 0; 284 - +} 285 - + 286 - +static ssize_t cpu_max_write(struct kernfs_open_file *of, 287 - + char *buf, size_t nbytes, loff_t off) 288 - +{ 289 - + struct task_group *tg = css_tg(of_css(of)); 290 - + u64 period = tg_get_cfs_period(tg); 291 - + u64 quota; 292 - + int ret; 293 - + 294 - + ret = cpu_period_quota_parse(buf, &period, &quota); 295 - + if (!ret) 296 - + ret = tg_set_cfs_bandwidth(tg, period, quota); 297 - + return ret ?: nbytes; 298 - +} 299 - +#endif 300 - + 301 - +static struct cftype cpu_files[] = { 302 - + { 303 - + .name = "stat", 304 - + .flags = CFTYPE_NOT_ON_ROOT, 305 - + .seq_show = cpu_stats_show, 306 - + }, 307 - +#ifdef CONFIG_FAIR_GROUP_SCHED 308 - + { 309 - + .name = "weight", 310 - + .flags = CFTYPE_NOT_ON_ROOT, 311 - + .read_u64 = cpu_weight_read_u64, 312 - + .write_u64 = cpu_weight_write_u64, 313 - + }, 314 - +#endif 315 - +#ifdef CONFIG_CFS_BANDWIDTH 316 - + { 317 - + .name = "max", 318 - + .flags = CFTYPE_NOT_ON_ROOT, 319 - + .seq_show = cpu_max_show, 320 - + .write = cpu_max_write, 321 - + }, 322 - +#endif 323 - + { } /* terminate */ 324 - +}; 325 - + 326 - struct cgroup_subsys cpu_cgrp_subsys = { 327 - .css_alloc = cpu_cgroup_css_alloc, 328 - .css_released = cpu_cgroup_css_released, 329 - @@ -8792,7 +8925,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { 330 - .can_attach = cpu_cgroup_can_attach, 331 - .attach = cpu_cgroup_attach, 332 - .legacy_cftypes = cpu_legacy_files, 333 - + .dfl_cftypes = cpu_files, 334 - .early_init = true, 335 - +#ifdef CONFIG_CGROUP_CPUACCT 336 - + /* 337 - + * cpuacct is enabled together with cpu on the unified hierarchy 338 - + * and its stats are reported through "cpu.stat". 339 - + */ 340 - + .depends_on = 1 << cpuacct_cgrp_id, 341 - +#endif 342 - }; 343 - 344 - #endif /* CONFIG_CGROUP_SCHED */ 345 - diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c 346 - index d1e5dd0b3a64..57f390514c39 100644 347 - --- a/kernel/sched/cpuacct.c 348 - +++ b/kernel/sched/cpuacct.c 349 - @@ -347,6 +347,31 @@ static struct cftype files[] = { 350 - { } /* terminate */ 351 - }; 352 - 353 - +/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ 354 - +void cpuacct_cpu_stats_show(struct seq_file *sf) 355 - +{ 356 - + struct cgroup_subsys_state *css; 357 - + u64 usage, val[CPUACCT_STAT_NSTATS]; 358 - + 359 - + css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); 360 - + 361 - + usage = cpuusage_read(css, seq_cft(sf)); 362 - + cpuacct_stats_read(css_ca(css), &val); 363 - + 364 - + val[CPUACCT_STAT_USER] *= TICK_NSEC; 365 - + val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC; 366 - + do_div(usage, NSEC_PER_USEC); 367 - + do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC); 368 - + do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC); 369 - + 370 - + seq_printf(sf, "usage_usec %llu\n" 371 - + "user_usec %llu\n" 372 - + "system_usec %llu\n", 373 - + usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]); 374 - + 375 - + css_put(css); 376 - +} 377 - + 378 - /* 379 - * charge this task's execution time to its accounting group. 380 - * 381 - diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h 382 - index ba72807c73d4..ddf7af466d35 100644 383 - --- a/kernel/sched/cpuacct.h 384 - +++ b/kernel/sched/cpuacct.h 385 - @@ -2,6 +2,7 @@ 386 - 387 - extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); 388 - extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); 389 - +extern void cpuacct_cpu_stats_show(struct seq_file *sf); 390 - 391 - #else 392 - 393 - @@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val) 394 - { 395 - } 396 - 397 - +static inline void cpuacct_cpu_stats_show(struct seq_file *sf) 398 - +{ 399 - +} 400 - + 401 - #endif 402 - 403 - commit 5019fe3d7ec456b58d451ef06fe1f81d7d9f28a9 404 - Author: Tejun Heo <tj@kernel.org> 405 - Date: Fri Aug 5 12:41:01 2016 -0400 406 - 407 - cgroup: add documentation regarding CPU controller cgroup v2 support 408 - 409 - Signed-off-by: Tejun Heo <tj@kernel.org> 410 - 411 - diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt 412 - new file mode 100644 413 - index 000000000000..1ed7032d4472 414 - --- /dev/null 415 - +++ b/Documentation/cgroup-v2-cpu.txt 416 - @@ -0,0 +1,368 @@ 417 - + 418 - + 419 - +CPU Controller on Control Group v2 420 - + 421 - +August, 2016 Tejun Heo <tj@kernel.org> 422 - + 423 - + 424 - +While most controllers have support for cgroup v2 now, the CPU 425 - +controller support is not upstream yet due to objections from the 426 - +scheduler maintainers on the basic designs of cgroup v2. This 427 - +document explains the current situation as well as an interim 428 - +solution, and details the disagreements and arguments. The latest 429 - +version of this document can be found at the following URL. 430 - + 431 - + https://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu 432 - + 433 - +This document was posted to the linux-kernel and cgroup mailing lists. 434 - +Unfortunately, no consensus was reached as of Oct, 2016. The thread 435 - +can be found at the following URL. 436 - + 437 - + http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org 438 - + 439 - + 440 - +CONTENTS 441 - + 442 - +1. Current Situation and Interim Solution 443 - +2. Disagreements and Arguments 444 - + 2-1. Contentious Restrictions 445 - + 2-1-1. Process Granularity 446 - + 2-1-2. No Internal Process Constraint 447 - + 2-2. Impact on CPU Controller 448 - + 2-2-1. Impact of Process Granularity 449 - + 2-2-2. Impact of No Internal Process Constraint 450 - + 2-3. Arguments for cgroup v2 451 - +3. Way Forward 452 - +4. References 453 - + 454 - + 455 - +1. Current Situation and Interim Solution 456 - + 457 - +All objections from the scheduler maintainers apply to cgroup v2 core 458 - +design, and there are no known objections to the specifics of the CPU 459 - +controller cgroup v2 interface. The only blocked part is changes to 460 - +expose the CPU controller interface on cgroup v2, which comprises the 461 - +following two patches: 462 - + 463 - + [1] sched: Misc preps for cgroup unified hierarchy interface 464 - + [2] sched: Implement interface for cgroup unified hierarchy 465 - + 466 - +The necessary changes are superficial and implement the interface 467 - +files on cgroup v2. The combined diffstat is as follows. 468 - + 469 - + kernel/sched/core.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++-- 470 - + kernel/sched/cpuacct.c | 57 ++++++++++++------ 471 - + kernel/sched/cpuacct.h | 5 + 472 - + 3 files changed, 189 insertions(+), 22 deletions(-) 473 - + 474 - +The patches are easy to apply and forward-port. The following git 475 - +branch will always carry the two patches on top of the latest release 476 - +of the upstream kernel. 477 - + 478 - + git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu 479 - + 480 - +There also are versioned branches going back to v4.4. 481 - + 482 - + git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER 483 - + 484 - +While it's difficult to tell whether the CPU controller support will 485 - +be merged, there are crucial resource control features in cgroup v2 486 - +that are only possible due to the design choices that are being 487 - +objected to, and every effort will be made to ease enabling the CPU 488 - +controller cgroup v2 support out-of-tree for parties which choose to. 489 - + 490 - + 491 - +2. Disagreements and Arguments 492 - + 493 - +There have been several lengthy discussion threads [3][4] on LKML 494 - +around the structural constraints of cgroup v2. The two that affect 495 - +the CPU controller are process granularity and no internal process 496 - +constraint. Both arise primarily from the need for common resource 497 - +domain definition across different resources. 498 - + 499 - +The common resource domain is a powerful concept in cgroup v2 that 500 - +allows controllers to make basic assumptions about the structural 501 - +organization of processes and controllers inside the cgroup hierarchy, 502 - +and thus solve problems spanning multiple types of resources. The 503 - +prime example for this is page cache writeback: dirty page cache is 504 - +regulated through throttling buffered writers based on memory 505 - +availability, and initiating batched write outs to the disk based on 506 - +IO capacity. Tracking and controlling writeback inside a cgroup thus 507 - +requires the direct cooperation of the memory and the IO controller. 508 - + 509 - +This easily extends to other areas, such as CPU cycles consumed while 510 - +performing memory reclaim or IO encryption. 511 - + 512 - + 513 - +2-1. Contentious Restrictions 514 - + 515 - +For controllers of different resources to work together, they must 516 - +agree on a common organization. This uniform model across controllers 517 - +imposes two contentious restrictions on the CPU controller: process 518 - +granularity and the no-internal-process constraint. 519 - + 520 - + 521 - + 2-1-1. Process Granularity 522 - + 523 - + For memory, because an address space is shared between all threads 524 - + of a process, the terminal consumer is a process, not a thread. 525 - + Separating the threads of a single process into different memory 526 - + control domains doesn't make semantical sense. cgroup v2 ensures 527 - + that all controller can agree on the same organization by requiring 528 - + that threads of the same process belong to the same cgroup. 529 - + 530 - + There are other reasons to enforce process granularity. One 531 - + important one is isolating system-level management operations from 532 - + in-process application operations. The cgroup interface, being a 533 - + virtual filesystem, is very unfit for multiple independent 534 - + operations taking place at the same time as most operations have to 535 - + be multi-step and there is no way to synchronize multiple accessors. 536 - + See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity" 537 - + 538 - + 539 - + 2-1-2. No Internal Process Constraint 540 - + 541 - + cgroup v2 does not allow processes to belong to any cgroup which has 542 - + child cgroups when resource controllers are enabled on it (the 543 - + notable exception being the root cgroup itself). This is because, 544 - + for some resources, a resource domain (cgroup) is not directly 545 - + comparable to the terminal consumer (process/task) of said resource, 546 - + and so putting the two into a sibling relationship isn't meaningful. 547 - + 548 - + - Differing Control Parameters and Capabilities 549 - + 550 - + A cgroup controller has different resource control parameters and 551 - + capabilities from a terminal consumer, be that a task or process. 552 - + There are a couple cases where a cgroup control knob can be mapped 553 - + to a per-task or per-process API but they are exceptions and the 554 - + mappings aren't obvious even in those cases. 555 - + 556 - + For example, task priorities (also known as nice values) set 557 - + through setpriority(2) are mapped to the CPU controller 558 - + "cpu.shares" values. However, how exactly the two ranges map and 559 - + even the fact that they map to each other at all are not obvious. 560 - + 561 - + The situation gets further muddled when considering other resource 562 - + types and control knobs. IO priorities set through ioprio_set(2) 563 - + cannot be mapped to IO controller weights and most cgroup resource 564 - + control knobs including the bandwidth control knobs of the CPU 565 - + controller don't have counterparts in the terminal consumers. 566 - + 567 - + - Anonymous Resource Consumption 568 - + 569 - + For CPU, every time slice consumed from inside a cgroup, which 570 - + comprises most but not all of consumed CPU time for the cgroup, 571 - + can be clearly attributed to a specific task or process. Because 572 - + these two types of entities are directly comparable as consumers 573 - + of CPU time, it's theoretically possible to mix tasks and cgroups 574 - + on the same tree levels and let them directly compete for the time 575 - + quota available to their common ancestor. 576 - + 577 - + However, the same can't be said for resource types like memory or 578 - + IO: the memory consumed by the page cache, for example, can be 579 - + tracked on a per-cgroup level, but due to mismatches in lifetimes 580 - + of involved objects (page cache can persist long after processes 581 - + are gone), shared usages and the implementation overhead of 582 - + tracking persistent state, it can no longer be attributed to 583 - + individual processes after instantiation. Consequently, any IO 584 - + incurred by page cache writeback can be attributed to a cgroup, 585 - + but not to the individual consumers inside the cgroup. 586 - + 587 - + For memory and IO, this makes a resource domain (cgroup) an object 588 - + of a fundamentally different type than a terminal consumer 589 - + (process). A process can't be a first class object in the resource 590 - + distribution graph as its total resource consumption can't be 591 - + described without the containing resource domain. 592 - + 593 - + Disallowing processes in internal cgroups avoids competition between 594 - + cgroups and processes which cannot be meaningfully defined for these 595 - + resources. All resource control takes place among cgroups and a 596 - + terminal consumer interacts with the containing cgroup the same way 597 - + it would with the system without cgroup. 598 - + 599 - + Root cgroup is exempt from this constraint, which is in line with 600 - + how root cgroup is handled in general - it's excluded from cgroup 601 - + resource accounting and control. 602 - + 603 - + 604 - +Enforcing process granularity and no internal process constraint 605 - +allows all controllers to be on the same footing in terms of resource 606 - +distribution hierarchy. 607 - + 608 - + 609 - +2-2. Impact on CPU Controller 610 - + 611 - +As indicated earlier, the CPU controller's resource distribution graph 612 - +is the simplest. Every schedulable resource consumption can be 613 - +attributed to a specific task. In addition, for weight based control, 614 - +the per-task priority set through setpriority(2) can be translated to 615 - +and from a per-cgroup weight. As such, the CPU controller can treat a 616 - +task and a cgroup symmetrically, allowing support for any tree layout 617 - +of cgroups and tasks. Both process granularity and the no internal 618 - +process constraint restrict how the CPU controller can be used. 619 - + 620 - + 621 - + 2-2-1. Impact of Process Granularity 622 - + 623 - + Process granularity prevents tasks belonging to the same process to 624 - + be assigned to different cgroups. It was pointed out [6] that this 625 - + excludes the valid use case of hierarchical CPU distribution within 626 - + processes. 627 - + 628 - + To address this issue, the rgroup (resource group) [7][8][9] 629 - + interface, an extension of the existing setpriority(2) API, was 630 - + proposed, which is in line with other programmable priority 631 - + mechanisms and eliminates the risk of in-application configuration 632 - + and system configuration stepping on each other's toes. 633 - + Unfortunately, the proposal quickly turned into discussions around 634 - + cgroup v2 design decisions [4] and no consensus could be reached. 635 - + 636 - + 637 - + 2-2-2. Impact of No Internal Process Constraint 638 - + 639 - + The no internal process constraint disallows tasks from competing 640 - + directly against cgroups. Here is an excerpt from Peter Zijlstra 641 - + pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and 642 - + t4 are tasks: 643 - + 644 - + 645 - + R 646 - + / | \ 647 - + t1 t2 A 648 - + / \ 649 - + t3 t4 650 - + 651 - + 652 - + Is fundamentally different from: 653 - + 654 - + 655 - + R 656 - + / \ 657 - + L A 658 - + / \ / \ 659 - + t1 t2 t3 t4 660 - + 661 - + 662 - + Because if in the first hierarchy you add a task (t5) to R, all of 663 - + its A will run at 1/4th of total bandwidth where before it had 664 - + 1/3rd, whereas with the second example, if you add our t5 to L, A 665 - + doesn't get any less bandwidth. 666 - + 667 - + 668 - + It is true that the trees are semantically different from each other 669 - + and the symmetric handling of tasks and cgroups is aesthetically 670 - + pleasing. However, it isn't clear what the practical usefulness of 671 - + a layout with direct competition between tasks and cgroups would be, 672 - + considering that number and behavior of tasks are controlled by each 673 - + application, and cgroups primarily deal with system level resource 674 - + distribution; changes in the number of active threads would directly 675 - + impact resource distribution. Real world use cases of such layouts 676 - + could not be established during the discussions. 677 - + 678 - + 679 - +2-3. Arguments for cgroup v2 680 - + 681 - +There are strong demands for comprehensive hierarchical resource 682 - +control across all major resources, and establishing a common resource 683 - +hierarchy is an essential step. As with most engineering decisions, 684 - +common resource hierarchy definition comes with its trade-offs. With 685 - +cgroup v2, the trade-offs are in the form of structural constraints 686 - +which, among others, restrict the CPU controller's space of possible 687 - +configurations. 688 - + 689 - +However, even with the restrictions, cgroup v2, in combination with 690 - +rgroup, covers most of identified real world use cases while enabling 691 - +new important use cases of resource control across multiple resource 692 - +types that were fundamentally broken previously. 693 - + 694 - +Furthermore, for resource control, treating resource domains as 695 - +objects of a different type from terminal consumers has important 696 - +advantages - it can account for resource consumptions which are not 697 - +tied to any specific terminal consumer, be that a task or process, and 698 - +allows decoupling resource distribution controls from in-application 699 - +APIs. Even the CPU controller may benefit from it as the kernel can 700 - +consume significant amount of CPU cycles in interrupt context or tasks 701 - +shared across multiple resource domains (e.g. softirq). 702 - + 703 - +Finally, it's important to note that enabling cgroup v2 support for 704 - +the CPU controller doesn't block use cases which require the features 705 - +which are not available on cgroup v2. Unlikely, but should anybody 706 - +actually rely on the CPU controller's symmetric handling of tasks and 707 - +cgroups, backward compatibility is and will be maintained by being 708 - +able to disconnect the controller from the cgroup v2 hierarchy and use 709 - +it standalone. This also holds for cpuset which is often used in 710 - +highly customized configurations which might be a poor fit for common 711 - +resource domains. 712 - + 713 - +The required changes are minimal, the benefits for the target use 714 - +cases are critical and obvious, and use cases which have to use v1 can 715 - +continue to do so. 716 - + 717 - + 718 - +3. Way Forward 719 - + 720 - +cgroup v2 primarily aims to solve the problem of comprehensive 721 - +hierarchical resource control across all major computing resources, 722 - +which is one of the core problems of modern server infrastructure 723 - +engineering. The trade-offs that cgroup v2 took are results of 724 - +pursuing that goal and gaining a better understanding of the nature of 725 - +resource control in the process. 726 - + 727 - +I believe that real world usages will prove cgroup v2's model right, 728 - +considering the crucial pieces of comprehensive resource control that 729 - +cannot be implemented without common resource domains. This is not to 730 - +say that cgroup v2 is fixed in stone and can't be updated; if there is 731 - +an approach which better serves both comprehensive resource control 732 - +and the CPU controller's flexibility, we will surely move towards 733 - +that. It goes without saying that discussions around such approach 734 - +should consider practical aspects of resource control as a whole 735 - +rather than absolutely focusing on a particular controller. 736 - + 737 - +Until such consensus can be reached, the CPU controller cgroup v2 738 - +support will be maintained out of the mainline kernel in an easily 739 - +accessible form. If there is anything cgroup developers can do to 740 - +ease the pain, please feel free to contact us on the cgroup mailing 741 - +list at cgroups@vger.kernel.org. 742 - + 743 - + 744 - +4. References 745 - + 746 - +[1] http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org 747 - + [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface 748 - + Tejun Heo <tj@kernel.org> 749 - + 750 - +[2] http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org 751 - + [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy 752 - + Tejun Heo <tj@kernel.org> 753 - + 754 - +[3] http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org 755 - + [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy 756 - + Tejun Heo <tj@kernel.org> 757 - + 758 - +[4] http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net 759 - + Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP 760 - + Peter Zijlstra <peterz@infradead.org> 761 - + 762 - +[5] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt 763 - + Control Group v2 764 - + Tejun Heo <tj@kernel.org> 765 - + 766 - +[6] http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com 767 - + Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy 768 - + Paul Turner <pjt@google.com> 769 - + 770 - +[7] http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org 771 - + [RFD] cgroup: thread granularity support for cpu controller 772 - + Tejun Heo <tj@kernel.org> 773 - + 774 - +[8] http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org 775 - + [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP 776 - + Tejun Heo <tj@kernel.org> 777 - + 778 - +[9] http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org 779 - + Example program for PRIO_RGRP 780 - + Tejun Heo <tj@kernel.org> 781 - + 782 - +[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net 783 - + Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource 784 - + Peter Zijlstra <peterz@infradead.org>

···

-12

pkgs/os-specific/linux/kernel/linux-4.9.nix

··· 1 - { buildPackages, fetchurl, perl, buildLinux, nixosTests, stdenv, ... } @ args: 2 - 3 - buildLinux (args // rec { 4 - version = "4.9.332"; 5 - extraMeta.branch = "4.9"; 6 - extraMeta.broken = stdenv.isAarch64; 7 - 8 - src = fetchurl { 9 - url = "mirror://kernel/linux/kernel/v4.x/linux-${version}.tar.xz"; 10 - sha256 = "1kiqa9kw4932n5qglkyymsrak849wbbszw9rnq1aygmdinjz4c8i"; 11 - }; 12 - } // (args.argsOverride or {}))

···

+2 -9

pkgs/top-level/linux-kernels.nix

··· 95 96 linux_4_4 = throw "linux 4.4 was removed because it reached its end of life upstream"; 97 98 - linux_4_9 = callPackage ../os-specific/linux/kernel/linux-4.9.nix { 99 - kernelPatches = 100 - [ kernelPatches.bridge_stp_helper 101 - kernelPatches.request_key_helper_updated 102 - kernelPatches.cpu-cgroup-v2."4.9" 103 - kernelPatches.modinst_arg_list_too_long 104 - ]; 105 - }; 106 107 linux_4_14 = callPackage ../os-specific/linux/kernel/linux-4.14.nix { 108 kernelPatches = ··· 519 vanillaPackages = { 520 # recurse to build modules for the kernels 521 linux_4_4 = throw "linux 4.4 was removed because it reached its end of life upstream"; # Added 2022-02-11 522 - linux_4_9 = recurseIntoAttrs (packagesFor kernels.linux_4_9); 523 linux_4_14 = recurseIntoAttrs (packagesFor kernels.linux_4_14); 524 linux_4_19 = recurseIntoAttrs (packagesFor kernels.linux_4_19); 525 linux_5_4 = recurseIntoAttrs (packagesFor kernels.linux_5_4);

··· 95 96 linux_4_4 = throw "linux 4.4 was removed because it reached its end of life upstream"; 97 98 + linux_4_9 = throw "linux 4.9 was removed because it will reach its end of life within 22.11"; 99 100 linux_4_14 = callPackage ../os-specific/linux/kernel/linux-4.14.nix { 101 kernelPatches = ··· 512 vanillaPackages = { 513 # recurse to build modules for the kernels 514 linux_4_4 = throw "linux 4.4 was removed because it reached its end of life upstream"; # Added 2022-02-11 515 + linux_4_9 = throw "linux 4.9 was removed because it will reach its end of life within 22.11"; # Added 2022-11-08 516 linux_4_14 = recurseIntoAttrs (packagesFor kernels.linux_4_14); 517 linux_4_19 = recurseIntoAttrs (packagesFor kernels.linux_4_19); 518 linux_5_4 = recurseIntoAttrs (packagesFor kernels.linux_5_4);