Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cgroup: remove per-cpu per-subsystem locks

The rstat update side used to insert the cgroup whose stats are updated
in the update tree and the read side flush the update tree to get the
latest uptodate stats. The per-cpu per-subsystem locks were used to
synchronize the update and flush side. However now the update side does
not access update tree but uses per-cpu lockless lists. So there is no
need for locks to synchronize update and flush side. Let's remove them.

Suggested-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Tested-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Shakeel Butt and committed by
Tejun Heo
6af89c6c 36df6e3d

+4 -150
-7
include/linux/cgroup-defs.h
··· 375 375 * Child cgroups with stat updates on this cpu since the last read 376 376 * are linked on the parent's ->updated_children through 377 377 * ->updated_next. updated_children is terminated by its container css. 378 - * 379 - * In addition to being more compact, singly-linked list pointing to 380 - * the css makes it unnecessary for each per-cpu struct to point back 381 - * to the associated css. 382 - * 383 - * Protected by per-cpu css->ss->rstat_ss_cpu_lock. 384 378 */ 385 379 struct cgroup_subsys_state *updated_children; 386 380 struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ ··· 818 824 unsigned int depends_on; 819 825 820 826 spinlock_t rstat_ss_lock; 821 - raw_spinlock_t __percpu *rstat_ss_cpu_lock; 822 827 struct llist_head __percpu *lhead; /* lockless update list head */ 823 828 }; 824 829
-47
include/trace/events/cgroup.h
··· 257 257 TP_ARGS(cgrp, cpu, contended) 258 258 ); 259 259 260 - /* 261 - * Related to per CPU locks: 262 - * global rstat_base_cpu_lock for base stats 263 - * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats 264 - */ 265 - DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended, 266 - 267 - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), 268 - 269 - TP_ARGS(cgrp, cpu, contended) 270 - ); 271 - 272 - DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath, 273 - 274 - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), 275 - 276 - TP_ARGS(cgrp, cpu, contended) 277 - ); 278 - 279 - DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked, 280 - 281 - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), 282 - 283 - TP_ARGS(cgrp, cpu, contended) 284 - ); 285 - 286 - DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath, 287 - 288 - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), 289 - 290 - TP_ARGS(cgrp, cpu, contended) 291 - ); 292 - 293 - DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock, 294 - 295 - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), 296 - 297 - TP_ARGS(cgrp, cpu, contended) 298 - ); 299 - 300 - DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath, 301 - 302 - TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), 303 - 304 - TP_ARGS(cgrp, cpu, contended) 305 - ); 306 - 307 260 #endif /* _TRACE_CGROUP_H */ 308 261 309 262 /* This part must be outside protection */
+4 -96
kernel/cgroup/rstat.c
··· 10 10 #include <trace/events/cgroup.h> 11 11 12 12 static DEFINE_SPINLOCK(rstat_base_lock); 13 - static DEFINE_PER_CPU(raw_spinlock_t, rstat_base_cpu_lock); 14 13 static DEFINE_PER_CPU(struct llist_head, rstat_backlog_list); 15 14 16 15 static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu); ··· 50 51 if (ss) 51 52 return per_cpu_ptr(ss->lhead, cpu); 52 53 return per_cpu_ptr(&rstat_backlog_list, cpu); 53 - } 54 - 55 - static raw_spinlock_t *ss_rstat_cpu_lock(struct cgroup_subsys *ss, int cpu) 56 - { 57 - if (ss) 58 - return per_cpu_ptr(ss->rstat_ss_cpu_lock, cpu); 59 - 60 - return per_cpu_ptr(&rstat_base_cpu_lock, cpu); 61 - } 62 - 63 - /* 64 - * Helper functions for rstat per CPU locks. 65 - * 66 - * This makes it easier to diagnose locking issues and contention in 67 - * production environments. The parameter @fast_path determine the 68 - * tracepoints being added, allowing us to diagnose "flush" related 69 - * operations without handling high-frequency fast-path "update" events. 70 - */ 71 - static __always_inline 72 - unsigned long _css_rstat_cpu_lock(struct cgroup_subsys_state *css, int cpu, 73 - const bool fast_path) 74 - { 75 - struct cgroup *cgrp = css->cgroup; 76 - raw_spinlock_t *cpu_lock; 77 - unsigned long flags; 78 - bool contended; 79 - 80 - /* 81 - * The _irqsave() is needed because the locks used for flushing are 82 - * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring this lock 83 - * with the _irq() suffix only disables interrupts on a non-PREEMPT_RT 84 - * kernel. The raw_spinlock_t below disables interrupts on both 85 - * configurations. The _irqsave() ensures that interrupts are always 86 - * disabled and later restored. 87 - */ 88 - cpu_lock = ss_rstat_cpu_lock(css->ss, cpu); 89 - contended = !raw_spin_trylock_irqsave(cpu_lock, flags); 90 - if (contended) { 91 - if (fast_path) 92 - trace_cgroup_rstat_cpu_lock_contended_fastpath(cgrp, cpu, contended); 93 - else 94 - trace_cgroup_rstat_cpu_lock_contended(cgrp, cpu, contended); 95 - 96 - raw_spin_lock_irqsave(cpu_lock, flags); 97 - } 98 - 99 - if (fast_path) 100 - trace_cgroup_rstat_cpu_locked_fastpath(cgrp, cpu, contended); 101 - else 102 - trace_cgroup_rstat_cpu_locked(cgrp, cpu, contended); 103 - 104 - return flags; 105 - } 106 - 107 - static __always_inline 108 - void _css_rstat_cpu_unlock(struct cgroup_subsys_state *css, int cpu, 109 - unsigned long flags, const bool fast_path) 110 - { 111 - struct cgroup *cgrp = css->cgroup; 112 - raw_spinlock_t *cpu_lock; 113 - 114 - if (fast_path) 115 - trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false); 116 - else 117 - trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false); 118 - 119 - cpu_lock = ss_rstat_cpu_lock(css->ss, cpu); 120 - raw_spin_unlock_irqrestore(cpu_lock, flags); 121 54 } 122 55 123 56 /** ··· 254 323 { 255 324 struct css_rstat_cpu *rstatc = css_rstat_cpu(root, cpu); 256 325 struct cgroup_subsys_state *head = NULL, *parent, *child; 257 - unsigned long flags; 258 - 259 - flags = _css_rstat_cpu_lock(root, cpu, false); 260 326 261 327 css_process_update_tree(root->ss, cpu); 262 328 263 329 /* Return NULL if this subtree is not on-list */ 264 330 if (!rstatc->updated_next) 265 - goto unlock_ret; 331 + return NULL; 266 332 267 333 /* 268 334 * Unlink @root from its parent. As the updated_children list is ··· 291 363 rstatc->updated_children = root; 292 364 if (child != root) 293 365 head = css_rstat_push_children(head, child, cpu); 294 - unlock_ret: 295 - _css_rstat_cpu_unlock(root, cpu, flags, false); 366 + 296 367 return head; 297 368 } 298 369 ··· 487 560 { 488 561 int cpu; 489 562 490 - #ifdef CONFIG_SMP 491 - /* 492 - * On uniprocessor machines, arch_spinlock_t is defined as an empty 493 - * struct. Avoid allocating a size of zero by having this block 494 - * excluded in this case. It's acceptable to leave the subsystem locks 495 - * unitialized since the associated lock functions are no-ops in the 496 - * non-smp case. 497 - */ 498 - if (ss) { 499 - ss->rstat_ss_cpu_lock = alloc_percpu(raw_spinlock_t); 500 - if (!ss->rstat_ss_cpu_lock) 501 - return -ENOMEM; 502 - } 503 - #endif 504 - 505 563 if (ss) { 506 564 ss->lhead = alloc_percpu(struct llist_head); 507 - if (!ss->lhead) { 508 - free_percpu(ss->rstat_ss_cpu_lock); 565 + if (!ss->lhead) 509 566 return -ENOMEM; 510 - } 511 567 } 512 568 513 569 spin_lock_init(ss_rstat_lock(ss)); 514 - for_each_possible_cpu(cpu) { 515 - raw_spin_lock_init(ss_rstat_cpu_lock(ss, cpu)); 570 + for_each_possible_cpu(cpu) 516 571 init_llist_head(ss_lhead_cpu(ss, cpu)); 517 - } 518 572 519 573 return 0; 520 574 }