Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cgroup/rstat: Optimize cgroup_rstat_updated_list()

The current design of cgroup_rstat_cpu_pop_updated() is to traverse
the updated tree in a way to pop out the leaf nodes first before
their parents. This can cause traversal of multiple nodes before a
leaf node can be found and popped out. IOW, a given node in the tree
can be visited multiple times before the whole operation is done. So
it is not very efficient and the code can be hard to read.

With the introduction of cgroup_rstat_updated_list() to build a list
of cgroups to be flushed first before any flushing operation is being
done, we can optimize the way the updated tree nodes are being popped
by pushing the parents first to the tail end of the list before their
children. In this way, most updated tree nodes will be visited only
once with the exception of the subtree root as we still need to go
back to its parent and popped it out of its updated_children list.
This also makes the code easier to read.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Waiman Long and committed by
Tejun Heo
d499fd41 7b91eb60

+95 -66
+95 -66
kernel/cgroup/rstat.c
··· 74 74 } 75 75 76 76 /** 77 - * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree 78 - * @pos: current position 79 - * @root: root of the tree to traversal 77 + * cgroup_rstat_push_children - push children cgroups into the given list 78 + * @head: current head of the list (= subtree root) 79 + * @child: first child of the root 80 80 * @cpu: target cpu 81 + * Return: A new singly linked list of cgroups to be flush 81 82 * 82 - * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts 83 - * the traversal and %NULL return indicates the end. During traversal, 84 - * each returned cgroup is unlinked from the tree. Must be called with the 85 - * matching cgroup_rstat_cpu_lock held. 86 - * 87 - * The only ordering guarantee is that, for a parent and a child pair 88 - * covered by a given traversal, if a child is visited, its parent is 89 - * guaranteed to be visited afterwards. 83 + * Iteratively traverse down the cgroup_rstat_cpu updated tree level by 84 + * level and push all the parents first before their next level children 85 + * into a singly linked list built from the tail backward like "pushing" 86 + * cgroups into a stack. The root is pushed by the caller. 90 87 */ 91 - static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, 92 - struct cgroup *root, int cpu) 88 + static struct cgroup *cgroup_rstat_push_children(struct cgroup *head, 89 + struct cgroup *child, int cpu) 93 90 { 94 - struct cgroup_rstat_cpu *rstatc; 95 - struct cgroup *parent; 91 + struct cgroup *chead = child; /* Head of child cgroup level */ 92 + struct cgroup *ghead = NULL; /* Head of grandchild cgroup level */ 93 + struct cgroup *parent, *grandchild; 94 + struct cgroup_rstat_cpu *crstatc; 96 95 97 - if (pos == root) 98 - return NULL; 96 + child->rstat_flush_next = NULL; 99 97 100 - /* 101 - * We're gonna walk down to the first leaf and visit/remove it. We 102 - * can pick whatever unvisited node as the starting point. 103 - */ 104 - if (!pos) { 105 - pos = root; 106 - /* return NULL if this subtree is not on-list */ 107 - if (!cgroup_rstat_cpu(pos, cpu)->updated_next) 108 - return NULL; 109 - } else { 110 - pos = cgroup_parent(pos); 111 - } 98 + next_level: 99 + while (chead) { 100 + child = chead; 101 + chead = child->rstat_flush_next; 102 + parent = cgroup_parent(child); 112 103 113 - /* walk down to the first leaf */ 114 - while (true) { 115 - rstatc = cgroup_rstat_cpu(pos, cpu); 116 - if (rstatc->updated_children == pos) 117 - break; 118 - pos = rstatc->updated_children; 119 - } 120 - 121 - /* 122 - * Unlink @pos from the tree. As the updated_children list is 123 - * singly linked, we have to walk it to find the removal point. 124 - * However, due to the way we traverse, @pos will be the first 125 - * child in most cases. The only exception is @root. 126 - */ 127 - parent = cgroup_parent(pos); 128 - if (parent) { 129 - struct cgroup_rstat_cpu *prstatc; 130 - struct cgroup **nextp; 131 - 132 - prstatc = cgroup_rstat_cpu(parent, cpu); 133 - nextp = &prstatc->updated_children; 134 - while (*nextp != pos) { 135 - struct cgroup_rstat_cpu *nrstatc; 136 - 137 - nrstatc = cgroup_rstat_cpu(*nextp, cpu); 138 - WARN_ON_ONCE(*nextp == parent); 139 - nextp = &nrstatc->updated_next; 104 + /* updated_next is parent cgroup terminated */ 105 + while (child != parent) { 106 + child->rstat_flush_next = head; 107 + head = child; 108 + crstatc = cgroup_rstat_cpu(child, cpu); 109 + grandchild = crstatc->updated_children; 110 + if (grandchild != child) { 111 + /* Push the grand child to the next level */ 112 + crstatc->updated_children = child; 113 + grandchild->rstat_flush_next = ghead; 114 + ghead = grandchild; 115 + } 116 + child = crstatc->updated_next; 117 + crstatc->updated_next = NULL; 140 118 } 141 - *nextp = rstatc->updated_next; 142 119 } 143 120 144 - rstatc->updated_next = NULL; 145 - return pos; 121 + if (ghead) { 122 + chead = ghead; 123 + ghead = NULL; 124 + goto next_level; 125 + } 126 + return head; 146 127 } 147 128 148 - /* Return a list of updated cgroups to be flushed */ 129 + /** 130 + * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed 131 + * @root: root of the cgroup subtree to traverse 132 + * @cpu: target cpu 133 + * Return: A singly linked list of cgroups to be flushed 134 + * 135 + * Walks the updated rstat_cpu tree on @cpu from @root. During traversal, 136 + * each returned cgroup is unlinked from the updated tree. 137 + * 138 + * The only ordering guarantee is that, for a parent and a child pair 139 + * covered by a given traversal, the child is before its parent in 140 + * the list. 141 + * 142 + * Note that updated_children is self terminated and points to a list of 143 + * child cgroups if not empty. Whereas updated_next is like a sibling link 144 + * within the children list and terminated by the parent cgroup. An exception 145 + * here is the cgroup root whose updated_next can be self terminated. 146 + */ 149 147 static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu) 150 148 { 151 149 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); 152 - struct cgroup *head, *tail, *next; 150 + struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(root, cpu); 151 + struct cgroup *head = NULL, *parent, *child; 153 152 unsigned long flags; 154 153 155 154 /* ··· 160 161 * that interrupts are always disabled and later restored. 161 162 */ 162 163 raw_spin_lock_irqsave(cpu_lock, flags); 163 - head = tail = cgroup_rstat_cpu_pop_updated(NULL, root, cpu); 164 - while (tail) { 165 - next = cgroup_rstat_cpu_pop_updated(tail, root, cpu); 166 - tail->rstat_flush_next = next; 167 - tail = next; 164 + 165 + /* Return NULL if this subtree is not on-list */ 166 + if (!rstatc->updated_next) 167 + goto unlock_ret; 168 + 169 + /* 170 + * Unlink @root from its parent. As the updated_children list is 171 + * singly linked, we have to walk it to find the removal point. 172 + */ 173 + parent = cgroup_parent(root); 174 + if (parent) { 175 + struct cgroup_rstat_cpu *prstatc; 176 + struct cgroup **nextp; 177 + 178 + prstatc = cgroup_rstat_cpu(parent, cpu); 179 + nextp = &prstatc->updated_children; 180 + while (*nextp != root) { 181 + struct cgroup_rstat_cpu *nrstatc; 182 + 183 + nrstatc = cgroup_rstat_cpu(*nextp, cpu); 184 + WARN_ON_ONCE(*nextp == parent); 185 + nextp = &nrstatc->updated_next; 186 + } 187 + *nextp = rstatc->updated_next; 168 188 } 189 + 190 + rstatc->updated_next = NULL; 191 + 192 + /* Push @root to the list first before pushing the children */ 193 + head = root; 194 + root->rstat_flush_next = NULL; 195 + child = rstatc->updated_children; 196 + rstatc->updated_children = root; 197 + if (child != root) 198 + head = cgroup_rstat_push_children(head, child, cpu); 199 + unlock_ret: 169 200 raw_spin_unlock_irqrestore(cpu_lock, flags); 170 201 return head; 171 202 }