Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

proc: revert /proc/<pid>/maps [stack:TID] annotation

Commit b76437579d13 ("procfs: mark thread stack correctly in
proc/<pid>/maps") added [stack:TID] annotation to /proc/<pid>/maps.

Finding the task of a stack VMA requires walking the entire thread list,
turning this into quadratic behavior: a thousand threads means a
thousand stacks, so the rendering of /proc/<pid>/maps needs to look at a
million combinations.

The cost is not in proportion to the usefulness as described in the
patch.

Drop the [stack:TID] annotation to make /proc/<pid>/maps (and
/proc/<pid>/numa_maps) usable again for higher thread counts.

The [stack] annotation inside /proc/<pid>/task/<tid>/maps is retained, as
identifying the stack VMA there is an O(1) operation.

Siddesh said:
"The end users needed a way to identify thread stacks programmatically and
there wasn't a way to do that. I'm afraid I no longer remember (or have
access to the resources that would aid my memory since I changed
employers) the details of their requirement. However, I did do this on my
own time because I thought it was an interesting project for me and nobody
really gave any feedback then as to its utility, so as far as I am
concerned you could roll back the main thread maps information since the
information is available in the thread-specific files"

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
Cc: Shaohua Li <shli@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
65376df5 5c2ff95e

+47 -105
+3 -6
Documentation/filesystems/proc.txt
··· 356 356 a7cb1000-a7cb2000 ---p 00000000 00:00 0 357 357 a7cb2000-a7eb2000 rw-p 00000000 00:00 0 358 358 a7eb2000-a7eb3000 ---p 00000000 00:00 0 359 - a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001] 359 + a7eb3000-a7ed5000 rw-p 00000000 00:00 0 360 360 a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 361 361 a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 362 362 a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 ··· 388 388 389 389 [heap] = the heap of the program 390 390 [stack] = the stack of the main process 391 - [stack:1001] = the stack of the thread with tid 1001 392 391 [vdso] = the "virtual dynamic shared object", 393 392 the kernel system call handler 394 393 ··· 395 396 396 397 The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint 397 398 of the individual tasks of a process. In this file you will see a mapping marked 398 - as [stack] if that task sees it as a stack. This is a key difference from the 399 - content of /proc/PID/maps, where you will see all mappings that are being used 400 - as stack by all of those tasks. Hence, for the example above, the task-level 401 - map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: 399 + as [stack] if that task sees it as a stack. Hence, for the example above, the 400 + task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: 402 401 403 402 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test 404 403 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
+23 -43
fs/proc/task_mmu.c
··· 259 259 sizeof(struct proc_maps_private)); 260 260 } 261 261 262 - static pid_t pid_of_stack(struct proc_maps_private *priv, 263 - struct vm_area_struct *vma, bool is_pid) 262 + /* 263 + * Indicate if the VMA is a stack for the given task; for 264 + * /proc/PID/maps that is the stack of the main task. 265 + */ 266 + static int is_stack(struct proc_maps_private *priv, 267 + struct vm_area_struct *vma, int is_pid) 264 268 { 265 - struct inode *inode = priv->inode; 266 - struct task_struct *task; 267 - pid_t ret = 0; 269 + int stack = 0; 268 270 269 - rcu_read_lock(); 270 - task = pid_task(proc_pid(inode), PIDTYPE_PID); 271 - if (task) { 272 - task = task_of_stack(task, vma, is_pid); 271 + if (is_pid) { 272 + stack = vma->vm_start <= vma->vm_mm->start_stack && 273 + vma->vm_end >= vma->vm_mm->start_stack; 274 + } else { 275 + struct inode *inode = priv->inode; 276 + struct task_struct *task; 277 + 278 + rcu_read_lock(); 279 + task = pid_task(proc_pid(inode), PIDTYPE_PID); 273 280 if (task) 274 - ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 281 + stack = vma_is_stack_for_task(vma, task); 282 + rcu_read_unlock(); 275 283 } 276 - rcu_read_unlock(); 277 - 278 - return ret; 284 + return stack; 279 285 } 280 286 281 287 static void ··· 341 335 342 336 name = arch_vma_name(vma); 343 337 if (!name) { 344 - pid_t tid; 345 - 346 338 if (!mm) { 347 339 name = "[vdso]"; 348 340 goto done; ··· 352 348 goto done; 353 349 } 354 350 355 - tid = pid_of_stack(priv, vma, is_pid); 356 - if (tid != 0) { 357 - /* 358 - * Thread stack in /proc/PID/task/TID/maps or 359 - * the main process stack. 360 - */ 361 - if (!is_pid || (vma->vm_start <= mm->start_stack && 362 - vma->vm_end >= mm->start_stack)) { 363 - name = "[stack]"; 364 - } else { 365 - /* Thread stack in /proc/PID/maps */ 366 - seq_pad(m, ' '); 367 - seq_printf(m, "[stack:%d]", tid); 368 - } 369 - } 351 + if (is_stack(priv, vma, is_pid)) 352 + name = "[stack]"; 370 353 } 371 354 372 355 done: ··· 1609 1618 seq_file_path(m, file, "\n\t= "); 1610 1619 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1611 1620 seq_puts(m, " heap"); 1612 - } else { 1613 - pid_t tid = pid_of_stack(proc_priv, vma, is_pid); 1614 - if (tid != 0) { 1615 - /* 1616 - * Thread stack in /proc/PID/task/TID/maps or 1617 - * the main process stack. 1618 - */ 1619 - if (!is_pid || (vma->vm_start <= mm->start_stack && 1620 - vma->vm_end >= mm->start_stack)) 1621 - seq_puts(m, " stack"); 1622 - else 1623 - seq_printf(m, " stack:%d", tid); 1624 - } 1621 + } else if (is_stack(proc_priv, vma, is_pid)) { 1622 + seq_puts(m, " stack"); 1625 1623 } 1626 1624 1627 1625 if (is_vm_hugetlb_page(vma))
+19 -28
fs/proc/task_nommu.c
··· 123 123 return size; 124 124 } 125 125 126 - static pid_t pid_of_stack(struct proc_maps_private *priv, 127 - struct vm_area_struct *vma, bool is_pid) 126 + static int is_stack(struct proc_maps_private *priv, 127 + struct vm_area_struct *vma, int is_pid) 128 128 { 129 - struct inode *inode = priv->inode; 130 - struct task_struct *task; 131 - pid_t ret = 0; 129 + struct mm_struct *mm = vma->vm_mm; 130 + int stack = 0; 132 131 133 - rcu_read_lock(); 134 - task = pid_task(proc_pid(inode), PIDTYPE_PID); 135 - if (task) { 136 - task = task_of_stack(task, vma, is_pid); 132 + if (is_pid) { 133 + stack = vma->vm_start <= mm->start_stack && 134 + vma->vm_end >= mm->start_stack; 135 + } else { 136 + struct inode *inode = priv->inode; 137 + struct task_struct *task; 138 + 139 + rcu_read_lock(); 140 + task = pid_task(proc_pid(inode), PIDTYPE_PID); 137 141 if (task) 138 - ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 142 + stack = vma_is_stack_for_task(vma, task); 143 + rcu_read_unlock(); 139 144 } 140 - rcu_read_unlock(); 141 - 142 - return ret; 145 + return stack; 143 146 } 144 147 145 148 /* ··· 184 181 if (file) { 185 182 seq_pad(m, ' '); 186 183 seq_file_path(m, file, ""); 187 - } else if (mm) { 188 - pid_t tid = pid_of_stack(priv, vma, is_pid); 189 - 190 - if (tid != 0) { 191 - seq_pad(m, ' '); 192 - /* 193 - * Thread stack in /proc/PID/task/TID/maps or 194 - * the main process stack. 195 - */ 196 - if (!is_pid || (vma->vm_start <= mm->start_stack && 197 - vma->vm_end >= mm->start_stack)) 198 - seq_printf(m, "[stack]"); 199 - else 200 - seq_printf(m, "[stack:%d]", tid); 201 - } 184 + } else if (mm && is_stack(priv, vma, is_pid)) { 185 + seq_pad(m, ' '); 186 + seq_printf(m, "[stack]"); 202 187 } 203 188 204 189 seq_putc(m, '\n');
+1 -2
include/linux/mm.h
··· 1341 1341 !vma_growsup(vma->vm_next, addr); 1342 1342 } 1343 1343 1344 - extern struct task_struct *task_of_stack(struct task_struct *task, 1345 - struct vm_area_struct *vma, bool in_group); 1344 + int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t); 1346 1345 1347 1346 extern unsigned long move_page_tables(struct vm_area_struct *vma, 1348 1347 unsigned long old_addr, struct vm_area_struct *new_vma,
+1 -26
mm/util.c
··· 230 230 } 231 231 232 232 /* Check if the vma is being used as a stack by this task */ 233 - static int vm_is_stack_for_task(struct task_struct *t, 234 - struct vm_area_struct *vma) 233 + int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t) 235 234 { 236 235 return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); 237 - } 238 - 239 - /* 240 - * Check if the vma is being used as a stack. 241 - * If is_group is non-zero, check in the entire thread group or else 242 - * just check in the current task. Returns the task_struct of the task 243 - * that the vma is stack for. Must be called under rcu_read_lock(). 244 - */ 245 - struct task_struct *task_of_stack(struct task_struct *task, 246 - struct vm_area_struct *vma, bool in_group) 247 - { 248 - if (vm_is_stack_for_task(task, vma)) 249 - return task; 250 - 251 - if (in_group) { 252 - struct task_struct *t; 253 - 254 - for_each_thread(task, t) { 255 - if (vm_is_stack_for_task(t, vma)) 256 - return t; 257 - } 258 - } 259 - 260 - return NULL; 261 236 } 262 237 263 238 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)