Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cgroup: relocate cgroup_attach_lock within cgroup_procs_write_start

Later patches will introduce a new parameter `task` to
cgroup_attach_lock, thus adjusting the position of cgroup_attach_lock
within cgroup_procs_write_start.

Between obtaining the threadgroup leader via PID and acquiring the
cgroup attach lock, the threadgroup leader may change, which could lead
to incorrect cgroup migration. Therefore, after acquiring the cgroup
attach lock, we check whether the threadgroup leader has changed, and if
so, retry the operation.

tj: Minor comment adjustments.

Signed-off-by: Yi Tao <escape@linux.alibaba.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Yi Tao and committed by
Tejun Heo
477abc2e a1ffc8ad

+35 -23
+35 -23
kernel/cgroup/cgroup.c
··· 3017 3017 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) 3018 3018 return ERR_PTR(-EINVAL); 3019 3019 3020 - /* 3021 - * If we migrate a single thread, we don't care about threadgroup 3022 - * stability. If the thread is `current`, it won't exit(2) under our 3023 - * hands or change PID through exec(2). We exclude 3024 - * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write 3025 - * callers by cgroup_mutex. 3026 - * Therefore, we can skip the global lock. 3027 - */ 3028 - lockdep_assert_held(&cgroup_mutex); 3029 - 3030 - if (pid || threadgroup) 3031 - *lock_mode = CGRP_ATTACH_LOCK_GLOBAL; 3032 - else 3033 - *lock_mode = CGRP_ATTACH_LOCK_NONE; 3034 - 3035 - cgroup_attach_lock(*lock_mode); 3036 - 3020 + retry_find_task: 3037 3021 rcu_read_lock(); 3038 3022 if (pid) { 3039 3023 tsk = find_task_by_vpid(pid); 3040 3024 if (!tsk) { 3041 3025 tsk = ERR_PTR(-ESRCH); 3042 - goto out_unlock_threadgroup; 3026 + goto out_unlock_rcu; 3043 3027 } 3044 3028 } else { 3045 3029 tsk = current; ··· 3040 3056 */ 3041 3057 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { 3042 3058 tsk = ERR_PTR(-EINVAL); 3043 - goto out_unlock_threadgroup; 3059 + goto out_unlock_rcu; 3044 3060 } 3045 3061 3046 3062 get_task_struct(tsk); 3047 - goto out_unlock_rcu; 3063 + rcu_read_unlock(); 3048 3064 3049 - out_unlock_threadgroup: 3050 - cgroup_attach_unlock(*lock_mode); 3051 - *lock_mode = CGRP_ATTACH_LOCK_NONE; 3065 + /* 3066 + * If we migrate a single thread, we don't care about threadgroup 3067 + * stability. If the thread is `current`, it won't exit(2) under our 3068 + * hands or change PID through exec(2). We exclude 3069 + * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write callers 3070 + * by cgroup_mutex. Therefore, we can skip the global lock. 3071 + */ 3072 + lockdep_assert_held(&cgroup_mutex); 3073 + 3074 + if (pid || threadgroup) 3075 + *lock_mode = CGRP_ATTACH_LOCK_GLOBAL; 3076 + else 3077 + *lock_mode = CGRP_ATTACH_LOCK_NONE; 3078 + 3079 + cgroup_attach_lock(*lock_mode); 3080 + 3081 + if (threadgroup) { 3082 + if (!thread_group_leader(tsk)) { 3083 + /* 3084 + * A race with de_thread from another thread's exec() 3085 + * may strip us of our leadership. If this happens, 3086 + * throw this task away and try again. 3087 + */ 3088 + cgroup_attach_unlock(*lock_mode); 3089 + put_task_struct(tsk); 3090 + goto retry_find_task; 3091 + } 3092 + } 3093 + 3094 + return tsk; 3095 + 3052 3096 out_unlock_rcu: 3053 3097 rcu_read_unlock(); 3054 3098 return tsk;