···33#define _LINUX_BPFILTER_H4455#include <uapi/linux/bpfilter.h>66-#include <linux/umh.h>66+#include <linux/usermode_driver.h>7788struct sock;99int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,1010 unsigned int optlen);1111int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,1212 int __user *optlen);1313+void bpfilter_umh_cleanup(struct umd_info *info);1414+1315struct bpfilter_umh_ops {1414- struct umh_info info;1616+ struct umd_info info;1517 /* since ip_getsockopt() can run in parallel, serialize access to umh */1618 struct mutex lock;1719 int (*sockopt)(struct sock *sk, int optname,1820 char __user *optval,1921 unsigned int optlen, bool is_set);2022 int (*start)(void);2121- bool stop;2223};2324extern struct bpfilter_umh_ops bpfilter_ops;2425#endif
-9
include/linux/sched.h
···15101510#define PF_KTHREAD 0x00200000 /* I am a kernel thread */15111511#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */15121512#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */15131513-#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */15141513#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */15151514#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */15161515#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */···20172018}2018201920192020#endif20202020-20212021-void __exit_umh(struct task_struct *tsk);20222022-20232023-static inline void exit_umh(struct task_struct *tsk)20242024-{20252025- if (unlikely(tsk->flags & PF_UMH))20262026- __exit_umh(tsk);20272027-}2028202120292022#ifdef CONFIG_DEBUG_RSEQ20302023
···804804 exit_task_namespaces(tsk);805805 exit_task_work(tsk);806806 exit_thread(tsk);807807- exit_umh(tsk);808807809808 /*810809 * Flush inherited counters to the parent - before the parent···17091710 return -EFAULT;17101711}17111712#endif17131713+17141714+/**17151715+ * thread_group_exited - check that a thread group has exited17161716+ * @pid: tgid of thread group to be checked.17171717+ *17181718+ * Test if the thread group represented by tgid has exited (all17191719+ * threads are zombies, dead or completely gone).17201720+ *17211721+ * Return: true if the thread group has exited. false otherwise.17221722+ */17231723+bool thread_group_exited(struct pid *pid)17241724+{17251725+ struct task_struct *task;17261726+ bool exited;17271727+17281728+ rcu_read_lock();17291729+ task = pid_task(pid, PIDTYPE_PID);17301730+ exited = !task ||17311731+ (READ_ONCE(task->exit_state) && thread_group_empty(task));17321732+ rcu_read_unlock();17331733+17341734+ return exited;17351735+}17361736+EXPORT_SYMBOL(thread_group_exited);1712173717131738__weak void abort(void)17141739{
+1-5
kernel/fork.c
···17871787 */17881788static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)17891789{17901790- struct task_struct *task;17911790 struct pid *pid = file->private_data;17921791 __poll_t poll_flags = 0;1793179217941793 poll_wait(file, &pid->wait_pidfd, pts);1795179417961796- rcu_read_lock();17971797- task = pid_task(pid, PIDTYPE_PID);17981795 /*17991796 * Inform pollers only when the whole thread group exits.18001797 * If the thread group leader exits before all other threads in the18011798 * group, then poll(2) should block, similar to the wait(2) family.18021799 */18031803- if (!task || (task->exit_state && thread_group_empty(task)))18001800+ if (thread_group_exited(pid))18041801 poll_flags = EPOLLIN | EPOLLRDNORM;18051805- rcu_read_unlock();1806180218071803 return poll_flags;18081804}
+3-168
kernel/umh.c
···2626#include <linux/ptrace.h>2727#include <linux/async.h>2828#include <linux/uaccess.h>2929-#include <linux/shmem_fs.h>3030-#include <linux/pipe_fs_i.h>31293230#include <trace/events/module.h>3331···3638static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;3739static DEFINE_SPINLOCK(umh_sysctl_lock);3840static DECLARE_RWSEM(umhelper_sem);3939-static LIST_HEAD(umh_list);4040-static DEFINE_MUTEX(umh_list_lock);41414242static void call_usermodehelper_freeinfo(struct subprocess_info *info)4343{···9810299103 commit_creds(new);100104101101- sub_info->pid = task_pid_nr(current);102102- if (sub_info->file) {103103- retval = do_execve_file(sub_info->file,104104- sub_info->argv, sub_info->envp);105105- if (!retval)106106- current->flags |= PF_UMH;107107- } else108108- retval = do_execve(getname_kernel(sub_info->path),109109- (const char __user *const __user *)sub_info->argv,110110- (const char __user *const __user *)sub_info->envp);105105+ retval = do_execve(getname_kernel(sub_info->path),106106+ (const char __user *const __user *)sub_info->argv,107107+ (const char __user *const __user *)sub_info->envp);111108out:112109 sub_info->retval = retval;113110 /*···394405}395406EXPORT_SYMBOL(call_usermodehelper_setup);396407397397-struct subprocess_info *call_usermodehelper_setup_file(struct file *file,398398- int (*init)(struct subprocess_info *info, struct cred *new),399399- void (*cleanup)(struct subprocess_info *info), void *data)400400-{401401- struct subprocess_info *sub_info;402402- struct umh_info *info = data;403403- const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper";404404-405405- sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);406406- if (!sub_info)407407- return NULL;408408-409409- sub_info->argv = argv_split(GFP_KERNEL, cmdline, NULL);410410- if (!sub_info->argv) {411411- kfree(sub_info);412412- return NULL;413413- }414414-415415- INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);416416- sub_info->path = "none";417417- sub_info->file = file;418418- sub_info->init = init;419419- sub_info->cleanup = cleanup;420420- sub_info->data = data;421421- return sub_info;422422-}423423-424424-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)425425-{426426- struct umh_info *umh_info = info->data;427427- struct file *from_umh[2];428428- struct file *to_umh[2];429429- int err;430430-431431- /* create pipe to send data to umh */432432- err = create_pipe_files(to_umh, 0);433433- if (err)434434- return err;435435- err = replace_fd(0, to_umh[0], 0);436436- fput(to_umh[0]);437437- if (err < 0) {438438- fput(to_umh[1]);439439- return err;440440- }441441-442442- /* create pipe to receive data from umh */443443- err = create_pipe_files(from_umh, 0);444444- if (err) {445445- fput(to_umh[1]);446446- replace_fd(0, NULL, 0);447447- return err;448448- }449449- err = replace_fd(1, from_umh[1], 0);450450- fput(from_umh[1]);451451- if (err < 0) {452452- fput(to_umh[1]);453453- replace_fd(0, NULL, 0);454454- fput(from_umh[0]);455455- return err;456456- }457457-458458- umh_info->pipe_to_umh = to_umh[1];459459- umh_info->pipe_from_umh = from_umh[0];460460- return 0;461461-}462462-463463-static void umh_clean_and_save_pid(struct subprocess_info *info)464464-{465465- struct umh_info *umh_info = info->data;466466-467467- /* cleanup if umh_pipe_setup() was successful but exec failed */468468- if (info->pid && info->retval) {469469- fput(umh_info->pipe_to_umh);470470- fput(umh_info->pipe_from_umh);471471- }472472-473473- argv_free(info->argv);474474- umh_info->pid = info->pid;475475-}476476-477477-/**478478- * fork_usermode_blob - fork a blob of bytes as a usermode process479479- * @data: a blob of bytes that can be do_execv-ed as a file480480- * @len: length of the blob481481- * @info: information about usermode process (shouldn't be NULL)482482- *483483- * If info->cmdline is set it will be used as command line for the484484- * user process, else "usermodehelper" is used.485485- *486486- * Returns either negative error or zero which indicates success487487- * in executing a blob of bytes as a usermode process. In such488488- * case 'struct umh_info *info' is populated with two pipes489489- * and a pid of the process. The caller is responsible for health490490- * check of the user process, killing it via pid, and closing the491491- * pipes when user process is no longer needed.492492- */493493-int fork_usermode_blob(void *data, size_t len, struct umh_info *info)494494-{495495- struct subprocess_info *sub_info;496496- struct file *file;497497- ssize_t written;498498- loff_t pos = 0;499499- int err;500500-501501- file = shmem_kernel_file_setup("", len, 0);502502- if (IS_ERR(file))503503- return PTR_ERR(file);504504-505505- written = kernel_write(file, data, len, &pos);506506- if (written != len) {507507- err = written;508508- if (err >= 0)509509- err = -ENOMEM;510510- goto out;511511- }512512-513513- err = -ENOMEM;514514- sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,515515- umh_clean_and_save_pid, info);516516- if (!sub_info)517517- goto out;518518-519519- err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);520520- if (!err) {521521- mutex_lock(&umh_list_lock);522522- list_add(&info->list, &umh_list);523523- mutex_unlock(&umh_list_lock);524524- }525525-out:526526- fput(file);527527- return err;528528-}529529-EXPORT_SYMBOL_GPL(fork_usermode_blob);530530-531408/**532409 * call_usermodehelper_exec - start a usermode application533410 * @sub_info: information about the subprocessa···553698 }554699555700 return 0;556556-}557557-558558-void __exit_umh(struct task_struct *tsk)559559-{560560- struct umh_info *info;561561- pid_t pid = tsk->pid;562562-563563- mutex_lock(&umh_list_lock);564564- list_for_each_entry(info, &umh_list, list) {565565- if (info->pid == pid) {566566- list_del(&info->list);567567- mutex_unlock(&umh_list_lock);568568- goto out;569569- }570570- }571571- mutex_unlock(&umh_list_lock);572572- return;573573-out:574574- if (info->cleanup)575575- info->cleanup(info);576701}577702578703struct ctl_table usermodehelper_table[] = {
+182
kernel/usermode_driver.c
···11+// SPDX-License-Identifier: GPL-2.0-only22+/*33+ * umd - User mode driver support44+ */55+#include <linux/shmem_fs.h>66+#include <linux/pipe_fs_i.h>77+#include <linux/mount.h>88+#include <linux/fs_struct.h>99+#include <linux/task_work.h>1010+#include <linux/usermode_driver.h>1111+1212+static struct vfsmount *blob_to_mnt(const void *data, size_t len, const char *name)1313+{1414+ struct file_system_type *type;1515+ struct vfsmount *mnt;1616+ struct file *file;1717+ ssize_t written;1818+ loff_t pos = 0;1919+2020+ type = get_fs_type("tmpfs");2121+ if (!type)2222+ return ERR_PTR(-ENODEV);2323+2424+ mnt = kern_mount(type);2525+ put_filesystem(type);2626+ if (IS_ERR(mnt))2727+ return mnt;2828+2929+ file = file_open_root(mnt->mnt_root, mnt, name, O_CREAT | O_WRONLY, 0700);3030+ if (IS_ERR(file)) {3131+ mntput(mnt);3232+ return ERR_CAST(file);3333+ }3434+3535+ written = kernel_write(file, data, len, &pos);3636+ if (written != len) {3737+ int err = written;3838+ if (err >= 0)3939+ err = -ENOMEM;4040+ filp_close(file, NULL);4141+ mntput(mnt);4242+ return ERR_PTR(err);4343+ }4444+4545+ fput(file);4646+4747+ /* Flush delayed fput so exec can open the file read-only */4848+ flush_delayed_fput();4949+ task_work_run();5050+ return mnt;5151+}5252+5353+/**5454+ * umd_load_blob - Remember a blob of bytes for fork_usermode_driver5555+ * @info: information about usermode driver5656+ * @data: a blob of bytes that can be executed as a file5757+ * @len: The lentgh of the blob5858+ *5959+ */6060+int umd_load_blob(struct umd_info *info, const void *data, size_t len)6161+{6262+ struct vfsmount *mnt;6363+6464+ if (WARN_ON_ONCE(info->wd.dentry || info->wd.mnt))6565+ return -EBUSY;6666+6767+ mnt = blob_to_mnt(data, len, info->driver_name);6868+ if (IS_ERR(mnt))6969+ return PTR_ERR(mnt);7070+7171+ info->wd.mnt = mnt;7272+ info->wd.dentry = mnt->mnt_root;7373+ return 0;7474+}7575+EXPORT_SYMBOL_GPL(umd_load_blob);7676+7777+/**7878+ * umd_unload_blob - Disassociate @info from a previously loaded blob7979+ * @info: information about usermode driver8080+ *8181+ */8282+int umd_unload_blob(struct umd_info *info)8383+{8484+ if (WARN_ON_ONCE(!info->wd.mnt ||8585+ !info->wd.dentry ||8686+ info->wd.mnt->mnt_root != info->wd.dentry))8787+ return -EINVAL;8888+8989+ kern_unmount(info->wd.mnt);9090+ info->wd.mnt = NULL;9191+ info->wd.dentry = NULL;9292+ return 0;9393+}9494+EXPORT_SYMBOL_GPL(umd_unload_blob);9595+9696+static int umd_setup(struct subprocess_info *info, struct cred *new)9797+{9898+ struct umd_info *umd_info = info->data;9999+ struct file *from_umh[2];100100+ struct file *to_umh[2];101101+ int err;102102+103103+ /* create pipe to send data to umh */104104+ err = create_pipe_files(to_umh, 0);105105+ if (err)106106+ return err;107107+ err = replace_fd(0, to_umh[0], 0);108108+ fput(to_umh[0]);109109+ if (err < 0) {110110+ fput(to_umh[1]);111111+ return err;112112+ }113113+114114+ /* create pipe to receive data from umh */115115+ err = create_pipe_files(from_umh, 0);116116+ if (err) {117117+ fput(to_umh[1]);118118+ replace_fd(0, NULL, 0);119119+ return err;120120+ }121121+ err = replace_fd(1, from_umh[1], 0);122122+ fput(from_umh[1]);123123+ if (err < 0) {124124+ fput(to_umh[1]);125125+ replace_fd(0, NULL, 0);126126+ fput(from_umh[0]);127127+ return err;128128+ }129129+130130+ set_fs_pwd(current->fs, &umd_info->wd);131131+ umd_info->pipe_to_umh = to_umh[1];132132+ umd_info->pipe_from_umh = from_umh[0];133133+ umd_info->tgid = get_pid(task_tgid(current));134134+ return 0;135135+}136136+137137+static void umd_cleanup(struct subprocess_info *info)138138+{139139+ struct umd_info *umd_info = info->data;140140+141141+ /* cleanup if umh_setup() was successful but exec failed */142142+ if (info->retval) {143143+ fput(umd_info->pipe_to_umh);144144+ fput(umd_info->pipe_from_umh);145145+ put_pid(umd_info->tgid);146146+ umd_info->tgid = NULL;147147+ }148148+}149149+150150+/**151151+ * fork_usermode_driver - fork a usermode driver152152+ * @info: information about usermode driver (shouldn't be NULL)153153+ *154154+ * Returns either negative error or zero which indicates success in155155+ * executing a usermode driver. In such case 'struct umd_info *info'156156+ * is populated with two pipes and a tgid of the process. The caller is157157+ * responsible for health check of the user process, killing it via158158+ * tgid, and closing the pipes when user process is no longer needed.159159+ */160160+int fork_usermode_driver(struct umd_info *info)161161+{162162+ struct subprocess_info *sub_info;163163+ const char *argv[] = { info->driver_name, NULL };164164+ int err;165165+166166+ if (WARN_ON_ONCE(info->tgid))167167+ return -EBUSY;168168+169169+ err = -ENOMEM;170170+ sub_info = call_usermodehelper_setup(info->driver_name,171171+ (char **)argv, NULL, GFP_KERNEL,172172+ umd_setup, umd_cleanup, info);173173+ if (!sub_info)174174+ goto out;175175+176176+ err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);177177+out:178178+ return err;179179+}180180+EXPORT_SYMBOL_GPL(fork_usermode_driver);181181+182182+
+19-19
net/bpfilter/bpfilter_kern.c
···15151616static void shutdown_umh(void)1717{1818- struct task_struct *tsk;1818+ struct umd_info *info = &bpfilter_ops.info;1919+ struct pid *tgid = info->tgid;19202020- if (bpfilter_ops.stop)2121- return;2222-2323- tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID);2424- if (tsk) {2525- send_sig(SIGKILL, tsk, 1);2626- put_task_struct(tsk);2121+ if (tgid) {2222+ kill_pid(tgid, SIGKILL, 1);2323+ wait_event(tgid->wait_pidfd, thread_group_exited(tgid));2424+ bpfilter_umh_cleanup(info);2725 }2826}2927···4648 req.cmd = optname;4749 req.addr = (long __force __user)optval;4850 req.len = optlen;4949- if (!bpfilter_ops.info.pid)5151+ if (!bpfilter_ops.info.tgid)5052 goto out;5153 n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),5254 &pos);···7577 int err;76787779 /* fork usermode process */7878- err = fork_usermode_blob(&bpfilter_umh_start,7979- &bpfilter_umh_end - &bpfilter_umh_start,8080- &bpfilter_ops.info);8080+ err = fork_usermode_driver(&bpfilter_ops.info);8181 if (err)8282 return err;8383- bpfilter_ops.stop = false;8484- pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid);8383+ pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));85848685 /* health check that usermode process started correctly */8786 if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {···9398{9499 int err;95100101101+ err = umd_load_blob(&bpfilter_ops.info,102102+ &bpfilter_umh_start,103103+ &bpfilter_umh_end - &bpfilter_umh_start);104104+ if (err)105105+ return err;106106+96107 mutex_lock(&bpfilter_ops.lock);9797- if (!bpfilter_ops.stop) {9898- err = -EFAULT;9999- goto out;100100- }101108 err = start_umh();102109 if (!err && IS_ENABLED(CONFIG_INET)) {103110 bpfilter_ops.sockopt = &__bpfilter_process_sockopt;104111 bpfilter_ops.start = &start_umh;105112 }106106-out:107113 mutex_unlock(&bpfilter_ops.lock);114114+ if (err)115115+ umd_unload_blob(&bpfilter_ops.info);108116 return err;109117}110118···120122 bpfilter_ops.sockopt = NULL;121123 }122124 mutex_unlock(&bpfilter_ops.lock);125125+126126+ umd_unload_blob(&bpfilter_ops.info);123127}124128module_init(load_umh);125129module_exit(fini_umh);