commit 87d6a412bd1ed82c14cabd4b408003b23bbd2880 · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

vhost: fix attach to cgroups regression

Since 2.6.36-rc1, non-root users of vhost-net fail to attach
if they are in any cgroups.

The reason is that when qemu uses vhost, vhost wants to attach
its thread to all cgroups that qemu has. But we got the API backwards,
so a non-priveledged process (Qemu) tried to control
the priveledged one (vhost), which fails.

Fix this by switching to the new cgroup_attach_task_all,
and running it from the vhost thread.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

Michael S. Tsirkin 15 years ago 87d6a412 73457f0f

+60 -25

1 changed file

expand all

unified split

drivers

vhost

vhost.c

+60 -25

drivers/vhost/vhost.c

··· 60 return 0; 61 } 62 63 - /* Init poll structure */ 64 - void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, 65 - unsigned long mask, struct vhost_dev *dev) 66 { 67 - struct vhost_work *work = &poll->work; 68 - 69 - init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); 70 - init_poll_funcptr(&poll->table, vhost_poll_func); 71 - poll->mask = mask; 72 - poll->dev = dev; 73 - 74 INIT_LIST_HEAD(&work->node); 75 work->fn = fn; 76 init_waitqueue_head(&work->done); 77 work->flushing = 0; 78 work->queue_seq = work->done_seq = 0; 79 } 80 81 /* Start polling a file. We add ourselves to file's wait queue. The caller must ··· 98 remove_wait_queue(poll->wqh, &poll->wait); 99 } 100 101 - /* Flush any work that has been scheduled. When calling this, don't hold any 102 - * locks that are also used by the callback. */ 103 - void vhost_poll_flush(struct vhost_poll *poll) 104 { 105 - struct vhost_work *work = &poll->work; 106 unsigned seq; 107 int left; 108 int flushing; 109 110 - spin_lock_irq(&poll->dev->work_lock); 111 seq = work->queue_seq; 112 work->flushing++; 113 - spin_unlock_irq(&poll->dev->work_lock); 114 wait_event(work->done, ({ 115 - spin_lock_irq(&poll->dev->work_lock); 116 left = seq - work->done_seq <= 0; 117 - spin_unlock_irq(&poll->dev->work_lock); 118 left; 119 })); 120 - spin_lock_irq(&poll->dev->work_lock); 121 flushing = --work->flushing; 122 - spin_unlock_irq(&poll->dev->work_lock); 123 BUG_ON(flushing < 0); 124 } 125 126 - void vhost_poll_queue(struct vhost_poll *poll) 127 { 128 - struct vhost_dev *dev = poll->dev; 129 - struct vhost_work *work = &poll->work; 130 unsigned long flags; 131 132 spin_lock_irqsave(&dev->work_lock, flags); ··· 139 wake_up_process(dev->worker); 140 } 141 spin_unlock_irqrestore(&dev->work_lock, flags); 142 } 143 144 static void vhost_vq_reset(struct vhost_dev *dev, ··· 247 return dev->mm == current->mm ? 0 : -EPERM; 248 } 249 250 /* Caller should have device mutex */ 251 static long vhost_dev_set_owner(struct vhost_dev *dev) 252 { ··· 289 } 290 291 dev->worker = worker; 292 - err = cgroup_attach_task_current_cg(worker); 293 if (err) 294 goto err_cgroup; 295 - wake_up_process(worker); /* avoid contributing to loadavg */ 296 297 return 0; 298 err_cgroup:

··· 60 return 0; 61 } 62 63 + static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) 64 { 65 INIT_LIST_HEAD(&work->node); 66 work->fn = fn; 67 init_waitqueue_head(&work->done); 68 work->flushing = 0; 69 work->queue_seq = work->done_seq = 0; 70 + } 71 + 72 + /* Init poll structure */ 73 + void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, 74 + unsigned long mask, struct vhost_dev *dev) 75 + { 76 + init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); 77 + init_poll_funcptr(&poll->table, vhost_poll_func); 78 + poll->mask = mask; 79 + poll->dev = dev; 80 + 81 + vhost_work_init(&poll->work, fn); 82 } 83 84 /* Start polling a file. We add ourselves to file's wait queue. The caller must ··· 95 remove_wait_queue(poll->wqh, &poll->wait); 96 } 97 98 + static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) 99 { 100 unsigned seq; 101 int left; 102 int flushing; 103 104 + spin_lock_irq(&dev->work_lock); 105 seq = work->queue_seq; 106 work->flushing++; 107 + spin_unlock_irq(&dev->work_lock); 108 wait_event(work->done, ({ 109 + spin_lock_irq(&dev->work_lock); 110 left = seq - work->done_seq <= 0; 111 + spin_unlock_irq(&dev->work_lock); 112 left; 113 })); 114 + spin_lock_irq(&dev->work_lock); 115 flushing = --work->flushing; 116 + spin_unlock_irq(&dev->work_lock); 117 BUG_ON(flushing < 0); 118 } 119 120 + /* Flush any work that has been scheduled. When calling this, don't hold any 121 + * locks that are also used by the callback. */ 122 + void vhost_poll_flush(struct vhost_poll *poll) 123 { 124 + vhost_work_flush(poll->dev, &poll->work); 125 + } 126 + 127 + static inline void vhost_work_queue(struct vhost_dev *dev, 128 + struct vhost_work *work) 129 + { 130 unsigned long flags; 131 132 spin_lock_irqsave(&dev->work_lock, flags); ··· 133 wake_up_process(dev->worker); 134 } 135 spin_unlock_irqrestore(&dev->work_lock, flags); 136 + } 137 + 138 + void vhost_poll_queue(struct vhost_poll *poll) 139 + { 140 + vhost_work_queue(poll->dev, &poll->work); 141 } 142 143 static void vhost_vq_reset(struct vhost_dev *dev, ··· 236 return dev->mm == current->mm ? 0 : -EPERM; 237 } 238 239 + struct vhost_attach_cgroups_struct { 240 + struct vhost_work work; 241 + struct task_struct *owner; 242 + int ret; 243 + }; 244 + 245 + static void vhost_attach_cgroups_work(struct vhost_work *work) 246 + { 247 + struct vhost_attach_cgroups_struct *s; 248 + s = container_of(work, struct vhost_attach_cgroups_struct, work); 249 + s->ret = cgroup_attach_task_all(s->owner, current); 250 + } 251 + 252 + static int vhost_attach_cgroups(struct vhost_dev *dev) 253 + { 254 + struct vhost_attach_cgroups_struct attach; 255 + attach.owner = current; 256 + vhost_work_init(&attach.work, vhost_attach_cgroups_work); 257 + vhost_work_queue(dev, &attach.work); 258 + vhost_work_flush(dev, &attach.work); 259 + return attach.ret; 260 + } 261 + 262 /* Caller should have device mutex */ 263 static long vhost_dev_set_owner(struct vhost_dev *dev) 264 { ··· 255 } 256 257 dev->worker = worker; 258 + wake_up_process(worker); /* avoid contributing to loadavg */ 259 + 260 + err = vhost_attach_cgroups(dev); 261 if (err) 262 goto err_cgroup; 263 264 return 0; 265 err_cgroup: