Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'fuse-update-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

- Add support for idmapped fuse mounts (Alexander Mikhalitsyn)

- Add optimization when checking for writeback (yangyun)

- Add tracepoints (Josef Bacik)

- Clean up writeback code (Joanne Koong)

- Clean up request queuing (me)

- Misc fixes

* tag 'fuse-update-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (32 commits)
fuse: use exclusive lock when FUSE_I_CACHE_IO_MODE is set
fuse: clear FR_PENDING if abort is detected when sending request
fs/fuse: convert to use invalid_mnt_idmap
fs/mnt_idmapping: introduce an invalid_mnt_idmap
fs/fuse: introduce and use fuse_simple_idmap_request() helper
fs/fuse: fix null-ptr-deref when checking SB_I_NOIDMAP flag
fuse: allow O_PATH fd for FUSE_DEV_IOC_BACKING_OPEN
virtio_fs: allow idmapped mounts
fuse: allow idmapped mounts
fuse: warn if fuse_access is called when idmapped mounts are allowed
fuse: handle idmappings properly in ->write_iter()
fuse: support idmapped ->rename op
fuse: support idmapped ->set_acl
fuse: drop idmap argument from __fuse_get_acl
fuse: support idmapped ->setattr op
fuse: support idmapped ->permission inode op
fuse: support idmapped getattr inode op
fuse: support idmap for mkdir/mknod/symlink/create/tmpfile
fuse: support idmapped FUSE_EXT_GROUPS
fuse: add an idmap argument to fuse_simple_request
...

+552 -297
+3
fs/fuse/Makefile
··· 3 3 # Makefile for the FUSE filesystem. 4 4 # 5 5 6 + # Needed for trace events 7 + ccflags-y = -I$(src) 8 + 6 9 obj-$(CONFIG_FUSE_FS) += fuse.o 7 10 obj-$(CONFIG_CUSE) += cuse.o 8 11 obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
+4 -6
fs/fuse/acl.c
··· 12 12 #include <linux/posix_acl_xattr.h> 13 13 14 14 static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc, 15 - struct mnt_idmap *idmap, 16 15 struct inode *inode, int type, bool rcu) 17 16 { 18 17 int size; ··· 73 74 if (fuse_no_acl(fc, inode)) 74 75 return ERR_PTR(-EOPNOTSUPP); 75 76 76 - return __fuse_get_acl(fc, idmap, inode, type, false); 77 + return __fuse_get_acl(fc, inode, type, false); 77 78 } 78 79 79 80 struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) ··· 89 90 */ 90 91 if (!fc->posix_acl) 91 92 return NULL; 92 - 93 - return __fuse_get_acl(fc, &nop_mnt_idmap, inode, type, rcu); 93 + return __fuse_get_acl(fc, inode, type, rcu); 94 94 } 95 95 96 96 int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, ··· 144 146 * be stripped. 145 147 */ 146 148 if (fc->posix_acl && 147 - !in_group_or_capable(&nop_mnt_idmap, inode, 148 - i_gid_into_vfsgid(&nop_mnt_idmap, inode))) 149 + !in_group_or_capable(idmap, inode, 150 + i_gid_into_vfsgid(idmap, inode))) 149 151 extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; 150 152 151 153 ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
+131 -83
fs/fuse/dev.c
··· 22 22 #include <linux/splice.h> 23 23 #include <linux/sched.h> 24 24 25 + #define CREATE_TRACE_POINTS 26 + #include "fuse_trace.h" 27 + 25 28 MODULE_ALIAS_MISCDEV(FUSE_MINOR); 26 29 MODULE_ALIAS("devname:fuse"); 27 30 ··· 108 105 109 106 static void fuse_put_request(struct fuse_req *req); 110 107 111 - static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background) 108 + static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap, 109 + struct fuse_mount *fm, 110 + bool for_background) 112 111 { 113 112 struct fuse_conn *fc = fm->fc; 114 113 struct fuse_req *req; 114 + bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP); 115 + kuid_t fsuid; 116 + kgid_t fsgid; 115 117 int err; 118 + 116 119 atomic_inc(&fc->num_waiting); 117 120 118 121 if (fuse_block_alloc(fc, for_background)) { ··· 146 137 goto out; 147 138 } 148 139 149 - req->in.h.uid = from_kuid(fc->user_ns, current_fsuid()); 150 - req->in.h.gid = from_kgid(fc->user_ns, current_fsgid()); 151 140 req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); 152 141 153 142 __set_bit(FR_WAITING, &req->flags); 154 143 if (for_background) 155 144 __set_bit(FR_BACKGROUND, &req->flags); 156 145 157 - if (unlikely(req->in.h.uid == ((uid_t)-1) || 158 - req->in.h.gid == ((gid_t)-1))) { 146 + /* 147 + * Keep the old behavior when idmappings support was not 148 + * declared by a FUSE server. 149 + * 150 + * For those FUSE servers who support idmapped mounts, 151 + * we send UID/GID only along with "inode creation" 152 + * fuse requests, otherwise idmap == &invalid_mnt_idmap and 153 + * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID. 154 + */ 155 + fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns); 156 + fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns); 157 + req->in.h.uid = from_kuid(fc->user_ns, fsuid); 158 + req->in.h.gid = from_kgid(fc->user_ns, fsgid); 159 + 160 + if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) || 161 + req->in.h.gid == ((gid_t)-1))) { 159 162 fuse_put_request(req); 160 163 return ERR_PTR(-EOVERFLOW); 161 164 } 165 + 162 166 return req; 163 167 164 168 out: ··· 216 194 } 217 195 EXPORT_SYMBOL_GPL(fuse_len_args); 218 196 219 - u64 fuse_get_unique(struct fuse_iqueue *fiq) 197 + static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq) 220 198 { 221 199 fiq->reqctr += FUSE_REQ_ID_STEP; 222 200 return fiq->reqctr; 201 + } 202 + 203 + u64 fuse_get_unique(struct fuse_iqueue *fiq) 204 + { 205 + u64 ret; 206 + 207 + spin_lock(&fiq->lock); 208 + ret = fuse_get_unique_locked(fiq); 209 + spin_unlock(&fiq->lock); 210 + 211 + return ret; 223 212 } 224 213 EXPORT_SYMBOL_GPL(fuse_get_unique); 225 214 ··· 250 217 spin_unlock(&fiq->lock); 251 218 } 252 219 220 + static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget) 221 + { 222 + spin_lock(&fiq->lock); 223 + if (fiq->connected) { 224 + fiq->forget_list_tail->next = forget; 225 + fiq->forget_list_tail = forget; 226 + fuse_dev_wake_and_unlock(fiq); 227 + } else { 228 + kfree(forget); 229 + spin_unlock(&fiq->lock); 230 + } 231 + } 232 + 233 + static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 234 + { 235 + spin_lock(&fiq->lock); 236 + if (list_empty(&req->intr_entry)) { 237 + list_add_tail(&req->intr_entry, &fiq->interrupts); 238 + /* 239 + * Pairs with smp_mb() implied by test_and_set_bit() 240 + * from fuse_request_end(). 241 + */ 242 + smp_mb(); 243 + if (test_bit(FR_FINISHED, &req->flags)) { 244 + list_del_init(&req->intr_entry); 245 + spin_unlock(&fiq->lock); 246 + } else { 247 + fuse_dev_wake_and_unlock(fiq); 248 + } 249 + } else { 250 + spin_unlock(&fiq->lock); 251 + } 252 + } 253 + 254 + static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) 255 + { 256 + spin_lock(&fiq->lock); 257 + if (fiq->connected) { 258 + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) 259 + req->in.h.unique = fuse_get_unique_locked(fiq); 260 + list_add_tail(&req->list, &fiq->pending); 261 + fuse_dev_wake_and_unlock(fiq); 262 + } else { 263 + spin_unlock(&fiq->lock); 264 + req->out.h.error = -ENOTCONN; 265 + clear_bit(FR_PENDING, &req->flags); 266 + fuse_request_end(req); 267 + } 268 + } 269 + 253 270 const struct fuse_iqueue_ops fuse_dev_fiq_ops = { 254 - .wake_forget_and_unlock = fuse_dev_wake_and_unlock, 255 - .wake_interrupt_and_unlock = fuse_dev_wake_and_unlock, 256 - .wake_pending_and_unlock = fuse_dev_wake_and_unlock, 271 + .send_forget = fuse_dev_queue_forget, 272 + .send_interrupt = fuse_dev_queue_interrupt, 273 + .send_req = fuse_dev_queue_req, 257 274 }; 258 275 EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); 259 276 260 - static void queue_request_and_unlock(struct fuse_iqueue *fiq, 261 - struct fuse_req *req) 262 - __releases(fiq->lock) 277 + static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) 263 278 { 264 279 req->in.h.len = sizeof(struct fuse_in_header) + 265 280 fuse_len_args(req->args->in_numargs, 266 281 (struct fuse_arg *) req->args->in_args); 267 - list_add_tail(&req->list, &fiq->pending); 268 - fiq->ops->wake_pending_and_unlock(fiq); 282 + trace_fuse_request_send(req); 283 + fiq->ops->send_req(fiq, req); 269 284 } 270 285 271 286 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, ··· 324 243 forget->forget_one.nodeid = nodeid; 325 244 forget->forget_one.nlookup = nlookup; 326 245 327 - spin_lock(&fiq->lock); 328 - if (fiq->connected) { 329 - fiq->forget_list_tail->next = forget; 330 - fiq->forget_list_tail = forget; 331 - fiq->ops->wake_forget_and_unlock(fiq); 332 - } else { 333 - kfree(forget); 334 - spin_unlock(&fiq->lock); 335 - } 246 + fiq->ops->send_forget(fiq, forget); 336 247 } 337 248 338 249 static void flush_bg_queue(struct fuse_conn *fc) ··· 338 265 req = list_first_entry(&fc->bg_queue, struct fuse_req, list); 339 266 list_del(&req->list); 340 267 fc->active_background++; 341 - spin_lock(&fiq->lock); 342 - req->in.h.unique = fuse_get_unique(fiq); 343 - queue_request_and_unlock(fiq, req); 268 + fuse_send_one(fiq, req); 344 269 } 345 270 } 346 271 ··· 359 288 if (test_and_set_bit(FR_FINISHED, &req->flags)) 360 289 goto put_request; 361 290 291 + trace_fuse_request_end(req); 362 292 /* 363 293 * test_and_set_bit() implies smp_mb() between bit 364 294 * changing and below FR_INTERRUPTED check. Pairs with ··· 409 337 { 410 338 struct fuse_iqueue *fiq = &req->fm->fc->iq; 411 339 412 - spin_lock(&fiq->lock); 413 340 /* Check for we've sent request to interrupt this req */ 414 - if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { 415 - spin_unlock(&fiq->lock); 341 + if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) 416 342 return -EINVAL; 417 - } 418 343 419 - if (list_empty(&req->intr_entry)) { 420 - list_add_tail(&req->intr_entry, &fiq->interrupts); 421 - /* 422 - * Pairs with smp_mb() implied by test_and_set_bit() 423 - * from fuse_request_end(). 424 - */ 425 - smp_mb(); 426 - if (test_bit(FR_FINISHED, &req->flags)) { 427 - list_del_init(&req->intr_entry); 428 - spin_unlock(&fiq->lock); 429 - return 0; 430 - } 431 - fiq->ops->wake_interrupt_and_unlock(fiq); 432 - } else { 433 - spin_unlock(&fiq->lock); 434 - } 344 + fiq->ops->send_interrupt(fiq, req); 345 + 435 346 return 0; 436 347 } 437 348 ··· 469 414 struct fuse_iqueue *fiq = &req->fm->fc->iq; 470 415 471 416 BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); 472 - spin_lock(&fiq->lock); 473 - if (!fiq->connected) { 474 - spin_unlock(&fiq->lock); 475 - req->out.h.error = -ENOTCONN; 476 - } else { 477 - req->in.h.unique = fuse_get_unique(fiq); 478 - /* acquire extra reference, since request is still needed 479 - after fuse_request_end() */ 480 - __fuse_get_request(req); 481 - queue_request_and_unlock(fiq, req); 482 417 483 - request_wait_answer(req); 484 - /* Pairs with smp_wmb() in fuse_request_end() */ 485 - smp_rmb(); 486 - } 418 + /* acquire extra reference, since request is still needed after 419 + fuse_request_end() */ 420 + __fuse_get_request(req); 421 + fuse_send_one(fiq, req); 422 + 423 + request_wait_answer(req); 424 + /* Pairs with smp_wmb() in fuse_request_end() */ 425 + smp_rmb(); 487 426 } 488 427 489 428 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) ··· 517 468 { 518 469 struct fuse_conn *fc = req->fm->fc; 519 470 520 - req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); 521 - req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); 471 + if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) { 472 + req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); 473 + req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); 474 + } else { 475 + req->in.h.uid = FUSE_INVALID_UIDGID; 476 + req->in.h.gid = FUSE_INVALID_UIDGID; 477 + } 478 + 522 479 req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); 523 480 } 524 481 ··· 539 484 __set_bit(FR_ASYNC, &req->flags); 540 485 } 541 486 542 - ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) 487 + ssize_t __fuse_simple_request(struct mnt_idmap *idmap, 488 + struct fuse_mount *fm, 489 + struct fuse_args *args) 543 490 { 544 491 struct fuse_conn *fc = fm->fc; 545 492 struct fuse_req *req; ··· 558 501 __set_bit(FR_FORCE, &req->flags); 559 502 } else { 560 503 WARN_ON(args->nocreds); 561 - req = fuse_get_req(fm, false); 504 + req = fuse_get_req(idmap, fm, false); 562 505 if (IS_ERR(req)) 563 506 return PTR_ERR(req); 564 507 } ··· 619 562 __set_bit(FR_BACKGROUND, &req->flags); 620 563 } else { 621 564 WARN_ON(args->nocreds); 622 - req = fuse_get_req(fm, true); 565 + req = fuse_get_req(&invalid_mnt_idmap, fm, true); 623 566 if (IS_ERR(req)) 624 567 return PTR_ERR(req); 625 568 } ··· 640 583 { 641 584 struct fuse_req *req; 642 585 struct fuse_iqueue *fiq = &fm->fc->iq; 643 - int err = 0; 644 586 645 - req = fuse_get_req(fm, false); 587 + req = fuse_get_req(&invalid_mnt_idmap, fm, false); 646 588 if (IS_ERR(req)) 647 589 return PTR_ERR(req); 648 590 ··· 650 594 651 595 fuse_args_to_req(req, args); 652 596 653 - spin_lock(&fiq->lock); 654 - if (fiq->connected) { 655 - queue_request_and_unlock(fiq, req); 656 - } else { 657 - err = -ENODEV; 658 - spin_unlock(&fiq->lock); 659 - fuse_put_request(req); 660 - } 597 + fuse_send_one(fiq, req); 661 598 662 - return err; 599 + return 0; 663 600 } 664 601 665 602 /* ··· 1124 1075 return err ? err : reqsize; 1125 1076 } 1126 1077 1127 - struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, 1128 - unsigned int max, 1129 - unsigned int *countp) 1078 + static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, 1079 + unsigned int max, 1080 + unsigned int *countp) 1130 1081 { 1131 1082 struct fuse_forget_link *head = fiq->forget_list_head.next; 1132 1083 struct fuse_forget_link **newhead = &head; ··· 1145 1096 1146 1097 return head; 1147 1098 } 1148 - EXPORT_SYMBOL(fuse_dequeue_forget); 1149 1099 1150 1100 static int fuse_read_single_forget(struct fuse_iqueue *fiq, 1151 1101 struct fuse_copy_state *cs, ··· 1159 1111 struct fuse_in_header ih = { 1160 1112 .opcode = FUSE_FORGET, 1161 1113 .nodeid = forget->forget_one.nodeid, 1162 - .unique = fuse_get_unique(fiq), 1114 + .unique = fuse_get_unique_locked(fiq), 1163 1115 .len = sizeof(ih) + sizeof(arg), 1164 1116 }; 1165 1117 ··· 1190 1142 struct fuse_batch_forget_in arg = { .count = 0 }; 1191 1143 struct fuse_in_header ih = { 1192 1144 .opcode = FUSE_BATCH_FORGET, 1193 - .unique = fuse_get_unique(fiq), 1145 + .unique = fuse_get_unique_locked(fiq), 1194 1146 .len = sizeof(ih) + sizeof(arg), 1195 1147 }; 1196 1148 ··· 1878 1830 } 1879 1831 /* iq and pq requests are both oldest to newest */ 1880 1832 list_splice(&to_queue, &fiq->pending); 1881 - fiq->ops->wake_pending_and_unlock(fiq); 1833 + fuse_dev_wake_and_unlock(fiq); 1882 1834 } 1883 1835 1884 1836 static int fuse_notify_resend(struct fuse_conn *fc)
+92 -60
fs/fuse/dir.c
··· 545 545 /* 546 546 * This adds just a single supplementary group that matches the parent's group. 547 547 */ 548 - static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext) 548 + static int get_create_supp_group(struct mnt_idmap *idmap, 549 + struct inode *dir, 550 + struct fuse_in_arg *ext) 549 551 { 550 552 struct fuse_conn *fc = get_fuse_conn(dir); 551 553 struct fuse_ext_header *xh; 552 554 struct fuse_supp_groups *sg; 553 555 kgid_t kgid = dir->i_gid; 556 + vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid); 554 557 gid_t parent_gid = from_kgid(fc->user_ns, kgid); 558 + 555 559 u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0])); 556 560 557 - if (parent_gid == (gid_t) -1 || gid_eq(kgid, current_fsgid()) || 558 - !in_group_p(kgid)) 561 + if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) || 562 + !vfsgid_in_group_p(vfsgid)) 559 563 return 0; 560 564 561 565 xh = extend_arg(ext, sg_len); ··· 576 572 return 0; 577 573 } 578 574 579 - static int get_create_ext(struct fuse_args *args, 575 + static int get_create_ext(struct mnt_idmap *idmap, 576 + struct fuse_args *args, 580 577 struct inode *dir, struct dentry *dentry, 581 578 umode_t mode) 582 579 { ··· 588 583 if (fc->init_security) 589 584 err = get_security_context(dentry, mode, &ext); 590 585 if (!err && fc->create_supp_group) 591 - err = get_create_supp_group(dir, &ext); 586 + err = get_create_supp_group(idmap, dir, &ext); 592 587 593 588 if (!err && ext.size) { 594 589 WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); ··· 614 609 * If the filesystem doesn't support this, then fall back to separate 615 610 * 'mknod' + 'open' requests. 616 611 */ 617 - static int fuse_create_open(struct inode *dir, struct dentry *entry, 618 - struct file *file, unsigned int flags, 619 - umode_t mode, u32 opcode) 612 + static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, 613 + struct dentry *entry, struct file *file, 614 + unsigned int flags, umode_t mode, u32 opcode) 620 615 { 621 616 int err; 622 617 struct inode *inode; ··· 673 668 args.out_args[1].size = sizeof(*outopenp); 674 669 args.out_args[1].value = outopenp; 675 670 676 - err = get_create_ext(&args, dir, entry, mode); 671 + err = get_create_ext(idmap, &args, dir, entry, mode); 677 672 if (err) 678 673 goto out_free_ff; 679 674 680 - err = fuse_simple_request(fm, &args); 675 + err = fuse_simple_idmap_request(idmap, fm, &args); 681 676 free_ext_value(&args); 682 677 if (err) 683 678 goto out_free_ff; ··· 734 729 umode_t mode) 735 730 { 736 731 int err; 732 + struct mnt_idmap *idmap = file_mnt_idmap(file); 737 733 struct fuse_conn *fc = get_fuse_conn(dir); 738 734 struct dentry *res = NULL; 739 735 ··· 759 753 if (fc->no_create) 760 754 goto mknod; 761 755 762 - err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE); 756 + err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE); 763 757 if (err == -ENOSYS) { 764 758 fc->no_create = 1; 765 759 goto mknod; ··· 770 764 return err; 771 765 772 766 mknod: 773 - err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); 767 + err = fuse_mknod(idmap, dir, entry, mode, 0); 774 768 if (err) 775 769 goto out_dput; 776 770 no_open: ··· 780 774 /* 781 775 * Code shared between mknod, mkdir, symlink and link 782 776 */ 783 - static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, 784 - struct inode *dir, struct dentry *entry, 785 - umode_t mode) 777 + static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm, 778 + struct fuse_args *args, struct inode *dir, 779 + struct dentry *entry, umode_t mode) 786 780 { 787 781 struct fuse_entry_out outarg; 788 782 struct inode *inode; ··· 804 798 args->out_args[0].value = &outarg; 805 799 806 800 if (args->opcode != FUSE_LINK) { 807 - err = get_create_ext(args, dir, entry, mode); 801 + err = get_create_ext(idmap, args, dir, entry, mode); 808 802 if (err) 809 803 goto out_put_forget_req; 810 804 } 811 805 812 - err = fuse_simple_request(fm, args); 806 + err = fuse_simple_idmap_request(idmap, fm, args); 813 807 free_ext_value(args); 814 808 if (err) 815 809 goto out_put_forget_req; ··· 870 864 args.in_args[0].value = &inarg; 871 865 args.in_args[1].size = entry->d_name.len + 1; 872 866 args.in_args[1].value = entry->d_name.name; 873 - return create_new_entry(fm, &args, dir, entry, mode); 867 + return create_new_entry(idmap, fm, &args, dir, entry, mode); 874 868 } 875 869 876 870 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir, 877 871 struct dentry *entry, umode_t mode, bool excl) 878 872 { 879 - return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); 873 + return fuse_mknod(idmap, dir, entry, mode, 0); 880 874 } 881 875 882 876 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, ··· 888 882 if (fc->no_tmpfile) 889 883 return -EOPNOTSUPP; 890 884 891 - err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE); 885 + err = fuse_create_open(idmap, dir, file->f_path.dentry, file, 886 + file->f_flags, mode, FUSE_TMPFILE); 892 887 if (err == -ENOSYS) { 893 888 fc->no_tmpfile = 1; 894 889 err = -EOPNOTSUPP; ··· 916 909 args.in_args[0].value = &inarg; 917 910 args.in_args[1].size = entry->d_name.len + 1; 918 911 args.in_args[1].value = entry->d_name.name; 919 - return create_new_entry(fm, &args, dir, entry, S_IFDIR); 912 + return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR); 920 913 } 921 914 922 915 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, ··· 932 925 args.in_args[0].value = entry->d_name.name; 933 926 args.in_args[1].size = len; 934 927 args.in_args[1].value = link; 935 - return create_new_entry(fm, &args, dir, entry, S_IFLNK); 928 + return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK); 936 929 } 937 930 938 931 void fuse_flush_time_update(struct inode *inode) ··· 1026 1019 return err; 1027 1020 } 1028 1021 1029 - static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, 1022 + static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent, 1030 1023 struct inode *newdir, struct dentry *newent, 1031 1024 unsigned int flags, int opcode, size_t argsize) 1032 1025 { ··· 1047 1040 args.in_args[1].value = oldent->d_name.name; 1048 1041 args.in_args[2].size = newent->d_name.len + 1; 1049 1042 args.in_args[2].value = newent->d_name.name; 1050 - err = fuse_simple_request(fm, &args); 1043 + err = fuse_simple_idmap_request(idmap, fm, &args); 1051 1044 if (!err) { 1052 1045 /* ctime changes */ 1053 1046 fuse_update_ctime(d_inode(oldent)); ··· 1093 1086 if (fc->no_rename2 || fc->minor < 23) 1094 1087 return -EINVAL; 1095 1088 1096 - err = fuse_rename_common(olddir, oldent, newdir, newent, flags, 1089 + err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap, 1090 + olddir, oldent, newdir, newent, flags, 1097 1091 FUSE_RENAME2, 1098 1092 sizeof(struct fuse_rename2_in)); 1099 1093 if (err == -ENOSYS) { ··· 1102 1094 err = -EINVAL; 1103 1095 } 1104 1096 } else { 1105 - err = fuse_rename_common(olddir, oldent, newdir, newent, 0, 1097 + err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0, 1106 1098 FUSE_RENAME, 1107 1099 sizeof(struct fuse_rename_in)); 1108 1100 } ··· 1127 1119 args.in_args[0].value = &inarg; 1128 1120 args.in_args[1].size = newent->d_name.len + 1; 1129 1121 args.in_args[1].value = newent->d_name.name; 1130 - err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); 1122 + err = create_new_entry(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode); 1131 1123 if (!err) 1132 1124 fuse_update_ctime_in_cache(inode); 1133 1125 else if (err == -EINTR) ··· 1136 1128 return err; 1137 1129 } 1138 1130 1139 - static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, 1140 - struct kstat *stat) 1131 + static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, 1132 + struct fuse_attr *attr, struct kstat *stat) 1141 1133 { 1142 1134 unsigned int blkbits; 1143 1135 struct fuse_conn *fc = get_fuse_conn(inode); 1136 + vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns, 1137 + make_kuid(fc->user_ns, attr->uid)); 1138 + vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, 1139 + make_kgid(fc->user_ns, attr->gid)); 1144 1140 1145 1141 stat->dev = inode->i_sb->s_dev; 1146 1142 stat->ino = attr->ino; 1147 1143 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 1148 1144 stat->nlink = attr->nlink; 1149 - stat->uid = make_kuid(fc->user_ns, attr->uid); 1150 - stat->gid = make_kgid(fc->user_ns, attr->gid); 1145 + stat->uid = vfsuid_into_kuid(vfsuid); 1146 + stat->gid = vfsgid_into_kgid(vfsgid); 1151 1147 stat->rdev = inode->i_rdev; 1152 1148 stat->atime.tv_sec = attr->atime; 1153 1149 stat->atime.tv_nsec = attr->atimensec; ··· 1190 1178 attr->blksize = sx->blksize; 1191 1179 } 1192 1180 1193 - static int fuse_do_statx(struct inode *inode, struct file *file, 1194 - struct kstat *stat) 1181 + static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode, 1182 + struct file *file, struct kstat *stat) 1195 1183 { 1196 1184 int err; 1197 1185 struct fuse_attr attr; ··· 1244 1232 stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); 1245 1233 stat->btime.tv_sec = sx->btime.tv_sec; 1246 1234 stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); 1247 - fuse_fillattr(inode, &attr, stat); 1235 + fuse_fillattr(idmap, inode, &attr, stat); 1248 1236 stat->result_mask |= STATX_TYPE; 1249 1237 } 1250 1238 1251 1239 return 0; 1252 1240 } 1253 1241 1254 - static int fuse_do_getattr(struct inode *inode, struct kstat *stat, 1255 - struct file *file) 1242 + static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode, 1243 + struct kstat *stat, struct file *file) 1256 1244 { 1257 1245 int err; 1258 1246 struct fuse_getattr_in inarg; ··· 1291 1279 ATTR_TIMEOUT(&outarg), 1292 1280 attr_version); 1293 1281 if (stat) 1294 - fuse_fillattr(inode, &outarg.attr, stat); 1282 + fuse_fillattr(idmap, inode, &outarg.attr, stat); 1295 1283 } 1296 1284 } 1297 1285 return err; 1298 1286 } 1299 1287 1300 - static int fuse_update_get_attr(struct inode *inode, struct file *file, 1301 - struct kstat *stat, u32 request_mask, 1302 - unsigned int flags) 1288 + static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode, 1289 + struct file *file, struct kstat *stat, 1290 + u32 request_mask, unsigned int flags) 1303 1291 { 1304 1292 struct fuse_inode *fi = get_fuse_inode(inode); 1305 1293 struct fuse_conn *fc = get_fuse_conn(inode); ··· 1330 1318 forget_all_cached_acls(inode); 1331 1319 /* Try statx if BTIME is requested */ 1332 1320 if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { 1333 - err = fuse_do_statx(inode, file, stat); 1321 + err = fuse_do_statx(idmap, inode, file, stat); 1334 1322 if (err == -ENOSYS) { 1335 1323 fc->no_statx = 1; 1336 1324 err = 0; 1337 1325 goto retry; 1338 1326 } 1339 1327 } else { 1340 - err = fuse_do_getattr(inode, stat, file); 1328 + err = fuse_do_getattr(idmap, inode, stat, file); 1341 1329 } 1342 1330 } else if (stat) { 1343 - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 1331 + generic_fillattr(idmap, request_mask, inode, stat); 1344 1332 stat->mode = fi->orig_i_mode; 1345 1333 stat->ino = fi->orig_ino; 1346 1334 if (test_bit(FUSE_I_BTIME, &fi->state)) { ··· 1354 1342 1355 1343 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) 1356 1344 { 1357 - return fuse_update_get_attr(inode, file, NULL, mask, 0); 1345 + return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0); 1358 1346 } 1359 1347 1360 1348 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, ··· 1474 1462 1475 1463 BUG_ON(mask & MAY_NOT_BLOCK); 1476 1464 1465 + /* 1466 + * We should not send FUSE_ACCESS to the userspace 1467 + * when idmapped mounts are enabled as for this case 1468 + * we have fc->default_permissions = 1 and access 1469 + * permission checks are done on the kernel side. 1470 + */ 1471 + WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP)); 1472 + 1477 1473 if (fm->fc->no_access) 1478 1474 return 0; 1479 1475 ··· 1506 1486 return -ECHILD; 1507 1487 1508 1488 forget_all_cached_acls(inode); 1509 - return fuse_do_getattr(inode, NULL, NULL); 1489 + return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL); 1510 1490 } 1511 1491 1512 1492 /* ··· 1554 1534 } 1555 1535 1556 1536 if (fc->default_permissions) { 1557 - err = generic_permission(&nop_mnt_idmap, inode, mask); 1537 + err = generic_permission(idmap, inode, mask); 1558 1538 1559 1539 /* If permission is denied, try to refresh file 1560 1540 attributes. This is also needed, because the root ··· 1562 1542 if (err == -EACCES && !refreshed) { 1563 1543 err = fuse_perm_getattr(inode, mask); 1564 1544 if (!err) 1565 - err = generic_permission(&nop_mnt_idmap, 1545 + err = generic_permission(idmap, 1566 1546 inode, mask); 1567 1547 } 1568 1548 ··· 1758 1738 return true; 1759 1739 } 1760 1740 1761 - static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, 1762 - struct fuse_setattr_in *arg, bool trust_local_cmtime) 1741 + static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc, 1742 + struct iattr *iattr, struct fuse_setattr_in *arg, 1743 + bool trust_local_cmtime) 1763 1744 { 1764 1745 unsigned ivalid = iattr->ia_valid; 1765 1746 1766 1747 if (ivalid & ATTR_MODE) 1767 1748 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1768 - if (ivalid & ATTR_UID) 1769 - arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); 1770 - if (ivalid & ATTR_GID) 1771 - arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); 1749 + 1750 + if (ivalid & ATTR_UID) { 1751 + kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid); 1752 + 1753 + arg->valid |= FATTR_UID; 1754 + arg->uid = from_kuid(fc->user_ns, fsuid); 1755 + } 1756 + 1757 + if (ivalid & ATTR_GID) { 1758 + kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid); 1759 + 1760 + arg->valid |= FATTR_GID; 1761 + arg->gid = from_kgid(fc->user_ns, fsgid); 1762 + } 1763 + 1772 1764 if (ivalid & ATTR_SIZE) 1773 1765 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1774 1766 if (ivalid & ATTR_ATIME) { ··· 1900 1868 * vmtruncate() doesn't allow for this case, so do the rlimit checking 1901 1869 * and the actual truncation by hand. 1902 1870 */ 1903 - int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, 1904 - struct file *file) 1871 + int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1872 + struct iattr *attr, struct file *file) 1905 1873 { 1906 1874 struct inode *inode = d_inode(dentry); 1907 1875 struct fuse_mount *fm = get_fuse_mount(inode); ··· 1921 1889 if (!fc->default_permissions) 1922 1890 attr->ia_valid |= ATTR_FORCE; 1923 1891 1924 - err = setattr_prepare(&nop_mnt_idmap, dentry, attr); 1892 + err = setattr_prepare(idmap, dentry, attr); 1925 1893 if (err) 1926 1894 return err; 1927 1895 ··· 1980 1948 1981 1949 memset(&inarg, 0, sizeof(inarg)); 1982 1950 memset(&outarg, 0, sizeof(outarg)); 1983 - iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime); 1951 + iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime); 1984 1952 if (file) { 1985 1953 struct fuse_file *ff = file->private_data; 1986 1954 inarg.valid |= FATTR_FH; ··· 2097 2065 * ia_mode calculation may have used stale i_mode. 2098 2066 * Refresh and recalculate. 2099 2067 */ 2100 - ret = fuse_do_getattr(inode, NULL, file); 2068 + ret = fuse_do_getattr(idmap, inode, NULL, file); 2101 2069 if (ret) 2102 2070 return ret; 2103 2071 ··· 2115 2083 if (!attr->ia_valid) 2116 2084 return 0; 2117 2085 2118 - ret = fuse_do_setattr(entry, attr, file); 2086 + ret = fuse_do_setattr(idmap, entry, attr, file); 2119 2087 if (!ret) { 2120 2088 /* 2121 2089 * If filesystem supports acls it may have updated acl xattrs in ··· 2154 2122 return -EACCES; 2155 2123 } 2156 2124 2157 - return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); 2125 + return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags); 2158 2126 } 2159 2127 2160 2128 static const struct inode_operations fuse_dir_inode_operations = {
+92 -92
fs/fuse/file.c
··· 448 448 449 449 /* 450 450 * Check if any page in a range is under writeback 451 - * 452 - * This is currently done by walking the list of writepage requests 453 - * for the inode, which can be pretty inefficient. 454 451 */ 455 452 static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, 456 453 pgoff_t idx_to) 457 454 { 458 455 struct fuse_inode *fi = get_fuse_inode(inode); 459 456 bool found; 457 + 458 + if (RB_EMPTY_ROOT(&fi->writepages)) 459 + return false; 460 460 461 461 spin_lock(&fi->lock); 462 462 found = fuse_find_writeback(fi, idx_from, idx_to); ··· 1345 1345 1346 1346 /* shared locks are not allowed with parallel page cache IO */ 1347 1347 if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state)) 1348 - return false; 1348 + return true; 1349 1349 1350 1350 /* Parallel dio beyond EOF is not supported, at least for now. */ 1351 1351 if (fuse_io_past_eof(iocb, from)) ··· 1398 1398 static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) 1399 1399 { 1400 1400 struct file *file = iocb->ki_filp; 1401 + struct mnt_idmap *idmap = file_mnt_idmap(file); 1401 1402 struct address_space *mapping = file->f_mapping; 1402 1403 ssize_t written = 0; 1403 1404 struct inode *inode = mapping->host; ··· 1413 1412 return err; 1414 1413 1415 1414 if (fc->handle_killpriv_v2 && 1416 - setattr_should_drop_suidgid(&nop_mnt_idmap, 1415 + setattr_should_drop_suidgid(idmap, 1417 1416 file_inode(file))) { 1418 1417 goto writethrough; 1419 1418 } ··· 1763 1762 for (i = 0; i < ap->num_pages; i++) 1764 1763 __free_page(ap->pages[i]); 1765 1764 1766 - if (wpa->ia.ff) 1767 - fuse_file_put(wpa->ia.ff, false); 1765 + fuse_file_put(wpa->ia.ff, false); 1768 1766 1769 1767 kfree(ap->pages); 1770 1768 kfree(wpa); 1771 1769 } 1772 1770 1773 - static void fuse_writepage_finish(struct fuse_mount *fm, 1774 - struct fuse_writepage_args *wpa) 1771 + static void fuse_writepage_finish_stat(struct inode *inode, struct page *page) 1772 + { 1773 + struct backing_dev_info *bdi = inode_to_bdi(inode); 1774 + 1775 + dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1776 + dec_node_page_state(page, NR_WRITEBACK_TEMP); 1777 + wb_writeout_inc(&bdi->wb); 1778 + } 1779 + 1780 + static void fuse_writepage_finish(struct fuse_writepage_args *wpa) 1775 1781 { 1776 1782 struct fuse_args_pages *ap = &wpa->ia.ap; 1777 1783 struct inode *inode = wpa->inode; 1778 1784 struct fuse_inode *fi = get_fuse_inode(inode); 1779 - struct backing_dev_info *bdi = inode_to_bdi(inode); 1780 1785 int i; 1781 1786 1782 - for (i = 0; i < ap->num_pages; i++) { 1783 - dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1784 - dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); 1785 - wb_writeout_inc(&bdi->wb); 1786 - } 1787 + for (i = 0; i < ap->num_pages; i++) 1788 + fuse_writepage_finish_stat(inode, ap->pages[i]); 1789 + 1787 1790 wake_up(&fi->page_waitq); 1788 1791 } 1789 1792 ··· 1834 1829 out_free: 1835 1830 fi->writectr--; 1836 1831 rb_erase(&wpa->writepages_entry, &fi->writepages); 1837 - fuse_writepage_finish(fm, wpa); 1832 + fuse_writepage_finish(wpa); 1838 1833 spin_unlock(&fi->lock); 1839 1834 1840 1835 /* After rb_erase() aux request list is private */ 1841 1836 for (aux = wpa->next; aux; aux = next) { 1842 - struct backing_dev_info *bdi = inode_to_bdi(aux->inode); 1843 - 1844 1837 next = aux->next; 1845 1838 aux->next = NULL; 1846 - 1847 - dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1848 - dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP); 1849 - wb_writeout_inc(&bdi->wb); 1839 + fuse_writepage_finish_stat(aux->inode, aux->ia.ap.pages[0]); 1850 1840 fuse_writepage_free(aux); 1851 1841 } 1852 1842 ··· 1936 1936 1937 1937 wpa->next = next->next; 1938 1938 next->next = NULL; 1939 - next->ia.ff = fuse_file_get(wpa->ia.ff); 1940 1939 tree_insert(&fi->writepages, next); 1941 1940 1942 1941 /* ··· 1964 1965 fuse_send_writepage(fm, next, inarg->offset + inarg->size); 1965 1966 } 1966 1967 fi->writectr--; 1967 - fuse_writepage_finish(fm, wpa); 1968 + fuse_writepage_finish(wpa); 1968 1969 spin_unlock(&fi->lock); 1969 1970 fuse_writepage_free(wpa); 1970 1971 } ··· 2048 2049 rcu_read_unlock(); 2049 2050 } 2050 2051 2052 + static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio, 2053 + struct folio *tmp_folio, uint32_t page_index) 2054 + { 2055 + struct inode *inode = folio->mapping->host; 2056 + struct fuse_args_pages *ap = &wpa->ia.ap; 2057 + 2058 + folio_copy(tmp_folio, folio); 2059 + 2060 + ap->pages[page_index] = &tmp_folio->page; 2061 + ap->descs[page_index].offset = 0; 2062 + ap->descs[page_index].length = PAGE_SIZE; 2063 + 2064 + inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 2065 + inc_node_page_state(&tmp_folio->page, NR_WRITEBACK_TEMP); 2066 + } 2067 + 2068 + static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio, 2069 + struct fuse_file *ff) 2070 + { 2071 + struct inode *inode = folio->mapping->host; 2072 + struct fuse_conn *fc = get_fuse_conn(inode); 2073 + struct fuse_writepage_args *wpa; 2074 + struct fuse_args_pages *ap; 2075 + 2076 + wpa = fuse_writepage_args_alloc(); 2077 + if (!wpa) 2078 + return NULL; 2079 + 2080 + fuse_writepage_add_to_bucket(fc, wpa); 2081 + fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0); 2082 + wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 2083 + wpa->inode = inode; 2084 + wpa->ia.ff = ff; 2085 + 2086 + ap = &wpa->ia.ap; 2087 + ap->args.in_pages = true; 2088 + ap->args.end = fuse_writepage_end; 2089 + 2090 + return wpa; 2091 + } 2092 + 2051 2093 static int fuse_writepage_locked(struct folio *folio) 2052 2094 { 2053 2095 struct address_space *mapping = folio->mapping; 2054 2096 struct inode *inode = mapping->host; 2055 - struct fuse_conn *fc = get_fuse_conn(inode); 2056 2097 struct fuse_inode *fi = get_fuse_inode(inode); 2057 2098 struct fuse_writepage_args *wpa; 2058 2099 struct fuse_args_pages *ap; 2059 2100 struct folio *tmp_folio; 2101 + struct fuse_file *ff; 2060 2102 int error = -ENOMEM; 2061 - 2062 - folio_start_writeback(folio); 2063 - 2064 - wpa = fuse_writepage_args_alloc(); 2065 - if (!wpa) 2066 - goto err; 2067 - ap = &wpa->ia.ap; 2068 2103 2069 2104 tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0); 2070 2105 if (!tmp_folio) 2071 - goto err_free; 2106 + goto err; 2072 2107 2073 2108 error = -EIO; 2074 - wpa->ia.ff = fuse_write_file_get(fi); 2075 - if (!wpa->ia.ff) 2109 + ff = fuse_write_file_get(fi); 2110 + if (!ff) 2076 2111 goto err_nofile; 2077 2112 2078 - fuse_writepage_add_to_bucket(fc, wpa); 2079 - fuse_write_args_fill(&wpa->ia, wpa->ia.ff, folio_pos(folio), 0); 2113 + wpa = fuse_writepage_args_setup(folio, ff); 2114 + error = -ENOMEM; 2115 + if (!wpa) 2116 + goto err_writepage_args; 2080 2117 2081 - folio_copy(tmp_folio, folio); 2082 - wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 2083 - wpa->next = NULL; 2084 - ap->args.in_pages = true; 2118 + ap = &wpa->ia.ap; 2085 2119 ap->num_pages = 1; 2086 - ap->pages[0] = &tmp_folio->page; 2087 - ap->descs[0].offset = 0; 2088 - ap->descs[0].length = PAGE_SIZE; 2089 - ap->args.end = fuse_writepage_end; 2090 - wpa->inode = inode; 2091 2120 2092 - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 2093 - node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP); 2121 + folio_start_writeback(folio); 2122 + fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0); 2094 2123 2095 2124 spin_lock(&fi->lock); 2096 2125 tree_insert(&fi->writepages, wpa); ··· 2130 2103 2131 2104 return 0; 2132 2105 2106 + err_writepage_args: 2107 + fuse_file_put(ff, false); 2133 2108 err_nofile: 2134 2109 folio_put(tmp_folio); 2135 - err_free: 2136 - kfree(wpa); 2137 2110 err: 2138 2111 mapping_set_error(folio->mapping, error); 2139 - folio_end_writeback(folio); 2140 2112 return error; 2141 2113 } 2142 2114 ··· 2181 2155 int num_pages = wpa->ia.ap.num_pages; 2182 2156 int i; 2183 2157 2184 - wpa->ia.ff = fuse_file_get(data->ff); 2185 2158 spin_lock(&fi->lock); 2186 2159 list_add_tail(&wpa->queue_entry, &fi->queued_writes); 2187 2160 fuse_flush_writepages(inode); ··· 2235 2210 spin_unlock(&fi->lock); 2236 2211 2237 2212 if (tmp) { 2238 - struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode); 2239 - 2240 - dec_wb_stat(&bdi->wb, WB_WRITEBACK); 2241 - dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP); 2242 - wb_writeout_inc(&bdi->wb); 2213 + fuse_writepage_finish_stat(new_wpa->inode, new_ap->pages[0]); 2243 2214 fuse_writepage_free(new_wpa); 2244 2215 } 2245 2216 ··· 2285 2264 struct inode *inode = data->inode; 2286 2265 struct fuse_inode *fi = get_fuse_inode(inode); 2287 2266 struct fuse_conn *fc = get_fuse_conn(inode); 2288 - struct page *tmp_page; 2267 + struct folio *tmp_folio; 2289 2268 int err; 2290 - 2291 - if (!data->ff) { 2292 - err = -EIO; 2293 - data->ff = fuse_write_file_get(fi); 2294 - if (!data->ff) 2295 - goto out_unlock; 2296 - } 2297 2269 2298 2270 if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) { 2299 2271 fuse_writepages_send(data); ··· 2294 2280 } 2295 2281 2296 2282 err = -ENOMEM; 2297 - tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 2298 - if (!tmp_page) 2283 + tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0); 2284 + if (!tmp_folio) 2299 2285 goto out_unlock; 2300 2286 2301 2287 /* ··· 2313 2299 */ 2314 2300 if (data->wpa == NULL) { 2315 2301 err = -ENOMEM; 2316 - wpa = fuse_writepage_args_alloc(); 2302 + wpa = fuse_writepage_args_setup(folio, data->ff); 2317 2303 if (!wpa) { 2318 - __free_page(tmp_page); 2304 + folio_put(tmp_folio); 2319 2305 goto out_unlock; 2320 2306 } 2321 - fuse_writepage_add_to_bucket(fc, wpa); 2322 - 2307 + fuse_file_get(wpa->ia.ff); 2323 2308 data->max_pages = 1; 2324 - 2325 2309 ap = &wpa->ia.ap; 2326 - fuse_write_args_fill(&wpa->ia, data->ff, folio_pos(folio), 0); 2327 - wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 2328 - wpa->next = NULL; 2329 - ap->args.in_pages = true; 2330 - ap->args.end = fuse_writepage_end; 2331 - ap->num_pages = 0; 2332 - wpa->inode = inode; 2333 2310 } 2334 2311 folio_start_writeback(folio); 2335 2312 2336 - copy_highpage(tmp_page, &folio->page); 2337 - ap->pages[ap->num_pages] = tmp_page; 2338 - ap->descs[ap->num_pages].offset = 0; 2339 - ap->descs[ap->num_pages].length = PAGE_SIZE; 2313 + fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_pages); 2340 2314 data->orig_pages[ap->num_pages] = &folio->page; 2341 - 2342 - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 2343 - inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); 2344 2315 2345 2316 err = 0; 2346 2317 if (data->wpa) { ··· 2351 2352 struct writeback_control *wbc) 2352 2353 { 2353 2354 struct inode *inode = mapping->host; 2355 + struct fuse_inode *fi = get_fuse_inode(inode); 2354 2356 struct fuse_conn *fc = get_fuse_conn(inode); 2355 2357 struct fuse_fill_wb_data data; 2356 2358 int err; 2357 2359 2358 - err = -EIO; 2359 2360 if (fuse_is_bad(inode)) 2360 - goto out; 2361 + return -EIO; 2361 2362 2362 2363 if (wbc->sync_mode == WB_SYNC_NONE && 2363 2364 fc->num_background >= fc->congestion_threshold) ··· 2365 2366 2366 2367 data.inode = inode; 2367 2368 data.wpa = NULL; 2368 - data.ff = NULL; 2369 + data.ff = fuse_write_file_get(fi); 2370 + if (!data.ff) 2371 + return -EIO; 2369 2372 2370 2373 err = -ENOMEM; 2371 2374 data.orig_pages = kcalloc(fc->max_pages, ··· 2381 2380 WARN_ON(!data.wpa->ia.ap.num_pages); 2382 2381 fuse_writepages_send(&data); 2383 2382 } 2384 - if (data.ff) 2385 - fuse_file_put(data.ff, false); 2386 2383 2387 2384 kfree(data.orig_pages); 2388 2385 out: 2386 + fuse_file_put(data.ff, false); 2389 2387 return err; 2390 2388 } 2391 2389 ··· 2973 2973 attr.ia_file = file; 2974 2974 attr.ia_valid |= ATTR_FILE; 2975 2975 2976 - fuse_do_setattr(file_dentry(file), &attr, file); 2976 + fuse_do_setattr(file_mnt_idmap(file), file_dentry(file), &attr, file); 2977 2977 } 2978 2978 2979 2979 static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
+25 -17
fs/fuse/fuse_i.h
··· 449 449 */ 450 450 struct fuse_iqueue_ops { 451 451 /** 452 - * Signal that a forget has been queued 452 + * Send one forget 453 453 */ 454 - void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) 455 - __releases(fiq->lock); 454 + void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); 456 455 457 456 /** 458 - * Signal that an INTERRUPT request has been queued 457 + * Send interrupt for request 459 458 */ 460 - void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) 461 - __releases(fiq->lock); 459 + void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); 462 460 463 461 /** 464 - * Signal that a request has been queued 462 + * Send one request 465 463 */ 466 - void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) 467 - __releases(fiq->lock); 464 + void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); 468 465 469 466 /** 470 467 * Clean up when fuse_iqueue is destroyed ··· 866 869 /** Negotiated minor version */ 867 870 unsigned minor; 868 871 869 - /** Entry on the fuse_mount_list */ 872 + /** Entry on the fuse_conn_list */ 870 873 struct list_head entry; 871 874 872 875 /** Device ID from the root super block */ ··· 1050 1053 1051 1054 struct fuse_forget_link *fuse_alloc_forget(void); 1052 1055 1053 - struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, 1054 - unsigned int max, 1055 - unsigned int *countp); 1056 - 1057 1056 /* 1058 1057 * Initialize READ or READDIR request 1059 1058 */ ··· 1147 1154 /** 1148 1155 * Simple request sending that does request allocation and freeing 1149 1156 */ 1150 - ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args); 1157 + ssize_t __fuse_simple_request(struct mnt_idmap *idmap, 1158 + struct fuse_mount *fm, 1159 + struct fuse_args *args); 1160 + 1161 + static inline ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) 1162 + { 1163 + return __fuse_simple_request(&invalid_mnt_idmap, fm, args); 1164 + } 1165 + 1166 + static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap, 1167 + struct fuse_mount *fm, 1168 + struct fuse_args *args) 1169 + { 1170 + return __fuse_simple_request(idmap, fm, args); 1171 + } 1172 + 1151 1173 int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, 1152 1174 gfp_t gfp_flags); 1153 1175 ··· 1338 1330 int fuse_flush_times(struct inode *inode, struct fuse_file *ff); 1339 1331 int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); 1340 1332 1341 - int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, 1342 - struct file *file); 1333 + int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 1334 + struct iattr *attr, struct file *file); 1343 1335 1344 1336 void fuse_set_initialized(struct fuse_conn *fc); 1345 1337
+132
fs/fuse/fuse_trace.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #undef TRACE_SYSTEM 3 + #define TRACE_SYSTEM fuse 4 + 5 + #if !defined(_TRACE_FUSE_H) || defined(TRACE_HEADER_MULTI_READ) 6 + #define _TRACE_FUSE_H 7 + 8 + #include <linux/tracepoint.h> 9 + 10 + #define OPCODES \ 11 + EM( FUSE_LOOKUP, "FUSE_LOOKUP") \ 12 + EM( FUSE_FORGET, "FUSE_FORGET") \ 13 + EM( FUSE_GETATTR, "FUSE_GETATTR") \ 14 + EM( FUSE_SETATTR, "FUSE_SETATTR") \ 15 + EM( FUSE_READLINK, "FUSE_READLINK") \ 16 + EM( FUSE_SYMLINK, "FUSE_SYMLINK") \ 17 + EM( FUSE_MKNOD, "FUSE_MKNOD") \ 18 + EM( FUSE_MKDIR, "FUSE_MKDIR") \ 19 + EM( FUSE_UNLINK, "FUSE_UNLINK") \ 20 + EM( FUSE_RMDIR, "FUSE_RMDIR") \ 21 + EM( FUSE_RENAME, "FUSE_RENAME") \ 22 + EM( FUSE_LINK, "FUSE_LINK") \ 23 + EM( FUSE_OPEN, "FUSE_OPEN") \ 24 + EM( FUSE_READ, "FUSE_READ") \ 25 + EM( FUSE_WRITE, "FUSE_WRITE") \ 26 + EM( FUSE_STATFS, "FUSE_STATFS") \ 27 + EM( FUSE_RELEASE, "FUSE_RELEASE") \ 28 + EM( FUSE_FSYNC, "FUSE_FSYNC") \ 29 + EM( FUSE_SETXATTR, "FUSE_SETXATTR") \ 30 + EM( FUSE_GETXATTR, "FUSE_GETXATTR") \ 31 + EM( FUSE_LISTXATTR, "FUSE_LISTXATTR") \ 32 + EM( FUSE_REMOVEXATTR, "FUSE_REMOVEXATTR") \ 33 + EM( FUSE_FLUSH, "FUSE_FLUSH") \ 34 + EM( FUSE_INIT, "FUSE_INIT") \ 35 + EM( FUSE_OPENDIR, "FUSE_OPENDIR") \ 36 + EM( FUSE_READDIR, "FUSE_READDIR") \ 37 + EM( FUSE_RELEASEDIR, "FUSE_RELEASEDIR") \ 38 + EM( FUSE_FSYNCDIR, "FUSE_FSYNCDIR") \ 39 + EM( FUSE_GETLK, "FUSE_GETLK") \ 40 + EM( FUSE_SETLK, "FUSE_SETLK") \ 41 + EM( FUSE_SETLKW, "FUSE_SETLKW") \ 42 + EM( FUSE_ACCESS, "FUSE_ACCESS") \ 43 + EM( FUSE_CREATE, "FUSE_CREATE") \ 44 + EM( FUSE_INTERRUPT, "FUSE_INTERRUPT") \ 45 + EM( FUSE_BMAP, "FUSE_BMAP") \ 46 + EM( FUSE_DESTROY, "FUSE_DESTROY") \ 47 + EM( FUSE_IOCTL, "FUSE_IOCTL") \ 48 + EM( FUSE_POLL, "FUSE_POLL") \ 49 + EM( FUSE_NOTIFY_REPLY, "FUSE_NOTIFY_REPLY") \ 50 + EM( FUSE_BATCH_FORGET, "FUSE_BATCH_FORGET") \ 51 + EM( FUSE_FALLOCATE, "FUSE_FALLOCATE") \ 52 + EM( FUSE_READDIRPLUS, "FUSE_READDIRPLUS") \ 53 + EM( FUSE_RENAME2, "FUSE_RENAME2") \ 54 + EM( FUSE_LSEEK, "FUSE_LSEEK") \ 55 + EM( FUSE_COPY_FILE_RANGE, "FUSE_COPY_FILE_RANGE") \ 56 + EM( FUSE_SETUPMAPPING, "FUSE_SETUPMAPPING") \ 57 + EM( FUSE_REMOVEMAPPING, "FUSE_REMOVEMAPPING") \ 58 + EM( FUSE_SYNCFS, "FUSE_SYNCFS") \ 59 + EM( FUSE_TMPFILE, "FUSE_TMPFILE") \ 60 + EM( FUSE_STATX, "FUSE_STATX") \ 61 + EMe(CUSE_INIT, "CUSE_INIT") 62 + 63 + /* 64 + * This will turn the above table into TRACE_DEFINE_ENUM() for each of the 65 + * entries. 66 + */ 67 + #undef EM 68 + #undef EMe 69 + #define EM(a, b) TRACE_DEFINE_ENUM(a); 70 + #define EMe(a, b) TRACE_DEFINE_ENUM(a); 71 + 72 + OPCODES 73 + 74 + /* Now we redfine it with the table that __print_symbolic needs. */ 75 + #undef EM 76 + #undef EMe 77 + #define EM(a, b) {a, b}, 78 + #define EMe(a, b) {a, b} 79 + 80 + TRACE_EVENT(fuse_request_send, 81 + TP_PROTO(const struct fuse_req *req), 82 + 83 + TP_ARGS(req), 84 + 85 + TP_STRUCT__entry( 86 + __field(dev_t, connection) 87 + __field(uint64_t, unique) 88 + __field(enum fuse_opcode, opcode) 89 + __field(uint32_t, len) 90 + ), 91 + 92 + TP_fast_assign( 93 + __entry->connection = req->fm->fc->dev; 94 + __entry->unique = req->in.h.unique; 95 + __entry->opcode = req->in.h.opcode; 96 + __entry->len = req->in.h.len; 97 + ), 98 + 99 + TP_printk("connection %u req %llu opcode %u (%s) len %u ", 100 + __entry->connection, __entry->unique, __entry->opcode, 101 + __print_symbolic(__entry->opcode, OPCODES), __entry->len) 102 + ); 103 + 104 + TRACE_EVENT(fuse_request_end, 105 + TP_PROTO(const struct fuse_req *req), 106 + 107 + TP_ARGS(req), 108 + 109 + TP_STRUCT__entry( 110 + __field(dev_t, connection) 111 + __field(uint64_t, unique) 112 + __field(uint32_t, len) 113 + __field(int32_t, error) 114 + ), 115 + 116 + TP_fast_assign( 117 + __entry->connection = req->fm->fc->dev; 118 + __entry->unique = req->in.h.unique; 119 + __entry->len = req->out.h.len; 120 + __entry->error = req->out.h.error; 121 + ), 122 + 123 + TP_printk("connection %u req %llu len %u error %d", __entry->connection, 124 + __entry->unique, __entry->len, __entry->error) 125 + ); 126 + 127 + #endif /* _TRACE_FUSE_H */ 128 + 129 + #undef TRACE_INCLUDE_PATH 130 + #define TRACE_INCLUDE_PATH . 131 + #define TRACE_INCLUDE_FILE fuse_trace 132 + #include <trace/define_trace.h>
+10 -3
fs/fuse/inode.c
··· 1348 1348 } 1349 1349 if (flags & FUSE_NO_EXPORT_SUPPORT) 1350 1350 fm->sb->s_export_op = &fuse_export_fid_operations; 1351 + if (flags & FUSE_ALLOW_IDMAP) { 1352 + if (fc->default_permissions) 1353 + fm->sb->s_iflags &= ~SB_I_NOIDMAP; 1354 + else 1355 + ok = false; 1356 + } 1351 1357 } else { 1352 1358 ra_pages = fc->max_read / PAGE_SIZE; 1353 1359 fc->no_lock = 1; ··· 1401 1395 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | 1402 1396 FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | 1403 1397 FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | 1404 - FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND; 1398 + FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP; 1405 1399 #ifdef CONFIG_FUSE_DAX 1406 1400 if (fm->fc->dax) 1407 1401 flags |= FUSE_MAP_ALIGNMENT; ··· 1578 1572 sb->s_time_gran = 1; 1579 1573 sb->s_export_op = &fuse_export_operations; 1580 1574 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1575 + sb->s_iflags |= SB_I_NOIDMAP; 1581 1576 if (sb->s_user_ns != &init_user_ns) 1582 1577 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1583 1578 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); ··· 1991 1984 static struct file_system_type fuse_fs_type = { 1992 1985 .owner = THIS_MODULE, 1993 1986 .name = "fuse", 1994 - .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, 1987 + .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 1995 1988 .init_fs_context = fuse_init_fs_context, 1996 1989 .parameters = fuse_fs_parameters, 1997 1990 .kill_sb = fuse_kill_sb_anon, ··· 2012 2005 .init_fs_context = fuse_init_fs_context, 2013 2006 .parameters = fuse_fs_parameters, 2014 2007 .kill_sb = fuse_kill_sb_blk, 2015 - .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 2008 + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP, 2016 2009 }; 2017 2010 MODULE_ALIAS_FS("fuseblk"); 2018 2011
+2 -5
fs/fuse/passthrough.c
··· 228 228 if (map->flags || map->padding) 229 229 goto out; 230 230 231 - file = fget(map->fd); 231 + file = fget_raw(map->fd); 232 232 res = -EBADF; 233 233 if (!file) 234 234 goto out; 235 235 236 - res = -EOPNOTSUPP; 237 - if (!file->f_op->read_iter || !file->f_op->write_iter) 238 - goto out_fput; 239 - 240 236 backing_sb = file_inode(file)->i_sb; 237 + pr_info("%s: %x:%pD %i\n", __func__, backing_sb->s_dev, file, backing_sb->s_stack_depth); 241 238 res = -ELOOP; 242 239 if (backing_sb->s_stack_depth >= fc->max_stack_depth) 243 240 goto out_fput;
+14 -28
fs/fuse/virtio_fs.c
··· 1091 1091 #endif 1092 1092 }; 1093 1093 1094 - static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 1095 - __releases(fiq->lock) 1094 + static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link) 1096 1095 { 1097 - struct fuse_forget_link *link; 1098 1096 struct virtio_fs_forget *forget; 1099 1097 struct virtio_fs_forget_req *req; 1100 - struct virtio_fs *fs; 1101 - struct virtio_fs_vq *fsvq; 1102 - u64 unique; 1103 - 1104 - link = fuse_dequeue_forget(fiq, 1, NULL); 1105 - unique = fuse_get_unique(fiq); 1106 - 1107 - fs = fiq->priv; 1108 - fsvq = &fs->vqs[VQ_HIPRIO]; 1109 - spin_unlock(&fiq->lock); 1098 + struct virtio_fs *fs = fiq->priv; 1099 + struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO]; 1100 + u64 unique = fuse_get_unique(fiq); 1110 1101 1111 1102 /* Allocate a buffer for the request */ 1112 1103 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); ··· 1117 1126 kfree(link); 1118 1127 } 1119 1128 1120 - static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1121 - __releases(fiq->lock) 1129 + static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 1122 1130 { 1123 1131 /* 1124 1132 * TODO interrupts. ··· 1126 1136 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1127 1137 * with shared lock between host and guest. 1128 1138 */ 1129 - spin_unlock(&fiq->lock); 1130 1139 } 1131 1140 1132 1141 /* Count number of scatter-gather elements required */ ··· 1330 1341 return ret; 1331 1342 } 1332 1343 1333 - static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1334 - __releases(fiq->lock) 1344 + static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) 1335 1345 { 1336 1346 unsigned int queue_id; 1337 1347 struct virtio_fs *fs; 1338 - struct fuse_req *req; 1339 1348 struct virtio_fs_vq *fsvq; 1340 1349 int ret; 1341 1350 1342 - WARN_ON(list_empty(&fiq->pending)); 1343 - req = list_last_entry(&fiq->pending, struct fuse_req, list); 1351 + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) 1352 + req->in.h.unique = fuse_get_unique(fiq); 1353 + 1344 1354 clear_bit(FR_PENDING, &req->flags); 1345 - list_del_init(&req->list); 1346 - WARN_ON(!list_empty(&fiq->pending)); 1347 - spin_unlock(&fiq->lock); 1348 1355 1349 1356 fs = fiq->priv; 1350 1357 queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()]; ··· 1378 1393 } 1379 1394 1380 1395 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1381 - .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1382 - .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1383 - .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1384 - .release = virtio_fs_fiq_release, 1396 + .send_forget = virtio_fs_send_forget, 1397 + .send_interrupt = virtio_fs_send_interrupt, 1398 + .send_req = virtio_fs_send_req, 1399 + .release = virtio_fs_fiq_release, 1385 1400 }; 1386 1401 1387 1402 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) ··· 1613 1628 .name = "virtiofs", 1614 1629 .init_fs_context = virtio_fs_init_fs_context, 1615 1630 .kill_sb = virtio_kill_sb, 1631 + .fs_flags = FS_ALLOW_IDMAP, 1616 1632 }; 1617 1633 1618 1634 static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
+20 -2
fs/mnt_idmapping.c
··· 32 32 }; 33 33 EXPORT_SYMBOL_GPL(nop_mnt_idmap); 34 34 35 + /* 36 + * Carries the invalid idmapping of a full 0-4294967295 {g,u}id range. 37 + * This means that all {g,u}ids are mapped to INVALID_VFS{G,U}ID. 38 + */ 39 + struct mnt_idmap invalid_mnt_idmap = { 40 + .count = REFCOUNT_INIT(1), 41 + }; 42 + EXPORT_SYMBOL_GPL(invalid_mnt_idmap); 43 + 35 44 /** 36 45 * initial_idmapping - check whether this is the initial mapping 37 46 * @ns: idmapping to check ··· 84 75 85 76 if (idmap == &nop_mnt_idmap) 86 77 return VFSUIDT_INIT(kuid); 78 + if (idmap == &invalid_mnt_idmap) 79 + return INVALID_VFSUID; 87 80 if (initial_idmapping(fs_userns)) 88 81 uid = __kuid_val(kuid); 89 82 else ··· 123 112 124 113 if (idmap == &nop_mnt_idmap) 125 114 return VFSGIDT_INIT(kgid); 115 + if (idmap == &invalid_mnt_idmap) 116 + return INVALID_VFSGID; 126 117 if (initial_idmapping(fs_userns)) 127 118 gid = __kgid_val(kgid); 128 119 else ··· 153 140 154 141 if (idmap == &nop_mnt_idmap) 155 142 return AS_KUIDT(vfsuid); 143 + if (idmap == &invalid_mnt_idmap) 144 + return INVALID_UID; 156 145 uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid)); 157 146 if (uid == (uid_t)-1) 158 147 return INVALID_UID; ··· 182 167 183 168 if (idmap == &nop_mnt_idmap) 184 169 return AS_KGIDT(vfsgid); 170 + if (idmap == &invalid_mnt_idmap) 171 + return INVALID_GID; 185 172 gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid)); 186 173 if (gid == (gid_t)-1) 187 174 return INVALID_GID; ··· 313 296 */ 314 297 struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) 315 298 { 316 - if (idmap != &nop_mnt_idmap) 299 + if (idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap) 317 300 refcount_inc(&idmap->count); 318 301 319 302 return idmap; ··· 329 312 */ 330 313 void mnt_idmap_put(struct mnt_idmap *idmap) 331 314 { 332 - if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) 315 + if (idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap && 316 + refcount_dec_and_test(&idmap->count)) 333 317 free_mnt_idmap(idmap); 334 318 } 335 319 EXPORT_SYMBOL_GPL(mnt_idmap_put);
+4
fs/namespace.c
··· 4471 4471 if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) 4472 4472 return -EINVAL; 4473 4473 4474 + /* The filesystem has turned off idmapped mounts. */ 4475 + if (m->mnt_sb->s_iflags & SB_I_NOIDMAP) 4476 + return -EINVAL; 4477 + 4474 4478 /* We're not controlling the superblock. */ 4475 4479 if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) 4476 4480 return -EPERM;
+1
include/linux/fs.h
··· 1229 1229 #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ 1230 1230 #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ 1231 1231 #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ 1232 + #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ 1232 1233 1233 1234 /* Possible states of 'frozen' field */ 1234 1235 enum {
+1
include/linux/mnt_idmapping.h
··· 9 9 struct user_namespace; 10 10 11 11 extern struct mnt_idmap nop_mnt_idmap; 12 + extern struct mnt_idmap invalid_mnt_idmap; 12 13 extern struct user_namespace init_user_ns; 13 14 14 15 typedef struct {
+21 -1
include/uapi/linux/fuse.h
··· 217 217 * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag 218 218 * - add FUSE_NO_EXPORT_SUPPORT init flag 219 219 * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag 220 + * 221 + * 7.41 222 + * - add FUSE_ALLOW_IDMAP 220 223 */ 221 224 222 225 #ifndef _LINUX_FUSE_H ··· 255 252 #define FUSE_KERNEL_VERSION 7 256 253 257 254 /** Minor version number of this interface */ 258 - #define FUSE_KERNEL_MINOR_VERSION 40 255 + #define FUSE_KERNEL_MINOR_VERSION 41 259 256 260 257 /** The node ID of the root inode */ 261 258 #define FUSE_ROOT_ID 1 ··· 424 421 * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support 425 422 * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit 426 423 * of the request ID indicates resend requests 424 + * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts 427 425 */ 428 426 #define FUSE_ASYNC_READ (1 << 0) 429 427 #define FUSE_POSIX_LOCKS (1 << 1) ··· 470 466 471 467 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ 472 468 #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP 469 + #define FUSE_ALLOW_IDMAP (1ULL << 40) 473 470 474 471 /** 475 472 * CUSE INIT request/reply flags ··· 988 983 * request accordingly. 989 984 */ 990 985 #define FUSE_UNIQUE_RESEND (1ULL << 63) 986 + 987 + /** 988 + * This value will be set by the kernel to 989 + * (struct fuse_in_header).{uid,gid} fields in 990 + * case when: 991 + * - fuse daemon enabled FUSE_ALLOW_IDMAP 992 + * - idmapping information is not available and uid/gid 993 + * can not be mapped in accordance with an idmapping. 994 + * 995 + * Note: an idmapping information always available 996 + * for inode creation operations like: 997 + * FUSE_MKNOD, FUSE_SYMLINK, FUSE_MKDIR, FUSE_TMPFILE, 998 + * FUSE_CREATE and FUSE_RENAME2 (with RENAME_WHITEOUT). 999 + */ 1000 + #define FUSE_INVALID_UIDGID ((uint32_t)(-1)) 991 1001 992 1002 struct fuse_in_header { 993 1003 uint32_t len;