Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs: use backing_file container for internal files with "fake" f_path

Overlayfs uses open_with_fake_path() to allocate internal kernel files,
with a "fake" path - whose f_path is not on the same fs as f_inode.

Allocate a container struct backing_file for those internal files, that
is used to hold the "fake" ovl path along with the real path.

backing_file_real_path() can be used to access the stored real path.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Message-Id: <20230615112229.2143178-5-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Amir Goldstein and committed by
Christian Brauner
62d53c4a 8a05a8c3

+114 -23
+48 -2
fs/file_table.c
··· 44 44 45 45 static struct percpu_counter nr_files __cacheline_aligned_in_smp; 46 46 47 + /* Container for backing file with optional real path */ 48 + struct backing_file { 49 + struct file file; 50 + struct path real_path; 51 + }; 52 + 53 + static inline struct backing_file *backing_file(struct file *f) 54 + { 55 + return container_of(f, struct backing_file, file); 56 + } 57 + 58 + struct path *backing_file_real_path(struct file *f) 59 + { 60 + return &backing_file(f)->real_path; 61 + } 62 + EXPORT_SYMBOL_GPL(backing_file_real_path); 63 + 47 64 static void file_free_rcu(struct rcu_head *head) 48 65 { 49 66 struct file *f = container_of(head, struct file, f_rcuhead); 50 67 51 68 put_cred(f->f_cred); 52 - kmem_cache_free(filp_cachep, f); 69 + if (unlikely(f->f_mode & FMODE_BACKING)) 70 + kfree(backing_file(f)); 71 + else 72 + kmem_cache_free(filp_cachep, f); 53 73 } 54 74 55 75 static inline void file_free(struct file *f) 56 76 { 57 77 security_file_free(f); 58 - if (!(f->f_mode & FMODE_NOACCOUNT)) 78 + if (unlikely(f->f_mode & FMODE_BACKING)) 79 + path_put(backing_file_real_path(f)); 80 + if (likely(!(f->f_mode & FMODE_NOACCOUNT))) 59 81 percpu_counter_dec(&nr_files); 60 82 call_rcu(&f->f_rcuhead, file_free_rcu); 61 83 } ··· 246 224 f->f_mode |= FMODE_NOACCOUNT; 247 225 248 226 return f; 227 + } 228 + 229 + /* 230 + * Variant of alloc_empty_file() that allocates a backing_file container 231 + * and doesn't check and modify nr_files. 232 + * 233 + * This is only for kernel internal use, and the allocate file must not be 234 + * installed into file tables or such. 235 + */ 236 + struct file *alloc_empty_backing_file(int flags, const struct cred *cred) 237 + { 238 + struct backing_file *ff; 239 + int error; 240 + 241 + ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL); 242 + if (unlikely(!ff)) 243 + return ERR_PTR(-ENOMEM); 244 + 245 + error = init_file(&ff->file, flags, cred); 246 + if (unlikely(error)) 247 + return ERR_PTR(error); 248 + 249 + ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT; 250 + return &ff->file; 249 251 } 250 252 251 253 /**
+3 -2
fs/internal.h
··· 97 97 /* 98 98 * file_table.c 99 99 */ 100 - extern struct file *alloc_empty_file(int, const struct cred *); 101 - extern struct file *alloc_empty_file_noaccount(int, const struct cred *); 100 + struct file *alloc_empty_file(int flags, const struct cred *cred); 101 + struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred); 102 + struct file *alloc_empty_backing_file(int flags, const struct cred *cred); 102 103 103 104 static inline void put_file_access(struct file *file) 104 105 {
+33 -12
fs/open.c
··· 1149 1149 } 1150 1150 EXPORT_SYMBOL_GPL(kernel_file_open); 1151 1151 1152 - struct file *open_with_fake_path(const struct path *path, int flags, 1153 - struct inode *inode, const struct cred *cred) 1152 + /** 1153 + * backing_file_open - open a backing file for kernel internal use 1154 + * @path: path of the file to open 1155 + * @flags: open flags 1156 + * @path: path of the backing file 1157 + * @cred: credentials for open 1158 + * 1159 + * Open a backing file for a stackable filesystem (e.g., overlayfs). 1160 + * @path may be on the stackable filesystem and backing inode on the 1161 + * underlying filesystem. In this case, we want to be able to return 1162 + * the @real_path of the backing inode. This is done by embedding the 1163 + * returned file into a container structure that also stores the path of 1164 + * the backing inode on the underlying filesystem, which can be 1165 + * retrieved using backing_file_real_path(). 1166 + */ 1167 + struct file *backing_file_open(const struct path *path, int flags, 1168 + const struct path *real_path, 1169 + const struct cred *cred) 1154 1170 { 1155 - struct file *f = alloc_empty_file_noaccount(flags, cred); 1156 - if (!IS_ERR(f)) { 1157 - int error; 1171 + struct file *f; 1172 + int error; 1158 1173 1159 - f->f_path = *path; 1160 - error = do_dentry_open(f, inode, NULL); 1161 - if (error) { 1162 - fput(f); 1163 - f = ERR_PTR(error); 1164 - } 1174 + f = alloc_empty_backing_file(flags, cred); 1175 + if (IS_ERR(f)) 1176 + return f; 1177 + 1178 + f->f_path = *path; 1179 + path_get(real_path); 1180 + *backing_file_real_path(f) = *real_path; 1181 + error = do_dentry_open(f, d_inode(real_path->dentry), NULL); 1182 + if (error) { 1183 + fput(f); 1184 + f = ERR_PTR(error); 1165 1185 } 1186 + 1166 1187 return f; 1167 1188 } 1168 - EXPORT_SYMBOL(open_with_fake_path); 1189 + EXPORT_SYMBOL_GPL(backing_file_open); 1169 1190 1170 1191 #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) 1171 1192 #define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
+2 -2
fs/overlayfs/file.c
··· 61 61 if (!inode_owner_or_capable(real_idmap, realinode)) 62 62 flags &= ~O_NOATIME; 63 63 64 - realfile = open_with_fake_path(&file->f_path, flags, realinode, 65 - current_cred()); 64 + realfile = backing_file_open(&file->f_path, flags, realpath, 65 + current_cred()); 66 66 } 67 67 revert_creds(old_cred); 68 68
+28 -5
include/linux/fs.h
··· 171 171 /* File supports non-exclusive O_DIRECT writes from multiple threads */ 172 172 #define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) 173 173 174 + /* File is embedded in backing_file object */ 175 + #define FMODE_BACKING ((__force fmode_t)0x2000000) 176 + 174 177 /* File was opened by fanotify and shouldn't generate fanotify events */ 175 178 #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) 176 179 ··· 2355 2352 return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root}, 2356 2353 name, flags, mode); 2357 2354 } 2358 - extern struct file * dentry_open(const struct path *, int, const struct cred *); 2359 - extern struct file *dentry_create(const struct path *path, int flags, 2360 - umode_t mode, const struct cred *cred); 2361 - extern struct file * open_with_fake_path(const struct path *, int, 2362 - struct inode*, const struct cred *); 2355 + struct file *dentry_open(const struct path *path, int flags, 2356 + const struct cred *creds); 2357 + struct file *dentry_create(const struct path *path, int flags, umode_t mode, 2358 + const struct cred *cred); 2359 + struct file *backing_file_open(const struct path *path, int flags, 2360 + const struct path *real_path, 2361 + const struct cred *cred); 2362 + struct path *backing_file_real_path(struct file *f); 2363 + 2364 + /* 2365 + * file_real_path - get the path corresponding to f_inode 2366 + * 2367 + * When opening a backing file for a stackable filesystem (e.g., 2368 + * overlayfs) f_path may be on the stackable filesystem and f_inode on 2369 + * the underlying filesystem. When the path associated with f_inode is 2370 + * needed, this helper should be used instead of accessing f_path 2371 + * directly. 2372 + */ 2373 + static inline const struct path *file_real_path(struct file *f) 2374 + { 2375 + if (unlikely(f->f_mode & FMODE_BACKING)) 2376 + return backing_file_real_path(f); 2377 + return &f->f_path; 2378 + } 2379 + 2363 2380 static inline struct file *file_clone_open(struct file *file) 2364 2381 { 2365 2382 return dentry_open(&file->f_path, file->f_flags, file->f_cred);