Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/readdir.c
4 *
5 * Copyright (C) 1995 Linus Torvalds
6 */
7
8#include <linux/stddef.h>
9#include <linux/kernel.h>
10#include <linux/export.h>
11#include <linux/time.h>
12#include <linux/mm.h>
13#include <linux/errno.h>
14#include <linux/stat.h>
15#include <linux/file.h>
16#include <linux/fs.h>
17#include <linux/fsnotify.h>
18#include <linux/dirent.h>
19#include <linux/security.h>
20#include <linux/syscalls.h>
21#include <linux/unistd.h>
22#include <linux/compat.h>
23#include <linux/uaccess.h>
24
25/*
26 * Some filesystems were never converted to '->iterate_shared()'
27 * and their directory iterators want the inode lock held for
28 * writing. This wrapper allows for converting from the shared
29 * semantics to the exclusive inode use.
30 */
31int wrap_directory_iterator(struct file *file,
32 struct dir_context *ctx,
33 int (*iter)(struct file *, struct dir_context *))
34{
35 struct inode *inode = file_inode(file);
36 int ret;
37
38 /*
39 * We'd love to have an 'inode_upgrade_trylock()' operation,
40 * see the comment in mmap_upgrade_trylock() in mm/memory.c.
41 *
42 * But considering this is for "filesystems that never got
43 * converted", it really doesn't matter.
44 *
45 * Also note that since we have to return with the lock held
46 * for reading, we can't use the "killable()" locking here,
47 * since we do need to get the lock even if we're dying.
48 *
49 * We could do the write part killably and then get the read
50 * lock unconditionally if it mattered, but see above on why
51 * this does the very simplistic conversion.
52 */
53 up_read(&inode->i_rwsem);
54 down_write(&inode->i_rwsem);
55
56 /*
57 * Since we dropped the inode lock, we should do the
58 * DEADDIR test again. See 'iterate_dir()' below.
59 *
60 * Note that we don't need to re-do the f_pos games,
61 * since the file must be locked wrt f_pos anyway.
62 */
63 ret = -ENOENT;
64 if (!IS_DEADDIR(inode))
65 ret = iter(file, ctx);
66
67 downgrade_write(&inode->i_rwsem);
68 return ret;
69}
70EXPORT_SYMBOL(wrap_directory_iterator);
71
72/*
73 * Note the "unsafe_put_user()" semantics: we goto a
74 * label for errors.
75 */
76#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
77 char __user *dst = (_dst); \
78 const char *src = (_src); \
79 size_t len = (_len); \
80 unsafe_put_user(0, dst+len, label); \
81 unsafe_copy_to_user(dst, src, len, label); \
82} while (0)
83
84
85int iterate_dir(struct file *file, struct dir_context *ctx)
86{
87 struct inode *inode = file_inode(file);
88 int res = -ENOTDIR;
89
90 if (!file->f_op->iterate_shared)
91 goto out;
92
93 res = security_file_permission(file, MAY_READ);
94 if (res)
95 goto out;
96
97 res = fsnotify_file_perm(file, MAY_READ);
98 if (res)
99 goto out;
100
101 res = down_read_killable(&inode->i_rwsem);
102 if (res)
103 goto out;
104
105 res = -ENOENT;
106 if (!IS_DEADDIR(inode)) {
107 ctx->pos = file->f_pos;
108 res = file->f_op->iterate_shared(file, ctx);
109 file->f_pos = ctx->pos;
110 fsnotify_access(file);
111 file_accessed(file);
112 }
113 inode_unlock_shared(inode);
114out:
115 return res;
116}
117EXPORT_SYMBOL(iterate_dir);
118
119/*
120 * POSIX says that a dirent name cannot contain NULL or a '/'.
121 *
122 * It's not 100% clear what we should really do in this case.
123 * The filesystem is clearly corrupted, but returning a hard
124 * error means that you now don't see any of the other names
125 * either, so that isn't a perfect alternative.
126 *
127 * And if you return an error, what error do you use? Several
128 * filesystems seem to have decided on EUCLEAN being the error
129 * code for EFSCORRUPTED, and that may be the error to use. Or
130 * just EIO, which is perhaps more obvious to users.
131 *
132 * In order to see the other file names in the directory, the
133 * caller might want to make this a "soft" error: skip the
134 * entry, and return the error at the end instead.
135 *
136 * Note that this should likely do a "memchr(name, 0, len)"
137 * check too, since that would be filesystem corruption as
138 * well. However, that case can't actually confuse user space,
139 * which has to do a strlen() on the name anyway to find the
140 * filename length, and the above "soft error" worry means
141 * that it's probably better left alone until we have that
142 * issue clarified.
143 *
144 * Note the PATH_MAX check - it's arbitrary but the real
145 * kernel limit on a possible path component, not NAME_MAX,
146 * which is the technical standard limit.
147 */
148static int verify_dirent_name(const char *name, int len)
149{
150 if (len <= 0 || len >= PATH_MAX)
151 return -EIO;
152 if (memchr(name, '/', len))
153 return -EIO;
154 return 0;
155}
156
157/*
158 * Traditional linux readdir() handling..
159 *
160 * "count=1" is a special case, meaning that the buffer is one
161 * dirent-structure in size and that the code can't handle more
162 * anyway. Thus the special "fillonedir()" function for that
163 * case (the low-level handlers don't need to care about this).
164 */
165
166#ifdef __ARCH_WANT_OLD_READDIR
167
168struct old_linux_dirent {
169 unsigned long d_ino;
170 unsigned long d_offset;
171 unsigned short d_namlen;
172 char d_name[];
173};
174
175struct readdir_callback {
176 struct dir_context ctx;
177 struct old_linux_dirent __user * dirent;
178 int result;
179};
180
181static bool fillonedir(struct dir_context *ctx, const char *name, int namlen,
182 loff_t offset, u64 ino, unsigned int d_type)
183{
184 struct readdir_callback *buf =
185 container_of(ctx, struct readdir_callback, ctx);
186 struct old_linux_dirent __user * dirent;
187 unsigned long d_ino;
188
189 if (buf->result)
190 return false;
191 buf->result = verify_dirent_name(name, namlen);
192 if (buf->result)
193 return false;
194 d_ino = ino;
195 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
196 buf->result = -EOVERFLOW;
197 return false;
198 }
199 buf->result++;
200 dirent = buf->dirent;
201 if (!user_write_access_begin(dirent,
202 (unsigned long)(dirent->d_name + namlen + 1) -
203 (unsigned long)dirent))
204 goto efault;
205 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
206 unsafe_put_user(offset, &dirent->d_offset, efault_end);
207 unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
208 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
209 user_write_access_end();
210 return true;
211efault_end:
212 user_write_access_end();
213efault:
214 buf->result = -EFAULT;
215 return false;
216}
217
218SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
219 struct old_linux_dirent __user *, dirent, unsigned int, count)
220{
221 int error;
222 CLASS(fd_pos, f)(fd);
223 struct readdir_callback buf = {
224 .ctx.actor = fillonedir,
225 .ctx.count = 1, /* Hint to fs: just one entry. */
226 .dirent = dirent
227 };
228
229 if (fd_empty(f))
230 return -EBADF;
231
232 error = iterate_dir(fd_file(f), &buf.ctx);
233 if (buf.result)
234 error = buf.result;
235
236 return error;
237}
238
239#endif /* __ARCH_WANT_OLD_READDIR */
240
241/*
242 * New, all-improved, singing, dancing, iBCS2-compliant getdents()
243 * interface.
244 */
245struct linux_dirent {
246 unsigned long d_ino;
247 unsigned long d_off;
248 unsigned short d_reclen;
249 char d_name[];
250};
251
252struct getdents_callback {
253 struct dir_context ctx;
254 struct linux_dirent __user * current_dir;
255 int prev_reclen;
256 int error;
257};
258
259static bool filldir(struct dir_context *ctx, const char *name, int namlen,
260 loff_t offset, u64 ino, unsigned int d_type)
261{
262 struct linux_dirent __user *dirent, *prev;
263 struct getdents_callback *buf =
264 container_of(ctx, struct getdents_callback, ctx);
265 unsigned long d_ino;
266 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
267 sizeof(long));
268 int prev_reclen;
269 unsigned int flags = d_type;
270
271 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
272 d_type &= S_DT_MASK;
273
274 buf->error = verify_dirent_name(name, namlen);
275 if (unlikely(buf->error))
276 return false;
277 buf->error = -EINVAL; /* only used if we fail.. */
278 if (reclen > ctx->count)
279 return false;
280 d_ino = ino;
281 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
282 buf->error = -EOVERFLOW;
283 return false;
284 }
285 prev_reclen = buf->prev_reclen;
286 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
287 return false;
288 dirent = buf->current_dir;
289 prev = (void __user *) dirent - prev_reclen;
290 if (!user_write_access_begin(prev, reclen + prev_reclen))
291 goto efault;
292
293 /* This might be 'dirent->d_off', but if so it will get overwritten */
294 unsafe_put_user(offset, &prev->d_off, efault_end);
295 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
296 unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
297 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
298 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
299 user_write_access_end();
300
301 buf->current_dir = (void __user *)dirent + reclen;
302 buf->prev_reclen = reclen;
303 ctx->count -= reclen;
304 return true;
305efault_end:
306 user_write_access_end();
307efault:
308 buf->error = -EFAULT;
309 return false;
310}
311
312SYSCALL_DEFINE3(getdents, unsigned int, fd,
313 struct linux_dirent __user *, dirent, unsigned int, count)
314{
315 CLASS(fd_pos, f)(fd);
316 struct getdents_callback buf = {
317 .ctx.actor = filldir,
318 .ctx.count = count,
319 .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
320 .current_dir = dirent
321 };
322 int error;
323
324 if (fd_empty(f))
325 return -EBADF;
326
327 error = iterate_dir(fd_file(f), &buf.ctx);
328 if (error >= 0)
329 error = buf.error;
330 if (buf.prev_reclen) {
331 struct linux_dirent __user * lastdirent;
332 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
333
334 if (put_user(buf.ctx.pos, &lastdirent->d_off))
335 error = -EFAULT;
336 else
337 error = count - buf.ctx.count;
338 }
339 return error;
340}
341
342struct getdents_callback64 {
343 struct dir_context ctx;
344 struct linux_dirent64 __user * current_dir;
345 int prev_reclen;
346 int error;
347};
348
349static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
350 loff_t offset, u64 ino, unsigned int d_type)
351{
352 struct linux_dirent64 __user *dirent, *prev;
353 struct getdents_callback64 *buf =
354 container_of(ctx, struct getdents_callback64, ctx);
355 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
356 sizeof(u64));
357 int prev_reclen;
358 unsigned int flags = d_type;
359
360 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
361 d_type &= S_DT_MASK;
362
363 buf->error = verify_dirent_name(name, namlen);
364 if (unlikely(buf->error))
365 return false;
366 buf->error = -EINVAL; /* only used if we fail.. */
367 if (reclen > ctx->count)
368 return false;
369 prev_reclen = buf->prev_reclen;
370 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
371 return false;
372 dirent = buf->current_dir;
373 prev = (void __user *)dirent - prev_reclen;
374 if (!user_write_access_begin(prev, reclen + prev_reclen))
375 goto efault;
376
377 /* This might be 'dirent->d_off', but if so it will get overwritten */
378 unsafe_put_user(offset, &prev->d_off, efault_end);
379 unsafe_put_user(ino, &dirent->d_ino, efault_end);
380 unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
381 unsafe_put_user(d_type, &dirent->d_type, efault_end);
382 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
383 user_write_access_end();
384
385 buf->prev_reclen = reclen;
386 buf->current_dir = (void __user *)dirent + reclen;
387 ctx->count -= reclen;
388 return true;
389
390efault_end:
391 user_write_access_end();
392efault:
393 buf->error = -EFAULT;
394 return false;
395}
396
397SYSCALL_DEFINE3(getdents64, unsigned int, fd,
398 struct linux_dirent64 __user *, dirent, unsigned int, count)
399{
400 CLASS(fd_pos, f)(fd);
401 struct getdents_callback64 buf = {
402 .ctx.actor = filldir64,
403 .ctx.count = count,
404 .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
405 .current_dir = dirent
406 };
407 int error;
408
409 if (fd_empty(f))
410 return -EBADF;
411
412 error = iterate_dir(fd_file(f), &buf.ctx);
413 if (error >= 0)
414 error = buf.error;
415 if (buf.prev_reclen) {
416 struct linux_dirent64 __user * lastdirent;
417 typeof(lastdirent->d_off) d_off = buf.ctx.pos;
418
419 lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
420 if (put_user(d_off, &lastdirent->d_off))
421 error = -EFAULT;
422 else
423 error = count - buf.ctx.count;
424 }
425 return error;
426}
427
428#ifdef CONFIG_COMPAT
429struct compat_old_linux_dirent {
430 compat_ulong_t d_ino;
431 compat_ulong_t d_offset;
432 unsigned short d_namlen;
433 char d_name[];
434};
435
436struct compat_readdir_callback {
437 struct dir_context ctx;
438 struct compat_old_linux_dirent __user *dirent;
439 int result;
440};
441
442static bool compat_fillonedir(struct dir_context *ctx, const char *name,
443 int namlen, loff_t offset, u64 ino,
444 unsigned int d_type)
445{
446 struct compat_readdir_callback *buf =
447 container_of(ctx, struct compat_readdir_callback, ctx);
448 struct compat_old_linux_dirent __user *dirent;
449 compat_ulong_t d_ino;
450
451 if (buf->result)
452 return false;
453 buf->result = verify_dirent_name(name, namlen);
454 if (buf->result)
455 return false;
456 d_ino = ino;
457 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
458 buf->result = -EOVERFLOW;
459 return false;
460 }
461 buf->result++;
462 dirent = buf->dirent;
463 if (!user_write_access_begin(dirent,
464 (unsigned long)(dirent->d_name + namlen + 1) -
465 (unsigned long)dirent))
466 goto efault;
467 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
468 unsafe_put_user(offset, &dirent->d_offset, efault_end);
469 unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
470 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
471 user_write_access_end();
472 return true;
473efault_end:
474 user_write_access_end();
475efault:
476 buf->result = -EFAULT;
477 return false;
478}
479
480COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
481 struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
482{
483 int error;
484 CLASS(fd_pos, f)(fd);
485 struct compat_readdir_callback buf = {
486 .ctx.actor = compat_fillonedir,
487 .ctx.count = 1, /* Hint to fs: just one entry. */
488 .dirent = dirent
489 };
490
491 if (fd_empty(f))
492 return -EBADF;
493
494 error = iterate_dir(fd_file(f), &buf.ctx);
495 if (buf.result)
496 error = buf.result;
497
498 return error;
499}
500
501struct compat_linux_dirent {
502 compat_ulong_t d_ino;
503 compat_ulong_t d_off;
504 unsigned short d_reclen;
505 char d_name[];
506};
507
508struct compat_getdents_callback {
509 struct dir_context ctx;
510 struct compat_linux_dirent __user *current_dir;
511 int prev_reclen;
512 int error;
513};
514
515static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen,
516 loff_t offset, u64 ino, unsigned int d_type)
517{
518 struct compat_linux_dirent __user *dirent, *prev;
519 struct compat_getdents_callback *buf =
520 container_of(ctx, struct compat_getdents_callback, ctx);
521 compat_ulong_t d_ino;
522 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
523 namlen + 2, sizeof(compat_long_t));
524 int prev_reclen;
525 unsigned int flags = d_type;
526
527 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK);
528 d_type &= S_DT_MASK;
529
530 buf->error = verify_dirent_name(name, namlen);
531 if (unlikely(buf->error))
532 return false;
533 buf->error = -EINVAL; /* only used if we fail.. */
534 if (reclen > ctx->count)
535 return false;
536 d_ino = ino;
537 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
538 buf->error = -EOVERFLOW;
539 return false;
540 }
541 prev_reclen = buf->prev_reclen;
542 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current))
543 return false;
544 dirent = buf->current_dir;
545 prev = (void __user *) dirent - prev_reclen;
546 if (!user_write_access_begin(prev, reclen + prev_reclen))
547 goto efault;
548
549 unsafe_put_user(offset, &prev->d_off, efault_end);
550 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
551 unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
552 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
553 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
554 user_write_access_end();
555
556 buf->prev_reclen = reclen;
557 buf->current_dir = (void __user *)dirent + reclen;
558 ctx->count -= reclen;
559 return true;
560efault_end:
561 user_write_access_end();
562efault:
563 buf->error = -EFAULT;
564 return false;
565}
566
567COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
568 struct compat_linux_dirent __user *, dirent, unsigned int, count)
569{
570 CLASS(fd_pos, f)(fd);
571 struct compat_getdents_callback buf = {
572 .ctx.actor = compat_filldir,
573 .ctx.count = count,
574 .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR,
575 .current_dir = dirent,
576 };
577 int error;
578
579 if (fd_empty(f))
580 return -EBADF;
581
582 error = iterate_dir(fd_file(f), &buf.ctx);
583 if (error >= 0)
584 error = buf.error;
585 if (buf.prev_reclen) {
586 struct compat_linux_dirent __user * lastdirent;
587 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
588
589 if (put_user(buf.ctx.pos, &lastdirent->d_off))
590 error = -EFAULT;
591 else
592 error = count - buf.ctx.count;
593 }
594 return error;
595}
596#endif