at v6.19-rc8 596 lines 16 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/fs/readdir.c 4 * 5 * Copyright (C) 1995 Linus Torvalds 6 */ 7 8#include <linux/stddef.h> 9#include <linux/kernel.h> 10#include <linux/export.h> 11#include <linux/time.h> 12#include <linux/mm.h> 13#include <linux/errno.h> 14#include <linux/stat.h> 15#include <linux/file.h> 16#include <linux/fs.h> 17#include <linux/fsnotify.h> 18#include <linux/dirent.h> 19#include <linux/security.h> 20#include <linux/syscalls.h> 21#include <linux/unistd.h> 22#include <linux/compat.h> 23#include <linux/uaccess.h> 24 25/* 26 * Some filesystems were never converted to '->iterate_shared()' 27 * and their directory iterators want the inode lock held for 28 * writing. This wrapper allows for converting from the shared 29 * semantics to the exclusive inode use. 30 */ 31int wrap_directory_iterator(struct file *file, 32 struct dir_context *ctx, 33 int (*iter)(struct file *, struct dir_context *)) 34{ 35 struct inode *inode = file_inode(file); 36 int ret; 37 38 /* 39 * We'd love to have an 'inode_upgrade_trylock()' operation, 40 * see the comment in mmap_upgrade_trylock() in mm/memory.c. 41 * 42 * But considering this is for "filesystems that never got 43 * converted", it really doesn't matter. 44 * 45 * Also note that since we have to return with the lock held 46 * for reading, we can't use the "killable()" locking here, 47 * since we do need to get the lock even if we're dying. 48 * 49 * We could do the write part killably and then get the read 50 * lock unconditionally if it mattered, but see above on why 51 * this does the very simplistic conversion. 52 */ 53 up_read(&inode->i_rwsem); 54 down_write(&inode->i_rwsem); 55 56 /* 57 * Since we dropped the inode lock, we should do the 58 * DEADDIR test again. See 'iterate_dir()' below. 59 * 60 * Note that we don't need to re-do the f_pos games, 61 * since the file must be locked wrt f_pos anyway. 62 */ 63 ret = -ENOENT; 64 if (!IS_DEADDIR(inode)) 65 ret = iter(file, ctx); 66 67 downgrade_write(&inode->i_rwsem); 68 return ret; 69} 70EXPORT_SYMBOL(wrap_directory_iterator); 71 72/* 73 * Note the "unsafe_put_user()" semantics: we goto a 74 * label for errors. 75 */ 76#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ 77 char __user *dst = (_dst); \ 78 const char *src = (_src); \ 79 size_t len = (_len); \ 80 unsafe_put_user(0, dst+len, label); \ 81 unsafe_copy_to_user(dst, src, len, label); \ 82} while (0) 83 84 85int iterate_dir(struct file *file, struct dir_context *ctx) 86{ 87 struct inode *inode = file_inode(file); 88 int res = -ENOTDIR; 89 90 if (!file->f_op->iterate_shared) 91 goto out; 92 93 res = security_file_permission(file, MAY_READ); 94 if (res) 95 goto out; 96 97 res = fsnotify_file_perm(file, MAY_READ); 98 if (res) 99 goto out; 100 101 res = down_read_killable(&inode->i_rwsem); 102 if (res) 103 goto out; 104 105 res = -ENOENT; 106 if (!IS_DEADDIR(inode)) { 107 ctx->pos = file->f_pos; 108 res = file->f_op->iterate_shared(file, ctx); 109 file->f_pos = ctx->pos; 110 fsnotify_access(file); 111 file_accessed(file); 112 } 113 inode_unlock_shared(inode); 114out: 115 return res; 116} 117EXPORT_SYMBOL(iterate_dir); 118 119/* 120 * POSIX says that a dirent name cannot contain NULL or a '/'. 121 * 122 * It's not 100% clear what we should really do in this case. 123 * The filesystem is clearly corrupted, but returning a hard 124 * error means that you now don't see any of the other names 125 * either, so that isn't a perfect alternative. 126 * 127 * And if you return an error, what error do you use? Several 128 * filesystems seem to have decided on EUCLEAN being the error 129 * code for EFSCORRUPTED, and that may be the error to use. Or 130 * just EIO, which is perhaps more obvious to users. 131 * 132 * In order to see the other file names in the directory, the 133 * caller might want to make this a "soft" error: skip the 134 * entry, and return the error at the end instead. 135 * 136 * Note that this should likely do a "memchr(name, 0, len)" 137 * check too, since that would be filesystem corruption as 138 * well. However, that case can't actually confuse user space, 139 * which has to do a strlen() on the name anyway to find the 140 * filename length, and the above "soft error" worry means 141 * that it's probably better left alone until we have that 142 * issue clarified. 143 * 144 * Note the PATH_MAX check - it's arbitrary but the real 145 * kernel limit on a possible path component, not NAME_MAX, 146 * which is the technical standard limit. 147 */ 148static int verify_dirent_name(const char *name, int len) 149{ 150 if (len <= 0 || len >= PATH_MAX) 151 return -EIO; 152 if (memchr(name, '/', len)) 153 return -EIO; 154 return 0; 155} 156 157/* 158 * Traditional linux readdir() handling.. 159 * 160 * "count=1" is a special case, meaning that the buffer is one 161 * dirent-structure in size and that the code can't handle more 162 * anyway. Thus the special "fillonedir()" function for that 163 * case (the low-level handlers don't need to care about this). 164 */ 165 166#ifdef __ARCH_WANT_OLD_READDIR 167 168struct old_linux_dirent { 169 unsigned long d_ino; 170 unsigned long d_offset; 171 unsigned short d_namlen; 172 char d_name[]; 173}; 174 175struct readdir_callback { 176 struct dir_context ctx; 177 struct old_linux_dirent __user * dirent; 178 int result; 179}; 180 181static bool fillonedir(struct dir_context *ctx, const char *name, int namlen, 182 loff_t offset, u64 ino, unsigned int d_type) 183{ 184 struct readdir_callback *buf = 185 container_of(ctx, struct readdir_callback, ctx); 186 struct old_linux_dirent __user * dirent; 187 unsigned long d_ino; 188 189 if (buf->result) 190 return false; 191 buf->result = verify_dirent_name(name, namlen); 192 if (buf->result) 193 return false; 194 d_ino = ino; 195 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 196 buf->result = -EOVERFLOW; 197 return false; 198 } 199 buf->result++; 200 dirent = buf->dirent; 201 if (!user_write_access_begin(dirent, 202 (unsigned long)(dirent->d_name + namlen + 1) - 203 (unsigned long)dirent)) 204 goto efault; 205 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 206 unsafe_put_user(offset, &dirent->d_offset, efault_end); 207 unsafe_put_user(namlen, &dirent->d_namlen, efault_end); 208 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 209 user_write_access_end(); 210 return true; 211efault_end: 212 user_write_access_end(); 213efault: 214 buf->result = -EFAULT; 215 return false; 216} 217 218SYSCALL_DEFINE3(old_readdir, unsigned int, fd, 219 struct old_linux_dirent __user *, dirent, unsigned int, count) 220{ 221 int error; 222 CLASS(fd_pos, f)(fd); 223 struct readdir_callback buf = { 224 .ctx.actor = fillonedir, 225 .ctx.count = 1, /* Hint to fs: just one entry. */ 226 .dirent = dirent 227 }; 228 229 if (fd_empty(f)) 230 return -EBADF; 231 232 error = iterate_dir(fd_file(f), &buf.ctx); 233 if (buf.result) 234 error = buf.result; 235 236 return error; 237} 238 239#endif /* __ARCH_WANT_OLD_READDIR */ 240 241/* 242 * New, all-improved, singing, dancing, iBCS2-compliant getdents() 243 * interface. 244 */ 245struct linux_dirent { 246 unsigned long d_ino; 247 unsigned long d_off; 248 unsigned short d_reclen; 249 char d_name[]; 250}; 251 252struct getdents_callback { 253 struct dir_context ctx; 254 struct linux_dirent __user * current_dir; 255 int prev_reclen; 256 int error; 257}; 258 259static bool filldir(struct dir_context *ctx, const char *name, int namlen, 260 loff_t offset, u64 ino, unsigned int d_type) 261{ 262 struct linux_dirent __user *dirent, *prev; 263 struct getdents_callback *buf = 264 container_of(ctx, struct getdents_callback, ctx); 265 unsigned long d_ino; 266 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, 267 sizeof(long)); 268 int prev_reclen; 269 unsigned int flags = d_type; 270 271 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); 272 d_type &= S_DT_MASK; 273 274 buf->error = verify_dirent_name(name, namlen); 275 if (unlikely(buf->error)) 276 return false; 277 buf->error = -EINVAL; /* only used if we fail.. */ 278 if (reclen > ctx->count) 279 return false; 280 d_ino = ino; 281 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 282 buf->error = -EOVERFLOW; 283 return false; 284 } 285 prev_reclen = buf->prev_reclen; 286 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) 287 return false; 288 dirent = buf->current_dir; 289 prev = (void __user *) dirent - prev_reclen; 290 if (!user_write_access_begin(prev, reclen + prev_reclen)) 291 goto efault; 292 293 /* This might be 'dirent->d_off', but if so it will get overwritten */ 294 unsafe_put_user(offset, &prev->d_off, efault_end); 295 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 296 unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 297 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); 298 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 299 user_write_access_end(); 300 301 buf->current_dir = (void __user *)dirent + reclen; 302 buf->prev_reclen = reclen; 303 ctx->count -= reclen; 304 return true; 305efault_end: 306 user_write_access_end(); 307efault: 308 buf->error = -EFAULT; 309 return false; 310} 311 312SYSCALL_DEFINE3(getdents, unsigned int, fd, 313 struct linux_dirent __user *, dirent, unsigned int, count) 314{ 315 CLASS(fd_pos, f)(fd); 316 struct getdents_callback buf = { 317 .ctx.actor = filldir, 318 .ctx.count = count, 319 .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, 320 .current_dir = dirent 321 }; 322 int error; 323 324 if (fd_empty(f)) 325 return -EBADF; 326 327 error = iterate_dir(fd_file(f), &buf.ctx); 328 if (error >= 0) 329 error = buf.error; 330 if (buf.prev_reclen) { 331 struct linux_dirent __user * lastdirent; 332 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; 333 334 if (put_user(buf.ctx.pos, &lastdirent->d_off)) 335 error = -EFAULT; 336 else 337 error = count - buf.ctx.count; 338 } 339 return error; 340} 341 342struct getdents_callback64 { 343 struct dir_context ctx; 344 struct linux_dirent64 __user * current_dir; 345 int prev_reclen; 346 int error; 347}; 348 349static bool filldir64(struct dir_context *ctx, const char *name, int namlen, 350 loff_t offset, u64 ino, unsigned int d_type) 351{ 352 struct linux_dirent64 __user *dirent, *prev; 353 struct getdents_callback64 *buf = 354 container_of(ctx, struct getdents_callback64, ctx); 355 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, 356 sizeof(u64)); 357 int prev_reclen; 358 unsigned int flags = d_type; 359 360 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); 361 d_type &= S_DT_MASK; 362 363 buf->error = verify_dirent_name(name, namlen); 364 if (unlikely(buf->error)) 365 return false; 366 buf->error = -EINVAL; /* only used if we fail.. */ 367 if (reclen > ctx->count) 368 return false; 369 prev_reclen = buf->prev_reclen; 370 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) 371 return false; 372 dirent = buf->current_dir; 373 prev = (void __user *)dirent - prev_reclen; 374 if (!user_write_access_begin(prev, reclen + prev_reclen)) 375 goto efault; 376 377 /* This might be 'dirent->d_off', but if so it will get overwritten */ 378 unsafe_put_user(offset, &prev->d_off, efault_end); 379 unsafe_put_user(ino, &dirent->d_ino, efault_end); 380 unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 381 unsafe_put_user(d_type, &dirent->d_type, efault_end); 382 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 383 user_write_access_end(); 384 385 buf->prev_reclen = reclen; 386 buf->current_dir = (void __user *)dirent + reclen; 387 ctx->count -= reclen; 388 return true; 389 390efault_end: 391 user_write_access_end(); 392efault: 393 buf->error = -EFAULT; 394 return false; 395} 396 397SYSCALL_DEFINE3(getdents64, unsigned int, fd, 398 struct linux_dirent64 __user *, dirent, unsigned int, count) 399{ 400 CLASS(fd_pos, f)(fd); 401 struct getdents_callback64 buf = { 402 .ctx.actor = filldir64, 403 .ctx.count = count, 404 .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, 405 .current_dir = dirent 406 }; 407 int error; 408 409 if (fd_empty(f)) 410 return -EBADF; 411 412 error = iterate_dir(fd_file(f), &buf.ctx); 413 if (error >= 0) 414 error = buf.error; 415 if (buf.prev_reclen) { 416 struct linux_dirent64 __user * lastdirent; 417 typeof(lastdirent->d_off) d_off = buf.ctx.pos; 418 419 lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; 420 if (put_user(d_off, &lastdirent->d_off)) 421 error = -EFAULT; 422 else 423 error = count - buf.ctx.count; 424 } 425 return error; 426} 427 428#ifdef CONFIG_COMPAT 429struct compat_old_linux_dirent { 430 compat_ulong_t d_ino; 431 compat_ulong_t d_offset; 432 unsigned short d_namlen; 433 char d_name[]; 434}; 435 436struct compat_readdir_callback { 437 struct dir_context ctx; 438 struct compat_old_linux_dirent __user *dirent; 439 int result; 440}; 441 442static bool compat_fillonedir(struct dir_context *ctx, const char *name, 443 int namlen, loff_t offset, u64 ino, 444 unsigned int d_type) 445{ 446 struct compat_readdir_callback *buf = 447 container_of(ctx, struct compat_readdir_callback, ctx); 448 struct compat_old_linux_dirent __user *dirent; 449 compat_ulong_t d_ino; 450 451 if (buf->result) 452 return false; 453 buf->result = verify_dirent_name(name, namlen); 454 if (buf->result) 455 return false; 456 d_ino = ino; 457 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 458 buf->result = -EOVERFLOW; 459 return false; 460 } 461 buf->result++; 462 dirent = buf->dirent; 463 if (!user_write_access_begin(dirent, 464 (unsigned long)(dirent->d_name + namlen + 1) - 465 (unsigned long)dirent)) 466 goto efault; 467 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 468 unsafe_put_user(offset, &dirent->d_offset, efault_end); 469 unsafe_put_user(namlen, &dirent->d_namlen, efault_end); 470 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 471 user_write_access_end(); 472 return true; 473efault_end: 474 user_write_access_end(); 475efault: 476 buf->result = -EFAULT; 477 return false; 478} 479 480COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, 481 struct compat_old_linux_dirent __user *, dirent, unsigned int, count) 482{ 483 int error; 484 CLASS(fd_pos, f)(fd); 485 struct compat_readdir_callback buf = { 486 .ctx.actor = compat_fillonedir, 487 .ctx.count = 1, /* Hint to fs: just one entry. */ 488 .dirent = dirent 489 }; 490 491 if (fd_empty(f)) 492 return -EBADF; 493 494 error = iterate_dir(fd_file(f), &buf.ctx); 495 if (buf.result) 496 error = buf.result; 497 498 return error; 499} 500 501struct compat_linux_dirent { 502 compat_ulong_t d_ino; 503 compat_ulong_t d_off; 504 unsigned short d_reclen; 505 char d_name[]; 506}; 507 508struct compat_getdents_callback { 509 struct dir_context ctx; 510 struct compat_linux_dirent __user *current_dir; 511 int prev_reclen; 512 int error; 513}; 514 515static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen, 516 loff_t offset, u64 ino, unsigned int d_type) 517{ 518 struct compat_linux_dirent __user *dirent, *prev; 519 struct compat_getdents_callback *buf = 520 container_of(ctx, struct compat_getdents_callback, ctx); 521 compat_ulong_t d_ino; 522 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + 523 namlen + 2, sizeof(compat_long_t)); 524 int prev_reclen; 525 unsigned int flags = d_type; 526 527 BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); 528 d_type &= S_DT_MASK; 529 530 buf->error = verify_dirent_name(name, namlen); 531 if (unlikely(buf->error)) 532 return false; 533 buf->error = -EINVAL; /* only used if we fail.. */ 534 if (reclen > ctx->count) 535 return false; 536 d_ino = ino; 537 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 538 buf->error = -EOVERFLOW; 539 return false; 540 } 541 prev_reclen = buf->prev_reclen; 542 if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) 543 return false; 544 dirent = buf->current_dir; 545 prev = (void __user *) dirent - prev_reclen; 546 if (!user_write_access_begin(prev, reclen + prev_reclen)) 547 goto efault; 548 549 unsafe_put_user(offset, &prev->d_off, efault_end); 550 unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 551 unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 552 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); 553 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 554 user_write_access_end(); 555 556 buf->prev_reclen = reclen; 557 buf->current_dir = (void __user *)dirent + reclen; 558 ctx->count -= reclen; 559 return true; 560efault_end: 561 user_write_access_end(); 562efault: 563 buf->error = -EFAULT; 564 return false; 565} 566 567COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, 568 struct compat_linux_dirent __user *, dirent, unsigned int, count) 569{ 570 CLASS(fd_pos, f)(fd); 571 struct compat_getdents_callback buf = { 572 .ctx.actor = compat_filldir, 573 .ctx.count = count, 574 .ctx.dt_flags_mask = FILLDIR_FLAG_NOINTR, 575 .current_dir = dirent, 576 }; 577 int error; 578 579 if (fd_empty(f)) 580 return -EBADF; 581 582 error = iterate_dir(fd_file(f), &buf.ctx); 583 if (error >= 0) 584 error = buf.error; 585 if (buf.prev_reclen) { 586 struct compat_linux_dirent __user * lastdirent; 587 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; 588 589 if (put_user(buf.ctx.pos, &lastdirent->d_off)) 590 error = -EFAULT; 591 else 592 error = count - buf.ctx.count; 593 } 594 return error; 595} 596#endif