at v3.13 563 lines 15 kB view raw
1/* 2 * linux/fs/ext3/dir.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/dir.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * ext3 directory handling functions 16 * 17 * Big-endian to little-endian byte-swapping/bitmaps by 18 * David S. Miller (davem@caip.rutgers.edu), 1995 19 * 20 * Hash Tree Directory indexing (c) 2001 Daniel Phillips 21 * 22 */ 23 24#include <linux/compat.h> 25#include "ext3.h" 26 27static unsigned char ext3_filetype_table[] = { 28 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 29}; 30 31static int ext3_dx_readdir(struct file *, struct dir_context *); 32 33static unsigned char get_dtype(struct super_block *sb, int filetype) 34{ 35 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || 36 (filetype >= EXT3_FT_MAX)) 37 return DT_UNKNOWN; 38 39 return (ext3_filetype_table[filetype]); 40} 41 42/** 43 * Check if the given dir-inode refers to an htree-indexed directory 44 * (or a directory which could potentially get converted to use htree 45 * indexing). 46 * 47 * Return 1 if it is a dx dir, 0 if not 48 */ 49static int is_dx_dir(struct inode *inode) 50{ 51 struct super_block *sb = inode->i_sb; 52 53 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, 54 EXT3_FEATURE_COMPAT_DIR_INDEX) && 55 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || 56 ((inode->i_size >> sb->s_blocksize_bits) == 1))) 57 return 1; 58 59 return 0; 60} 61 62int ext3_check_dir_entry (const char * function, struct inode * dir, 63 struct ext3_dir_entry_2 * de, 64 struct buffer_head * bh, 65 unsigned long offset) 66{ 67 const char * error_msg = NULL; 68 const int rlen = ext3_rec_len_from_disk(de->rec_len); 69 70 if (unlikely(rlen < EXT3_DIR_REC_LEN(1))) 71 error_msg = "rec_len is smaller than minimal"; 72 else if (unlikely(rlen % 4 != 0)) 73 error_msg = "rec_len % 4 != 0"; 74 else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len))) 75 error_msg = "rec_len is too small for name_len"; 76 else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))) 77 error_msg = "directory entry across blocks"; 78 else if (unlikely(le32_to_cpu(de->inode) > 79 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))) 80 error_msg = "inode out of bounds"; 81 82 if (unlikely(error_msg != NULL)) 83 ext3_error (dir->i_sb, function, 84 "bad entry in directory #%lu: %s - " 85 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 86 dir->i_ino, error_msg, offset, 87 (unsigned long) le32_to_cpu(de->inode), 88 rlen, de->name_len); 89 90 return error_msg == NULL ? 1 : 0; 91} 92 93static int ext3_readdir(struct file *file, struct dir_context *ctx) 94{ 95 unsigned long offset; 96 int i; 97 struct ext3_dir_entry_2 *de; 98 int err; 99 struct inode *inode = file_inode(file); 100 struct super_block *sb = inode->i_sb; 101 int dir_has_error = 0; 102 103 if (is_dx_dir(inode)) { 104 err = ext3_dx_readdir(file, ctx); 105 if (err != ERR_BAD_DX_DIR) 106 return err; 107 /* 108 * We don't set the inode dirty flag since it's not 109 * critical that it get flushed back to the disk. 110 */ 111 EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; 112 } 113 offset = ctx->pos & (sb->s_blocksize - 1); 114 115 while (ctx->pos < inode->i_size) { 116 unsigned long blk = ctx->pos >> EXT3_BLOCK_SIZE_BITS(sb); 117 struct buffer_head map_bh; 118 struct buffer_head *bh = NULL; 119 120 map_bh.b_state = 0; 121 err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); 122 if (err > 0) { 123 pgoff_t index = map_bh.b_blocknr >> 124 (PAGE_CACHE_SHIFT - inode->i_blkbits); 125 if (!ra_has_index(&file->f_ra, index)) 126 page_cache_sync_readahead( 127 sb->s_bdev->bd_inode->i_mapping, 128 &file->f_ra, file, 129 index, 1); 130 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 131 bh = ext3_bread(NULL, inode, blk, 0, &err); 132 } 133 134 /* 135 * We ignore I/O errors on directories so users have a chance 136 * of recovering data when there's a bad sector 137 */ 138 if (!bh) { 139 if (!dir_has_error) { 140 ext3_error(sb, __func__, "directory #%lu " 141 "contains a hole at offset %lld", 142 inode->i_ino, ctx->pos); 143 dir_has_error = 1; 144 } 145 /* corrupt size? Maybe no more blocks to read */ 146 if (ctx->pos > inode->i_blocks << 9) 147 break; 148 ctx->pos += sb->s_blocksize - offset; 149 continue; 150 } 151 152 /* If the dir block has changed since the last call to 153 * readdir(2), then we might be pointing to an invalid 154 * dirent right now. Scan from the start of the block 155 * to make sure. */ 156 if (offset && file->f_version != inode->i_version) { 157 for (i = 0; i < sb->s_blocksize && i < offset; ) { 158 de = (struct ext3_dir_entry_2 *) 159 (bh->b_data + i); 160 /* It's too expensive to do a full 161 * dirent test each time round this 162 * loop, but we do have to test at 163 * least that it is non-zero. A 164 * failure will be detected in the 165 * dirent test below. */ 166 if (ext3_rec_len_from_disk(de->rec_len) < 167 EXT3_DIR_REC_LEN(1)) 168 break; 169 i += ext3_rec_len_from_disk(de->rec_len); 170 } 171 offset = i; 172 ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) 173 | offset; 174 file->f_version = inode->i_version; 175 } 176 177 while (ctx->pos < inode->i_size 178 && offset < sb->s_blocksize) { 179 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); 180 if (!ext3_check_dir_entry ("ext3_readdir", inode, de, 181 bh, offset)) { 182 /* On error, skip the to the 183 next block. */ 184 ctx->pos = (ctx->pos | 185 (sb->s_blocksize - 1)) + 1; 186 break; 187 } 188 offset += ext3_rec_len_from_disk(de->rec_len); 189 if (le32_to_cpu(de->inode)) { 190 if (!dir_emit(ctx, de->name, de->name_len, 191 le32_to_cpu(de->inode), 192 get_dtype(sb, de->file_type))) { 193 brelse(bh); 194 return 0; 195 } 196 } 197 ctx->pos += ext3_rec_len_from_disk(de->rec_len); 198 } 199 offset = 0; 200 brelse (bh); 201 if (ctx->pos < inode->i_size) 202 if (!dir_relax(inode)) 203 return 0; 204 } 205 return 0; 206} 207 208static inline int is_32bit_api(void) 209{ 210#ifdef CONFIG_COMPAT 211 return is_compat_task(); 212#else 213 return (BITS_PER_LONG == 32); 214#endif 215} 216 217/* 218 * These functions convert from the major/minor hash to an f_pos 219 * value for dx directories 220 * 221 * Upper layer (for example NFS) should specify FMODE_32BITHASH or 222 * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted 223 * directly on both 32-bit and 64-bit nodes, under such case, neither 224 * FMODE_32BITHASH nor FMODE_64BITHASH is specified. 225 */ 226static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) 227{ 228 if ((filp->f_mode & FMODE_32BITHASH) || 229 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 230 return major >> 1; 231 else 232 return ((__u64)(major >> 1) << 32) | (__u64)minor; 233} 234 235static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) 236{ 237 if ((filp->f_mode & FMODE_32BITHASH) || 238 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 239 return (pos << 1) & 0xffffffff; 240 else 241 return ((pos >> 32) << 1) & 0xffffffff; 242} 243 244static inline __u32 pos2min_hash(struct file *filp, loff_t pos) 245{ 246 if ((filp->f_mode & FMODE_32BITHASH) || 247 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 248 return 0; 249 else 250 return pos & 0xffffffff; 251} 252 253/* 254 * Return 32- or 64-bit end-of-file for dx directories 255 */ 256static inline loff_t ext3_get_htree_eof(struct file *filp) 257{ 258 if ((filp->f_mode & FMODE_32BITHASH) || 259 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 260 return EXT3_HTREE_EOF_32BIT; 261 else 262 return EXT3_HTREE_EOF_64BIT; 263} 264 265 266/* 267 * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both 268 * non-htree and htree directories, where the "offset" is in terms 269 * of the filename hash value instead of the byte offset. 270 * 271 * Because we may return a 64-bit hash that is well beyond s_maxbytes, 272 * we need to pass the max hash as the maximum allowable offset in 273 * the htree directory case. 274 * 275 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) 276 * will be invalid once the directory was converted into a dx directory 277 */ 278loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) 279{ 280 struct inode *inode = file->f_mapping->host; 281 int dx_dir = is_dx_dir(inode); 282 loff_t htree_max = ext3_get_htree_eof(file); 283 284 if (likely(dx_dir)) 285 return generic_file_llseek_size(file, offset, whence, 286 htree_max, htree_max); 287 else 288 return generic_file_llseek(file, offset, whence); 289} 290 291/* 292 * This structure holds the nodes of the red-black tree used to store 293 * the directory entry in hash order. 294 */ 295struct fname { 296 __u32 hash; 297 __u32 minor_hash; 298 struct rb_node rb_hash; 299 struct fname *next; 300 __u32 inode; 301 __u8 name_len; 302 __u8 file_type; 303 char name[0]; 304}; 305 306/* 307 * This functoin implements a non-recursive way of freeing all of the 308 * nodes in the red-black tree. 309 */ 310static void free_rb_tree_fname(struct rb_root *root) 311{ 312 struct rb_node *n = root->rb_node; 313 struct rb_node *parent; 314 struct fname *fname; 315 316 while (n) { 317 /* Do the node's children first */ 318 if (n->rb_left) { 319 n = n->rb_left; 320 continue; 321 } 322 if (n->rb_right) { 323 n = n->rb_right; 324 continue; 325 } 326 /* 327 * The node has no children; free it, and then zero 328 * out parent's link to it. Finally go to the 329 * beginning of the loop and try to free the parent 330 * node. 331 */ 332 parent = rb_parent(n); 333 fname = rb_entry(n, struct fname, rb_hash); 334 while (fname) { 335 struct fname * old = fname; 336 fname = fname->next; 337 kfree (old); 338 } 339 if (!parent) 340 *root = RB_ROOT; 341 else if (parent->rb_left == n) 342 parent->rb_left = NULL; 343 else if (parent->rb_right == n) 344 parent->rb_right = NULL; 345 n = parent; 346 } 347} 348 349 350static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, 351 loff_t pos) 352{ 353 struct dir_private_info *p; 354 355 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 356 if (!p) 357 return NULL; 358 p->curr_hash = pos2maj_hash(filp, pos); 359 p->curr_minor_hash = pos2min_hash(filp, pos); 360 return p; 361} 362 363void ext3_htree_free_dir_info(struct dir_private_info *p) 364{ 365 free_rb_tree_fname(&p->root); 366 kfree(p); 367} 368 369/* 370 * Given a directory entry, enter it into the fname rb tree. 371 */ 372int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, 373 __u32 minor_hash, 374 struct ext3_dir_entry_2 *dirent) 375{ 376 struct rb_node **p, *parent = NULL; 377 struct fname * fname, *new_fn; 378 struct dir_private_info *info; 379 int len; 380 381 info = (struct dir_private_info *) dir_file->private_data; 382 p = &info->root.rb_node; 383 384 /* Create and allocate the fname structure */ 385 len = sizeof(struct fname) + dirent->name_len + 1; 386 new_fn = kzalloc(len, GFP_KERNEL); 387 if (!new_fn) 388 return -ENOMEM; 389 new_fn->hash = hash; 390 new_fn->minor_hash = minor_hash; 391 new_fn->inode = le32_to_cpu(dirent->inode); 392 new_fn->name_len = dirent->name_len; 393 new_fn->file_type = dirent->file_type; 394 memcpy(new_fn->name, dirent->name, dirent->name_len); 395 new_fn->name[dirent->name_len] = 0; 396 397 while (*p) { 398 parent = *p; 399 fname = rb_entry(parent, struct fname, rb_hash); 400 401 /* 402 * If the hash and minor hash match up, then we put 403 * them on a linked list. This rarely happens... 404 */ 405 if ((new_fn->hash == fname->hash) && 406 (new_fn->minor_hash == fname->minor_hash)) { 407 new_fn->next = fname->next; 408 fname->next = new_fn; 409 return 0; 410 } 411 412 if (new_fn->hash < fname->hash) 413 p = &(*p)->rb_left; 414 else if (new_fn->hash > fname->hash) 415 p = &(*p)->rb_right; 416 else if (new_fn->minor_hash < fname->minor_hash) 417 p = &(*p)->rb_left; 418 else /* if (new_fn->minor_hash > fname->minor_hash) */ 419 p = &(*p)->rb_right; 420 } 421 422 rb_link_node(&new_fn->rb_hash, parent, p); 423 rb_insert_color(&new_fn->rb_hash, &info->root); 424 return 0; 425} 426 427 428 429/* 430 * This is a helper function for ext3_dx_readdir. It calls filldir 431 * for all entres on the fname linked list. (Normally there is only 432 * one entry on the linked list, unless there are 62 bit hash collisions.) 433 */ 434static bool call_filldir(struct file *file, struct dir_context *ctx, 435 struct fname *fname) 436{ 437 struct dir_private_info *info = file->private_data; 438 struct inode *inode = file_inode(file); 439 struct super_block *sb = inode->i_sb; 440 441 if (!fname) { 442 printk("call_filldir: called with null fname?!?\n"); 443 return true; 444 } 445 ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); 446 while (fname) { 447 if (!dir_emit(ctx, fname->name, fname->name_len, 448 fname->inode, 449 get_dtype(sb, fname->file_type))) { 450 info->extra_fname = fname; 451 return false; 452 } 453 fname = fname->next; 454 } 455 return true; 456} 457 458static int ext3_dx_readdir(struct file *file, struct dir_context *ctx) 459{ 460 struct dir_private_info *info = file->private_data; 461 struct inode *inode = file_inode(file); 462 struct fname *fname; 463 int ret; 464 465 if (!info) { 466 info = ext3_htree_create_dir_info(file, ctx->pos); 467 if (!info) 468 return -ENOMEM; 469 file->private_data = info; 470 } 471 472 if (ctx->pos == ext3_get_htree_eof(file)) 473 return 0; /* EOF */ 474 475 /* Some one has messed with f_pos; reset the world */ 476 if (info->last_pos != ctx->pos) { 477 free_rb_tree_fname(&info->root); 478 info->curr_node = NULL; 479 info->extra_fname = NULL; 480 info->curr_hash = pos2maj_hash(file, ctx->pos); 481 info->curr_minor_hash = pos2min_hash(file, ctx->pos); 482 } 483 484 /* 485 * If there are any leftover names on the hash collision 486 * chain, return them first. 487 */ 488 if (info->extra_fname) { 489 if (!call_filldir(file, ctx, info->extra_fname)) 490 goto finished; 491 info->extra_fname = NULL; 492 goto next_node; 493 } else if (!info->curr_node) 494 info->curr_node = rb_first(&info->root); 495 496 while (1) { 497 /* 498 * Fill the rbtree if we have no more entries, 499 * or the inode has changed since we last read in the 500 * cached entries. 501 */ 502 if ((!info->curr_node) || 503 (file->f_version != inode->i_version)) { 504 info->curr_node = NULL; 505 free_rb_tree_fname(&info->root); 506 file->f_version = inode->i_version; 507 ret = ext3_htree_fill_tree(file, info->curr_hash, 508 info->curr_minor_hash, 509 &info->next_hash); 510 if (ret < 0) 511 return ret; 512 if (ret == 0) { 513 ctx->pos = ext3_get_htree_eof(file); 514 break; 515 } 516 info->curr_node = rb_first(&info->root); 517 } 518 519 fname = rb_entry(info->curr_node, struct fname, rb_hash); 520 info->curr_hash = fname->hash; 521 info->curr_minor_hash = fname->minor_hash; 522 if (!call_filldir(file, ctx, fname)) 523 break; 524 next_node: 525 info->curr_node = rb_next(info->curr_node); 526 if (info->curr_node) { 527 fname = rb_entry(info->curr_node, struct fname, 528 rb_hash); 529 info->curr_hash = fname->hash; 530 info->curr_minor_hash = fname->minor_hash; 531 } else { 532 if (info->next_hash == ~0) { 533 ctx->pos = ext3_get_htree_eof(file); 534 break; 535 } 536 info->curr_hash = info->next_hash; 537 info->curr_minor_hash = 0; 538 } 539 } 540finished: 541 info->last_pos = ctx->pos; 542 return 0; 543} 544 545static int ext3_release_dir (struct inode * inode, struct file * filp) 546{ 547 if (filp->private_data) 548 ext3_htree_free_dir_info(filp->private_data); 549 550 return 0; 551} 552 553const struct file_operations ext3_dir_operations = { 554 .llseek = ext3_dir_llseek, 555 .read = generic_read_dir, 556 .iterate = ext3_readdir, 557 .unlocked_ioctl = ext3_ioctl, 558#ifdef CONFIG_COMPAT 559 .compat_ioctl = ext3_compat_ioctl, 560#endif 561 .fsync = ext3_sync_file, 562 .release = ext3_release_dir, 563};