Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: add physical block and status member into extent status tree

This commit adds two members in extent_status structure to let it record
physical block and extent status. Here es_pblk is used to record both
of them because physical block only has 48 bits. So extent status could
be stashed into it so that we can save some memory. Now written,
unwritten, delayed and hole are defined as status.

Due to new member is added into extent status tree, all interfaces need
to be adjusted.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>

authored by

Zheng Liu and committed by
Theodore Ts'o
fdc0212e 06b0c886

+142 -26
+55 -12
fs/ext4/extents_status.c
··· 179 179 while (node) { 180 180 struct extent_status *es; 181 181 es = rb_entry(node, struct extent_status, rb_node); 182 - printk(KERN_DEBUG " [%u/%u)", es->es_lblk, es->es_len); 182 + printk(KERN_DEBUG " [%u/%u) %llu %llx", 183 + es->es_lblk, es->es_len, 184 + ext4_es_pblock(es), ext4_es_status(es)); 183 185 node = rb_next(node); 184 186 } 185 187 printk(KERN_DEBUG "\n"); ··· 236 234 * @es: delayed extent that we found 237 235 * 238 236 * Returns the first block of the next extent after es, otherwise 239 - * EXT_MAX_BLOCKS if no delay extent is found. 237 + * EXT_MAX_BLOCKS if no extent is found. 240 238 * Delayed extent is returned via @es. 241 239 */ 242 240 ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) ··· 251 249 read_lock(&EXT4_I(inode)->i_es_lock); 252 250 tree = &EXT4_I(inode)->i_es_tree; 253 251 254 - /* find delay extent in cache firstly */ 252 + /* find extent in cache firstly */ 253 + es->es_len = es->es_pblk = 0; 255 254 if (tree->cache_es) { 256 255 es1 = tree->cache_es; 257 256 if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) { 258 - es_debug("%u cached by [%u/%u)\n", 259 - es->es_lblk, es1->es_lblk, es1->es_len); 257 + es_debug("%u cached by [%u/%u) %llu %llx\n", 258 + es->es_lblk, es1->es_lblk, es1->es_len, 259 + ext4_es_pblock(es1), ext4_es_status(es1)); 260 260 goto out; 261 261 } 262 262 } 263 263 264 - es->es_len = 0; 265 264 es1 = __es_tree_search(&tree->root, es->es_lblk); 266 265 267 266 out: ··· 270 267 tree->cache_es = es1; 271 268 es->es_lblk = es1->es_lblk; 272 269 es->es_len = es1->es_len; 270 + es->es_pblk = es1->es_pblk; 273 271 node = rb_next(&es1->rb_node); 274 272 if (node) { 275 273 es1 = rb_entry(node, struct extent_status, rb_node); ··· 285 281 } 286 282 287 283 static struct extent_status * 288 - ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len) 284 + ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk) 289 285 { 290 286 struct extent_status *es; 291 287 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); ··· 293 289 return NULL; 294 290 es->es_lblk = lblk; 295 291 es->es_len = len; 292 + es->es_pblk = pblk; 296 293 return es; 297 294 } 298 295 ··· 306 301 * Check whether or not two extents can be merged 307 302 * Condition: 308 303 * - logical block number is contiguous 304 + * - physical block number is contiguous 305 + * - status is equal 309 306 */ 310 307 static int ext4_es_can_be_merged(struct extent_status *es1, 311 308 struct extent_status *es2) 312 309 { 313 310 if (es1->es_lblk + es1->es_len != es2->es_lblk) 311 + return 0; 312 + 313 + if (ext4_es_status(es1) != ext4_es_status(es2)) 314 + return 0; 315 + 316 + if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && 317 + (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2))) 314 318 return 0; 315 319 316 320 return 1; ··· 385 371 */ 386 372 es->es_lblk = newes->es_lblk; 387 373 es->es_len += newes->es_len; 374 + if (ext4_es_is_written(es) || 375 + ext4_es_is_unwritten(es)) 376 + ext4_es_store_pblock(es, 377 + newes->es_pblk); 388 378 es = ext4_es_try_to_merge_left(tree, es); 389 379 goto out; 390 380 } ··· 406 388 } 407 389 } 408 390 409 - es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len); 391 + es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len, 392 + newes->es_pblk); 410 393 if (!es) 411 394 return -ENOMEM; 412 395 rb_link_node(&es->rb_node, parent, p); ··· 427 408 * Return 0 on success, error code on failure. 428 409 */ 429 410 int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 430 - ext4_lblk_t len) 411 + ext4_lblk_t len, ext4_fsblk_t pblk, 412 + unsigned long long status) 431 413 { 432 414 struct ext4_es_tree *tree; 433 415 struct extent_status newes; 434 416 ext4_lblk_t end = lblk + len - 1; 435 417 int err = 0; 436 418 437 - trace_ext4_es_insert_extent(inode, lblk, len); 438 - es_debug("add [%u/%u) to extent status tree of inode %lu\n", 439 - lblk, len, inode->i_ino); 419 + es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", 420 + lblk, len, pblk, status, inode->i_ino); 440 421 441 422 BUG_ON(end < lblk); 442 423 443 424 newes.es_lblk = lblk; 444 425 newes.es_len = len; 426 + ext4_es_store_pblock(&newes, pblk); 427 + ext4_es_store_status(&newes, status); 428 + trace_ext4_es_insert_extent(inode, &newes); 445 429 446 430 write_lock(&EXT4_I(inode)->i_es_lock); 447 431 tree = &EXT4_I(inode)->i_es_tree; ··· 468 446 struct extent_status *es; 469 447 struct extent_status orig_es; 470 448 ext4_lblk_t len1, len2; 449 + ext4_fsblk_t block; 471 450 int err = 0; 472 451 473 452 es = __es_tree_search(&tree->root, lblk); ··· 482 459 483 460 orig_es.es_lblk = es->es_lblk; 484 461 orig_es.es_len = es->es_len; 462 + orig_es.es_pblk = es->es_pblk; 463 + 485 464 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0; 486 465 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0; 487 466 if (len1 > 0) ··· 494 469 495 470 newes.es_lblk = end + 1; 496 471 newes.es_len = len2; 472 + if (ext4_es_is_written(&orig_es) || 473 + ext4_es_is_unwritten(&orig_es)) { 474 + block = ext4_es_pblock(&orig_es) + 475 + orig_es.es_len - len2; 476 + ext4_es_store_pblock(&newes, block); 477 + } 478 + ext4_es_store_status(&newes, ext4_es_status(&orig_es)); 497 479 err = __es_insert_extent(tree, &newes); 498 480 if (err) { 499 481 es->es_lblk = orig_es.es_lblk; ··· 510 478 } else { 511 479 es->es_lblk = end + 1; 512 480 es->es_len = len2; 481 + if (ext4_es_is_written(es) || 482 + ext4_es_is_unwritten(es)) { 483 + block = orig_es.es_pblk + orig_es.es_len - len2; 484 + ext4_es_store_pblock(es, block); 485 + } 513 486 } 514 487 goto out; 515 488 } ··· 539 502 } 540 503 541 504 if (es && es->es_lblk < end + 1) { 505 + ext4_lblk_t orig_len = es->es_len; 506 + 542 507 len1 = ext4_es_end(es) - end; 543 508 es->es_lblk = end + 1; 544 509 es->es_len = len1; 510 + if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) { 511 + block = es->es_pblk + orig_len - len1; 512 + ext4_es_store_pblock(es, block); 513 + } 545 514 } 546 515 547 516 out:
+63 -1
fs/ext4/extents_status.h
··· 20 20 #define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 21 21 #endif 22 22 23 + #define EXTENT_STATUS_WRITTEN 0x80000000 /* written extent */ 24 + #define EXTENT_STATUS_UNWRITTEN 0x40000000 /* unwritten extent */ 25 + #define EXTENT_STATUS_DELAYED 0x20000000 /* delayed extent */ 26 + #define EXTENT_STATUS_HOLE 0x10000000 /* hole */ 27 + 28 + #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ 29 + EXTENT_STATUS_UNWRITTEN | \ 30 + EXTENT_STATUS_DELAYED | \ 31 + EXTENT_STATUS_HOLE) 32 + 23 33 struct extent_status { 24 34 struct rb_node rb_node; 25 35 ext4_lblk_t es_lblk; /* first logical block extent covers */ 26 36 ext4_lblk_t es_len; /* length of extent in block */ 37 + ext4_fsblk_t es_pblk; /* first physical block */ 27 38 }; 28 39 29 40 struct ext4_es_tree { ··· 47 36 extern void ext4_es_init_tree(struct ext4_es_tree *tree); 48 37 49 38 extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 50 - ext4_lblk_t len); 39 + ext4_lblk_t len, ext4_fsblk_t pblk, 40 + unsigned long long status); 51 41 extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 52 42 ext4_lblk_t len); 53 43 extern ext4_lblk_t ext4_es_find_extent(struct inode *inode, 54 44 struct extent_status *es); 45 + 46 + static inline int ext4_es_is_written(struct extent_status *es) 47 + { 48 + return (es->es_pblk & EXTENT_STATUS_WRITTEN); 49 + } 50 + 51 + static inline int ext4_es_is_unwritten(struct extent_status *es) 52 + { 53 + return (es->es_pblk & EXTENT_STATUS_UNWRITTEN); 54 + } 55 + 56 + static inline int ext4_es_is_delayed(struct extent_status *es) 57 + { 58 + return (es->es_pblk & EXTENT_STATUS_DELAYED); 59 + } 60 + 61 + static inline int ext4_es_is_hole(struct extent_status *es) 62 + { 63 + return (es->es_pblk & EXTENT_STATUS_HOLE); 64 + } 65 + 66 + static inline ext4_fsblk_t ext4_es_status(struct extent_status *es) 67 + { 68 + return (es->es_pblk & EXTENT_STATUS_FLAGS); 69 + } 70 + 71 + static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) 72 + { 73 + return (es->es_pblk & ~EXTENT_STATUS_FLAGS); 74 + } 75 + 76 + static inline void ext4_es_store_pblock(struct extent_status *es, 77 + ext4_fsblk_t pb) 78 + { 79 + ext4_fsblk_t block; 80 + 81 + block = (pb & ~EXTENT_STATUS_FLAGS) | 82 + (es->es_pblk & EXTENT_STATUS_FLAGS); 83 + es->es_pblk = block; 84 + } 85 + 86 + static inline void ext4_es_store_status(struct extent_status *es, 87 + unsigned long long status) 88 + { 89 + ext4_fsblk_t block; 90 + 91 + block = (status & EXTENT_STATUS_FLAGS) | 92 + (es->es_pblk & ~EXTENT_STATUS_FLAGS); 93 + es->es_pblk = block; 94 + } 55 95 56 96 #endif /* _EXT4_EXTENTS_STATUS_H */
+2 -1
fs/ext4/inode.c
··· 1784 1784 goto out_unlock; 1785 1785 } 1786 1786 1787 - retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); 1787 + retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 1788 + ~0, EXTENT_STATUS_DELAYED); 1788 1789 if (retval) 1789 1790 goto out_unlock; 1790 1791
+22 -12
include/trace/events/ext4.h
··· 2093 2093 ); 2094 2094 2095 2095 TRACE_EVENT(ext4_es_insert_extent, 2096 - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), 2096 + TP_PROTO(struct inode *inode, struct extent_status *es), 2097 2097 2098 - TP_ARGS(inode, lblk, len), 2098 + TP_ARGS(inode, es), 2099 2099 2100 2100 TP_STRUCT__entry( 2101 - __field( dev_t, dev ) 2102 - __field( ino_t, ino ) 2103 - __field( loff_t, lblk ) 2104 - __field( loff_t, len ) 2101 + __field( dev_t, dev ) 2102 + __field( ino_t, ino ) 2103 + __field( ext4_lblk_t, lblk ) 2104 + __field( ext4_lblk_t, len ) 2105 + __field( ext4_fsblk_t, pblk ) 2106 + __field( unsigned long long, status ) 2105 2107 ), 2106 2108 2107 2109 TP_fast_assign( 2108 2110 __entry->dev = inode->i_sb->s_dev; 2109 2111 __entry->ino = inode->i_ino; 2110 - __entry->lblk = lblk; 2111 - __entry->len = len; 2112 + __entry->lblk = es->es_lblk; 2113 + __entry->len = es->es_len; 2114 + __entry->pblk = ext4_es_pblock(es); 2115 + __entry->status = ext4_es_status(es); 2112 2116 ), 2113 2117 2114 - TP_printk("dev %d,%d ino %lu es [%lld/%lld)", 2118 + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx", 2115 2119 MAJOR(__entry->dev), MINOR(__entry->dev), 2116 2120 (unsigned long) __entry->ino, 2117 - __entry->lblk, __entry->len) 2121 + __entry->lblk, __entry->len, 2122 + __entry->pblk, __entry->status) 2118 2123 ); 2119 2124 2120 2125 TRACE_EVENT(ext4_es_remove_extent, ··· 2180 2175 __field( ino_t, ino ) 2181 2176 __field( ext4_lblk_t, lblk ) 2182 2177 __field( ext4_lblk_t, len ) 2178 + __field( ext4_fsblk_t, pblk ) 2179 + __field( unsigned long long, status ) 2183 2180 __field( ext4_lblk_t, ret ) 2184 2181 ), 2185 2182 ··· 2190 2183 __entry->ino = inode->i_ino; 2191 2184 __entry->lblk = es->es_lblk; 2192 2185 __entry->len = es->es_len; 2186 + __entry->pblk = ext4_es_pblock(es); 2187 + __entry->status = ext4_es_status(es); 2193 2188 __entry->ret = ret; 2194 2189 ), 2195 2190 2196 - TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u", 2191 + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u", 2197 2192 MAJOR(__entry->dev), MINOR(__entry->dev), 2198 2193 (unsigned long) __entry->ino, 2199 - __entry->lblk, __entry->len, __entry->ret) 2194 + __entry->lblk, __entry->len, 2195 + __entry->pblk, __entry->status, __entry->ret) 2200 2196 ); 2201 2197 2202 2198 #endif /* _TRACE_EXT4_H */