Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: support GETFSMAP ioctls

Support the GETFSMAP ioctls so that we can use the xfs free space
management tools to probe ext4 as well. Note that this is a partial
implementation -- we only report fixed-location metadata and free space;
everything else is reported as "unknown".

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Darrick J. Wong and committed by
Theodore Ts'o
0c9ec4be d0649f04

+1027 -5
+5 -5
fs/ext4/Makefile
··· 4 4 5 5 obj-$(CONFIG_EXT4_FS) += ext4.o 6 6 7 - ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ 8 - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 9 - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ 10 - mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ 11 - xattr_trusted.o inline.o readpage.o sysfs.o 7 + ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ 8 + extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \ 9 + indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \ 10 + mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \ 11 + super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o 12 12 13 13 ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 14 14 ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
+722
fs/ext4/fsmap.c
··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "ext4.h" 21 + #include <linux/fsmap.h> 22 + #include "fsmap.h" 23 + #include "mballoc.h" 24 + #include <linux/sort.h> 25 + #include <linux/list_sort.h> 26 + #include <trace/events/ext4.h> 27 + 28 + /* Convert an ext4_fsmap to an fsmap. */ 29 + void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest, 30 + struct ext4_fsmap *src) 31 + { 32 + dest->fmr_device = src->fmr_device; 33 + dest->fmr_flags = src->fmr_flags; 34 + dest->fmr_physical = src->fmr_physical << sb->s_blocksize_bits; 35 + dest->fmr_owner = src->fmr_owner; 36 + dest->fmr_offset = 0; 37 + dest->fmr_length = src->fmr_length << sb->s_blocksize_bits; 38 + dest->fmr_reserved[0] = 0; 39 + dest->fmr_reserved[1] = 0; 40 + dest->fmr_reserved[2] = 0; 41 + } 42 + 43 + /* Convert an fsmap to an ext4_fsmap. */ 44 + void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest, 45 + struct fsmap *src) 46 + { 47 + dest->fmr_device = src->fmr_device; 48 + dest->fmr_flags = src->fmr_flags; 49 + dest->fmr_physical = src->fmr_physical >> sb->s_blocksize_bits; 50 + dest->fmr_owner = src->fmr_owner; 51 + dest->fmr_length = src->fmr_length >> sb->s_blocksize_bits; 52 + } 53 + 54 + /* getfsmap query state */ 55 + struct ext4_getfsmap_info { 56 + struct ext4_fsmap_head *gfi_head; 57 + ext4_fsmap_format_t gfi_formatter; /* formatting fn */ 58 + void *gfi_format_arg;/* format buffer */ 59 + ext4_fsblk_t gfi_next_fsblk; /* next fsblock we expect */ 60 + u32 gfi_dev; /* device id */ 61 + ext4_group_t gfi_agno; /* bg number, if applicable */ 62 + struct ext4_fsmap gfi_low; /* low rmap key */ 63 + struct ext4_fsmap gfi_high; /* high rmap key */ 64 + struct ext4_fsmap gfi_lastfree; /* free ext at end of last bg */ 65 + struct list_head gfi_meta_list; /* fixed metadata list */ 66 + bool gfi_last; /* last extent? */ 67 + }; 68 + 69 + /* Associate a device with a getfsmap handler. */ 70 + struct ext4_getfsmap_dev { 71 + int (*gfd_fn)(struct super_block *sb, 72 + struct ext4_fsmap *keys, 73 + struct ext4_getfsmap_info *info); 74 + u32 gfd_dev; 75 + }; 76 + 77 + /* Compare two getfsmap device handlers. */ 78 + static int ext4_getfsmap_dev_compare(const void *p1, const void *p2) 79 + { 80 + const struct ext4_getfsmap_dev *d1 = p1; 81 + const struct ext4_getfsmap_dev *d2 = p2; 82 + 83 + return d1->gfd_dev - d2->gfd_dev; 84 + } 85 + 86 + /* Compare a record against our starting point */ 87 + static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info, 88 + struct ext4_fsmap *rec) 89 + { 90 + return rec->fmr_physical < info->gfi_low.fmr_physical; 91 + } 92 + 93 + /* 94 + * Format a reverse mapping for getfsmap, having translated rm_startblock 95 + * into the appropriate daddr units. 96 + */ 97 + static int ext4_getfsmap_helper(struct super_block *sb, 98 + struct ext4_getfsmap_info *info, 99 + struct ext4_fsmap *rec) 100 + { 101 + struct ext4_fsmap fmr; 102 + struct ext4_sb_info *sbi = EXT4_SB(sb); 103 + ext4_fsblk_t rec_fsblk = rec->fmr_physical; 104 + ext4_group_t agno; 105 + ext4_grpblk_t cno; 106 + int error; 107 + 108 + if (fatal_signal_pending(current)) 109 + return -EINTR; 110 + 111 + /* 112 + * Filter out records that start before our startpoint, if the 113 + * caller requested that. 114 + */ 115 + if (ext4_getfsmap_rec_before_low_key(info, rec)) { 116 + rec_fsblk += rec->fmr_length; 117 + if (info->gfi_next_fsblk < rec_fsblk) 118 + info->gfi_next_fsblk = rec_fsblk; 119 + return EXT4_QUERY_RANGE_CONTINUE; 120 + } 121 + 122 + /* Are we just counting mappings? */ 123 + if (info->gfi_head->fmh_count == 0) { 124 + if (rec_fsblk > info->gfi_next_fsblk) 125 + info->gfi_head->fmh_entries++; 126 + 127 + if (info->gfi_last) 128 + return EXT4_QUERY_RANGE_CONTINUE; 129 + 130 + info->gfi_head->fmh_entries++; 131 + 132 + rec_fsblk += rec->fmr_length; 133 + if (info->gfi_next_fsblk < rec_fsblk) 134 + info->gfi_next_fsblk = rec_fsblk; 135 + return EXT4_QUERY_RANGE_CONTINUE; 136 + } 137 + 138 + /* 139 + * If the record starts past the last physical block we saw, 140 + * then we've found a gap. Report the gap as being owned by 141 + * whatever the caller specified is the missing owner. 142 + */ 143 + if (rec_fsblk > info->gfi_next_fsblk) { 144 + if (info->gfi_head->fmh_entries >= info->gfi_head->fmh_count) 145 + return EXT4_QUERY_RANGE_ABORT; 146 + 147 + ext4_get_group_no_and_offset(sb, info->gfi_next_fsblk, 148 + &agno, &cno); 149 + trace_ext4_fsmap_mapping(sb, info->gfi_dev, agno, 150 + EXT4_C2B(sbi, cno), 151 + rec_fsblk - info->gfi_next_fsblk, 152 + EXT4_FMR_OWN_UNKNOWN); 153 + 154 + fmr.fmr_device = info->gfi_dev; 155 + fmr.fmr_physical = info->gfi_next_fsblk; 156 + fmr.fmr_owner = EXT4_FMR_OWN_UNKNOWN; 157 + fmr.fmr_length = rec_fsblk - info->gfi_next_fsblk; 158 + fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; 159 + error = info->gfi_formatter(&fmr, info->gfi_format_arg); 160 + if (error) 161 + return error; 162 + info->gfi_head->fmh_entries++; 163 + } 164 + 165 + if (info->gfi_last) 166 + goto out; 167 + 168 + /* Fill out the extent we found */ 169 + if (info->gfi_head->fmh_entries >= info->gfi_head->fmh_count) 170 + return EXT4_QUERY_RANGE_ABORT; 171 + 172 + ext4_get_group_no_and_offset(sb, rec_fsblk, &agno, &cno); 173 + trace_ext4_fsmap_mapping(sb, info->gfi_dev, agno, EXT4_C2B(sbi, cno), 174 + rec->fmr_length, rec->fmr_owner); 175 + 176 + fmr.fmr_device = info->gfi_dev; 177 + fmr.fmr_physical = rec_fsblk; 178 + fmr.fmr_owner = rec->fmr_owner; 179 + fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; 180 + fmr.fmr_length = rec->fmr_length; 181 + error = info->gfi_formatter(&fmr, info->gfi_format_arg); 182 + if (error) 183 + return error; 184 + info->gfi_head->fmh_entries++; 185 + 186 + out: 187 + rec_fsblk += rec->fmr_length; 188 + if (info->gfi_next_fsblk < rec_fsblk) 189 + info->gfi_next_fsblk = rec_fsblk; 190 + return EXT4_QUERY_RANGE_CONTINUE; 191 + } 192 + 193 + static inline ext4_fsblk_t ext4_fsmap_next_pblk(struct ext4_fsmap *fmr) 194 + { 195 + return fmr->fmr_physical + fmr->fmr_length; 196 + } 197 + 198 + /* Transform a blockgroup's free record into a fsmap */ 199 + static int ext4_getfsmap_datadev_helper(struct super_block *sb, 200 + ext4_group_t agno, ext4_grpblk_t start, 201 + ext4_grpblk_t len, void *priv) 202 + { 203 + struct ext4_fsmap irec; 204 + struct ext4_getfsmap_info *info = priv; 205 + struct ext4_fsmap *p; 206 + struct ext4_fsmap *tmp; 207 + struct ext4_sb_info *sbi = EXT4_SB(sb); 208 + ext4_fsblk_t fsb; 209 + ext4_fsblk_t fslen; 210 + int error; 211 + 212 + fsb = (EXT4_C2B(sbi, start) + ext4_group_first_block_no(sb, agno)); 213 + fslen = EXT4_C2B(sbi, len); 214 + 215 + /* If the retained free extent record is set... */ 216 + if (info->gfi_lastfree.fmr_owner) { 217 + /* ...and abuts this one, lengthen it and return. */ 218 + if (ext4_fsmap_next_pblk(&info->gfi_lastfree) == fsb) { 219 + info->gfi_lastfree.fmr_length += fslen; 220 + return 0; 221 + } 222 + 223 + /* 224 + * There's a gap between the two free extents; emit the 225 + * retained extent prior to merging the meta_list. 226 + */ 227 + error = ext4_getfsmap_helper(sb, info, &info->gfi_lastfree); 228 + if (error) 229 + return error; 230 + info->gfi_lastfree.fmr_owner = 0; 231 + } 232 + 233 + /* Merge in any relevant extents from the meta_list */ 234 + list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) { 235 + if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) { 236 + list_del(&p->fmr_list); 237 + kfree(p); 238 + } else if (p->fmr_physical < fsb) { 239 + error = ext4_getfsmap_helper(sb, info, p); 240 + if (error) 241 + return error; 242 + 243 + list_del(&p->fmr_list); 244 + kfree(p); 245 + } 246 + } 247 + 248 + irec.fmr_device = 0; 249 + irec.fmr_physical = fsb; 250 + irec.fmr_length = fslen; 251 + irec.fmr_owner = EXT4_FMR_OWN_FREE; 252 + irec.fmr_flags = 0; 253 + 254 + /* If this is a free extent at the end of a bg, buffer it. */ 255 + if (ext4_fsmap_next_pblk(&irec) == 256 + ext4_group_first_block_no(sb, agno + 1)) { 257 + info->gfi_lastfree = irec; 258 + return 0; 259 + } 260 + 261 + /* Otherwise, emit it */ 262 + return ext4_getfsmap_helper(sb, info, &irec); 263 + } 264 + 265 + /* Execute a getfsmap query against the log device. */ 266 + static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys, 267 + struct ext4_getfsmap_info *info) 268 + { 269 + journal_t *journal = EXT4_SB(sb)->s_journal; 270 + struct ext4_fsmap irec; 271 + 272 + /* Set up search keys */ 273 + info->gfi_low = keys[0]; 274 + info->gfi_low.fmr_length = 0; 275 + 276 + memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high)); 277 + 278 + trace_ext4_fsmap_low_key(sb, info->gfi_dev, 0, 279 + info->gfi_low.fmr_physical, 280 + info->gfi_low.fmr_length, 281 + info->gfi_low.fmr_owner); 282 + 283 + trace_ext4_fsmap_high_key(sb, info->gfi_dev, 0, 284 + info->gfi_high.fmr_physical, 285 + info->gfi_high.fmr_length, 286 + info->gfi_high.fmr_owner); 287 + 288 + if (keys[0].fmr_physical > 0) 289 + return 0; 290 + 291 + /* Fabricate an rmap entry for the external log device. */ 292 + irec.fmr_physical = journal->j_blk_offset; 293 + irec.fmr_length = journal->j_maxlen; 294 + irec.fmr_owner = EXT4_FMR_OWN_LOG; 295 + irec.fmr_flags = 0; 296 + 297 + return ext4_getfsmap_helper(sb, info, &irec); 298 + } 299 + 300 + /* Helper to fill out an ext4_fsmap. */ 301 + static inline int ext4_getfsmap_fill(struct list_head *meta_list, 302 + ext4_fsblk_t fsb, ext4_fsblk_t len, 303 + uint64_t owner) 304 + { 305 + struct ext4_fsmap *fsm; 306 + 307 + fsm = kmalloc(sizeof(*fsm), GFP_NOFS); 308 + if (!fsm) 309 + return -ENOMEM; 310 + fsm->fmr_device = 0; 311 + fsm->fmr_flags = 0; 312 + fsm->fmr_physical = fsb; 313 + fsm->fmr_owner = owner; 314 + fsm->fmr_length = len; 315 + list_add_tail(&fsm->fmr_list, meta_list); 316 + 317 + return 0; 318 + } 319 + 320 + /* 321 + * This function returns the number of file system metadata blocks at 322 + * the beginning of a block group, including the reserved gdt blocks. 323 + */ 324 + static unsigned int ext4_getfsmap_find_sb(struct super_block *sb, 325 + ext4_group_t agno, 326 + struct list_head *meta_list) 327 + { 328 + struct ext4_sb_info *sbi = EXT4_SB(sb); 329 + ext4_fsblk_t fsb = ext4_group_first_block_no(sb, agno); 330 + ext4_fsblk_t len; 331 + unsigned long first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 332 + unsigned long metagroup = agno / EXT4_DESC_PER_BLOCK(sb); 333 + int error; 334 + 335 + /* Record the superblock. */ 336 + if (ext4_bg_has_super(sb, agno)) { 337 + error = ext4_getfsmap_fill(meta_list, fsb, 1, EXT4_FMR_OWN_FS); 338 + if (error) 339 + return error; 340 + fsb++; 341 + } 342 + 343 + /* Record the group descriptors. */ 344 + len = ext4_bg_num_gdb(sb, agno); 345 + if (!len) 346 + return 0; 347 + error = ext4_getfsmap_fill(meta_list, fsb, len, 348 + EXT4_FMR_OWN_GDT); 349 + if (error) 350 + return error; 351 + fsb += len; 352 + 353 + /* Reserved GDT blocks */ 354 + if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) { 355 + len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 356 + error = ext4_getfsmap_fill(meta_list, fsb, len, 357 + EXT4_FMR_OWN_RESV_GDT); 358 + if (error) 359 + return error; 360 + } 361 + 362 + return 0; 363 + } 364 + 365 + /* Compare two fsmap items. */ 366 + static int ext4_getfsmap_compare(void *priv, 367 + struct list_head *a, 368 + struct list_head *b) 369 + { 370 + struct ext4_fsmap *fa; 371 + struct ext4_fsmap *fb; 372 + 373 + fa = container_of(a, struct ext4_fsmap, fmr_list); 374 + fb = container_of(b, struct ext4_fsmap, fmr_list); 375 + if (fa->fmr_physical < fb->fmr_physical) 376 + return -1; 377 + else if (fa->fmr_physical > fb->fmr_physical) 378 + return 1; 379 + return 0; 380 + } 381 + 382 + /* Merge adjacent extents of fixed metadata. */ 383 + static void ext4_getfsmap_merge_fixed_metadata(struct list_head *meta_list) 384 + { 385 + struct ext4_fsmap *p; 386 + struct ext4_fsmap *prev = NULL; 387 + struct ext4_fsmap *tmp; 388 + 389 + list_for_each_entry_safe(p, tmp, meta_list, fmr_list) { 390 + if (!prev) { 391 + prev = p; 392 + continue; 393 + } 394 + 395 + if (prev->fmr_owner == p->fmr_owner && 396 + prev->fmr_physical + prev->fmr_length == p->fmr_physical) { 397 + prev->fmr_length += p->fmr_length; 398 + list_del(&p->fmr_list); 399 + kfree(p); 400 + } else 401 + prev = p; 402 + } 403 + } 404 + 405 + /* Free a list of fixed metadata. */ 406 + static void ext4_getfsmap_free_fixed_metadata(struct list_head *meta_list) 407 + { 408 + struct ext4_fsmap *p; 409 + struct ext4_fsmap *tmp; 410 + 411 + list_for_each_entry_safe(p, tmp, meta_list, fmr_list) { 412 + list_del(&p->fmr_list); 413 + kfree(p); 414 + } 415 + } 416 + 417 + /* Find all the fixed metadata in the filesystem. */ 418 + int ext4_getfsmap_find_fixed_metadata(struct super_block *sb, 419 + struct list_head *meta_list) 420 + { 421 + struct ext4_group_desc *gdp; 422 + ext4_group_t agno; 423 + int error; 424 + 425 + INIT_LIST_HEAD(meta_list); 426 + 427 + /* Collect everything. */ 428 + for (agno = 0; agno < EXT4_SB(sb)->s_groups_count; agno++) { 429 + gdp = ext4_get_group_desc(sb, agno, NULL); 430 + if (!gdp) { 431 + error = -EFSCORRUPTED; 432 + goto err; 433 + } 434 + 435 + /* Superblock & GDT */ 436 + error = ext4_getfsmap_find_sb(sb, agno, meta_list); 437 + if (error) 438 + goto err; 439 + 440 + /* Block bitmap */ 441 + error = ext4_getfsmap_fill(meta_list, 442 + ext4_block_bitmap(sb, gdp), 1, 443 + EXT4_FMR_OWN_BLKBM); 444 + if (error) 445 + goto err; 446 + 447 + /* Inode bitmap */ 448 + error = ext4_getfsmap_fill(meta_list, 449 + ext4_inode_bitmap(sb, gdp), 1, 450 + EXT4_FMR_OWN_INOBM); 451 + if (error) 452 + goto err; 453 + 454 + /* Inodes */ 455 + error = ext4_getfsmap_fill(meta_list, 456 + ext4_inode_table(sb, gdp), 457 + EXT4_SB(sb)->s_itb_per_group, 458 + EXT4_FMR_OWN_INODES); 459 + if (error) 460 + goto err; 461 + } 462 + 463 + /* Sort the list */ 464 + list_sort(NULL, meta_list, ext4_getfsmap_compare); 465 + 466 + /* Merge adjacent extents */ 467 + ext4_getfsmap_merge_fixed_metadata(meta_list); 468 + 469 + return 0; 470 + err: 471 + ext4_getfsmap_free_fixed_metadata(meta_list); 472 + return error; 473 + } 474 + 475 + /* Execute a getfsmap query against the buddy bitmaps */ 476 + static int ext4_getfsmap_datadev(struct super_block *sb, 477 + struct ext4_fsmap *keys, 478 + struct ext4_getfsmap_info *info) 479 + { 480 + struct ext4_sb_info *sbi = EXT4_SB(sb); 481 + ext4_fsblk_t start_fsb; 482 + ext4_fsblk_t end_fsb; 483 + ext4_fsblk_t eofs; 484 + ext4_group_t start_ag; 485 + ext4_group_t end_ag; 486 + ext4_grpblk_t first_cluster; 487 + ext4_grpblk_t last_cluster; 488 + int error = 0; 489 + 490 + eofs = ext4_blocks_count(sbi->s_es); 491 + if (keys[0].fmr_physical >= eofs) 492 + return 0; 493 + if (keys[1].fmr_physical >= eofs) 494 + keys[1].fmr_physical = eofs - 1; 495 + start_fsb = keys[0].fmr_physical; 496 + end_fsb = keys[1].fmr_physical; 497 + 498 + /* Determine first and last group to examine based on start and end */ 499 + ext4_get_group_no_and_offset(sb, start_fsb, &start_ag, &first_cluster); 500 + ext4_get_group_no_and_offset(sb, end_fsb, &end_ag, &last_cluster); 501 + 502 + /* 503 + * Convert the fsmap low/high keys to bg based keys. Initialize 504 + * low to the fsmap low key and max out the high key to the end 505 + * of the bg. 506 + */ 507 + info->gfi_low = keys[0]; 508 + info->gfi_low.fmr_physical = EXT4_C2B(sbi, first_cluster); 509 + info->gfi_low.fmr_length = 0; 510 + 511 + memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high)); 512 + 513 + /* Assemble a list of all the fixed-location metadata. */ 514 + error = ext4_getfsmap_find_fixed_metadata(sb, &info->gfi_meta_list); 515 + if (error) 516 + goto err; 517 + 518 + /* Query each bg */ 519 + for (info->gfi_agno = start_ag; 520 + info->gfi_agno <= end_ag; 521 + info->gfi_agno++) { 522 + /* 523 + * Set the bg high key from the fsmap high key if this 524 + * is the last bg that we're querying. 525 + */ 526 + if (info->gfi_agno == end_ag) { 527 + info->gfi_high = keys[1]; 528 + info->gfi_high.fmr_physical = EXT4_C2B(sbi, 529 + last_cluster); 530 + info->gfi_high.fmr_length = 0; 531 + } 532 + 533 + trace_ext4_fsmap_low_key(sb, info->gfi_dev, info->gfi_agno, 534 + info->gfi_low.fmr_physical, 535 + info->gfi_low.fmr_length, 536 + info->gfi_low.fmr_owner); 537 + 538 + trace_ext4_fsmap_high_key(sb, info->gfi_dev, info->gfi_agno, 539 + info->gfi_high.fmr_physical, 540 + info->gfi_high.fmr_length, 541 + info->gfi_high.fmr_owner); 542 + 543 + error = ext4_mballoc_query_range(sb, info->gfi_agno, 544 + EXT4_B2C(sbi, info->gfi_low.fmr_physical), 545 + EXT4_B2C(sbi, info->gfi_high.fmr_physical), 546 + ext4_getfsmap_datadev_helper, info); 547 + if (error) 548 + goto err; 549 + 550 + /* 551 + * Set the bg low key to the start of the bg prior to 552 + * moving on to the next bg. 553 + */ 554 + if (info->gfi_agno == start_ag) 555 + memset(&info->gfi_low, 0, sizeof(info->gfi_low)); 556 + } 557 + 558 + /* Do we have a retained free extent? */ 559 + if (info->gfi_lastfree.fmr_owner) { 560 + error = ext4_getfsmap_helper(sb, info, &info->gfi_lastfree); 561 + if (error) 562 + goto err; 563 + } 564 + 565 + /* Report any gaps at the end of the bg */ 566 + info->gfi_last = true; 567 + error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster, 0, info); 568 + if (error) 569 + goto err; 570 + 571 + err: 572 + ext4_getfsmap_free_fixed_metadata(&info->gfi_meta_list); 573 + return error; 574 + } 575 + 576 + /* Do we recognize the device? */ 577 + static bool ext4_getfsmap_is_valid_device(struct super_block *sb, 578 + struct ext4_fsmap *fm) 579 + { 580 + if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || 581 + fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev)) 582 + return true; 583 + if (EXT4_SB(sb)->journal_bdev && 584 + fm->fmr_device == new_encode_dev(EXT4_SB(sb)->journal_bdev->bd_dev)) 585 + return true; 586 + return false; 587 + } 588 + 589 + /* Ensure that the low key is less than the high key. */ 590 + static bool ext4_getfsmap_check_keys(struct ext4_fsmap *low_key, 591 + struct ext4_fsmap *high_key) 592 + { 593 + if (low_key->fmr_device > high_key->fmr_device) 594 + return false; 595 + if (low_key->fmr_device < high_key->fmr_device) 596 + return true; 597 + 598 + if (low_key->fmr_physical > high_key->fmr_physical) 599 + return false; 600 + if (low_key->fmr_physical < high_key->fmr_physical) 601 + return true; 602 + 603 + if (low_key->fmr_owner > high_key->fmr_owner) 604 + return false; 605 + if (low_key->fmr_owner < high_key->fmr_owner) 606 + return true; 607 + 608 + return false; 609 + } 610 + 611 + #define EXT4_GETFSMAP_DEVS 2 612 + /* 613 + * Get filesystem's extents as described in head, and format for 614 + * output. Calls formatter to fill the user's buffer until all 615 + * extents are mapped, until the passed-in head->fmh_count slots have 616 + * been filled, or until the formatter short-circuits the loop, if it 617 + * is tracking filled-in extents on its own. 618 + * 619 + * Key to Confusion 620 + * ---------------- 621 + * There are multiple levels of keys and counters at work here: 622 + * _fsmap_head.fmh_keys -- low and high fsmap keys passed in; 623 + * these reflect fs-wide block addrs. 624 + * dkeys -- fmh_keys used to query each device; 625 + * these are fmh_keys but w/ the low key 626 + * bumped up by fmr_length. 627 + * _getfsmap_info.gfi_next_fsblk-- next fs block we expect to see; this 628 + * is how we detect gaps in the fsmap 629 + * records and report them. 630 + * _getfsmap_info.gfi_low/high -- per-bg low/high keys computed from 631 + * dkeys; used to query the free space. 632 + */ 633 + int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, 634 + ext4_fsmap_format_t formatter, void *arg) 635 + { 636 + struct ext4_fsmap dkeys[2]; /* per-dev keys */ 637 + struct ext4_getfsmap_dev handlers[EXT4_GETFSMAP_DEVS]; 638 + struct ext4_getfsmap_info info = {0}; 639 + int i; 640 + int error = 0; 641 + 642 + if (head->fmh_iflags & ~FMH_IF_VALID) 643 + return -EINVAL; 644 + if (!ext4_getfsmap_is_valid_device(sb, &head->fmh_keys[0]) || 645 + !ext4_getfsmap_is_valid_device(sb, &head->fmh_keys[1])) 646 + return -EINVAL; 647 + 648 + head->fmh_entries = 0; 649 + 650 + /* Set up our device handlers. */ 651 + memset(handlers, 0, sizeof(handlers)); 652 + handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev); 653 + handlers[0].gfd_fn = ext4_getfsmap_datadev; 654 + if (EXT4_SB(sb)->journal_bdev) { 655 + handlers[1].gfd_dev = new_encode_dev( 656 + EXT4_SB(sb)->journal_bdev->bd_dev); 657 + handlers[1].gfd_fn = ext4_getfsmap_logdev; 658 + } 659 + 660 + sort(handlers, EXT4_GETFSMAP_DEVS, sizeof(struct ext4_getfsmap_dev), 661 + ext4_getfsmap_dev_compare, NULL); 662 + 663 + /* 664 + * To continue where we left off, we allow userspace to use the 665 + * last mapping from a previous call as the low key of the next. 666 + * This is identified by a non-zero length in the low key. We 667 + * have to increment the low key in this scenario to ensure we 668 + * don't return the same mapping again, and instead return the 669 + * very next mapping. 670 + * 671 + * Bump the physical offset as there can be no other mapping for 672 + * the same physical block range. 673 + */ 674 + dkeys[0] = head->fmh_keys[0]; 675 + dkeys[0].fmr_physical += dkeys[0].fmr_length; 676 + dkeys[0].fmr_owner = 0; 677 + dkeys[0].fmr_length = 0; 678 + memset(&dkeys[1], 0xFF, sizeof(struct ext4_fsmap)); 679 + 680 + if (!ext4_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) 681 + return -EINVAL; 682 + 683 + info.gfi_next_fsblk = head->fmh_keys[0].fmr_physical + 684 + head->fmh_keys[0].fmr_length; 685 + info.gfi_formatter = formatter; 686 + info.gfi_format_arg = arg; 687 + info.gfi_head = head; 688 + 689 + /* For each device we support... */ 690 + for (i = 0; i < EXT4_GETFSMAP_DEVS; i++) { 691 + /* Is this device within the range the user asked for? */ 692 + if (!handlers[i].gfd_fn) 693 + continue; 694 + if (head->fmh_keys[0].fmr_device > handlers[i].gfd_dev) 695 + continue; 696 + if (head->fmh_keys[1].fmr_device < handlers[i].gfd_dev) 697 + break; 698 + 699 + /* 700 + * If this device number matches the high key, we have 701 + * to pass the high key to the handler to limit the 702 + * query results. If the device number exceeds the 703 + * low key, zero out the low key so that we get 704 + * everything from the beginning. 705 + */ 706 + if (handlers[i].gfd_dev == head->fmh_keys[1].fmr_device) 707 + dkeys[1] = head->fmh_keys[1]; 708 + if (handlers[i].gfd_dev > head->fmh_keys[0].fmr_device) 709 + memset(&dkeys[0], 0, sizeof(struct ext4_fsmap)); 710 + 711 + info.gfi_dev = handlers[i].gfd_dev; 712 + info.gfi_last = false; 713 + info.gfi_agno = -1; 714 + error = handlers[i].gfd_fn(sb, dkeys, &info); 715 + if (error) 716 + break; 717 + info.gfi_next_fsblk = 0; 718 + } 719 + 720 + head->fmh_oflags = FMH_OF_DEV_T; 721 + return error; 722 + }
+69
fs/ext4/fsmap.h
··· 1 + /* 2 + * Copyright (C) 2017 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __EXT4_FSMAP_H__ 21 + #define __EXT4_FSMAP_H__ 22 + 23 + struct fsmap; 24 + 25 + /* internal fsmap representation */ 26 + struct ext4_fsmap { 27 + struct list_head fmr_list; 28 + dev_t fmr_device; /* device id */ 29 + uint32_t fmr_flags; /* mapping flags */ 30 + uint64_t fmr_physical; /* device offset of segment */ 31 + uint64_t fmr_owner; /* owner id */ 32 + uint64_t fmr_length; /* length of segment, blocks */ 33 + }; 34 + 35 + struct ext4_fsmap_head { 36 + uint32_t fmh_iflags; /* control flags */ 37 + uint32_t fmh_oflags; /* output flags */ 38 + unsigned int fmh_count; /* # of entries in array incl. input */ 39 + unsigned int fmh_entries; /* # of entries filled in (output). */ 40 + 41 + struct ext4_fsmap fmh_keys[2]; /* low and high keys */ 42 + }; 43 + 44 + void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest, 45 + struct ext4_fsmap *src); 46 + void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest, 47 + struct fsmap *src); 48 + 49 + /* fsmap to userspace formatter - copy to user & advance pointer */ 50 + typedef int (*ext4_fsmap_format_t)(struct ext4_fsmap *, void *); 51 + 52 + int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, 53 + ext4_fsmap_format_t formatter, void *arg); 54 + 55 + #define EXT4_QUERY_RANGE_ABORT 1 56 + #define EXT4_QUERY_RANGE_CONTINUE 0 57 + 58 + /* fmr_owner special values for FS_IOC_GETFSMAP; some share w/ XFS */ 59 + #define EXT4_FMR_OWN_FREE FMR_OWN_FREE /* free space */ 60 + #define EXT4_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */ 61 + #define EXT4_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */ 62 + #define EXT4_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */ 63 + #define EXT4_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */ 64 + #define EXT4_FMR_OWN_GDT FMR_OWNER('f', 1) /* group descriptors */ 65 + #define EXT4_FMR_OWN_RESV_GDT FMR_OWNER('f', 2) /* reserved gdt blocks */ 66 + #define EXT4_FMR_OWN_BLKBM FMR_OWNER('f', 3) /* inode bitmap */ 67 + #define EXT4_FMR_OWN_INOBM FMR_OWNER('f', 4) /* block bitmap */ 68 + 69 + #endif /* __EXT4_FSMAP_H__ */
+90
fs/ext4/ioctl.c
··· 19 19 #include <linux/delay.h> 20 20 #include "ext4_jbd2.h" 21 21 #include "ext4.h" 22 + #include <linux/fsmap.h> 23 + #include "fsmap.h" 24 + #include <trace/events/ext4.h> 22 25 23 26 /** 24 27 * Swap memory between @a and @b for @len bytes. ··· 492 489 return 0; 493 490 } 494 491 492 + struct getfsmap_info { 493 + struct super_block *gi_sb; 494 + struct fsmap_head __user *gi_data; 495 + unsigned int gi_idx; 496 + __u32 gi_last_flags; 497 + }; 498 + 499 + static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv) 500 + { 501 + struct getfsmap_info *info = priv; 502 + struct fsmap fm; 503 + 504 + trace_ext4_getfsmap_mapping(info->gi_sb, xfm); 505 + 506 + info->gi_last_flags = xfm->fmr_flags; 507 + ext4_fsmap_from_internal(info->gi_sb, &fm, xfm); 508 + if (copy_to_user(&info->gi_data->fmh_recs[info->gi_idx++], &fm, 509 + sizeof(struct fsmap))) 510 + return -EFAULT; 511 + 512 + return 0; 513 + } 514 + 515 + static int ext4_ioc_getfsmap(struct super_block *sb, 516 + struct fsmap_head __user *arg) 517 + { 518 + struct getfsmap_info info = {0}; 519 + struct ext4_fsmap_head xhead = {0}; 520 + struct fsmap_head head; 521 + bool aborted = false; 522 + int error; 523 + 524 + if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) 525 + return -EFAULT; 526 + if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || 527 + memchr_inv(head.fmh_keys[0].fmr_reserved, 0, 528 + sizeof(head.fmh_keys[0].fmr_reserved)) || 529 + memchr_inv(head.fmh_keys[1].fmr_reserved, 0, 530 + sizeof(head.fmh_keys[1].fmr_reserved))) 531 + return -EINVAL; 532 + /* 533 + * ext4 doesn't report file extents at all, so the only valid 534 + * file offsets are the magic ones (all zeroes or all ones). 535 + */ 536 + if (head.fmh_keys[0].fmr_offset || 537 + (head.fmh_keys[1].fmr_offset != 0 && 538 + head.fmh_keys[1].fmr_offset != -1ULL)) 539 + return -EINVAL; 540 + 541 + xhead.fmh_iflags = head.fmh_iflags; 542 + xhead.fmh_count = head.fmh_count; 543 + ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]); 544 + ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]); 545 + 546 + trace_ext4_getfsmap_low_key(sb, &xhead.fmh_keys[0]); 547 + trace_ext4_getfsmap_high_key(sb, &xhead.fmh_keys[1]); 548 + 549 + info.gi_sb = sb; 550 + info.gi_data = arg; 551 + error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info); 552 + if (error == EXT4_QUERY_RANGE_ABORT) { 553 + error = 0; 554 + aborted = true; 555 + } else if (error) 556 + return error; 557 + 558 + /* If we didn't abort, set the "last" flag in the last fmx */ 559 + if (!aborted && info.gi_idx) { 560 + info.gi_last_flags |= FMR_OF_LAST; 561 + if (copy_to_user(&info.gi_data->fmh_recs[info.gi_idx - 1].fmr_flags, 562 + &info.gi_last_flags, 563 + sizeof(info.gi_last_flags))) 564 + return -EFAULT; 565 + } 566 + 567 + /* copy back header */ 568 + head.fmh_entries = xhead.fmh_entries; 569 + head.fmh_oflags = xhead.fmh_oflags; 570 + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) 571 + return -EFAULT; 572 + 573 + return 0; 574 + } 575 + 495 576 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 496 577 { 497 578 struct inode *inode = file_inode(filp); ··· 586 499 ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); 587 500 588 501 switch (cmd) { 502 + case FS_IOC_GETFSMAP: 503 + return ext4_ioc_getfsmap(sb, (void __user *)arg); 589 504 case EXT4_IOC_GETFLAGS: 590 505 ext4_get_inode_flags(ei); 591 506 flags = ei->i_flags & EXT4_FL_USER_VISIBLE; ··· 1098 1009 case EXT4_IOC_GET_ENCRYPTION_PWSALT: 1099 1010 case EXT4_IOC_GET_ENCRYPTION_POLICY: 1100 1011 case EXT4_IOC_SHUTDOWN: 1012 + case FS_IOC_GETFSMAP: 1101 1013 break; 1102 1014 default: 1103 1015 return -ENOIOCTLCMD;
+49
fs/ext4/mballoc.c
··· 5277 5277 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; 5278 5278 return ret; 5279 5279 } 5280 + 5281 + /* Iterate all the free extents in the group. */ 5282 + int 5283 + ext4_mballoc_query_range( 5284 + struct super_block *sb, 5285 + ext4_group_t group, 5286 + ext4_grpblk_t start, 5287 + ext4_grpblk_t end, 5288 + ext4_mballoc_query_range_fn formatter, 5289 + void *priv) 5290 + { 5291 + void *bitmap; 5292 + ext4_grpblk_t next; 5293 + struct ext4_buddy e4b; 5294 + int error; 5295 + 5296 + error = ext4_mb_load_buddy(sb, group, &e4b); 5297 + if (error) 5298 + return error; 5299 + bitmap = e4b.bd_bitmap; 5300 + 5301 + ext4_lock_group(sb, group); 5302 + 5303 + start = (e4b.bd_info->bb_first_free > start) ? 5304 + e4b.bd_info->bb_first_free : start; 5305 + if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) 5306 + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; 5307 + 5308 + while (start <= end) { 5309 + start = mb_find_next_zero_bit(bitmap, end + 1, start); 5310 + if (start > end) 5311 + break; 5312 + next = mb_find_next_bit(bitmap, end + 1, start); 5313 + 5314 + ext4_unlock_group(sb, group); 5315 + error = formatter(sb, group, start, next - start, priv); 5316 + if (error) 5317 + goto out_unload; 5318 + ext4_lock_group(sb, group); 5319 + 5320 + start = next + 1; 5321 + } 5322 + 5323 + ext4_unlock_group(sb, group); 5324 + out_unload: 5325 + ext4_mb_unload_buddy(&e4b); 5326 + 5327 + return error; 5328 + }
+17
fs/ext4/mballoc.h
··· 199 199 return ext4_group_first_block_no(sb, fex->fe_group) + 200 200 (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); 201 201 } 202 + 203 + typedef int (*ext4_mballoc_query_range_fn)( 204 + struct super_block *sb, 205 + ext4_group_t agno, 206 + ext4_grpblk_t start, 207 + ext4_grpblk_t len, 208 + void *priv); 209 + 210 + int 211 + ext4_mballoc_query_range( 212 + struct super_block *sb, 213 + ext4_group_t agno, 214 + ext4_grpblk_t start, 215 + ext4_grpblk_t end, 216 + ext4_mballoc_query_range_fn formatter, 217 + void *priv); 218 + 202 219 #endif
+1
fs/ext4/super.c
··· 49 49 #include "xattr.h" 50 50 #include "acl.h" 51 51 #include "mballoc.h" 52 + #include "fsmap.h" 52 53 53 54 #define CREATE_TRACE_POINTS 54 55 #include <trace/events/ext4.h>
+74
include/trace/events/ext4.h
··· 15 15 struct mpage_da_data; 16 16 struct ext4_map_blocks; 17 17 struct extent_status; 18 + struct ext4_fsmap; 18 19 19 20 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) 20 21 ··· 2529 2528 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, 2530 2529 __entry->scan_time, __entry->nr_skipped, __entry->retried) 2531 2530 ); 2531 + 2532 + /* fsmap traces */ 2533 + DECLARE_EVENT_CLASS(ext4_fsmap_class, 2534 + TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, 2535 + u64 owner), 2536 + TP_ARGS(sb, keydev, agno, bno, len, owner), 2537 + TP_STRUCT__entry( 2538 + __field(dev_t, dev) 2539 + __field(dev_t, keydev) 2540 + __field(u32, agno) 2541 + __field(u64, bno) 2542 + __field(u64, len) 2543 + __field(u64, owner) 2544 + ), 2545 + TP_fast_assign( 2546 + __entry->dev = sb->s_bdev->bd_dev; 2547 + __entry->keydev = new_decode_dev(keydev); 2548 + __entry->agno = agno; 2549 + __entry->bno = bno; 2550 + __entry->len = len; 2551 + __entry->owner = owner; 2552 + ), 2553 + TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n", 2554 + MAJOR(__entry->dev), MINOR(__entry->dev), 2555 + MAJOR(__entry->keydev), MINOR(__entry->keydev), 2556 + __entry->agno, 2557 + __entry->bno, 2558 + __entry->len, 2559 + __entry->owner) 2560 + ) 2561 + #define DEFINE_FSMAP_EVENT(name) \ 2562 + DEFINE_EVENT(ext4_fsmap_class, name, \ 2563 + TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \ 2564 + u64 owner), \ 2565 + TP_ARGS(sb, keydev, agno, bno, len, owner)) 2566 + DEFINE_FSMAP_EVENT(ext4_fsmap_low_key); 2567 + DEFINE_FSMAP_EVENT(ext4_fsmap_high_key); 2568 + DEFINE_FSMAP_EVENT(ext4_fsmap_mapping); 2569 + 2570 + DECLARE_EVENT_CLASS(ext4_getfsmap_class, 2571 + TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), 2572 + TP_ARGS(sb, fsmap), 2573 + TP_STRUCT__entry( 2574 + __field(dev_t, dev) 2575 + __field(dev_t, keydev) 2576 + __field(u64, block) 2577 + __field(u64, len) 2578 + __field(u64, owner) 2579 + __field(u64, flags) 2580 + ), 2581 + TP_fast_assign( 2582 + __entry->dev = sb->s_bdev->bd_dev; 2583 + __entry->keydev = new_decode_dev(fsmap->fmr_device); 2584 + __entry->block = fsmap->fmr_physical; 2585 + __entry->len = fsmap->fmr_length; 2586 + __entry->owner = fsmap->fmr_owner; 2587 + __entry->flags = fsmap->fmr_flags; 2588 + ), 2589 + TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld flags 0x%llx\n", 2590 + MAJOR(__entry->dev), MINOR(__entry->dev), 2591 + MAJOR(__entry->keydev), MINOR(__entry->keydev), 2592 + __entry->block, 2593 + __entry->len, 2594 + __entry->owner, 2595 + __entry->flags) 2596 + ) 2597 + #define DEFINE_GETFSMAP_EVENT(name) \ 2598 + DEFINE_EVENT(ext4_getfsmap_class, name, \ 2599 + TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), \ 2600 + TP_ARGS(sb, fsmap)) 2601 + DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key); 2602 + DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key); 2603 + DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping); 2532 2604 2533 2605 #endif /* _TRACE_EXT4_H */ 2534 2606