Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ext4: Add EXT4_IOC_MIGRATE ioctl

The below patch add ioctl for migrating ext3 indirect block mapped inode
to ext4 extent mapped inode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

authored by

Aneesh Kumar K.V and committed by
Theodore Ts'o
c14c6fd5 25ec56b5

+572 -3
+1 -1
fs/ext4/Makefile
··· 6 6 7 7 ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 8 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 9 - ext4_jbd2.o 9 + ext4_jbd2.o migrate.o 10 10 11 11 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 12 12 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
+2 -2
fs/ext4/extents.c
··· 61 61 * idx_pblock: 62 62 * combine low and high parts of a leaf physical block number into ext4_fsblk_t 63 63 */ 64 - static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) 64 + ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) 65 65 { 66 66 ext4_fsblk_t block; 67 67 ··· 75 75 * stores a large physical block number into an extent struct, 76 76 * breaking it into parts 77 77 */ 78 - static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) 78 + void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) 79 79 { 80 80 ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); 81 81 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+3
fs/ext4/ioctl.c
··· 254 254 return err; 255 255 } 256 256 257 + case EXT4_IOC_MIGRATE: 258 + return ext4_ext_migrate(inode, filp, cmd, arg); 259 + 257 260 default: 258 261 return -ENOTTY; 259 262 }
+560
fs/ext4/migrate.c
··· 1 + /* 2 + * Copyright IBM Corporation, 2007 3 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of version 2.1 of the GNU Lesser General Public License 7 + * as published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it would be useful, but 10 + * WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 + * 13 + */ 14 + 15 + #include <linux/module.h> 16 + #include <linux/ext4_jbd2.h> 17 + #include <linux/ext4_fs_extents.h> 18 + 19 + /* 20 + * The contiguous blocks details which can be 21 + * represented by a single extent 22 + */ 23 + struct list_blocks_struct { 24 + ext4_lblk_t first_block, last_block; 25 + ext4_fsblk_t first_pblock, last_pblock; 26 + }; 27 + 28 + static int finish_range(handle_t *handle, struct inode *inode, 29 + struct list_blocks_struct *lb) 30 + 31 + { 32 + int retval = 0, needed; 33 + struct ext4_extent newext; 34 + struct ext4_ext_path *path; 35 + if (lb->first_pblock == 0) 36 + return 0; 37 + 38 + /* Add the extent to temp inode*/ 39 + newext.ee_block = cpu_to_le32(lb->first_block); 40 + newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 41 + ext4_ext_store_pblock(&newext, lb->first_pblock); 42 + path = ext4_ext_find_extent(inode, lb->first_block, NULL); 43 + 44 + if (IS_ERR(path)) { 45 + retval = PTR_ERR(path); 46 + goto err_out; 47 + } 48 + 49 + /* 50 + * Calculate the credit needed to inserting this extent 51 + * Since we are doing this in loop we may accumalate extra 52 + * credit. But below we try to not accumalate too much 53 + * of them by restarting the journal. 54 + */ 55 + needed = ext4_ext_calc_credits_for_insert(inode, path); 56 + 57 + /* 58 + * Make sure the credit we accumalated is not really high 59 + */ 60 + if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { 61 + retval = ext4_journal_restart(handle, needed); 62 + if (retval) 63 + goto err_out; 64 + } 65 + if (needed) { 66 + retval = ext4_journal_extend(handle, needed); 67 + if (retval != 0) { 68 + /* 69 + * IF not able to extend the journal restart the journal 70 + */ 71 + retval = ext4_journal_restart(handle, needed); 72 + if (retval) 73 + goto err_out; 74 + } 75 + } 76 + retval = ext4_ext_insert_extent(handle, inode, path, &newext); 77 + err_out: 78 + lb->first_pblock = 0; 79 + return retval; 80 + } 81 + 82 + static int update_extent_range(handle_t *handle, struct inode *inode, 83 + ext4_fsblk_t pblock, ext4_lblk_t blk_num, 84 + struct list_blocks_struct *lb) 85 + { 86 + int retval; 87 + /* 88 + * See if we can add on to the existing range (if it exists) 89 + */ 90 + if (lb->first_pblock && 91 + (lb->last_pblock+1 == pblock) && 92 + (lb->last_block+1 == blk_num)) { 93 + lb->last_pblock = pblock; 94 + lb->last_block = blk_num; 95 + return 0; 96 + } 97 + /* 98 + * Start a new range. 99 + */ 100 + retval = finish_range(handle, inode, lb); 101 + lb->first_pblock = lb->last_pblock = pblock; 102 + lb->first_block = lb->last_block = blk_num; 103 + 104 + return retval; 105 + } 106 + 107 + static int update_ind_extent_range(handle_t *handle, struct inode *inode, 108 + ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 109 + struct list_blocks_struct *lb) 110 + { 111 + struct buffer_head *bh; 112 + __le32 *i_data; 113 + int i, retval = 0; 114 + ext4_lblk_t blk_count = *blk_nump; 115 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 116 + 117 + if (!pblock) { 118 + /* Only update the file block number */ 119 + *blk_nump += max_entries; 120 + return 0; 121 + } 122 + 123 + bh = sb_bread(inode->i_sb, pblock); 124 + if (!bh) 125 + return -EIO; 126 + 127 + i_data = (__le32 *)bh->b_data; 128 + for (i = 0; i < max_entries; i++, blk_count++) { 129 + if (i_data[i]) { 130 + retval = update_extent_range(handle, inode, 131 + le32_to_cpu(i_data[i]), 132 + blk_count, lb); 133 + if (retval) 134 + break; 135 + } 136 + } 137 + 138 + /* Update the file block number */ 139 + *blk_nump = blk_count; 140 + put_bh(bh); 141 + return retval; 142 + 143 + } 144 + 145 + static int update_dind_extent_range(handle_t *handle, struct inode *inode, 146 + ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 147 + struct list_blocks_struct *lb) 148 + { 149 + struct buffer_head *bh; 150 + __le32 *i_data; 151 + int i, retval = 0; 152 + ext4_lblk_t blk_count = *blk_nump; 153 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 154 + 155 + if (!pblock) { 156 + /* Only update the file block number */ 157 + *blk_nump += max_entries * max_entries; 158 + return 0; 159 + } 160 + bh = sb_bread(inode->i_sb, pblock); 161 + if (!bh) 162 + return -EIO; 163 + 164 + i_data = (__le32 *)bh->b_data; 165 + for (i = 0; i < max_entries; i++) { 166 + if (i_data[i]) { 167 + retval = update_ind_extent_range(handle, inode, 168 + le32_to_cpu(i_data[i]), 169 + &blk_count, lb); 170 + if (retval) 171 + break; 172 + } else { 173 + /* Only update the file block number */ 174 + blk_count += max_entries; 175 + } 176 + } 177 + 178 + /* Update the file block number */ 179 + *blk_nump = blk_count; 180 + put_bh(bh); 181 + return retval; 182 + 183 + } 184 + 185 + static int update_tind_extent_range(handle_t *handle, struct inode *inode, 186 + ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 187 + struct list_blocks_struct *lb) 188 + { 189 + struct buffer_head *bh; 190 + __le32 *i_data; 191 + int i, retval = 0; 192 + ext4_lblk_t blk_count = *blk_nump; 193 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 194 + 195 + if (!pblock) { 196 + /* Only update the file block number */ 197 + *blk_nump += max_entries * max_entries * max_entries; 198 + return 0; 199 + } 200 + bh = sb_bread(inode->i_sb, pblock); 201 + if (!bh) 202 + return -EIO; 203 + 204 + i_data = (__le32 *)bh->b_data; 205 + for (i = 0; i < max_entries; i++) { 206 + if (i_data[i]) { 207 + retval = update_dind_extent_range(handle, inode, 208 + le32_to_cpu(i_data[i]), 209 + &blk_count, lb); 210 + if (retval) 211 + break; 212 + } else 213 + /* Only update the file block number */ 214 + blk_count += max_entries * max_entries; 215 + } 216 + /* Update the file block number */ 217 + *blk_nump = blk_count; 218 + put_bh(bh); 219 + return retval; 220 + 221 + } 222 + 223 + static int free_dind_blocks(handle_t *handle, 224 + struct inode *inode, __le32 i_data) 225 + { 226 + int i; 227 + __le32 *tmp_idata; 228 + struct buffer_head *bh; 229 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 230 + 231 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 232 + if (!bh) 233 + return -EIO; 234 + 235 + tmp_idata = (__le32 *)bh->b_data; 236 + for (i = 0; i < max_entries; i++) { 237 + if (tmp_idata[i]) 238 + ext4_free_blocks(handle, inode, 239 + le32_to_cpu(tmp_idata[i]), 1); 240 + } 241 + put_bh(bh); 242 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1); 243 + return 0; 244 + } 245 + 246 + static int free_tind_blocks(handle_t *handle, 247 + struct inode *inode, __le32 i_data) 248 + { 249 + int i, retval = 0; 250 + __le32 *tmp_idata; 251 + struct buffer_head *bh; 252 + unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 253 + 254 + bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 255 + if (!bh) 256 + return -EIO; 257 + 258 + tmp_idata = (__le32 *)bh->b_data; 259 + for (i = 0; i < max_entries; i++) { 260 + if (tmp_idata[i]) { 261 + retval = free_dind_blocks(handle, 262 + inode, tmp_idata[i]); 263 + if (retval) { 264 + put_bh(bh); 265 + return retval; 266 + } 267 + } 268 + } 269 + put_bh(bh); 270 + ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1); 271 + return 0; 272 + } 273 + 274 + static int free_ind_block(handle_t *handle, struct inode *inode) 275 + { 276 + int retval; 277 + struct ext4_inode_info *ei = EXT4_I(inode); 278 + 279 + if (ei->i_data[EXT4_IND_BLOCK]) 280 + ext4_free_blocks(handle, inode, 281 + le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1); 282 + 283 + if (ei->i_data[EXT4_DIND_BLOCK]) { 284 + retval = free_dind_blocks(handle, inode, 285 + ei->i_data[EXT4_DIND_BLOCK]); 286 + if (retval) 287 + return retval; 288 + } 289 + 290 + if (ei->i_data[EXT4_TIND_BLOCK]) { 291 + retval = free_tind_blocks(handle, inode, 292 + ei->i_data[EXT4_TIND_BLOCK]); 293 + if (retval) 294 + return retval; 295 + } 296 + return 0; 297 + } 298 + 299 + static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 300 + struct inode *tmp_inode, int retval) 301 + { 302 + struct ext4_inode_info *ei = EXT4_I(inode); 303 + struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 304 + 305 + retval = free_ind_block(handle, inode); 306 + if (retval) 307 + goto err_out; 308 + 309 + /* 310 + * One credit accounted for writing the 311 + * i_data field of the original inode 312 + */ 313 + retval = ext4_journal_extend(handle, 1); 314 + if (retval != 0) { 315 + retval = ext4_journal_restart(handle, 1); 316 + if (retval) 317 + goto err_out; 318 + } 319 + 320 + /* 321 + * We have the extent map build with the tmp inode. 322 + * Now copy the i_data across 323 + */ 324 + ei->i_flags |= EXT4_EXTENTS_FL; 325 + memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 326 + 327 + /* 328 + * Update i_blocks with the new blocks that got 329 + * allocated while adding extents for extent index 330 + * blocks. 331 + * 332 + * While converting to extents we need not 333 + * update the orignal inode i_blocks for extent blocks 334 + * via quota APIs. The quota update happened via tmp_inode already. 335 + */ 336 + spin_lock(&inode->i_lock); 337 + inode->i_blocks += tmp_inode->i_blocks; 338 + spin_unlock(&inode->i_lock); 339 + 340 + ext4_mark_inode_dirty(handle, inode); 341 + err_out: 342 + return retval; 343 + } 344 + 345 + static int free_ext_idx(handle_t *handle, struct inode *inode, 346 + struct ext4_extent_idx *ix) 347 + { 348 + int i, retval = 0; 349 + ext4_fsblk_t block; 350 + struct buffer_head *bh; 351 + struct ext4_extent_header *eh; 352 + 353 + block = idx_pblock(ix); 354 + bh = sb_bread(inode->i_sb, block); 355 + if (!bh) 356 + return -EIO; 357 + 358 + eh = (struct ext4_extent_header *)bh->b_data; 359 + if (eh->eh_depth != 0) { 360 + ix = EXT_FIRST_INDEX(eh); 361 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 362 + retval = free_ext_idx(handle, inode, ix); 363 + if (retval) 364 + break; 365 + } 366 + } 367 + put_bh(bh); 368 + ext4_free_blocks(handle, inode, block, 1); 369 + return retval; 370 + } 371 + 372 + /* 373 + * Free the extent meta data blocks only 374 + */ 375 + static int free_ext_block(handle_t *handle, struct inode *inode) 376 + { 377 + int i, retval = 0; 378 + struct ext4_inode_info *ei = EXT4_I(inode); 379 + struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 380 + struct ext4_extent_idx *ix; 381 + if (eh->eh_depth == 0) 382 + /* 383 + * No extra blocks allocated for extent meta data 384 + */ 385 + return 0; 386 + ix = EXT_FIRST_INDEX(eh); 387 + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 388 + retval = free_ext_idx(handle, inode, ix); 389 + if (retval) 390 + return retval; 391 + } 392 + return retval; 393 + 394 + } 395 + 396 + int ext4_ext_migrate(struct inode *inode, struct file *filp, 397 + unsigned int cmd, unsigned long arg) 398 + { 399 + handle_t *handle; 400 + int retval = 0, i; 401 + __le32 *i_data; 402 + ext4_lblk_t blk_count = 0; 403 + struct ext4_inode_info *ei; 404 + struct inode *tmp_inode = NULL; 405 + struct list_blocks_struct lb; 406 + unsigned long max_entries; 407 + 408 + if (!test_opt(inode->i_sb, EXTENTS)) 409 + /* 410 + * if mounted with noextents we don't allow the migrate 411 + */ 412 + return -EINVAL; 413 + 414 + if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 415 + return -EINVAL; 416 + 417 + down_write(&EXT4_I(inode)->i_data_sem); 418 + handle = ext4_journal_start(inode, 419 + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 420 + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 421 + 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 422 + + 1); 423 + if (IS_ERR(handle)) { 424 + retval = PTR_ERR(handle); 425 + goto err_out; 426 + } 427 + tmp_inode = ext4_new_inode(handle, 428 + inode->i_sb->s_root->d_inode, 429 + S_IFREG); 430 + if (IS_ERR(tmp_inode)) { 431 + retval = -ENOMEM; 432 + ext4_journal_stop(handle); 433 + tmp_inode = NULL; 434 + goto err_out; 435 + } 436 + i_size_write(tmp_inode, i_size_read(inode)); 437 + /* 438 + * We don't want the inode to be reclaimed 439 + * if we got interrupted in between. We have 440 + * this tmp inode carrying reference to the 441 + * data blocks of the original file. We set 442 + * the i_nlink to zero at the last stage after 443 + * switching the original file to extent format 444 + */ 445 + tmp_inode->i_nlink = 1; 446 + 447 + ext4_ext_tree_init(handle, tmp_inode); 448 + ext4_orphan_add(handle, tmp_inode); 449 + ext4_journal_stop(handle); 450 + 451 + ei = EXT4_I(inode); 452 + i_data = ei->i_data; 453 + memset(&lb, 0, sizeof(lb)); 454 + 455 + /* 32 bit block address 4 bytes */ 456 + max_entries = inode->i_sb->s_blocksize >> 2; 457 + 458 + /* 459 + * start with one credit accounted for 460 + * superblock modification. 461 + * 462 + * For the tmp_inode we already have commited the 463 + * trascation that created the inode. Later as and 464 + * when we add extents we extent the journal 465 + */ 466 + handle = ext4_journal_start(inode, 1); 467 + for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 468 + if (i_data[i]) { 469 + retval = update_extent_range(handle, tmp_inode, 470 + le32_to_cpu(i_data[i]), 471 + blk_count, &lb); 472 + if (retval) 473 + goto err_out; 474 + } 475 + } 476 + if (i_data[EXT4_IND_BLOCK]) { 477 + retval = update_ind_extent_range(handle, tmp_inode, 478 + le32_to_cpu(i_data[EXT4_IND_BLOCK]), 479 + &blk_count, &lb); 480 + if (retval) 481 + goto err_out; 482 + } else 483 + blk_count += max_entries; 484 + if (i_data[EXT4_DIND_BLOCK]) { 485 + retval = update_dind_extent_range(handle, tmp_inode, 486 + le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 487 + &blk_count, &lb); 488 + if (retval) 489 + goto err_out; 490 + } else 491 + blk_count += max_entries * max_entries; 492 + if (i_data[EXT4_TIND_BLOCK]) { 493 + retval = update_tind_extent_range(handle, tmp_inode, 494 + le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 495 + &blk_count, &lb); 496 + if (retval) 497 + goto err_out; 498 + } 499 + /* 500 + * Build the last extent 501 + */ 502 + retval = finish_range(handle, tmp_inode, &lb); 503 + err_out: 504 + /* 505 + * We are either freeing extent information or indirect 506 + * blocks. During this we touch superblock, group descriptor 507 + * and block bitmap. Later we mark the tmp_inode dirty 508 + * via ext4_ext_tree_init. So allocate a credit of 4 509 + * We may update quota (user and group). 510 + * 511 + * FIXME!! we may be touching bitmaps in different block groups. 512 + */ 513 + if (ext4_journal_extend(handle, 514 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) 515 + ext4_journal_restart(handle, 516 + 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); 517 + if (retval) 518 + /* 519 + * Failure case delete the extent information with the 520 + * tmp_inode 521 + */ 522 + free_ext_block(handle, tmp_inode); 523 + else 524 + retval = ext4_ext_swap_inode_data(handle, inode, 525 + tmp_inode, retval); 526 + 527 + /* 528 + * Mark the tmp_inode as of size zero 529 + */ 530 + i_size_write(tmp_inode, 0); 531 + 532 + /* 533 + * set the i_blocks count to zero 534 + * so that the ext4_delete_inode does the 535 + * right job 536 + * 537 + * We don't need to take the i_lock because 538 + * the inode is not visible to user space. 539 + */ 540 + tmp_inode->i_blocks = 0; 541 + 542 + /* Reset the extent details */ 543 + ext4_ext_tree_init(handle, tmp_inode); 544 + 545 + /* 546 + * Set the i_nlink to zero so that 547 + * generic_drop_inode really deletes the 548 + * inode 549 + */ 550 + tmp_inode->i_nlink = 0; 551 + 552 + ext4_journal_stop(handle); 553 + 554 + up_write(&EXT4_I(inode)->i_data_sem); 555 + 556 + if (tmp_inode) 557 + iput(tmp_inode); 558 + 559 + return retval; 560 + }
+4
include/linux/ext4_fs.h
··· 243 243 #endif 244 244 #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 245 245 #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 246 + #define EXT4_IOC_MIGRATE _IO('f', 7) 246 247 247 248 /* 248 249 * ioctl commands in 32 bit emulation ··· 984 983 unsigned long); 985 984 extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); 986 985 986 + /* migrate.c */ 987 + extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, 988 + unsigned long); 987 989 /* namei.c */ 988 990 extern int ext4_orphan_add(handle_t *, struct inode *); 989 991 extern int ext4_orphan_del(handle_t *, struct inode *);
+2
include/linux/ext4_fs_extents.h
··· 212 212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); 213 213 } 214 214 215 + extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 216 + extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 215 217 extern int ext4_extent_tree_init(handle_t *, struct inode *); 216 218 extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 217 219 extern int ext4_ext_try_to_merge(struct inode *inode,