Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

btrfs: send: fix duplicated rmdir operations when using extrefs

Commit 29d6d30f5c8a ("Btrfs: send, don't send rmdir for same target
multiple times") has fixed an issue that a send stream contained a rmdir
operation for the same directory multiple times. After that fix we keep
track of the last directory for which we sent a rmdir operation and
compare with it before sending a rmdir for the parent inode of a deleted
hardlink we are processing. But there is still a corner case that in
between rmdir dir operations for the same inode we find deleted hardlinks
for other parent inodes, so tracking just the last inode for which we sent
a rmdir operation is not enough.

Hardlinks of a file in the same directory are stored in the same INODE_REF
item, but if the number of hardlinks is too large and can not fit in a
leaf, we use INODE_EXTREF items to store them. The key of an INODE_EXTREF
item is (inode_id, INODE_EXTREF, hash[name, parent ino]), so between two
hardlinks for the same parent directory, we can find others for other
parent directories. For example for the reproducer below we get the
following (from a btrfs inspect-internal dump-tree output):

item 0 key (259 INODE_EXTREF 2309449) itemoff 16257 itemsize 26
index 6925 parent 257 namelen 8 name: foo.6923
item 1 key (259 INODE_EXTREF 2311350) itemoff 16231 itemsize 26
index 6588 parent 258 namelen 8 name: foo.6587
item 2 key (259 INODE_EXTREF 2457395) itemoff 16205 itemsize 26
index 6611 parent 257 namelen 8 name: foo.6609
(...)

So tracking the last directory's inode number does not work in this case
since we process a link for parent inode 257, then for 258 and then back
again for 257, and that second time we process a deleted link for 257 we
think we have not yet sent a rmdir operation.

Fix this by using a rbtree to keep track of all the directories for which
we have already sent rmdir operations, and add those directories to the
'check_dirs' ref list in process_recorded_refs() only if the directory is
not yet in the rbtree, otherwise skip it since it means we have already
sent a rmdir operation for that directory.

The following test script reproduces the problem:

$ cat test.sh
#!/bin/bash

DEV=/dev/sdi
MNT=/mnt/sdi

mkfs.btrfs -f $DEV
mount $DEV $MNT

mkdir $MNT/a $MNT/b

echo 123 > $MNT/a/foo
for ((i = 1; i <= 1000; i++)); do
ln $MNT/a/foo $MNT/a/foo.$i
ln $MNT/a/foo $MNT/b/foo.$i
done

btrfs subvolume snapshot -r $MNT $MNT/snap1
btrfs send $MNT/snap1 -f /tmp/base.send

rm -r $MNT/a $MNT/b

btrfs subvolume snapshot -r $MNT $MNT/snap2
btrfs send -p $MNT/snap1 $MNT/snap2 -f /tmp/incremental.send

umount $MNT
mkfs.btrfs -f $DEV
mount $DEV $MNT

btrfs receive $MNT -f /tmp/base.send
btrfs receive $MNT -f /tmp/incremental.send

rm -f /tmp/base.send /tmp/incremental.send

umount $MNT

When running it, it fails like this:

$ ./test.sh
(...)
At subvol snap1
At snapshot snap2
ERROR: rmdir o257-9-0 failed: No such file or directory

CC: <stable@vger.kernel.org>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Ting-Chang Hou <tchou@synology.com>
[ Updated changelog ]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>

authored by

Ting-Chang Hou and committed by
David Sterba
1fabe43b 17679ac6

+48 -8
+48 -8
fs/btrfs/send.c
··· 4102 4102 return ret; 4103 4103 } 4104 4104 4105 + static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node) 4106 + { 4107 + const struct recorded_ref *data = k; 4108 + const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node); 4109 + 4110 + if (data->dir > ref->dir) 4111 + return 1; 4112 + if (data->dir < ref->dir) 4113 + return -1; 4114 + if (data->dir_gen > ref->dir_gen) 4115 + return 1; 4116 + if (data->dir_gen < ref->dir_gen) 4117 + return -1; 4118 + return 0; 4119 + } 4120 + 4121 + static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent) 4122 + { 4123 + const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node); 4124 + 4125 + return rbtree_check_dir_ref_comp(entry, parent) < 0; 4126 + } 4127 + 4128 + static int record_check_dir_ref_in_tree(struct rb_root *root, 4129 + struct recorded_ref *ref, struct list_head *list) 4130 + { 4131 + struct recorded_ref *tmp_ref; 4132 + int ret; 4133 + 4134 + if (rb_find(ref, root, rbtree_check_dir_ref_comp)) 4135 + return 0; 4136 + 4137 + ret = dup_ref(ref, list); 4138 + if (ret < 0) 4139 + return ret; 4140 + 4141 + tmp_ref = list_last_entry(list, struct recorded_ref, list); 4142 + rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less); 4143 + tmp_ref->root = root; 4144 + return 0; 4145 + } 4146 + 4105 4147 static int rename_current_inode(struct send_ctx *sctx, 4106 4148 struct fs_path *current_path, 4107 4149 struct fs_path *new_path) ··· 4171 4129 struct recorded_ref *cur; 4172 4130 struct recorded_ref *cur2; 4173 4131 LIST_HEAD(check_dirs); 4132 + struct rb_root rbtree_check_dirs = RB_ROOT; 4174 4133 struct fs_path *valid_path = NULL; 4175 4134 u64 ow_inode = 0; 4176 4135 u64 ow_gen; 4177 4136 u64 ow_mode; 4178 - u64 last_dir_ino_rm = 0; 4179 4137 bool did_overwrite = false; 4180 4138 bool is_orphan = false; 4181 4139 bool can_rename = true; ··· 4479 4437 goto out; 4480 4438 } 4481 4439 } 4482 - ret = dup_ref(cur, &check_dirs); 4440 + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); 4483 4441 if (ret < 0) 4484 4442 goto out; 4485 4443 } ··· 4507 4465 } 4508 4466 4509 4467 list_for_each_entry(cur, &sctx->deleted_refs, list) { 4510 - ret = dup_ref(cur, &check_dirs); 4468 + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); 4511 4469 if (ret < 0) 4512 4470 goto out; 4513 4471 } ··· 4517 4475 * We have a moved dir. Add the old parent to check_dirs 4518 4476 */ 4519 4477 cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list); 4520 - ret = dup_ref(cur, &check_dirs); 4478 + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); 4521 4479 if (ret < 0) 4522 4480 goto out; 4523 4481 } else if (!S_ISDIR(sctx->cur_inode_mode)) { ··· 4551 4509 if (is_current_inode_path(sctx, cur->full_path)) 4552 4510 fs_path_reset(&sctx->cur_inode_path); 4553 4511 } 4554 - ret = dup_ref(cur, &check_dirs); 4512 + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); 4555 4513 if (ret < 0) 4556 4514 goto out; 4557 4515 } ··· 4594 4552 ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen); 4595 4553 if (ret < 0) 4596 4554 goto out; 4597 - } else if (ret == inode_state_did_delete && 4598 - cur->dir != last_dir_ino_rm) { 4555 + } else if (ret == inode_state_did_delete) { 4599 4556 ret = can_rmdir(sctx, cur->dir, cur->dir_gen); 4600 4557 if (ret < 0) 4601 4558 goto out; ··· 4606 4565 ret = send_rmdir(sctx, valid_path); 4607 4566 if (ret < 0) 4608 4567 goto out; 4609 - last_dir_ino_rm = cur->dir; 4610 4568 } 4611 4569 } 4612 4570 }