Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Btrfs: send, fix more issues related to directory renames

This is a continuation of the previous changes titled:

Btrfs: fix incremental send's decision to delay a dir move/rename
Btrfs: part 2, fix incremental send's decision to delay a dir move/rename

There's a few more cases where a directory rename/move must be delayed which was
previously overlooked. If our immediate ancestor has a lower inode number than
ours and it doesn't have a delayed rename/move operation associated to it, it
doesn't mean there isn't any non-direct ancestor of our current inode that needs
to be renamed/moved before our current inode (i.e. with a higher inode number
than ours).

So we can't stop the search if our immediate ancestor has a lower inode number than
ours, we need to navigate the directory hierarchy upwards until we hit the root or:

1) find an ancestor with an higher inode number that was renamed/moved in the send
root too (or already has a pending rename/move registered);
2) find an ancestor that is a new directory (higher inode number than ours and
exists only in the send root).

Reproducer for case 1)

$ mkfs.btrfs -f /dev/sdd
$ mount /dev/sdd /mnt

$ mkdir -p /mnt/a/b
$ mkdir -p /mnt/a/c/d
$ mkdir /mnt/a/b/e
$ mkdir /mnt/a/c/d/f
$ mv /mnt/a/b /mnt/a/c/d/2b
$ mkdir /mnt/a/x
$ mkdir /mnt/a/y

$ btrfs subvolume snapshot -r /mnt /mnt/snap1
$ btrfs send /mnt/snap1 -f /tmp/base.send

$ mv /mnt/a/x /mnt/a/y
$ mv /mnt/a/c/d/2b/e /mnt/a/c/d/2b/2e
$ mv /mnt/a/c/d /mnt/a/h/2d
$ mv /mnt/a/c /mnt/a/h/2d/2b/2c

$ btrfs subvolume snapshot -r /mnt /mnt/snap2
$ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

Simple reproducer for case 2)

$ mkfs.btrfs -f /dev/sdd
$ mount /dev/sdd /mnt

$ mkdir -p /mnt/a/b
$ mkdir /mnt/a/c
$ mv /mnt/a/b /mnt/a/c/b2
$ mkdir /mnt/a/e

$ btrfs subvolume snapshot -r /mnt /mnt/snap1
$ btrfs send /mnt/snap1 -f /tmp/base.send

$ mv /mnt/a/c/b2 /mnt/a/e/b3
$ mkdir /mnt/a/e/b3/f
$ mkdir /mnt/a/h
$ mv /mnt/a/c /mnt/a/e/b3/f/c2
$ mv /mnt/a/e /mnt/a/h/e2

$ btrfs subvolume snapshot -r /mnt /mnt/snap2
$ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

Another simple reproducer for case 2)

$ mkfs.btrfs -f /dev/sdd
$ mount /dev/sdd /mnt

$ mkdir -p /mnt/a/b
$ mkdir /mnt/a/c
$ mkdir /mnt/a/b/d
$ mkdir /mnt/a/c/e

$ btrfs subvolume snapshot -r /mnt /mnt/snap1
$ btrfs send /mnt/snap1 -f /tmp/base.send

$ mkdir /mnt/a/b/d/f
$ mkdir /mnt/a/b/g
$ mv /mnt/a/c/e /mnt/a/b/g/e2
$ mv /mnt/a/c /mnt/a/b/d/f/c2
$ mv /mnt/a/b/d/f /mnt/a/b/g/e2/f2

$ btrfs subvolume snapshot -r /mnt /mnt/snap2
$ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

More complex reproducer for case 2)

$ mkfs.btrfs -f /dev/sdd
$ mount /dev/sdd /mnt

$ mkdir -p /mnt/a/b
$ mkdir -p /mnt/a/c/d
$ mkdir /mnt/a/b/e
$ mkdir /mnt/a/c/d/f
$ mv /mnt/a/b /mnt/a/c/d/2b
$ mkdir /mnt/a/x
$ mkdir /mnt/a/y

$ btrfs subvolume snapshot -r /mnt /mnt/snap1
$ btrfs send /mnt/snap1 -f /tmp/base.send

$ mv /mnt/a/x /mnt/a/y
$ mv /mnt/a/c/d/2b/e /mnt/a/c/d/2b/2e
$ mv /mnt/a/c/d /mnt/a/h/2d
$ mv /mnt/a/c /mnt/a/h/2d/2b/2c

$ btrfs subvolume snapshot -r /mnt /mnt/snap2
$ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

For both cases the incremental send would enter an infinite loop when building
path strings.

While solving these cases, this change also re-implements the code to detect
when directory moves/renames should be delayed. Instead of dealing with several
specific cases separately, it's now more generic handling all cases with a simple
detection algorithm and if when applying a delayed move/rename there's a path loop
detected, it further delays the move/rename registering a new ancestor inode as
the dependency inode (so our rename happens after that ancestor is renamed).

Tests for these cases is being added to xfstests too.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>

authored by

Filipe Manana and committed by
Chris Mason
f959492f a10c4076

+96 -94
+96 -94
fs/btrfs/send.c
··· 2940 2940 static int add_pending_dir_move(struct send_ctx *sctx, 2941 2941 u64 ino, 2942 2942 u64 ino_gen, 2943 - u64 parent_ino) 2943 + u64 parent_ino, 2944 + struct list_head *new_refs, 2945 + struct list_head *deleted_refs) 2944 2946 { 2945 2947 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2946 2948 struct rb_node *parent = NULL; ··· 2974 2972 } 2975 2973 } 2976 2974 2977 - list_for_each_entry(cur, &sctx->deleted_refs, list) { 2975 + list_for_each_entry(cur, deleted_refs, list) { 2978 2976 ret = dup_ref(cur, &pm->update_refs); 2979 2977 if (ret < 0) 2980 2978 goto out; 2981 2979 } 2982 - list_for_each_entry(cur, &sctx->new_refs, list) { 2980 + list_for_each_entry(cur, new_refs, list) { 2983 2981 ret = dup_ref(cur, &pm->update_refs); 2984 2982 if (ret < 0) 2985 2983 goto out; ··· 3022 3020 return NULL; 3023 3021 } 3024 3022 3023 + static int path_loop(struct send_ctx *sctx, struct fs_path *name, 3024 + u64 ino, u64 gen, u64 *ancestor_ino) 3025 + { 3026 + int ret = 0; 3027 + u64 parent_inode = 0; 3028 + u64 parent_gen = 0; 3029 + u64 start_ino = ino; 3030 + 3031 + *ancestor_ino = 0; 3032 + while (ino != BTRFS_FIRST_FREE_OBJECTID) { 3033 + fs_path_reset(name); 3034 + 3035 + if (is_waiting_for_rm(sctx, ino)) 3036 + break; 3037 + if (is_waiting_for_move(sctx, ino)) { 3038 + if (*ancestor_ino == 0) 3039 + *ancestor_ino = ino; 3040 + ret = get_first_ref(sctx->parent_root, ino, 3041 + &parent_inode, &parent_gen, name); 3042 + } else { 3043 + ret = __get_cur_name_and_parent(sctx, ino, gen, 3044 + &parent_inode, 3045 + &parent_gen, name); 3046 + if (ret > 0) { 3047 + ret = 0; 3048 + break; 3049 + } 3050 + } 3051 + if (ret < 0) 3052 + break; 3053 + if (parent_inode == start_ino) { 3054 + ret = 1; 3055 + if (*ancestor_ino == 0) 3056 + *ancestor_ino = ino; 3057 + break; 3058 + } 3059 + ino = parent_inode; 3060 + gen = parent_gen; 3061 + } 3062 + return ret; 3063 + } 3064 + 3025 3065 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3026 3066 { 3027 3067 struct fs_path *from_path = NULL; ··· 3075 3031 struct waiting_dir_move *dm = NULL; 3076 3032 u64 rmdir_ino = 0; 3077 3033 int ret; 3034 + u64 ancestor = 0; 3078 3035 3079 3036 name = fs_path_alloc(); 3080 3037 from_path = fs_path_alloc(); ··· 3102 3057 if (ret < 0) 3103 3058 goto out; 3104 3059 3060 + sctx->send_progress = sctx->cur_ino + 1; 3061 + ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); 3062 + if (ret) { 3063 + LIST_HEAD(deleted_refs); 3064 + ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); 3065 + ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, 3066 + &pm->update_refs, &deleted_refs); 3067 + if (ret < 0) 3068 + goto out; 3069 + if (rmdir_ino) { 3070 + dm = get_waiting_dir_move(sctx, pm->ino); 3071 + ASSERT(dm); 3072 + dm->rmdir_ino = rmdir_ino; 3073 + } 3074 + goto out; 3075 + } 3105 3076 fs_path_reset(name); 3106 3077 to_path = name; 3107 3078 name = NULL; 3108 - 3109 - sctx->send_progress = sctx->cur_ino + 1; 3110 3079 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3111 3080 if (ret < 0) 3112 3081 goto out; ··· 3244 3185 static int wait_for_parent_move(struct send_ctx *sctx, 3245 3186 struct recorded_ref *parent_ref) 3246 3187 { 3247 - int ret; 3188 + int ret = 0; 3248 3189 u64 ino = parent_ref->dir; 3249 3190 u64 parent_ino_before, parent_ino_after; 3250 - u64 old_gen; 3251 3191 struct fs_path *path_before = NULL; 3252 3192 struct fs_path *path_after = NULL; 3253 3193 int len1, len2; 3254 - int register_upper_dirs; 3255 - u64 gen; 3256 - 3257 - if (is_waiting_for_move(sctx, ino)) 3258 - return 1; 3259 - 3260 - if (parent_ref->dir <= sctx->cur_ino) 3261 - return 0; 3262 - 3263 - ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, 3264 - NULL, NULL, NULL, NULL); 3265 - if (ret == -ENOENT) 3266 - return 0; 3267 - else if (ret < 0) 3268 - return ret; 3269 - 3270 - if (parent_ref->dir_gen != old_gen) 3271 - return 0; 3272 - 3273 - path_before = fs_path_alloc(); 3274 - if (!path_before) 3275 - return -ENOMEM; 3276 - 3277 - ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3278 - NULL, path_before); 3279 - if (ret == -ENOENT) { 3280 - ret = 0; 3281 - goto out; 3282 - } else if (ret < 0) { 3283 - goto out; 3284 - } 3285 3194 3286 3195 path_after = fs_path_alloc(); 3287 - if (!path_after) { 3196 + path_before = fs_path_alloc(); 3197 + if (!path_after || !path_before) { 3288 3198 ret = -ENOMEM; 3289 3199 goto out; 3290 3200 } 3291 3201 3292 - ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3293 - &gen, path_after); 3294 - if (ret == -ENOENT) { 3295 - ret = 0; 3296 - goto out; 3297 - } else if (ret < 0) { 3298 - goto out; 3299 - } 3300 - 3301 - len1 = fs_path_len(path_before); 3302 - len2 = fs_path_len(path_after); 3303 - if (parent_ino_before != parent_ino_after || len1 != len2 || 3304 - memcmp(path_before->start, path_after->start, len1)) { 3305 - ret = 1; 3306 - goto out; 3307 - } 3308 - ret = 0; 3309 - 3310 3202 /* 3311 - * Ok, our new most direct ancestor has a higher inode number but 3312 - * wasn't moved/renamed. So maybe some of the new ancestors higher in 3313 - * the hierarchy have an higher inode number too *and* were renamed 3314 - * or moved - in this case we need to wait for the ancestor's rename 3315 - * or move operation before we can do the move/rename for the current 3316 - * inode. 3203 + * Our current directory inode may not yet be renamed/moved because some 3204 + * ancestor (immediate or not) has to be renamed/moved first. So find if 3205 + * such ancestor exists and make sure our own rename/move happens after 3206 + * that ancestor is processed. 3317 3207 */ 3318 - register_upper_dirs = 0; 3319 - ino = parent_ino_after; 3320 - again: 3321 - while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { 3322 - u64 parent_gen; 3208 + while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3209 + if (is_waiting_for_move(sctx, ino)) { 3210 + ret = 1; 3211 + break; 3212 + } 3323 3213 3324 3214 fs_path_reset(path_before); 3325 3215 fs_path_reset(path_after); 3326 3216 3327 3217 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3328 - &parent_gen, path_after); 3218 + NULL, path_after); 3329 3219 if (ret < 0) 3330 3220 goto out; 3331 3221 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3332 3222 NULL, path_before); 3333 - if (ret == -ENOENT) { 3334 - ret = 0; 3335 - break; 3336 - } else if (ret < 0) { 3223 + if (ret < 0 && ret != -ENOENT) { 3337 3224 goto out; 3225 + } else if (ret == -ENOENT) { 3226 + ret = 1; 3227 + break; 3338 3228 } 3339 3229 3340 3230 len1 = fs_path_len(path_before); 3341 3231 len2 = fs_path_len(path_after); 3342 - if (parent_ino_before != parent_ino_after || len1 != len2 || 3343 - memcmp(path_before->start, path_after->start, len1)) { 3232 + if (ino > sctx->cur_ino && 3233 + (parent_ino_before != parent_ino_after || len1 != len2 || 3234 + memcmp(path_before->start, path_after->start, len1))) { 3344 3235 ret = 1; 3345 - if (register_upper_dirs) { 3346 - break; 3347 - } else { 3348 - register_upper_dirs = 1; 3349 - ino = parent_ref->dir; 3350 - gen = parent_ref->dir_gen; 3351 - goto again; 3352 - } 3353 - } else if (register_upper_dirs) { 3354 - ret = add_pending_dir_move(sctx, ino, gen, 3355 - parent_ino_after); 3356 - if (ret < 0 && ret != -EEXIST) 3357 - goto out; 3236 + break; 3358 3237 } 3359 - 3360 3238 ino = parent_ino_after; 3361 - gen = parent_gen; 3362 3239 } 3363 3240 3364 3241 out: 3365 3242 fs_path_free(path_before); 3366 3243 fs_path_free(path_after); 3244 + 3245 + if (ret == 1) { 3246 + ret = add_pending_dir_move(sctx, 3247 + sctx->cur_ino, 3248 + sctx->cur_inode_gen, 3249 + ino, 3250 + &sctx->new_refs, 3251 + &sctx->deleted_refs); 3252 + if (!ret) 3253 + ret = 1; 3254 + } 3367 3255 3368 3256 return ret; 3369 3257 } ··· 3472 3466 if (ret < 0) 3473 3467 goto out; 3474 3468 if (ret) { 3475 - ret = add_pending_dir_move(sctx, 3476 - sctx->cur_ino, 3477 - sctx->cur_inode_gen, 3478 - cur->dir); 3479 3469 *pending_move = 1; 3480 3470 } else { 3481 3471 ret = send_rename(sctx, valid_path,