Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "bkey_buf.h"
5#include "bkey_methods.h"
6#include "btree_update.h"
7#include "extents.h"
8#include "dirent.h"
9#include "fs.h"
10#include "keylist.h"
11#include "str_hash.h"
12#include "subvolume.h"
13
14#include <linux/dcache.h>
15
16static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
17 const struct qstr *str, struct qstr *out_cf)
18{
19 *out_cf = (struct qstr) QSTR_INIT(NULL, 0);
20
21#ifdef CONFIG_UNICODE
22 unsigned char *buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
23 int ret = PTR_ERR_OR_ZERO(buf);
24 if (ret)
25 return ret;
26
27 ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1);
28 if (ret <= 0)
29 return ret;
30
31 *out_cf = (struct qstr) QSTR_INIT(buf, ret);
32 return 0;
33#else
34 return -EOPNOTSUPP;
35#endif
36}
37
38static inline int bch2_maybe_casefold(struct btree_trans *trans,
39 const struct bch_hash_info *info,
40 const struct qstr *str, struct qstr *out_cf)
41{
42 if (likely(!info->cf_encoding)) {
43 *out_cf = *str;
44 return 0;
45 } else {
46 return bch2_casefold(trans, info, str, out_cf);
47 }
48}
49
50static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
51{
52 if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name))
53 return 0;
54
55 unsigned bkey_u64s = bkey_val_u64s(d.k);
56 unsigned bkey_bytes = bkey_u64s * sizeof(u64);
57 u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
58#if CPU_BIG_ENDIAN
59 unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
60#else
61 unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
62#endif
63
64 return bkey_bytes -
65 (d.v->d_casefold
66 ? offsetof(struct bch_dirent, d_cf_name_block.d_names)
67 : offsetof(struct bch_dirent, d_name)) -
68 trailing_nuls;
69}
70
71struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
72{
73 if (d.v->d_casefold) {
74 unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
75 return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[0], name_len);
76 } else {
77 return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
78 }
79}
80
81static struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d)
82{
83 if (d.v->d_casefold) {
84 unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
85 unsigned cf_name_len = le16_to_cpu(d.v->d_cf_name_block.d_cf_name_len);
86 return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[name_len], cf_name_len);
87 } else {
88 return (struct qstr) QSTR_INIT(NULL, 0);
89 }
90}
91
92static inline struct qstr bch2_dirent_get_lookup_name(struct bkey_s_c_dirent d)
93{
94 return d.v->d_casefold
95 ? bch2_dirent_get_casefold_name(d)
96 : bch2_dirent_get_name(d);
97}
98
99static u64 bch2_dirent_hash(const struct bch_hash_info *info,
100 const struct qstr *name)
101{
102 struct bch_str_hash_ctx ctx;
103
104 bch2_str_hash_init(&ctx, info);
105 bch2_str_hash_update(&ctx, info, name->name, name->len);
106
107 /* [0,2) reserved for dots */
108 return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
109}
110
111static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
112{
113 return bch2_dirent_hash(info, key);
114}
115
116static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
117{
118 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
119 struct qstr name = bch2_dirent_get_lookup_name(d);
120
121 return bch2_dirent_hash(info, &name);
122}
123
124static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
125{
126 struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
127 const struct qstr l_name = bch2_dirent_get_lookup_name(l);
128 const struct qstr *r_name = _r;
129
130 return !qstr_eq(l_name, *r_name);
131}
132
133static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
134{
135 struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
136 struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
137 const struct qstr l_name = bch2_dirent_get_lookup_name(l);
138 const struct qstr r_name = bch2_dirent_get_lookup_name(r);
139
140 return !qstr_eq(l_name, r_name);
141}
142
143static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
144{
145 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
146
147 if (d.v->d_type == DT_SUBVOL)
148 return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol;
149 return true;
150}
151
152const struct bch_hash_desc bch2_dirent_hash_desc = {
153 .btree_id = BTREE_ID_dirents,
154 .key_type = KEY_TYPE_dirent,
155 .hash_key = dirent_hash_key,
156 .hash_bkey = dirent_hash_bkey,
157 .cmp_key = dirent_cmp_key,
158 .cmp_bkey = dirent_cmp_bkey,
159 .is_visible = dirent_is_visible,
160};
161
162int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
163 struct bkey_validate_context from)
164{
165 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
166 unsigned name_block_len = bch2_dirent_name_bytes(d);
167 struct qstr d_name = bch2_dirent_get_name(d);
168 struct qstr d_cf_name = bch2_dirent_get_casefold_name(d);
169 int ret = 0;
170
171 bkey_fsck_err_on(!d_name.len,
172 c, dirent_empty_name,
173 "empty name");
174
175 bkey_fsck_err_on(d_name.len + d_cf_name.len > name_block_len,
176 c, dirent_val_too_big,
177 "dirent names exceed bkey size (%d + %d > %d)",
178 d_name.len, d_cf_name.len, name_block_len);
179
180 /*
181 * Check new keys don't exceed the max length
182 * (older keys may be larger.)
183 */
184 bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX,
185 c, dirent_name_too_long,
186 "dirent name too big (%u > %u)",
187 d_name.len, BCH_NAME_MAX);
188
189 bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len),
190 c, dirent_name_embedded_nul,
191 "dirent has stray data after name's NUL");
192
193 bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
194 (d_name.len == 2 && !memcmp(d_name.name, "..", 2)),
195 c, dirent_name_dot_or_dotdot,
196 "invalid name");
197
198 bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len),
199 c, dirent_name_has_slash,
200 "name with /");
201
202 bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
203 le64_to_cpu(d.v->d_inum) == d.k->p.inode,
204 c, dirent_to_itself,
205 "dirent points to own directory");
206
207 if (d.v->d_casefold) {
208 bkey_fsck_err_on(from.from == BKEY_VALIDATE_commit &&
209 d_cf_name.len > BCH_NAME_MAX,
210 c, dirent_cf_name_too_big,
211 "dirent w/ cf name too big (%u > %u)",
212 d_cf_name.len, BCH_NAME_MAX);
213
214 bkey_fsck_err_on(d_cf_name.len != strnlen(d_cf_name.name, d_cf_name.len),
215 c, dirent_stray_data_after_cf_name,
216 "dirent has stray data after cf name's NUL");
217 }
218fsck_err:
219 return ret;
220}
221
222void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
223{
224 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
225 struct qstr d_name = bch2_dirent_get_name(d);
226
227 prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
228
229 if (d.v->d_type != DT_SUBVOL)
230 prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
231 else
232 prt_printf(out, "%u -> %u",
233 le32_to_cpu(d.v->d_parent_subvol),
234 le32_to_cpu(d.v->d_child_subvol));
235
236 prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
237}
238
239static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
240 subvol_inum dir,
241 u8 type,
242 int name_len, int cf_name_len,
243 u64 dst)
244{
245 struct bkey_i_dirent *dirent;
246 unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len);
247
248 BUG_ON(u64s > U8_MAX);
249
250 dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
251 if (IS_ERR(dirent))
252 return dirent;
253
254 bkey_dirent_init(&dirent->k_i);
255 dirent->k.u64s = u64s;
256
257 if (type != DT_SUBVOL) {
258 dirent->v.d_inum = cpu_to_le64(dst);
259 } else {
260 dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
261 dirent->v.d_child_subvol = cpu_to_le32(dst);
262 }
263
264 dirent->v.d_type = type;
265 dirent->v.d_unused = 0;
266 dirent->v.d_casefold = cf_name_len ? 1 : 0;
267
268 return dirent;
269}
270
271static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
272 const struct qstr *name)
273{
274 EBUG_ON(dirent->v.d_casefold);
275
276 memcpy(&dirent->v.d_name[0], name->name, name->len);
277 memset(&dirent->v.d_name[name->len], 0,
278 bkey_val_bytes(&dirent->k) -
279 offsetof(struct bch_dirent, d_name) -
280 name->len);
281}
282
283static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
284 const struct qstr *name,
285 const struct qstr *cf_name)
286{
287 EBUG_ON(!dirent->v.d_casefold);
288 EBUG_ON(!cf_name->len);
289
290 dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
291 dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len);
292 memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
293 memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
294 memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
295 bkey_val_bytes(&dirent->k) -
296 offsetof(struct bch_dirent, d_cf_name_block.d_names) -
297 name->len + cf_name->len);
298
299 EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len);
300}
301
302static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
303 subvol_inum dir,
304 u8 type,
305 const struct qstr *name,
306 const struct qstr *cf_name,
307 u64 dst)
308{
309 struct bkey_i_dirent *dirent;
310
311 if (name->len > BCH_NAME_MAX)
312 return ERR_PTR(-ENAMETOOLONG);
313
314 dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
315 if (IS_ERR(dirent))
316 return dirent;
317
318 if (cf_name)
319 dirent_init_casefolded_name(dirent, name, cf_name);
320 else
321 dirent_init_regular_name(dirent, name);
322
323 EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
324
325 return dirent;
326}
327
328int bch2_dirent_create_snapshot(struct btree_trans *trans,
329 u32 dir_subvol, u64 dir, u32 snapshot,
330 const struct bch_hash_info *hash_info,
331 u8 type, const struct qstr *name, u64 dst_inum,
332 u64 *dir_offset,
333 enum btree_iter_update_trigger_flags flags)
334{
335 subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
336 struct bkey_i_dirent *dirent;
337 int ret;
338
339 dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum);
340 ret = PTR_ERR_OR_ZERO(dirent);
341 if (ret)
342 return ret;
343
344 dirent->k.p.inode = dir;
345 dirent->k.p.snapshot = snapshot;
346
347 ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
348 dir_inum, snapshot, &dirent->k_i,
349 flags|BTREE_UPDATE_internal_snapshot_node);
350 *dir_offset = dirent->k.p.offset;
351
352 return ret;
353}
354
355int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
356 const struct bch_hash_info *hash_info,
357 u8 type, const struct qstr *name, u64 dst_inum,
358 u64 *dir_offset,
359 u64 *i_size,
360 enum btree_iter_update_trigger_flags flags)
361{
362 struct bkey_i_dirent *dirent;
363 int ret;
364
365 if (hash_info->cf_encoding) {
366 struct qstr cf_name;
367 ret = bch2_casefold(trans, hash_info, name, &cf_name);
368 if (ret)
369 return ret;
370 dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
371 } else {
372 dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
373 }
374
375 ret = PTR_ERR_OR_ZERO(dirent);
376 if (ret)
377 return ret;
378
379 *i_size += bkey_bytes(&dirent->k);
380
381 ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
382 dir, &dirent->k_i, flags);
383 *dir_offset = dirent->k.p.offset;
384
385 return ret;
386}
387
388int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
389 struct bkey_s_c_dirent d, subvol_inum *target)
390{
391 struct bch_subvolume s;
392 int ret = 0;
393
394 if (d.v->d_type == DT_SUBVOL &&
395 le32_to_cpu(d.v->d_parent_subvol) != dir.subvol)
396 return 1;
397
398 if (likely(d.v->d_type != DT_SUBVOL)) {
399 target->subvol = dir.subvol;
400 target->inum = le64_to_cpu(d.v->d_inum);
401 } else {
402 target->subvol = le32_to_cpu(d.v->d_child_subvol);
403
404 ret = bch2_subvolume_get(trans, target->subvol, true, &s);
405
406 target->inum = le64_to_cpu(s.inode);
407 }
408
409 return ret;
410}
411
412int bch2_dirent_rename(struct btree_trans *trans,
413 subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size,
414 subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size,
415 const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
416 const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
417 enum bch_rename_mode mode)
418{
419 struct qstr src_name_lookup, dst_name_lookup;
420 struct btree_iter src_iter = {};
421 struct btree_iter dst_iter = {};
422 struct bkey_s_c old_src, old_dst = bkey_s_c_null;
423 struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
424 struct bpos dst_pos =
425 POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
426 unsigned src_update_flags = 0;
427 bool delete_src, delete_dst;
428 int ret = 0;
429
430 memset(src_inum, 0, sizeof(*src_inum));
431 memset(dst_inum, 0, sizeof(*dst_inum));
432
433 /* Lookup src: */
434 ret = bch2_maybe_casefold(trans, src_hash, src_name, &src_name_lookup);
435 if (ret)
436 goto out;
437 old_src = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
438 src_hash, src_dir, &src_name_lookup,
439 BTREE_ITER_intent);
440 ret = bkey_err(old_src);
441 if (ret)
442 goto out;
443
444 ret = bch2_dirent_read_target(trans, src_dir,
445 bkey_s_c_to_dirent(old_src), src_inum);
446 if (ret)
447 goto out;
448
449 /* Lookup dst: */
450 ret = bch2_maybe_casefold(trans, dst_hash, dst_name, &dst_name_lookup);
451 if (ret)
452 goto out;
453 if (mode == BCH_RENAME) {
454 /*
455 * Note that we're _not_ checking if the target already exists -
456 * we're relying on the VFS to do that check for us for
457 * correctness:
458 */
459 ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
460 dst_hash, dst_dir, &dst_name_lookup);
461 if (ret)
462 goto out;
463 } else {
464 old_dst = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
465 dst_hash, dst_dir, &dst_name_lookup,
466 BTREE_ITER_intent);
467 ret = bkey_err(old_dst);
468 if (ret)
469 goto out;
470
471 ret = bch2_dirent_read_target(trans, dst_dir,
472 bkey_s_c_to_dirent(old_dst), dst_inum);
473 if (ret)
474 goto out;
475 }
476
477 if (mode != BCH_RENAME_EXCHANGE)
478 *src_offset = dst_iter.pos.offset;
479
480 /* Create new dst key: */
481 new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
482 dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
483 ret = PTR_ERR_OR_ZERO(new_dst);
484 if (ret)
485 goto out;
486
487 dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
488 new_dst->k.p = dst_iter.pos;
489
490 /* Create new src key: */
491 if (mode == BCH_RENAME_EXCHANGE) {
492 new_src = dirent_create_key(trans, src_dir, 0, src_name,
493 src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
494 ret = PTR_ERR_OR_ZERO(new_src);
495 if (ret)
496 goto out;
497
498 dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
499 new_src->k.p = src_iter.pos;
500 } else {
501 new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
502 ret = PTR_ERR_OR_ZERO(new_src);
503 if (ret)
504 goto out;
505
506 bkey_init(&new_src->k);
507 new_src->k.p = src_iter.pos;
508
509 if (bkey_le(dst_pos, src_iter.pos) &&
510 bkey_lt(src_iter.pos, dst_iter.pos)) {
511 /*
512 * We have a hash collision for the new dst key,
513 * and new_src - the key we're deleting - is between
514 * new_dst's hashed slot and the slot we're going to be
515 * inserting it into - oops. This will break the hash
516 * table if we don't deal with it:
517 */
518 if (mode == BCH_RENAME) {
519 /*
520 * If we're not overwriting, we can just insert
521 * new_dst at the src position:
522 */
523 new_src = new_dst;
524 new_src->k.p = src_iter.pos;
525 goto out_set_src;
526 } else {
527 /* If we're overwriting, we can't insert new_dst
528 * at a different slot because it has to
529 * overwrite old_dst - just make sure to use a
530 * whiteout when deleting src:
531 */
532 new_src->k.type = KEY_TYPE_hash_whiteout;
533 }
534 } else {
535 /* Check if we need a whiteout to delete src: */
536 ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
537 src_hash, &src_iter);
538 if (ret < 0)
539 goto out;
540
541 if (ret)
542 new_src->k.type = KEY_TYPE_hash_whiteout;
543 }
544 }
545
546 if (new_dst->v.d_type == DT_SUBVOL)
547 new_dst->v.d_parent_subvol = cpu_to_le32(dst_dir.subvol);
548
549 if ((mode == BCH_RENAME_EXCHANGE) &&
550 new_src->v.d_type == DT_SUBVOL)
551 new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
552
553 if (old_dst.k)
554 *dst_dir_i_size -= bkey_bytes(old_dst.k);
555 *src_dir_i_size -= bkey_bytes(old_src.k);
556
557 if (mode == BCH_RENAME_EXCHANGE)
558 *src_dir_i_size += bkey_bytes(&new_src->k);
559 *dst_dir_i_size += bkey_bytes(&new_dst->k);
560
561 ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
562 if (ret)
563 goto out;
564out_set_src:
565 /*
566 * If we're deleting a subvolume we need to really delete the dirent,
567 * not just emit a whiteout in the current snapshot - there can only be
568 * single dirent that points to a given subvolume.
569 *
570 * IOW, we don't maintain multiple versions in different snapshots of
571 * dirents that point to subvolumes - dirents that point to subvolumes
572 * are only visible in one particular subvolume so it's not necessary,
573 * and it would be particularly confusing for fsck to have to deal with.
574 */
575 delete_src = bkey_s_c_to_dirent(old_src).v->d_type == DT_SUBVOL &&
576 new_src->k.p.snapshot != old_src.k->p.snapshot;
577
578 delete_dst = old_dst.k &&
579 bkey_s_c_to_dirent(old_dst).v->d_type == DT_SUBVOL &&
580 new_dst->k.p.snapshot != old_dst.k->p.snapshot;
581
582 if (!delete_src || !bkey_deleted(&new_src->k)) {
583 ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
584 if (ret)
585 goto out;
586 }
587
588 if (delete_src) {
589 bch2_btree_iter_set_snapshot(trans, &src_iter, old_src.k->p.snapshot);
590 ret = bch2_btree_iter_traverse(trans, &src_iter) ?:
591 bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node);
592 if (ret)
593 goto out;
594 }
595
596 if (delete_dst) {
597 bch2_btree_iter_set_snapshot(trans, &dst_iter, old_dst.k->p.snapshot);
598 ret = bch2_btree_iter_traverse(trans, &dst_iter) ?:
599 bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node);
600 if (ret)
601 goto out;
602 }
603
604 if (mode == BCH_RENAME_EXCHANGE)
605 *src_offset = new_src->k.p.offset;
606 *dst_offset = new_dst->k.p.offset;
607out:
608 bch2_trans_iter_exit(trans, &src_iter);
609 bch2_trans_iter_exit(trans, &dst_iter);
610 return ret;
611}
612
613int bch2_dirent_lookup_trans(struct btree_trans *trans,
614 struct btree_iter *iter,
615 subvol_inum dir,
616 const struct bch_hash_info *hash_info,
617 const struct qstr *name, subvol_inum *inum,
618 unsigned flags)
619{
620 struct qstr lookup_name;
621 int ret = bch2_maybe_casefold(trans, hash_info, name, &lookup_name);
622 if (ret)
623 return ret;
624
625 struct bkey_s_c k = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
626 hash_info, dir, &lookup_name, flags);
627 ret = bkey_err(k);
628 if (ret)
629 goto err;
630
631 ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
632 if (ret > 0)
633 ret = -ENOENT;
634err:
635 if (ret)
636 bch2_trans_iter_exit(trans, iter);
637 return ret;
638}
639
640u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
641 const struct bch_hash_info *hash_info,
642 const struct qstr *name, subvol_inum *inum)
643{
644 struct btree_trans *trans = bch2_trans_get(c);
645 struct btree_iter iter = {};
646
647 int ret = lockrestart_do(trans,
648 bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
649 bch2_trans_iter_exit(trans, &iter);
650 bch2_trans_put(trans);
651 return ret;
652}
653
654int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 snapshot)
655{
656 struct btree_iter iter;
657 struct bkey_s_c k;
658 int ret;
659
660 for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
661 SPOS(dir, 0, snapshot),
662 POS(dir, U64_MAX), 0, k, ret)
663 if (k.k->type == KEY_TYPE_dirent) {
664 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
665 if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol)
666 continue;
667 ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
668 break;
669 }
670 bch2_trans_iter_exit(trans, &iter);
671
672 return ret;
673}
674
675int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
676{
677 u32 snapshot;
678
679 return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
680 bch2_empty_dir_snapshot(trans, dir.inum, dir.subvol, snapshot);
681}
682
683static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subvol_inum target)
684{
685 struct qstr name = bch2_dirent_get_name(d);
686 /*
687 * Although not required by the kernel code, updating ctx->pos is needed
688 * for the bcachefs FUSE driver. Without this update, the FUSE
689 * implementation will be stuck in an infinite loop when reading
690 * directories (via the bcachefs_fuse_readdir callback).
691 * In kernel space, ctx->pos is updated by the VFS code.
692 */
693 ctx->pos = d.k->p.offset;
694 bool ret = dir_emit(ctx, name.name,
695 name.len,
696 target.inum,
697 vfs_d_type(d.v->d_type));
698 if (ret)
699 ctx->pos = d.k->p.offset + 1;
700 return ret;
701}
702
703int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
704{
705 struct bkey_buf sk;
706 bch2_bkey_buf_init(&sk);
707
708 int ret = bch2_trans_run(c,
709 for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents,
710 POS(inum.inum, ctx->pos),
711 POS(inum.inum, U64_MAX),
712 inum.subvol, 0, k, ({
713 if (k.k->type != KEY_TYPE_dirent)
714 continue;
715
716 /* dir_emit() can fault and block: */
717 bch2_bkey_buf_reassemble(&sk, c, k);
718 struct bkey_s_c_dirent dirent = bkey_i_to_s_c_dirent(sk.k);
719
720 subvol_inum target;
721 int ret2 = bch2_dirent_read_target(trans, inum, dirent, &target);
722 if (ret2 > 0)
723 continue;
724
725 ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target));
726 })));
727
728 bch2_bkey_buf_exit(&sk, c);
729
730 return ret < 0 ? ret : 0;
731}
732
733/* fsck */
734
735static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
736 struct bch_inode_unpacked *inode)
737{
738 struct btree_iter iter;
739 struct bkey_s_c k;
740 int ret;
741
742 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
743 BTREE_ITER_all_snapshots, k, ret) {
744 if (k.k->p.offset != inode_nr)
745 break;
746 if (!bkey_is_inode(k.k))
747 continue;
748 ret = bch2_inode_unpack(k, inode);
749 goto found;
750 }
751 ret = -BCH_ERR_ENOENT_inode;
752found:
753 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
754 bch2_trans_iter_exit(trans, &iter);
755 return ret;
756}
757
758int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos)
759{
760 struct bch_fs *c = trans->c;
761 struct btree_iter iter;
762 struct bch_inode_unpacked dir_inode;
763 struct bch_hash_info dir_hash_info;
764 int ret;
765
766 ret = lookup_first_inode(trans, pos.inode, &dir_inode);
767 if (ret)
768 goto err;
769
770 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
771
772 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
773
774 ret = bch2_btree_iter_traverse(trans, &iter) ?:
775 bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
776 &dir_hash_info, &iter,
777 BTREE_UPDATE_internal_snapshot_node);
778 bch2_trans_iter_exit(trans, &iter);
779err:
780 bch_err_fn(c, ret);
781 return ret;
782}