Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "acl.h"
5#include "btree_update.h"
6#include "dirent.h"
7#include "inode.h"
8#include "namei.h"
9#include "subvolume.h"
10#include "xattr.h"
11
12#include <linux/posix_acl.h>
13
14static inline subvol_inum parent_inum(subvol_inum inum, struct bch_inode_unpacked *inode)
15{
16 return (subvol_inum) {
17 .subvol = inode->bi_parent_subvol ?: inum.subvol,
18 .inum = inode->bi_dir,
19 };
20}
21
22static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
23{
24 return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
25}
26
27int bch2_create_trans(struct btree_trans *trans,
28 subvol_inum dir,
29 struct bch_inode_unpacked *dir_u,
30 struct bch_inode_unpacked *new_inode,
31 const struct qstr *name,
32 uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
33 struct posix_acl *default_acl,
34 struct posix_acl *acl,
35 subvol_inum snapshot_src,
36 unsigned flags)
37{
38 struct bch_fs *c = trans->c;
39 struct btree_iter dir_iter = {};
40 struct btree_iter inode_iter = {};
41 subvol_inum new_inum = dir;
42 u64 now = bch2_current_time(c);
43 u64 cpu = raw_smp_processor_id();
44 u64 dir_target;
45 u32 snapshot;
46 unsigned dir_type = mode_to_type(mode);
47 int ret;
48
49 ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
50 if (ret)
51 goto err;
52
53 ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir,
54 BTREE_ITER_intent|BTREE_ITER_with_updates);
55 if (ret)
56 goto err;
57
58 if (!(flags & BCH_CREATE_SNAPSHOT)) {
59 /* Normal create path - allocate a new inode: */
60 bch2_inode_init_late(c, new_inode, now, uid, gid, mode, rdev, dir_u);
61
62 if (flags & BCH_CREATE_TMPFILE)
63 new_inode->bi_flags |= BCH_INODE_unlinked;
64
65 ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
66 if (ret)
67 goto err;
68
69 snapshot_src = (subvol_inum) { 0 };
70 } else {
71 /*
72 * Creating a snapshot - we're not allocating a new inode, but
73 * we do have to lookup the root inode of the subvolume we're
74 * snapshotting and update it (in the new snapshot):
75 */
76
77 if (!snapshot_src.inum) {
78 /* Inode wasn't specified, just snapshot: */
79 struct bch_subvolume s;
80 ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s);
81 if (ret)
82 goto err;
83
84 snapshot_src.inum = le64_to_cpu(s.inode);
85 }
86
87 ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
88 BTREE_ITER_intent);
89 if (ret)
90 goto err;
91
92 if (new_inode->bi_subvol != snapshot_src.subvol) {
93 /* Not a subvolume root: */
94 ret = -EINVAL;
95 goto err;
96 }
97
98 /*
99 * If we're not root, we have to own the subvolume being
100 * snapshotted:
101 */
102 if (uid && new_inode->bi_uid != uid) {
103 ret = -EPERM;
104 goto err;
105 }
106
107 flags |= BCH_CREATE_SUBVOL;
108 }
109
110 new_inum.inum = new_inode->bi_inum;
111 dir_target = new_inode->bi_inum;
112
113 if (flags & BCH_CREATE_SUBVOL) {
114 u32 new_subvol, dir_snapshot;
115
116 ret = bch2_subvolume_create(trans, new_inode->bi_inum,
117 dir.subvol,
118 snapshot_src.subvol,
119 &new_subvol, &snapshot,
120 (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
121 if (ret)
122 goto err;
123
124 new_inode->bi_parent_subvol = dir.subvol;
125 new_inode->bi_subvol = new_subvol;
126 new_inum.subvol = new_subvol;
127 dir_target = new_subvol;
128 dir_type = DT_SUBVOL;
129
130 ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
131 if (ret)
132 goto err;
133
134 bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot);
135 ret = bch2_btree_iter_traverse(trans, &dir_iter);
136 if (ret)
137 goto err;
138 }
139
140 if (!(flags & BCH_CREATE_SNAPSHOT)) {
141 if (default_acl) {
142 ret = bch2_set_acl_trans(trans, new_inum, new_inode,
143 default_acl, ACL_TYPE_DEFAULT);
144 if (ret)
145 goto err;
146 }
147
148 if (acl) {
149 ret = bch2_set_acl_trans(trans, new_inum, new_inode,
150 acl, ACL_TYPE_ACCESS);
151 if (ret)
152 goto err;
153 }
154 }
155
156 if (!(flags & BCH_CREATE_TMPFILE)) {
157 struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
158 u64 dir_offset;
159
160 if (is_subdir_for_nlink(new_inode))
161 dir_u->bi_nlink++;
162 dir_u->bi_mtime = dir_u->bi_ctime = now;
163
164 ret = bch2_dirent_create(trans, dir, &dir_hash,
165 dir_type,
166 name,
167 dir_target,
168 &dir_offset,
169 STR_HASH_must_create|BTREE_ITER_with_updates) ?:
170 bch2_inode_write(trans, &dir_iter, dir_u);
171 if (ret)
172 goto err;
173
174 new_inode->bi_dir = dir_u->bi_inum;
175 new_inode->bi_dir_offset = dir_offset;
176 }
177
178 if (S_ISDIR(mode)) {
179 ret = bch2_maybe_propagate_has_case_insensitive(trans,
180 (subvol_inum) {
181 new_inode->bi_subvol ?: dir.subvol,
182 new_inode->bi_inum },
183 new_inode);
184 if (ret)
185 goto err;
186 }
187
188 if (S_ISDIR(mode) &&
189 !new_inode->bi_subvol)
190 new_inode->bi_depth = dir_u->bi_depth + 1;
191
192 inode_iter.flags &= ~BTREE_ITER_all_snapshots;
193 bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot);
194
195 ret = bch2_btree_iter_traverse(trans, &inode_iter) ?:
196 bch2_inode_write(trans, &inode_iter, new_inode);
197err:
198 bch2_trans_iter_exit(trans, &inode_iter);
199 bch2_trans_iter_exit(trans, &dir_iter);
200 return ret;
201}
202
203int bch2_link_trans(struct btree_trans *trans,
204 subvol_inum dir, struct bch_inode_unpacked *dir_u,
205 subvol_inum inum, struct bch_inode_unpacked *inode_u,
206 const struct qstr *name)
207{
208 struct bch_fs *c = trans->c;
209 struct btree_iter dir_iter = {};
210 struct btree_iter inode_iter = {};
211 struct bch_hash_info dir_hash;
212 u64 now = bch2_current_time(c);
213 u64 dir_offset = 0;
214 int ret;
215
216 if (dir.subvol != inum.subvol)
217 return -EXDEV;
218
219 ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_intent);
220 if (ret)
221 return ret;
222
223 inode_u->bi_ctime = now;
224 ret = bch2_inode_nlink_inc(inode_u);
225 if (ret)
226 goto err;
227
228 ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
229 if (ret)
230 goto err;
231
232 if (bch2_reinherit_attrs(inode_u, dir_u)) {
233 ret = -EXDEV;
234 goto err;
235 }
236
237 dir_u->bi_mtime = dir_u->bi_ctime = now;
238
239 dir_hash = bch2_hash_info_init(c, dir_u);
240
241 ret = bch2_dirent_create(trans, dir, &dir_hash,
242 mode_to_type(inode_u->bi_mode),
243 name, inum.inum,
244 &dir_offset,
245 STR_HASH_must_create);
246 if (ret)
247 goto err;
248
249 inode_u->bi_dir = dir.inum;
250 inode_u->bi_dir_offset = dir_offset;
251
252 ret = bch2_inode_write(trans, &dir_iter, dir_u) ?:
253 bch2_inode_write(trans, &inode_iter, inode_u);
254err:
255 bch2_trans_iter_exit(trans, &dir_iter);
256 bch2_trans_iter_exit(trans, &inode_iter);
257 return ret;
258}
259
260int bch2_unlink_trans(struct btree_trans *trans,
261 subvol_inum dir,
262 struct bch_inode_unpacked *dir_u,
263 struct bch_inode_unpacked *inode_u,
264 const struct qstr *name,
265 bool deleting_subvol)
266{
267 struct bch_fs *c = trans->c;
268 struct btree_iter dir_iter = {};
269 struct btree_iter dirent_iter = {};
270 struct btree_iter inode_iter = {};
271 struct bch_hash_info dir_hash;
272 subvol_inum inum;
273 u64 now = bch2_current_time(c);
274 struct bkey_s_c k;
275 int ret;
276
277 ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
278 if (ret)
279 goto err;
280
281 dir_hash = bch2_hash_info_init(c, dir_u);
282
283 ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
284 name, &inum, BTREE_ITER_intent);
285 if (ret)
286 goto err;
287
288 ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
289 BTREE_ITER_intent);
290 if (ret)
291 goto err;
292
293 if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
294 ret = bch2_empty_dir_trans(trans, inum);
295 if (ret)
296 goto err;
297 }
298
299 if (deleting_subvol && !inode_u->bi_subvol) {
300 ret = bch_err_throw(c, ENOENT_not_subvol);
301 goto err;
302 }
303
304 if (inode_u->bi_subvol) {
305 /* Recursive subvolume destroy not allowed (yet?) */
306 ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
307 if (ret)
308 goto err;
309 }
310
311 if (deleting_subvol || inode_u->bi_subvol) {
312 ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
313 if (ret)
314 goto err;
315
316 k = bch2_btree_iter_peek_slot(trans, &dirent_iter);
317 ret = bkey_err(k);
318 if (ret)
319 goto err;
320
321 /*
322 * If we're deleting a subvolume, we need to really delete the
323 * dirent, not just emit a whiteout in the current snapshot:
324 */
325 bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot);
326 ret = bch2_btree_iter_traverse(trans, &dirent_iter);
327 if (ret)
328 goto err;
329 } else {
330 bch2_inode_nlink_dec(trans, inode_u);
331 }
332
333 if (inode_u->bi_dir == dirent_iter.pos.inode &&
334 inode_u->bi_dir_offset == dirent_iter.pos.offset) {
335 inode_u->bi_dir = 0;
336 inode_u->bi_dir_offset = 0;
337 }
338
339 dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
340 dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
341
342 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
343 &dir_hash, &dirent_iter,
344 BTREE_UPDATE_internal_snapshot_node) ?:
345 bch2_inode_write(trans, &dir_iter, dir_u) ?:
346 bch2_inode_write(trans, &inode_iter, inode_u);
347err:
348 bch2_trans_iter_exit(trans, &inode_iter);
349 bch2_trans_iter_exit(trans, &dirent_iter);
350 bch2_trans_iter_exit(trans, &dir_iter);
351 return ret;
352}
353
354bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
355 struct bch_inode_unpacked *src_u)
356{
357 u64 src, dst;
358 unsigned id;
359 bool ret = false;
360
361 for (id = 0; id < Inode_opt_nr; id++) {
362 if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
363 continue;
364
365 /* Skip attributes that were explicitly set on this inode */
366 if (dst_u->bi_fields_set & (1 << id))
367 continue;
368
369 src = bch2_inode_opt_get(src_u, id);
370 dst = bch2_inode_opt_get(dst_u, id);
371
372 if (src == dst)
373 continue;
374
375 bch2_inode_opt_set(dst_u, id, src);
376 ret = true;
377 }
378
379 return ret;
380}
381
382static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent)
383{
384 struct btree_iter iter;
385 struct bkey_i_subvolume *s =
386 bch2_bkey_get_mut_typed(trans, &iter,
387 BTREE_ID_subvolumes, POS(0, subvol),
388 BTREE_ITER_cached, subvolume);
389 int ret = PTR_ERR_OR_ZERO(s);
390 if (ret)
391 return ret;
392
393 s->v.fs_path_parent = cpu_to_le32(new_parent);
394 bch2_trans_iter_exit(trans, &iter);
395 return 0;
396}
397
398int bch2_rename_trans(struct btree_trans *trans,
399 subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
400 subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
401 struct bch_inode_unpacked *src_inode_u,
402 struct bch_inode_unpacked *dst_inode_u,
403 const struct qstr *src_name,
404 const struct qstr *dst_name,
405 enum bch_rename_mode mode)
406{
407 struct bch_fs *c = trans->c;
408 struct btree_iter src_dir_iter = {};
409 struct btree_iter dst_dir_iter = {};
410 struct btree_iter src_inode_iter = {};
411 struct btree_iter dst_inode_iter = {};
412 struct bch_hash_info src_hash, dst_hash;
413 subvol_inum src_inum, dst_inum;
414 u64 src_offset, dst_offset;
415 u64 now = bch2_current_time(c);
416 int ret;
417
418 ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
419 BTREE_ITER_intent);
420 if (ret)
421 goto err;
422
423 src_hash = bch2_hash_info_init(c, src_dir_u);
424
425 if (!subvol_inum_eq(dst_dir, src_dir)) {
426 ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
427 BTREE_ITER_intent);
428 if (ret)
429 goto err;
430
431 dst_hash = bch2_hash_info_init(c, dst_dir_u);
432 } else {
433 dst_dir_u = src_dir_u;
434 dst_hash = src_hash;
435 }
436
437 ret = bch2_dirent_rename(trans,
438 src_dir, &src_hash,
439 dst_dir, &dst_hash,
440 src_name, &src_inum, &src_offset,
441 dst_name, &dst_inum, &dst_offset,
442 mode);
443 if (ret)
444 goto err;
445
446 ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
447 BTREE_ITER_intent);
448 if (ret)
449 goto err;
450
451 if (dst_inum.inum) {
452 ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
453 BTREE_ITER_intent);
454 if (ret)
455 goto err;
456 }
457
458 if (src_inode_u->bi_subvol &&
459 dst_dir.subvol != src_inode_u->bi_parent_subvol) {
460 ret = subvol_update_parent(trans, src_inode_u->bi_subvol, dst_dir.subvol);
461 if (ret)
462 goto err;
463 }
464
465 if (mode == BCH_RENAME_EXCHANGE &&
466 dst_inode_u->bi_subvol &&
467 src_dir.subvol != dst_inode_u->bi_parent_subvol) {
468 ret = subvol_update_parent(trans, dst_inode_u->bi_subvol, src_dir.subvol);
469 if (ret)
470 goto err;
471 }
472
473 /* Can't move across subvolumes, unless it's a subvolume root: */
474 if (src_dir.subvol != dst_dir.subvol &&
475 (!src_inode_u->bi_subvol ||
476 (dst_inum.inum && !dst_inode_u->bi_subvol))) {
477 ret = -EXDEV;
478 goto err;
479 }
480
481 if (src_inode_u->bi_parent_subvol)
482 src_inode_u->bi_parent_subvol = dst_dir.subvol;
483
484 if ((mode == BCH_RENAME_EXCHANGE) &&
485 dst_inode_u->bi_parent_subvol)
486 dst_inode_u->bi_parent_subvol = src_dir.subvol;
487
488 src_inode_u->bi_dir = dst_dir_u->bi_inum;
489 src_inode_u->bi_dir_offset = dst_offset;
490
491 if (mode == BCH_RENAME_EXCHANGE) {
492 dst_inode_u->bi_dir = src_dir_u->bi_inum;
493 dst_inode_u->bi_dir_offset = src_offset;
494 }
495
496 if (mode == BCH_RENAME_OVERWRITE &&
497 dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
498 dst_inode_u->bi_dir_offset == src_offset) {
499 dst_inode_u->bi_dir = 0;
500 dst_inode_u->bi_dir_offset = 0;
501 }
502
503 if (mode == BCH_RENAME_OVERWRITE) {
504 if (S_ISDIR(src_inode_u->bi_mode) !=
505 S_ISDIR(dst_inode_u->bi_mode)) {
506 ret = -ENOTDIR;
507 goto err;
508 }
509
510 if (S_ISDIR(dst_inode_u->bi_mode)) {
511 ret = bch2_empty_dir_trans(trans, dst_inum);
512 if (ret)
513 goto err;
514 }
515 }
516
517 if (!subvol_inum_eq(dst_dir, src_dir)) {
518 if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
519 S_ISDIR(src_inode_u->bi_mode)) {
520 ret = -EXDEV;
521 goto err;
522 }
523
524 if (mode == BCH_RENAME_EXCHANGE &&
525 bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
526 S_ISDIR(dst_inode_u->bi_mode)) {
527 ret = -EXDEV;
528 goto err;
529 }
530
531 ret = bch2_maybe_propagate_has_case_insensitive(trans, src_inum, src_inode_u) ?:
532 (mode == BCH_RENAME_EXCHANGE
533 ? bch2_maybe_propagate_has_case_insensitive(trans, dst_inum, dst_inode_u)
534 : 0);
535 if (ret)
536 goto err;
537
538 if (is_subdir_for_nlink(src_inode_u)) {
539 src_dir_u->bi_nlink--;
540 dst_dir_u->bi_nlink++;
541 }
542
543 if (S_ISDIR(src_inode_u->bi_mode) &&
544 !src_inode_u->bi_subvol)
545 src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
546
547 if (mode == BCH_RENAME_EXCHANGE &&
548 S_ISDIR(dst_inode_u->bi_mode) &&
549 !dst_inode_u->bi_subvol)
550 dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
551 }
552
553 if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
554 dst_dir_u->bi_nlink--;
555 src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
556 }
557
558 if (mode == BCH_RENAME_OVERWRITE)
559 bch2_inode_nlink_dec(trans, dst_inode_u);
560
561 src_dir_u->bi_mtime = now;
562 src_dir_u->bi_ctime = now;
563
564 if (src_dir.inum != dst_dir.inum) {
565 dst_dir_u->bi_mtime = now;
566 dst_dir_u->bi_ctime = now;
567 }
568
569 src_inode_u->bi_ctime = now;
570
571 if (dst_inum.inum)
572 dst_inode_u->bi_ctime = now;
573
574 ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
575 (src_dir.inum != dst_dir.inum
576 ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
577 : 0) ?:
578 bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
579 (dst_inum.inum
580 ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
581 : 0);
582err:
583 bch2_trans_iter_exit(trans, &dst_inode_iter);
584 bch2_trans_iter_exit(trans, &src_inode_iter);
585 bch2_trans_iter_exit(trans, &dst_dir_iter);
586 bch2_trans_iter_exit(trans, &src_dir_iter);
587 return ret;
588}
589
590/* inum_to_path */
591
592static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n)
593{
594 bch2_printbuf_make_room(out, n);
595
596 unsigned can_print = min(n, printbuf_remaining(out));
597
598 b += n;
599
600 for (unsigned i = 0; i < can_print; i++)
601 out->buf[out->pos++] = *((char *) --b);
602
603 printbuf_nul_terminate(out);
604}
605
606static inline void prt_str_reversed(struct printbuf *out, const char *s)
607{
608 prt_bytes_reversed(out, s, strlen(s));
609}
610
611static inline void reverse_bytes(void *b, size_t n)
612{
613 char *e = b + n, *s = b;
614
615 while (s < e) {
616 --e;
617 swap(*s, *e);
618 s++;
619 }
620}
621
622static int __bch2_inum_to_path(struct btree_trans *trans,
623 u32 subvol, u64 inum, u32 snapshot,
624 struct printbuf *path)
625{
626 unsigned orig_pos = path->pos;
627 int ret = 0;
628
629 while (true) {
630 if (!snapshot) {
631 ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
632 if (ret)
633 goto disconnected;
634 }
635
636 struct bch_inode_unpacked inode;
637 ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
638 if (ret)
639 goto disconnected;
640
641 if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL &&
642 inode.bi_inum == BCACHEFS_ROOT_INO)
643 break;
644
645 if (!inode.bi_dir && !inode.bi_dir_offset) {
646 ret = bch_err_throw(trans->c, ENOENT_inode_no_backpointer);
647 goto disconnected;
648 }
649
650 inum = inode.bi_dir;
651 if (inode.bi_parent_subvol) {
652 subvol = inode.bi_parent_subvol;
653 snapshot = 0;
654 }
655
656 struct btree_iter d_iter;
657 struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
658 BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot),
659 0, dirent);
660 ret = bkey_err(d.s_c);
661 if (ret)
662 goto disconnected;
663
664 struct qstr dirent_name = bch2_dirent_get_name(d);
665 prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
666
667 prt_char(path, '/');
668
669 bch2_trans_iter_exit(trans, &d_iter);
670 }
671
672 if (orig_pos == path->pos)
673 prt_char(path, '/');
674out:
675 ret = path->allocation_failure ? -ENOMEM : 0;
676 if (ret)
677 goto err;
678
679 reverse_bytes(path->buf + orig_pos, path->pos - orig_pos);
680 return 0;
681err:
682 return ret;
683disconnected:
684 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
685 goto err;
686
687 prt_str_reversed(path, "(disconnected)");
688 goto out;
689}
690
691int bch2_inum_to_path(struct btree_trans *trans,
692 subvol_inum inum,
693 struct printbuf *path)
694{
695 return __bch2_inum_to_path(trans, inum.subvol, inum.inum, 0, path);
696}
697
698int bch2_inum_snapshot_to_path(struct btree_trans *trans, u64 inum, u32 snapshot,
699 snapshot_id_list *snapshot_overwrites,
700 struct printbuf *path)
701{
702 return __bch2_inum_to_path(trans, 0, inum, snapshot, path);
703}
704
705/* fsck */
706
707static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
708 struct bkey_s_c_dirent d,
709 struct bch_inode_unpacked *target,
710 bool in_fsck)
711{
712 struct bch_fs *c = trans->c;
713 struct printbuf buf = PRINTBUF;
714 struct btree_iter bp_iter = {};
715 int ret = 0;
716
717 if (inode_points_to_dirent(target, d))
718 return 0;
719
720 if (!target->bi_dir &&
721 !target->bi_dir_offset) {
722 fsck_err_on(S_ISDIR(target->bi_mode),
723 trans, inode_dir_missing_backpointer,
724 "directory with missing backpointer\n%s",
725 (printbuf_reset(&buf),
726 bch2_bkey_val_to_text(&buf, c, d.s_c),
727 prt_printf(&buf, "\n"),
728 bch2_inode_unpacked_to_text(&buf, target),
729 buf.buf));
730
731 fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
732 trans, inode_unlinked_but_has_dirent,
733 "inode unlinked but has dirent\n%s",
734 (printbuf_reset(&buf),
735 bch2_bkey_val_to_text(&buf, c, d.s_c),
736 prt_printf(&buf, "\n"),
737 bch2_inode_unpacked_to_text(&buf, target),
738 buf.buf));
739
740 target->bi_flags &= ~BCH_INODE_unlinked;
741 target->bi_dir = d.k->p.inode;
742 target->bi_dir_offset = d.k->p.offset;
743 return __bch2_fsck_write_inode(trans, target);
744 }
745
746 struct bkey_s_c_dirent bp_dirent =
747 bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents,
748 SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot),
749 0, dirent);
750 ret = bkey_err(bp_dirent);
751 if (ret && !bch2_err_matches(ret, ENOENT))
752 goto err;
753
754 bool backpointer_exists = !ret;
755 ret = 0;
756
757 if (!backpointer_exists) {
758 if (fsck_err(trans, inode_wrong_backpointer,
759 "inode %llu:%u has wrong backpointer:\n"
760 "got %llu:%llu\n"
761 "should be %llu:%llu",
762 target->bi_inum, target->bi_snapshot,
763 target->bi_dir,
764 target->bi_dir_offset,
765 d.k->p.inode,
766 d.k->p.offset)) {
767 target->bi_dir = d.k->p.inode;
768 target->bi_dir_offset = d.k->p.offset;
769 ret = __bch2_fsck_write_inode(trans, target);
770 }
771 } else {
772 printbuf_reset(&buf);
773 bch2_bkey_val_to_text(&buf, c, d.s_c);
774 prt_newline(&buf);
775 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
776
777 if (S_ISDIR(target->bi_mode) || target->bi_subvol) {
778 /*
779 * XXX: verify connectivity of the other dirent
780 * up to the root before removing this one
781 *
782 * Additionally, bch2_lookup would need to cope with the
783 * dirent it found being removed - or should we remove
784 * the other one, even though the inode points to it?
785 */
786 if (in_fsck) {
787 if (fsck_err(trans, inode_dir_multiple_links,
788 "%s %llu:%u with multiple links\n%s",
789 S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
790 target->bi_inum, target->bi_snapshot, buf.buf))
791 ret = bch2_fsck_remove_dirent(trans, d.k->p);
792 } else {
793 bch2_fs_inconsistent(c,
794 "%s %llu:%u with multiple links\n%s",
795 S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
796 target->bi_inum, target->bi_snapshot, buf.buf);
797 }
798
799 goto out;
800 } else {
801 /*
802 * hardlinked file with nlink 0:
803 * We're just adjusting nlink here so check_nlinks() will pick
804 * it up, it ignores inodes with nlink 0
805 */
806 if (fsck_err_on(!target->bi_nlink,
807 trans, inode_multiple_links_but_nlink_0,
808 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
809 target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
810 target->bi_nlink++;
811 target->bi_flags &= ~BCH_INODE_unlinked;
812 ret = __bch2_fsck_write_inode(trans, target);
813 if (ret)
814 goto err;
815 }
816 }
817 }
818out:
819err:
820fsck_err:
821 bch2_trans_iter_exit(trans, &bp_iter);
822 printbuf_exit(&buf);
823 bch_err_fn(c, ret);
824 return ret;
825}
826
827int __bch2_check_dirent_target(struct btree_trans *trans,
828 struct btree_iter *dirent_iter,
829 struct bkey_s_c_dirent d,
830 struct bch_inode_unpacked *target,
831 bool in_fsck)
832{
833 struct bch_fs *c = trans->c;
834 struct printbuf buf = PRINTBUF;
835 int ret = 0;
836
837 ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck);
838 if (ret)
839 goto err;
840
841 if (fsck_err_on(d.v->d_type != inode_d_type(target),
842 trans, dirent_d_type_wrong,
843 "incorrect d_type: got %s, should be %s:\n%s",
844 bch2_d_type_str(d.v->d_type),
845 bch2_d_type_str(inode_d_type(target)),
846 (printbuf_reset(&buf),
847 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
848 struct bkey_i_dirent *n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
849 ret = PTR_ERR_OR_ZERO(n);
850 if (ret)
851 goto err;
852
853 bkey_reassemble(&n->k_i, d.s_c);
854 n->v.d_type = inode_d_type(target);
855 if (n->v.d_type == DT_SUBVOL) {
856 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
857 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
858 } else {
859 n->v.d_inum = cpu_to_le64(target->bi_inum);
860 }
861
862 ret = bch2_trans_update(trans, dirent_iter, &n->k_i,
863 BTREE_UPDATE_internal_snapshot_node);
864 if (ret)
865 goto err;
866 }
867err:
868fsck_err:
869 printbuf_exit(&buf);
870 bch_err_fn(c, ret);
871 return ret;
872}
873
874/*
875 * BCH_INODE_has_case_insensitive:
876 * We have to track whether directories have any descendent directory that is
877 * casefolded - for overlayfs:
878 */
879
880static int bch2_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum)
881{
882 struct btree_iter iter = {};
883 int ret = 0;
884
885 while (true) {
886 struct bch_inode_unpacked inode;
887 ret = bch2_inode_peek(trans, &iter, &inode, inum,
888 BTREE_ITER_intent|BTREE_ITER_with_updates);
889 if (ret)
890 break;
891
892 if (inode.bi_flags & BCH_INODE_has_case_insensitive)
893 break;
894
895 inode.bi_flags |= BCH_INODE_has_case_insensitive;
896 ret = bch2_inode_write(trans, &iter, &inode);
897 if (ret)
898 break;
899
900 bch2_trans_iter_exit(trans, &iter);
901 if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM))
902 break;
903
904 inum = parent_inum(inum, &inode);
905 }
906
907 bch2_trans_iter_exit(trans, &iter);
908 return ret;
909}
910
911int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum,
912 struct bch_inode_unpacked *inode)
913{
914 if (!bch2_inode_casefold(trans->c, inode))
915 return 0;
916
917 inode->bi_flags |= BCH_INODE_has_case_insensitive;
918
919 return bch2_propagate_has_case_insensitive(trans, parent_inum(inum, inode));
920}
921
922int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
923 struct bch_inode_unpacked *inode,
924 snapshot_id_list *snapshot_overwrites,
925 bool *do_update)
926{
927 struct printbuf buf = PRINTBUF;
928 bool repairing_parents = false;
929 int ret = 0;
930
931 if (!S_ISDIR(inode->bi_mode)) {
932 /*
933 * Old versions set bi_casefold for non dirs, but that's
934 * unnecessary and wasteful
935 */
936 if (inode->bi_casefold) {
937 inode->bi_casefold = 0;
938 *do_update = true;
939 }
940 return 0;
941 }
942
943 if (trans->c->sb.version < bcachefs_metadata_version_inode_has_case_insensitive)
944 return 0;
945
946 if (bch2_inode_casefold(trans->c, inode) &&
947 !(inode->bi_flags & BCH_INODE_has_case_insensitive)) {
948 prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
949 inode->bi_inum, inode->bi_snapshot);
950
951 ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
952 snapshot_overwrites, &buf);
953 if (ret)
954 goto err;
955
956 if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
957 inode->bi_flags |= BCH_INODE_has_case_insensitive;
958 *do_update = true;
959 }
960 }
961
962 if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
963 goto out;
964
965 struct bch_inode_unpacked dir = *inode;
966 u32 snapshot = dir.bi_snapshot;
967
968 while (!(dir.bi_inum == BCACHEFS_ROOT_INO &&
969 dir.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
970 if (dir.bi_parent_subvol) {
971 ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
972 if (ret)
973 goto err;
974
975 snapshot_overwrites = NULL;
976 }
977
978 ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
979 if (ret)
980 goto err;
981
982 if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
983 prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
984
985 ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
986 snapshot_overwrites, &buf);
987 if (ret)
988 goto err;
989
990 if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
991 dir.bi_flags |= BCH_INODE_has_case_insensitive;
992 ret = __bch2_fsck_write_inode(trans, &dir);
993 if (ret)
994 goto err;
995 }
996 }
997
998 /*
999 * We only need to check the first parent, unless we find an
1000 * inconsistency
1001 */
1002 if (!repairing_parents)
1003 break;
1004 }
1005out:
1006err:
1007fsck_err:
1008 printbuf_exit(&buf);
1009 if (ret)
1010 return ret;
1011
1012 if (repairing_parents) {
1013 return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
1014 -BCH_ERR_transaction_restart_nested;
1015 }
1016
1017 return 0;
1018}