fs/f2fs/recovery.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / f2fs / recovery.c
at master 24 kB view raw
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * fs/f2fs/recovery.c
  4 *
  5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6 *             http://www.samsung.com/
  7 */
  8#include <linux/unaligned.h>
  9#include <linux/fs.h>
 10#include <linux/f2fs_fs.h>
 11#include <linux/sched/mm.h>
 12#include "f2fs.h"
 13#include "node.h"
 14#include "segment.h"
 15
 16/*
 17 * Roll forward recovery scenarios.
 18 *
 19 * [Term] F: fsync_mark, D: dentry_mark
 20 *
 21 * 1. inode(x) | CP | inode(x) | dnode(F)
 22 * -> Update the latest inode(x).
 23 *
 24 * 2. inode(x) | CP | inode(F) | dnode(F)
 25 * -> No problem.
 26 *
 27 * 3. inode(x) | CP | dnode(F) | inode(x)
 28 * -> Recover to the latest dnode(F), and drop the last inode(x)
 29 *
 30 * 4. inode(x) | CP | dnode(F) | inode(F)
 31 * -> No problem.
 32 *
 33 * 5. CP | inode(x) | dnode(F)
 34 * -> The inode(DF) was missing. Should drop this dnode(F).
 35 *
 36 * 6. CP | inode(DF) | dnode(F)
 37 * -> No problem.
 38 *
 39 * 7. CP | dnode(F) | inode(DF)
 40 * -> If f2fs_iget fails, then goto next to find inode(DF).
 41 *
 42 * 8. CP | dnode(F) | inode(x)
 43 * -> If f2fs_iget fails, then goto next to find inode(DF).
 44 *    But it will fail due to no inode(DF).
 45 */
 46
 47static struct kmem_cache *fsync_entry_slab;
 48
 49bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
 50{
 51	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
 52
 53	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
 54		return false;
 55	if (NM_I(sbi)->max_rf_node_blocks &&
 56		percpu_counter_sum_positive(&sbi->rf_node_block_count) >=
 57						NM_I(sbi)->max_rf_node_blocks)
 58		return false;
 59	return true;
 60}
 61
 62static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
 63								nid_t ino)
 64{
 65	struct fsync_inode_entry *entry;
 66
 67	list_for_each_entry(entry, head, list)
 68		if (entry->inode->i_ino == ino)
 69			return entry;
 70
 71	return NULL;
 72}
 73
 74static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
 75			struct list_head *head, nid_t ino, bool quota_inode)
 76{
 77	struct inode *inode;
 78	struct fsync_inode_entry *entry;
 79	int err;
 80
 81	inode = f2fs_iget_retry(sbi->sb, ino);
 82	if (IS_ERR(inode))
 83		return ERR_CAST(inode);
 84
 85	err = f2fs_dquot_initialize(inode);
 86	if (err)
 87		goto err_out;
 88
 89	if (quota_inode) {
 90		err = dquot_alloc_inode(inode);
 91		if (err)
 92			goto err_out;
 93	}
 94
 95	entry = f2fs_kmem_cache_alloc(fsync_entry_slab,
 96					GFP_F2FS_ZERO, true, NULL);
 97	entry->inode = inode;
 98	list_add_tail(&entry->list, head);
 99
100	return entry;
101err_out:
102	iput(inode);
103	return ERR_PTR(err);
104}
105
106static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
107{
108	if (drop) {
109		/* inode should not be recovered, drop it */
110		f2fs_inode_synced(entry->inode);
111	}
112	iput(entry->inode);
113	list_del(&entry->list);
114	kmem_cache_free(fsync_entry_slab, entry);
115}
116
117static int init_recovered_filename(const struct inode *dir,
118				   struct f2fs_inode *raw_inode,
119				   struct f2fs_filename *fname,
120				   struct qstr *usr_fname)
121{
122	int err;
123
124	memset(fname, 0, sizeof(*fname));
125	fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
126	fname->disk_name.name = raw_inode->i_name;
127
128	if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
129		return -ENAMETOOLONG;
130
131	if (!IS_ENCRYPTED(dir)) {
132		usr_fname->name = fname->disk_name.name;
133		usr_fname->len = fname->disk_name.len;
134		fname->usr_fname = usr_fname;
135	}
136
137	/* Compute the hash of the filename */
138	if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) {
139		/*
140		 * In this case the hash isn't computable without the key, so it
141		 * was saved on-disk.
142		 */
143		if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN)
144			return -EINVAL;
145		fname->hash = get_unaligned((f2fs_hash_t *)
146				&raw_inode->i_name[fname->disk_name.len]);
147	} else if (IS_CASEFOLDED(dir)) {
148		err = f2fs_init_casefolded_name(dir, fname);
149		if (err)
150			return err;
151		f2fs_hash_filename(dir, fname);
152		/* Case-sensitive match is fine for recovery */
153		f2fs_free_casefolded_name(fname);
154	} else {
155		f2fs_hash_filename(dir, fname);
156	}
157	return 0;
158}
159
160static int recover_dentry(struct inode *inode, struct folio *ifolio,
161						struct list_head *dir_list)
162{
163	struct f2fs_inode *raw_inode = F2FS_INODE(ifolio);
164	nid_t pino = le32_to_cpu(raw_inode->i_pino);
165	struct f2fs_dir_entry *de;
166	struct f2fs_filename fname;
167	struct qstr usr_fname;
168	struct folio *folio;
169	struct inode *dir, *einode;
170	struct fsync_inode_entry *entry;
171	int err = 0;
172	char *name;
173
174	entry = get_fsync_inode(dir_list, pino);
175	if (!entry) {
176		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
177							pino, false);
178		if (IS_ERR(entry)) {
179			dir = ERR_CAST(entry);
180			err = PTR_ERR(entry);
181			goto out;
182		}
183	}
184
185	dir = entry->inode;
186	err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname);
187	if (err)
188		goto out;
189retry:
190	de = __f2fs_find_entry(dir, &fname, &folio);
191	if (de && inode->i_ino == le32_to_cpu(de->ino))
192		goto out_put;
193
194	if (de) {
195		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
196		if (IS_ERR(einode)) {
197			WARN_ON(1);
198			err = PTR_ERR(einode);
199			if (err == -ENOENT)
200				err = -EEXIST;
201			goto out_put;
202		}
203
204		err = f2fs_dquot_initialize(einode);
205		if (err) {
206			iput(einode);
207			goto out_put;
208		}
209
210		err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode));
211		if (err) {
212			iput(einode);
213			goto out_put;
214		}
215		f2fs_delete_entry(de, folio, dir, einode);
216		iput(einode);
217		goto retry;
218	} else if (IS_ERR(folio)) {
219		err = PTR_ERR(folio);
220	} else {
221		err = f2fs_add_dentry(dir, &fname, inode,
222					inode->i_ino, inode->i_mode);
223	}
224	if (err == -ENOMEM)
225		goto retry;
226	goto out;
227
228out_put:
229	f2fs_folio_put(folio, false);
230out:
231	if (file_enc_name(inode))
232		name = "<encrypted>";
233	else
234		name = raw_inode->i_name;
235	f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
236		    __func__, ino_of_node(ifolio), name,
237		    IS_ERR(dir) ? 0 : dir->i_ino, err);
238	return err;
239}
240
241static int recover_quota_data(struct inode *inode, struct folio *folio)
242{
243	struct f2fs_inode *raw = F2FS_INODE(folio);
244	struct iattr attr;
245	uid_t i_uid = le32_to_cpu(raw->i_uid);
246	gid_t i_gid = le32_to_cpu(raw->i_gid);
247	int err;
248
249	memset(&attr, 0, sizeof(attr));
250
251	attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid));
252	attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid));
253
254	if (!vfsuid_eq(attr.ia_vfsuid, i_uid_into_vfsuid(&nop_mnt_idmap, inode)))
255		attr.ia_valid |= ATTR_UID;
256	if (!vfsgid_eq(attr.ia_vfsgid, i_gid_into_vfsgid(&nop_mnt_idmap, inode)))
257		attr.ia_valid |= ATTR_GID;
258
259	if (!attr.ia_valid)
260		return 0;
261
262	err = dquot_transfer(&nop_mnt_idmap, inode, &attr);
263	if (err)
264		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
265	return err;
266}
267
268static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
269{
270	if (ri->i_inline & F2FS_PIN_FILE)
271		set_inode_flag(inode, FI_PIN_FILE);
272	else
273		clear_inode_flag(inode, FI_PIN_FILE);
274	if (ri->i_inline & F2FS_DATA_EXIST)
275		set_inode_flag(inode, FI_DATA_EXIST);
276	else
277		clear_inode_flag(inode, FI_DATA_EXIST);
278}
279
280static int recover_inode(struct inode *inode, struct folio *folio)
281{
282	struct f2fs_inode *raw = F2FS_INODE(folio);
283	struct f2fs_inode_info *fi = F2FS_I(inode);
284	char *name;
285	int err;
286
287	inode->i_mode = le16_to_cpu(raw->i_mode);
288
289	err = recover_quota_data(inode, folio);
290	if (err)
291		return err;
292
293	i_uid_write(inode, le32_to_cpu(raw->i_uid));
294	i_gid_write(inode, le32_to_cpu(raw->i_gid));
295
296	if (raw->i_inline & F2FS_EXTRA_ATTR) {
297		if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
298			F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
299								i_projid)) {
300			projid_t i_projid;
301			kprojid_t kprojid;
302
303			i_projid = (projid_t)le32_to_cpu(raw->i_projid);
304			kprojid = make_kprojid(&init_user_ns, i_projid);
305
306			if (!projid_eq(kprojid, fi->i_projid)) {
307				err = f2fs_transfer_project_quota(inode,
308								kprojid);
309				if (err)
310					return err;
311				fi->i_projid = kprojid;
312			}
313		}
314	}
315
316	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
317	inode_set_atime(inode, le64_to_cpu(raw->i_atime),
318			le32_to_cpu(raw->i_atime_nsec));
319	inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
320			le32_to_cpu(raw->i_ctime_nsec));
321	inode_set_mtime(inode, le64_to_cpu(raw->i_mtime),
322			le32_to_cpu(raw->i_mtime_nsec));
323
324	fi->i_advise = raw->i_advise;
325	fi->i_flags = le32_to_cpu(raw->i_flags);
326	f2fs_set_inode_flags(inode);
327	fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
328
329	recover_inline_flags(inode, raw);
330
331	f2fs_mark_inode_dirty_sync(inode, true);
332
333	if (file_enc_name(inode))
334		name = "<encrypted>";
335	else
336		name = F2FS_INODE(folio)->i_name;
337
338	f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
339		    ino_of_node(folio), name, raw->i_inline);
340	return 0;
341}
342
343static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
344				unsigned int ra_blocks, unsigned int blkaddr,
345				unsigned int next_blkaddr)
346{
347	if (blkaddr + 1 == next_blkaddr)
348		ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
349							ra_blocks * 2);
350	else if (next_blkaddr % BLKS_PER_SEG(sbi))
351		ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
352							ra_blocks / 2);
353	return ra_blocks;
354}
355
356/* Detect looped node chain with Floyd's cycle detection algorithm. */
357static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
358		block_t *blkaddr_fast, bool *is_detecting)
359{
360	unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
361	int i;
362
363	if (!*is_detecting)
364		return 0;
365
366	for (i = 0; i < 2; i++) {
367		struct folio *folio;
368
369		if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) {
370			*is_detecting = false;
371			return 0;
372		}
373
374		folio = f2fs_get_tmp_folio(sbi, *blkaddr_fast);
375		if (IS_ERR(folio))
376			return PTR_ERR(folio);
377
378		if (!is_recoverable_dnode(folio)) {
379			f2fs_folio_put(folio, true);
380			*is_detecting = false;
381			return 0;
382		}
383
384		ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast,
385					next_blkaddr_of_node(folio));
386
387		*blkaddr_fast = next_blkaddr_of_node(folio);
388		f2fs_folio_put(folio, true);
389
390		f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks);
391	}
392
393	if (*blkaddr_fast == blkaddr) {
394		f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u."
395				" Run fsck to fix it.", __func__, blkaddr);
396		return -EINVAL;
397	}
398	return 0;
399}
400
401static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
402				bool check_only, bool *new_inode)
403{
404	struct curseg_info *curseg;
405	block_t blkaddr, blkaddr_fast;
406	bool is_detecting = true;
407	int err = 0;
408
409	/* get node pages in the current segment */
410	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
411	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
412	blkaddr_fast = blkaddr;
413
414	while (1) {
415		struct fsync_inode_entry *entry;
416		struct folio *folio;
417
418		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
419			return 0;
420
421		folio = f2fs_get_tmp_folio(sbi, blkaddr);
422		if (IS_ERR(folio)) {
423			err = PTR_ERR(folio);
424			break;
425		}
426
427		if (!is_recoverable_dnode(folio)) {
428			f2fs_folio_put(folio, true);
429			break;
430		}
431
432		if (!is_fsync_dnode(folio))
433			goto next;
434
435		entry = get_fsync_inode(head, ino_of_node(folio));
436		if (!entry) {
437			bool quota_inode = false;
438
439			if (!check_only &&
440					IS_INODE(folio) &&
441					is_dent_dnode(folio)) {
442				err = f2fs_recover_inode_page(sbi, folio);
443				if (err) {
444					f2fs_folio_put(folio, true);
445					break;
446				}
447				quota_inode = true;
448			}
449
450			entry = add_fsync_inode(sbi, head, ino_of_node(folio),
451								quota_inode);
452			if (IS_ERR(entry)) {
453				err = PTR_ERR(entry);
454				/*
455				 * CP | dnode(F) | inode(DF)
456				 * For this case, we should not give up now.
457				 */
458				if (err == -ENOENT) {
459					if (check_only)
460						*new_inode = true;
461					goto next;
462				}
463				f2fs_folio_put(folio, true);
464				break;
465			}
466		}
467		entry->blkaddr = blkaddr;
468
469		if (IS_INODE(folio) && is_dent_dnode(folio))
470			entry->last_dentry = blkaddr;
471next:
472		/* check next segment */
473		blkaddr = next_blkaddr_of_node(folio);
474		f2fs_folio_put(folio, true);
475
476		err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast,
477				&is_detecting);
478		if (err)
479			break;
480	}
481	return err;
482}
483
484static void destroy_fsync_dnodes(struct list_head *head, int drop)
485{
486	struct fsync_inode_entry *entry, *tmp;
487
488	list_for_each_entry_safe(entry, tmp, head, list)
489		del_fsync_inode(entry, drop);
490}
491
492static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
493			block_t blkaddr, struct dnode_of_data *dn)
494{
495	struct seg_entry *sentry;
496	unsigned int segno = GET_SEGNO(sbi, blkaddr);
497	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
498	struct f2fs_summary_block *sum_node;
499	struct f2fs_summary sum;
500	struct folio *sum_folio, *node_folio;
501	struct dnode_of_data tdn = *dn;
502	nid_t ino, nid;
503	struct inode *inode;
504	unsigned int offset, ofs_in_node, max_addrs;
505	block_t bidx;
506	int i;
507
508	sentry = get_seg_entry(sbi, segno);
509	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
510		return 0;
511
512	/* Get the previous summary */
513	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
514		struct curseg_info *curseg = CURSEG_I(sbi, i);
515
516		if (curseg->segno == segno) {
517			sum = curseg->sum_blk->entries[blkoff];
518			goto got_it;
519		}
520	}
521
522	sum_folio = f2fs_get_sum_folio(sbi, segno);
523	if (IS_ERR(sum_folio))
524		return PTR_ERR(sum_folio);
525	sum_node = SUM_BLK_PAGE_ADDR(sum_folio, segno);
526	sum = sum_node->entries[blkoff];
527	f2fs_folio_put(sum_folio, true);
528got_it:
529	/* Use the locked dnode page and inode */
530	nid = le32_to_cpu(sum.nid);
531	ofs_in_node = le16_to_cpu(sum.ofs_in_node);
532
533	max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode);
534	if (ofs_in_node >= max_addrs) {
535		f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
536			ofs_in_node, dn->inode->i_ino, nid, max_addrs);
537		f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY);
538		return -EFSCORRUPTED;
539	}
540
541	if (dn->inode->i_ino == nid) {
542		tdn.nid = nid;
543		if (!dn->inode_folio_locked)
544			folio_lock(dn->inode_folio);
545		tdn.node_folio = dn->inode_folio;
546		tdn.ofs_in_node = ofs_in_node;
547		goto truncate_out;
548	} else if (dn->nid == nid) {
549		tdn.ofs_in_node = ofs_in_node;
550		goto truncate_out;
551	}
552
553	/* Get the node page */
554	node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
555	if (IS_ERR(node_folio))
556		return PTR_ERR(node_folio);
557
558	offset = ofs_of_node(node_folio);
559	ino = ino_of_node(node_folio);
560	f2fs_folio_put(node_folio, true);
561
562	if (ino != dn->inode->i_ino) {
563		int ret;
564
565		/* Deallocate previous index in the node page */
566		inode = f2fs_iget_retry(sbi->sb, ino);
567		if (IS_ERR(inode))
568			return PTR_ERR(inode);
569
570		ret = f2fs_dquot_initialize(inode);
571		if (ret) {
572			iput(inode);
573			return ret;
574		}
575	} else {
576		inode = dn->inode;
577	}
578
579	bidx = f2fs_start_bidx_of_node(offset, inode) +
580				le16_to_cpu(sum.ofs_in_node);
581
582	/*
583	 * if inode page is locked, unlock temporarily, but its reference
584	 * count keeps alive.
585	 */
586	if (ino == dn->inode->i_ino && dn->inode_folio_locked)
587		folio_unlock(dn->inode_folio);
588
589	set_new_dnode(&tdn, inode, NULL, NULL, 0);
590	if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
591		goto out;
592
593	if (tdn.data_blkaddr == blkaddr)
594		f2fs_truncate_data_blocks_range(&tdn, 1);
595
596	f2fs_put_dnode(&tdn);
597out:
598	if (ino != dn->inode->i_ino)
599		iput(inode);
600	else if (dn->inode_folio_locked)
601		folio_lock(dn->inode_folio);
602	return 0;
603
604truncate_out:
605	if (f2fs_data_blkaddr(&tdn) == blkaddr)
606		f2fs_truncate_data_blocks_range(&tdn, 1);
607	if (dn->inode->i_ino == nid && !dn->inode_folio_locked)
608		folio_unlock(dn->inode_folio);
609	return 0;
610}
611
612static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn)
613{
614	int i, err = 0;
615
616	for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) {
617		err = f2fs_reserve_new_block(dn);
618		if (!err)
619			break;
620	}
621
622	return err;
623}
624
625static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
626					struct folio *folio)
627{
628	struct dnode_of_data dn;
629	struct node_info ni;
630	unsigned int start = 0, end = 0, index;
631	int err = 0, recovered = 0;
632
633	/* step 1: recover xattr */
634	if (IS_INODE(folio)) {
635		err = f2fs_recover_inline_xattr(inode, folio);
636		if (err)
637			goto out;
638	} else if (f2fs_has_xattr_block(ofs_of_node(folio))) {
639		err = f2fs_recover_xattr_data(inode, folio);
640		if (!err)
641			recovered++;
642		goto out;
643	}
644
645	/* step 2: recover inline data */
646	err = f2fs_recover_inline_data(inode, folio);
647	if (err) {
648		if (err == 1)
649			err = 0;
650		goto out;
651	}
652
653	/* step 3: recover data indices */
654	start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode);
655	end = start + ADDRS_PER_PAGE(folio, inode);
656
657	set_new_dnode(&dn, inode, NULL, NULL, 0);
658retry_dn:
659	err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
660	if (err) {
661		if (err == -ENOMEM) {
662			memalloc_retry_wait(GFP_NOFS);
663			goto retry_dn;
664		}
665		goto out;
666	}
667
668	f2fs_folio_wait_writeback(dn.node_folio, NODE, true, true);
669
670	err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
671	if (err)
672		goto err;
673
674	f2fs_bug_on(sbi, ni.ino != ino_of_node(folio));
675
676	if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) {
677		f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
678			  inode->i_ino, ofs_of_node(dn.node_folio),
679			  ofs_of_node(folio));
680		err = -EFSCORRUPTED;
681		f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
682		goto err;
683	}
684
685	for (index = start; index < end; index++, dn.ofs_in_node++) {
686		block_t src, dest;
687
688		src = f2fs_data_blkaddr(&dn);
689		dest = data_blkaddr(dn.inode, folio, dn.ofs_in_node);
690
691		if (__is_valid_data_blkaddr(src) &&
692			!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
693			err = -EFSCORRUPTED;
694			goto err;
695		}
696
697		if (__is_valid_data_blkaddr(dest) &&
698			!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
699			err = -EFSCORRUPTED;
700			goto err;
701		}
702
703		/* skip recovering if dest is the same as src */
704		if (src == dest)
705			continue;
706
707		/* dest is invalid, just invalidate src block */
708		if (dest == NULL_ADDR) {
709			f2fs_truncate_data_blocks_range(&dn, 1);
710			continue;
711		}
712
713		if (!file_keep_isize(inode) &&
714			(i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT)))
715			f2fs_i_size_write(inode,
716				(loff_t)(index + 1) << PAGE_SHIFT);
717
718		/*
719		 * dest is reserved block, invalidate src block
720		 * and then reserve one new block in dnode page.
721		 */
722		if (dest == NEW_ADDR) {
723			f2fs_truncate_data_blocks_range(&dn, 1);
724
725			err = f2fs_reserve_new_block_retry(&dn);
726			if (err)
727				goto err;
728			continue;
729		}
730
731		/* dest is valid block, try to recover from src to dest */
732		if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
733			if (src == NULL_ADDR) {
734				err = f2fs_reserve_new_block_retry(&dn);
735				if (err)
736					goto err;
737			}
738retry_prev:
739			/* Check the previous node page having this index */
740			err = check_index_in_prev_nodes(sbi, dest, &dn);
741			if (err) {
742				if (err == -ENOMEM) {
743					memalloc_retry_wait(GFP_NOFS);
744					goto retry_prev;
745				}
746				goto err;
747			}
748
749			if (f2fs_is_valid_blkaddr(sbi, dest,
750					DATA_GENERIC_ENHANCE_UPDATE)) {
751				f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
752					dest, inode->i_ino, dn.ofs_in_node);
753				err = -EFSCORRUPTED;
754				goto err;
755			}
756
757			/* write dummy data page */
758			f2fs_replace_block(sbi, &dn, src, dest,
759						ni.version, false, false);
760			recovered++;
761		}
762	}
763
764	copy_node_footer(dn.node_folio, folio);
765	fill_node_footer(dn.node_folio, dn.nid, ni.ino,
766					ofs_of_node(folio), false);
767	folio_mark_dirty(dn.node_folio);
768err:
769	f2fs_put_dnode(&dn);
770out:
771	f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), "
772		    "range (%u, %u), recovered = %d, err = %d",
773		    inode->i_ino, nid_of_node(folio),
774		    file_keep_isize(inode) ? "keep" : "recover",
775		    start, end, recovered, err);
776	return err;
777}
778
779static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
780		struct list_head *tmp_inode_list, struct list_head *dir_list)
781{
782	struct curseg_info *curseg;
783	int err = 0;
784	block_t blkaddr;
785	unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
786	unsigned int recoverable_dnode = 0;
787	unsigned int fsynced_dnode = 0;
788	unsigned int total_dnode = 0;
789	unsigned int recovered_inode = 0;
790	unsigned int recovered_dentry = 0;
791	unsigned int recovered_dnode = 0;
792
793	f2fs_notice(sbi, "do_recover_data: start to recover dnode");
794
795	/* get node pages in the current segment */
796	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
797	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
798
799	while (1) {
800		struct fsync_inode_entry *entry;
801		struct folio *folio;
802
803		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
804			break;
805
806		folio = f2fs_get_tmp_folio(sbi, blkaddr);
807		if (IS_ERR(folio)) {
808			err = PTR_ERR(folio);
809			break;
810		}
811
812		if (!is_recoverable_dnode(folio)) {
813			f2fs_folio_put(folio, true);
814			break;
815		}
816		recoverable_dnode++;
817
818		entry = get_fsync_inode(inode_list, ino_of_node(folio));
819		if (!entry)
820			goto next;
821		fsynced_dnode++;
822		/*
823		 * inode(x) | CP | inode(x) | dnode(F)
824		 * In this case, we can lose the latest inode(x).
825		 * So, call recover_inode for the inode update.
826		 */
827		if (IS_INODE(folio)) {
828			err = recover_inode(entry->inode, folio);
829			if (err) {
830				f2fs_folio_put(folio, true);
831				break;
832			}
833			recovered_inode++;
834		}
835		if (entry->last_dentry == blkaddr) {
836			err = recover_dentry(entry->inode, folio, dir_list);
837			if (err) {
838				f2fs_folio_put(folio, true);
839				break;
840			}
841			recovered_dentry++;
842		}
843		err = do_recover_data(sbi, entry->inode, folio);
844		if (err) {
845			f2fs_folio_put(folio, true);
846			break;
847		}
848		recovered_dnode++;
849
850		if (entry->blkaddr == blkaddr)
851			list_move_tail(&entry->list, tmp_inode_list);
852next:
853		ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
854					next_blkaddr_of_node(folio));
855
856		/* check next segment */
857		blkaddr = next_blkaddr_of_node(folio);
858		f2fs_folio_put(folio, true);
859
860		f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
861		total_dnode++;
862	}
863	if (!err)
864		err = f2fs_allocate_new_segments(sbi);
865
866	f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, "
867		"total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d",
868		recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode,
869		recovered_dentry, recovered_dnode, err);
870	return err;
871}
872
873int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
874{
875	LIST_HEAD(inode_list);
876	LIST_HEAD(tmp_inode_list);
877	LIST_HEAD(dir_list);
878	int err;
879	int ret = 0;
880	unsigned long s_flags = sbi->sb->s_flags;
881	bool need_writecp = false;
882	bool new_inode = false;
883
884	f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
885					"check_only: %d", check_only);
886
887	if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
888		f2fs_info(sbi, "recover fsync data on readonly fs");
889
890	/* prevent checkpoint */
891	f2fs_down_write(&sbi->cp_global_sem);
892
893	/* step #1: find fsynced inode numbers */
894	err = find_fsync_dnodes(sbi, &inode_list, check_only, &new_inode);
895	if (err < 0 || (list_empty(&inode_list) && (!check_only || !new_inode)))
896		goto skip;
897
898	if (check_only) {
899		ret = 1;
900		goto skip;
901	}
902
903	need_writecp = true;
904
905	/* step #2: recover data */
906	err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
907	if (!err)
908		f2fs_bug_on(sbi, !list_empty(&inode_list));
909	else
910		f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
911skip:
912	destroy_fsync_dnodes(&inode_list, err);
913	destroy_fsync_dnodes(&tmp_inode_list, err);
914
915	/* truncate meta pages to be used by the recovery */
916	truncate_inode_pages_range(META_MAPPING(sbi),
917			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
918
919	if (err) {
920		truncate_inode_pages_final(NODE_MAPPING(sbi));
921		truncate_inode_pages_final(META_MAPPING(sbi));
922	}
923
924	/*
925	 * If fsync data succeeds or there is no fsync data to recover,
926	 * and the f2fs is not read only, check and fix zoned block devices'
927	 * write pointer consistency.
928	 */
929	if (!err)
930		err = f2fs_check_and_fix_write_pointer(sbi);
931
932	if (!err)
933		clear_sbi_flag(sbi, SBI_POR_DOING);
934
935	f2fs_up_write(&sbi->cp_global_sem);
936
937	/* let's drop all the directory inodes for clean checkpoint */
938	destroy_fsync_dnodes(&dir_list, err);
939
940	if (need_writecp) {
941		set_sbi_flag(sbi, SBI_IS_RECOVERED);
942
943		if (!err) {
944			struct cp_control cpc = {
945				.reason = CP_RECOVERY,
946			};
947			stat_inc_cp_call_count(sbi, TOTAL_CALL);
948			err = f2fs_write_checkpoint(sbi, &cpc);
949		}
950	}
951
952	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
953
954	return ret ? ret : err;
955}
956
957int __init f2fs_create_recovery_cache(void)
958{
959	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
960					sizeof(struct fsync_inode_entry));
961	return fsync_entry_slab ? 0 : -ENOMEM;
962}
963
964void f2fs_destroy_recovery_cache(void)
965{
966	kmem_cache_destroy(fsync_entry_slab);
967}