Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2025 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include <linux/fs.h>
7#include <linux/fsnotify.h>
8#include <linux/mempool.h>
9#include <linux/fserror.h>
10
11#define FSERROR_DEFAULT_EVENT_POOL_SIZE (32)
12
13static struct mempool fserror_events_pool;
14
15void fserror_mount(struct super_block *sb)
16{
17 /*
18 * The pending error counter is biased by 1 so that we don't wake_var
19 * until we're actually trying to unmount.
20 */
21 refcount_set(&sb->s_pending_errors, 1);
22}
23
24void fserror_unmount(struct super_block *sb)
25{
26 /*
27 * If we don't drop the pending error count to zero, then wait for it
28 * to drop below 1, which means that the pending errors cleared and
29 * hopefully we didn't saturate with 1 billion+ concurrent events.
30 */
31 if (!refcount_dec_and_test(&sb->s_pending_errors))
32 wait_var_event(&sb->s_pending_errors,
33 refcount_read(&sb->s_pending_errors) < 1);
34}
35
36static inline void fserror_pending_dec(struct super_block *sb)
37{
38 if (refcount_dec_and_test(&sb->s_pending_errors))
39 wake_up_var(&sb->s_pending_errors);
40}
41
42static inline void fserror_free_event(struct fserror_event *event)
43{
44 fserror_pending_dec(event->sb);
45 mempool_free(event, &fserror_events_pool);
46}
47
48static void fserror_worker(struct work_struct *work)
49{
50 struct fserror_event *event =
51 container_of(work, struct fserror_event, work);
52 struct super_block *sb = event->sb;
53
54 if (sb->s_flags & SB_ACTIVE) {
55 struct fs_error_report report = {
56 /* send positive error number to userspace */
57 .error = -event->error,
58 .inode = event->inode,
59 .sb = event->sb,
60 };
61
62 if (sb->s_op->report_error)
63 sb->s_op->report_error(event);
64
65 fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL,
66 NULL, 0);
67 }
68
69 iput(event->inode);
70 fserror_free_event(event);
71}
72
73static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
74 gfp_t gfp_flags)
75{
76 struct fserror_event *event = NULL;
77
78 /*
79 * If pending_errors already reached zero or is no longer active,
80 * the superblock is being deactivated so there's no point in
81 * continuing.
82 *
83 * The order of the check of s_pending_errors and SB_ACTIVE are
84 * mandated by order of accesses in generic_shutdown_super and
85 * fserror_unmount. Barriers are implicitly provided by the refcount
86 * manipulations in this function and fserror_unmount.
87 */
88 if (!refcount_inc_not_zero(&sb->s_pending_errors))
89 return NULL;
90 if (!(sb->s_flags & SB_ACTIVE))
91 goto out_pending;
92
93 event = mempool_alloc(&fserror_events_pool, gfp_flags);
94 if (!event)
95 goto out_pending;
96
97 /* mempool_alloc doesn't support GFP_ZERO */
98 memset(event, 0, sizeof(*event));
99 event->sb = sb;
100 INIT_WORK(&event->work, fserror_worker);
101
102 return event;
103
104out_pending:
105 fserror_pending_dec(sb);
106 return NULL;
107}
108
109/**
110 * fserror_report - report a filesystem error of some kind
111 *
112 * @sb: superblock of the filesystem
113 * @inode: inode within that filesystem, if applicable
114 * @type: type of error encountered
115 * @pos: start of inode range affected, if applicable
116 * @len: length of inode range affected, if applicable
117 * @error: error number encountered, must be negative
118 * @gfp: memory allocation flags for conveying the event to a worker,
119 * since this function can be called from atomic contexts
120 *
121 * Report details of a filesystem error to the super_operations::report_error
122 * callback if present; and to fsnotify for distribution to userspace. @sb,
123 * @gfp, @type, and @error must all be specified. For file I/O errors, the
124 * @inode, @pos, and @len fields must also be specified. For file metadata
125 * errors, @inode must be specified. If @inode is not NULL, then @inode->i_sb
126 * must point to @sb.
127 *
128 * Reporting work is deferred to a workqueue to ensure that ->report_error is
129 * called from process context without any locks held. An active reference to
130 * the inode is maintained until event handling is complete, and unmount will
131 * wait for queued events to drain.
132 */
133void fserror_report(struct super_block *sb, struct inode *inode,
134 enum fserror_type type, loff_t pos, u64 len, int error,
135 gfp_t gfp)
136{
137 struct fserror_event *event;
138
139 /* sb and inode must be from the same filesystem */
140 WARN_ON_ONCE(inode && inode->i_sb != sb);
141
142 /* error number must be negative */
143 WARN_ON_ONCE(error >= 0);
144
145 event = fserror_alloc_event(sb, gfp);
146 if (!event)
147 goto lost;
148
149 event->type = type;
150 event->pos = pos;
151 event->len = len;
152 event->error = error;
153
154 /*
155 * Can't iput from non-sleeping context, so grabbing another reference
156 * to the inode must be the last thing before submitting the event.
157 */
158 if (inode) {
159 event->inode = igrab(inode);
160 if (!event->inode)
161 goto lost_event;
162 }
163
164 /*
165 * Use schedule_work here even if we're already in process context so
166 * that fsnotify and super_operations::report_error implementations are
167 * guaranteed to run in process context without any locks held. Since
168 * errors are supposed to be rare, the overhead shouldn't kill us any
169 * more than the failing device will.
170 */
171 schedule_work(&event->work);
172 return;
173
174lost_event:
175 fserror_free_event(event);
176lost:
177 if (inode)
178 pr_err_ratelimited(
179 "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
180 sb->s_id, inode->i_ino, type, pos, len, error);
181 else
182 pr_err_ratelimited(
183 "%s: lost filesystem error report for type %u error %d",
184 sb->s_id, type, error);
185}
186EXPORT_SYMBOL_GPL(fserror_report);
187
188static int __init fserror_init(void)
189{
190 return mempool_init_kmalloc_pool(&fserror_events_pool,
191 FSERROR_DEFAULT_EVENT_POOL_SIZE,
192 sizeof(struct fserror_event));
193}
194fs_initcall(fserror_init);