fs/notify/fsnotify.c at v5.7-rc2 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / fs / notify / fsnotify.c
at v5.7-rc2 397 lines 12 kB view raw
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
  4 */
  5
  6#include <linux/dcache.h>
  7#include <linux/fs.h>
  8#include <linux/gfp.h>
  9#include <linux/init.h>
 10#include <linux/module.h>
 11#include <linux/mount.h>
 12#include <linux/srcu.h>
 13
 14#include <linux/fsnotify_backend.h>
 15#include "fsnotify.h"
 16
 17/*
 18 * Clear all of the marks on an inode when it is being evicted from core
 19 */
 20void __fsnotify_inode_delete(struct inode *inode)
 21{
 22	fsnotify_clear_marks_by_inode(inode);
 23}
 24EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
 25
 26void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 27{
 28	fsnotify_clear_marks_by_mount(mnt);
 29}
 30
 31/**
 32 * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
 33 * @sb: superblock being unmounted.
 34 *
 35 * Called during unmount with no locks held, so needs to be safe against
 36 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
 37 */
 38static void fsnotify_unmount_inodes(struct super_block *sb)
 39{
 40	struct inode *inode, *iput_inode = NULL;
 41
 42	spin_lock(&sb->s_inode_list_lock);
 43	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 44		/*
 45		 * We cannot __iget() an inode in state I_FREEING,
 46		 * I_WILL_FREE, or I_NEW which is fine because by that point
 47		 * the inode cannot have any associated watches.
 48		 */
 49		spin_lock(&inode->i_lock);
 50		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
 51			spin_unlock(&inode->i_lock);
 52			continue;
 53		}
 54
 55		/*
 56		 * If i_count is zero, the inode cannot have any watches and
 57		 * doing an __iget/iput with SB_ACTIVE clear would actually
 58		 * evict all inodes with zero i_count from icache which is
 59		 * unnecessarily violent and may in fact be illegal to do.
 60		 * However, we should have been called /after/ evict_inodes
 61		 * removed all zero refcount inodes, in any case.  Test to
 62		 * be sure.
 63		 */
 64		if (!atomic_read(&inode->i_count)) {
 65			spin_unlock(&inode->i_lock);
 66			continue;
 67		}
 68
 69		__iget(inode);
 70		spin_unlock(&inode->i_lock);
 71		spin_unlock(&sb->s_inode_list_lock);
 72
 73		if (iput_inode)
 74			iput(iput_inode);
 75
 76		/* for each watch, send FS_UNMOUNT and then remove it */
 77		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 78
 79		fsnotify_inode_delete(inode);
 80
 81		iput_inode = inode;
 82
 83		cond_resched();
 84		spin_lock(&sb->s_inode_list_lock);
 85	}
 86	spin_unlock(&sb->s_inode_list_lock);
 87
 88	if (iput_inode)
 89		iput(iput_inode);
 90	/* Wait for outstanding inode references from connectors */
 91	wait_var_event(&sb->s_fsnotify_inode_refs,
 92		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
 93}
 94
 95void fsnotify_sb_delete(struct super_block *sb)
 96{
 97	fsnotify_unmount_inodes(sb);
 98	fsnotify_clear_marks_by_sb(sb);
 99}
100
101/*
102 * Given an inode, first check if we care what happens to our children.  Inotify
103 * and dnotify both tell their parents about events.  If we care about any event
104 * on a child we run all of our children and set a dentry flag saying that the
105 * parent cares.  Thus when an event happens on a child it can quickly tell if
106 * if there is a need to find a parent and send the event to the parent.
107 */
108void __fsnotify_update_child_dentry_flags(struct inode *inode)
109{
110	struct dentry *alias;
111	int watched;
112
113	if (!S_ISDIR(inode->i_mode))
114		return;
115
116	/* determine if the children should tell inode about their events */
117	watched = fsnotify_inode_watches_children(inode);
118
119	spin_lock(&inode->i_lock);
120	/* run all of the dentries associated with this inode.  Since this is a
121	 * directory, there damn well better only be one item on this list */
122	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
123		struct dentry *child;
124
125		/* run all of the children of the original inode and fix their
126		 * d_flags to indicate parental interest (their parent is the
127		 * original inode) */
128		spin_lock(&alias->d_lock);
129		list_for_each_entry(child, &alias->d_subdirs, d_child) {
130			if (!child->d_inode)
131				continue;
132
133			spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
134			if (watched)
135				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
136			else
137				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
138			spin_unlock(&child->d_lock);
139		}
140		spin_unlock(&alias->d_lock);
141	}
142	spin_unlock(&inode->i_lock);
143}
144
145/* Notify this dentry's parent about a child's events. */
146int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
147		    int data_type)
148{
149	struct dentry *parent;
150	struct inode *p_inode;
151	int ret = 0;
152
153	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
154		return 0;
155
156	parent = dget_parent(dentry);
157	p_inode = parent->d_inode;
158
159	if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
160		__fsnotify_update_child_dentry_flags(p_inode);
161	} else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
162		struct name_snapshot name;
163
164		/* we are notifying a parent so come up with the new mask which
165		 * specifies these are events which came from a child. */
166		mask |= FS_EVENT_ON_CHILD;
167
168		take_dentry_name_snapshot(&name, dentry);
169		ret = fsnotify(p_inode, mask, data, data_type, &name.name, 0);
170		release_dentry_name_snapshot(&name);
171	}
172
173	dput(parent);
174
175	return ret;
176}
177EXPORT_SYMBOL_GPL(fsnotify_parent);
178
179static int send_to_group(struct inode *to_tell,
180			 __u32 mask, const void *data,
181			 int data_is, u32 cookie,
182			 const struct qstr *file_name,
183			 struct fsnotify_iter_info *iter_info)
184{
185	struct fsnotify_group *group = NULL;
186	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
187	__u32 marks_mask = 0;
188	__u32 marks_ignored_mask = 0;
189	struct fsnotify_mark *mark;
190	int type;
191
192	if (WARN_ON(!iter_info->report_mask))
193		return 0;
194
195	/* clear ignored on inode modification */
196	if (mask & FS_MODIFY) {
197		fsnotify_foreach_obj_type(type) {
198			if (!fsnotify_iter_should_report_type(iter_info, type))
199				continue;
200			mark = iter_info->marks[type];
201			if (mark &&
202			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
203				mark->ignored_mask = 0;
204		}
205	}
206
207	fsnotify_foreach_obj_type(type) {
208		if (!fsnotify_iter_should_report_type(iter_info, type))
209			continue;
210		mark = iter_info->marks[type];
211		/* does the object mark tell us to do something? */
212		if (mark) {
213			group = mark->group;
214			marks_mask |= mark->mask;
215			marks_ignored_mask |= mark->ignored_mask;
216		}
217	}
218
219	pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x"
220		 " data=%p data_is=%d cookie=%d\n",
221		 __func__, group, to_tell, mask, marks_mask, marks_ignored_mask,
222		 data, data_is, cookie);
223
224	if (!(test_mask & marks_mask & ~marks_ignored_mask))
225		return 0;
226
227	return group->ops->handle_event(group, to_tell, mask, data, data_is,
228					file_name, cookie, iter_info);
229}
230
231static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
232{
233	struct fsnotify_mark_connector *conn;
234	struct hlist_node *node = NULL;
235
236	conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
237	if (conn)
238		node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
239
240	return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
241}
242
243static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
244{
245	struct hlist_node *node = NULL;
246
247	if (mark)
248		node = srcu_dereference(mark->obj_list.next,
249					&fsnotify_mark_srcu);
250
251	return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
252}
253
254/*
255 * iter_info is a multi head priority queue of marks.
256 * Pick a subset of marks from queue heads, all with the
257 * same group and set the report_mask for selected subset.
258 * Returns the report_mask of the selected subset.
259 */
260static unsigned int fsnotify_iter_select_report_types(
261		struct fsnotify_iter_info *iter_info)
262{
263	struct fsnotify_group *max_prio_group = NULL;
264	struct fsnotify_mark *mark;
265	int type;
266
267	/* Choose max prio group among groups of all queue heads */
268	fsnotify_foreach_obj_type(type) {
269		mark = iter_info->marks[type];
270		if (mark &&
271		    fsnotify_compare_groups(max_prio_group, mark->group) > 0)
272			max_prio_group = mark->group;
273	}
274
275	if (!max_prio_group)
276		return 0;
277
278	/* Set the report mask for marks from same group as max prio group */
279	iter_info->report_mask = 0;
280	fsnotify_foreach_obj_type(type) {
281		mark = iter_info->marks[type];
282		if (mark &&
283		    fsnotify_compare_groups(max_prio_group, mark->group) == 0)
284			fsnotify_iter_set_report_type(iter_info, type);
285	}
286
287	return iter_info->report_mask;
288}
289
290/*
291 * Pop from iter_info multi head queue, the marks that were iterated in the
292 * current iteration step.
293 */
294static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
295{
296	int type;
297
298	fsnotify_foreach_obj_type(type) {
299		if (fsnotify_iter_should_report_type(iter_info, type))
300			iter_info->marks[type] =
301				fsnotify_next_mark(iter_info->marks[type]);
302	}
303}
304
305/*
306 * This is the main call to fsnotify.  The VFS calls into hook specific functions
307 * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
308 * out to all of the registered fsnotify_group.  Those groups can then use the
309 * notification event in whatever means they feel necessary.
310 */
311int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
312	     const struct qstr *file_name, u32 cookie)
313{
314	const struct path *path = fsnotify_data_path(data, data_is);
315	struct fsnotify_iter_info iter_info = {};
316	struct super_block *sb = to_tell->i_sb;
317	struct mount *mnt = NULL;
318	__u32 mnt_or_sb_mask = sb->s_fsnotify_mask;
319	int ret = 0;
320	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
321
322	if (path) {
323		mnt = real_mount(path->mnt);
324		mnt_or_sb_mask |= mnt->mnt_fsnotify_mask;
325	}
326	/* An event "on child" is not intended for a mount/sb mark */
327	if (mask & FS_EVENT_ON_CHILD)
328		mnt_or_sb_mask = 0;
329
330	/*
331	 * Optimization: srcu_read_lock() has a memory barrier which can
332	 * be expensive.  It protects walking the *_fsnotify_marks lists.
333	 * However, if we do not walk the lists, we do not have to do
334	 * SRCU because we have no references to any objects and do not
335	 * need SRCU to keep them "alive".
336	 */
337	if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks &&
338	    (!mnt || !mnt->mnt_fsnotify_marks))
339		return 0;
340	/*
341	 * if this is a modify event we may need to clear the ignored masks
342	 * otherwise return if neither the inode nor the vfsmount/sb care about
343	 * this type of event.
344	 */
345	if (!(mask & FS_MODIFY) &&
346	    !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
347		return 0;
348
349	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
350
351	iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
352		fsnotify_first_mark(&to_tell->i_fsnotify_marks);
353	iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
354		fsnotify_first_mark(&sb->s_fsnotify_marks);
355	if (mnt) {
356		iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
357			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
358	}
359
360	/*
361	 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
362	 * ignore masks are properly reflected for mount/sb mark notifications.
363	 * That's why this traversal is so complicated...
364	 */
365	while (fsnotify_iter_select_report_types(&iter_info)) {
366		ret = send_to_group(to_tell, mask, data, data_is, cookie,
367				    file_name, &iter_info);
368
369		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
370			goto out;
371
372		fsnotify_iter_next(&iter_info);
373	}
374	ret = 0;
375out:
376	srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
377
378	return ret;
379}
380EXPORT_SYMBOL_GPL(fsnotify);
381
382static __init int fsnotify_init(void)
383{
384	int ret;
385
386	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
387
388	ret = init_srcu_struct(&fsnotify_mark_srcu);
389	if (ret)
390		panic("initializing fsnotify_mark_srcu");
391
392	fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
393						    SLAB_PANIC);
394
395	return 0;
396}
397core_initcall(fsnotify_init);