Serenity Operating System
1/*
2 * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2021, sin-ack <sin-ack@protonmail.com>
4 * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
5 *
6 * SPDX-License-Identifier: BSD-2-Clause
7 */
8
9#include <AK/Singleton.h>
10#include <AK/StringView.h>
11#include <Kernel/API/InodeWatcherEvent.h>
12#include <Kernel/FileSystem/Custody.h>
13#include <Kernel/FileSystem/Inode.h>
14#include <Kernel/FileSystem/InodeWatcher.h>
15#include <Kernel/FileSystem/OpenFileDescription.h>
16#include <Kernel/FileSystem/VirtualFileSystem.h>
17#include <Kernel/KBufferBuilder.h>
18#include <Kernel/Memory/SharedInodeVMObject.h>
19#include <Kernel/Net/LocalSocket.h>
20#include <Kernel/Process.h>
21
22namespace Kernel {
23
24static Singleton<SpinlockProtected<Inode::AllInstancesList, LockRank::None>> s_all_instances;
25
26SpinlockProtected<Inode::AllInstancesList, LockRank::None>& Inode::all_instances()
27{
28 return s_all_instances;
29}
30
31void Inode::sync_all()
32{
33 Vector<NonnullRefPtr<Inode>, 32> inodes;
34 Inode::all_instances().with([&](auto& all_inodes) {
35 for (auto& inode : all_inodes) {
36 if (inode.is_metadata_dirty())
37 inodes.append(inode);
38 }
39 });
40
41 for (auto& inode : inodes) {
42 VERIFY(inode->is_metadata_dirty());
43 (void)inode->flush_metadata();
44 }
45}
46
47void Inode::sync()
48{
49 if (is_metadata_dirty())
50 (void)flush_metadata();
51 fs().flush_writes();
52}
53
54ErrorOr<NonnullOwnPtr<KBuffer>> Inode::read_entire(OpenFileDescription* description) const
55{
56 auto builder = TRY(KBufferBuilder::try_create());
57
58 u8 buffer[4096];
59 off_t offset = 0;
60 for (;;) {
61 auto buf = UserOrKernelBuffer::for_kernel_buffer(buffer);
62 auto nread = TRY(read_bytes(offset, sizeof(buffer), buf, description));
63 VERIFY(nread <= sizeof(buffer));
64 if (nread == 0)
65 break;
66 TRY(builder.append((char const*)buffer, nread));
67 offset += nread;
68 if (nread < sizeof(buffer))
69 break;
70 }
71
72 auto entire_file = builder.build();
73 if (!entire_file)
74 return ENOMEM;
75 return entire_file.release_nonnull();
76}
77
78ErrorOr<NonnullRefPtr<Custody>> Inode::resolve_as_link(Credentials const& credentials, Custody& base, RefPtr<Custody>* out_parent, int options, int symlink_recursion_level) const
79{
80 // The default implementation simply treats the stored
81 // contents as a path and resolves that. That is, it
82 // behaves exactly how you would expect a symlink to work.
83 auto contents = TRY(read_entire());
84 return VirtualFileSystem::the().resolve_path(credentials, StringView { contents->bytes() }, base, out_parent, options, symlink_recursion_level);
85}
86
87Inode::Inode(FileSystem& fs, InodeIndex index)
88 : m_file_system(fs)
89 , m_index(index)
90{
91 Inode::all_instances().with([&](auto& all_inodes) { all_inodes.append(*this); });
92}
93
94Inode::~Inode()
95{
96 m_watchers.for_each([&](auto& watcher) {
97 watcher->unregister_by_inode({}, identifier());
98 });
99}
100
101void Inode::will_be_destroyed()
102{
103 MutexLocker locker(m_inode_lock);
104 if (m_metadata_dirty)
105 (void)flush_metadata();
106}
107
108ErrorOr<size_t> Inode::write_bytes(off_t offset, size_t length, UserOrKernelBuffer const& target_buffer, OpenFileDescription* open_description)
109{
110 MutexLocker locker(m_inode_lock);
111 TRY(prepare_to_write_data());
112 return write_bytes_locked(offset, length, target_buffer, open_description);
113}
114
115ErrorOr<size_t> Inode::read_bytes(off_t offset, size_t length, UserOrKernelBuffer& buffer, OpenFileDescription* open_description) const
116{
117 MutexLocker locker(m_inode_lock, Mutex::Mode::Shared);
118 return read_bytes_locked(offset, length, buffer, open_description);
119}
120
121ErrorOr<void> Inode::update_timestamps([[maybe_unused]] Optional<Time> atime, [[maybe_unused]] Optional<Time> ctime, [[maybe_unused]] Optional<Time> mtime)
122{
123 return ENOTIMPL;
124}
125
126ErrorOr<void> Inode::increment_link_count()
127{
128 return ENOTIMPL;
129}
130
131ErrorOr<void> Inode::decrement_link_count()
132{
133 return ENOTIMPL;
134}
135
136ErrorOr<void> Inode::set_shared_vmobject(Memory::SharedInodeVMObject& vmobject)
137{
138 MutexLocker locker(m_inode_lock);
139 m_shared_vmobject = TRY(vmobject.try_make_weak_ptr<Memory::SharedInodeVMObject>());
140 return {};
141}
142
143LockRefPtr<LocalSocket> Inode::bound_socket() const
144{
145 return m_bound_socket.strong_ref();
146}
147
148bool Inode::bind_socket(LocalSocket& socket)
149{
150 MutexLocker locker(m_inode_lock);
151 if (m_bound_socket)
152 return false;
153 m_bound_socket = socket;
154 return true;
155}
156
157bool Inode::unbind_socket()
158{
159 MutexLocker locker(m_inode_lock);
160 if (!m_bound_socket)
161 return false;
162 m_bound_socket = nullptr;
163 return true;
164}
165
166ErrorOr<void> Inode::register_watcher(Badge<InodeWatcher>, InodeWatcher& watcher)
167{
168 return m_watchers.with([&](auto& watchers) -> ErrorOr<void> {
169 VERIFY(!watchers.contains(&watcher));
170 TRY(watchers.try_set(&watcher));
171 return {};
172 });
173}
174
175void Inode::unregister_watcher(Badge<InodeWatcher>, InodeWatcher& watcher)
176{
177 m_watchers.with([&](auto& watchers) {
178 VERIFY(watchers.contains(&watcher));
179 watchers.remove(&watcher);
180 });
181}
182
183ErrorOr<NonnullRefPtr<FIFO>> Inode::fifo()
184{
185 MutexLocker locker(m_inode_lock);
186 VERIFY(metadata().is_fifo());
187
188 // FIXME: Release m_fifo when it is closed by all readers and writers
189 if (!m_fifo)
190 m_fifo = TRY(FIFO::try_create(metadata().uid));
191
192 return NonnullRefPtr { *m_fifo };
193}
194
195void Inode::set_metadata_dirty(bool metadata_dirty)
196{
197 MutexLocker locker(m_inode_lock);
198
199 if (metadata_dirty) {
200 // Sanity check.
201 VERIFY(!fs().is_readonly());
202 }
203
204 if (m_metadata_dirty == metadata_dirty)
205 return;
206
207 m_metadata_dirty = metadata_dirty;
208 if (m_metadata_dirty) {
209 // FIXME: Maybe we should hook into modification events somewhere else, I'm not sure where.
210 // We don't always end up on this particular code path, for instance when writing to an ext2fs file.
211 m_watchers.for_each([&](auto& watcher) {
212 watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::MetadataModified);
213 });
214 }
215}
216
217void Inode::did_add_child(InodeIdentifier, StringView name)
218{
219 m_watchers.for_each([&](auto& watcher) {
220 watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::ChildCreated, name);
221 });
222}
223
224void Inode::did_remove_child(InodeIdentifier, StringView name)
225{
226 if (name == "." || name == "..") {
227 // These are just aliases and are not interesting to userspace.
228 return;
229 }
230
231 m_watchers.for_each([&](auto& watcher) {
232 watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::ChildDeleted, name);
233 });
234}
235
236void Inode::did_modify_contents()
237{
238 // FIXME: What happens if this fails?
239 // ENOTIMPL would be a meaningless error to return here
240 auto now = kgettimeofday();
241 (void)update_timestamps({}, now, now);
242
243 m_watchers.for_each([&](auto& watcher) {
244 watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::ContentModified);
245 });
246}
247
248void Inode::did_delete_self()
249{
250 m_watchers.for_each([&](auto& watcher) {
251 watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::Deleted);
252 });
253}
254
255ErrorOr<void> Inode::prepare_to_write_data()
256{
257 VERIFY(m_inode_lock.is_locked());
258 if (fs().is_readonly())
259 return EROFS;
260 auto metadata = this->metadata();
261 if (metadata.is_setuid() || metadata.is_setgid()) {
262 dbgln("Inode::prepare_to_write_data(): Stripping SUID/SGID bits from {}", identifier());
263 return chmod(metadata.mode & ~(04000 | 02000));
264 }
265 return {};
266}
267
268LockRefPtr<Memory::SharedInodeVMObject> Inode::shared_vmobject() const
269{
270 MutexLocker locker(m_inode_lock);
271 return m_shared_vmobject.strong_ref();
272}
273
274template<typename T>
275static inline bool range_overlap(T start1, T len1, T start2, T len2)
276{
277 return ((start1 < start2 + len2) || len2 == 0) && ((start2 < start1 + len1) || len1 == 0);
278}
279
280static inline ErrorOr<void> normalize_flock(OpenFileDescription const& description, flock& lock)
281{
282 off_t start;
283 switch (lock.l_whence) {
284 case SEEK_SET:
285 start = lock.l_start;
286 break;
287 case SEEK_CUR:
288 start = description.offset() + lock.l_start;
289 break;
290 case SEEK_END:
291 // FIXME: Implement SEEK_END and negative lengths.
292 return ENOTSUP;
293 default:
294 return EINVAL;
295 }
296 lock = { lock.l_type, SEEK_SET, start, lock.l_len, 0 };
297 return {};
298}
299
300bool Inode::can_apply_flock(flock const& new_lock, Optional<OpenFileDescription const&> description) const
301{
302 VERIFY(new_lock.l_whence == SEEK_SET);
303
304 if (new_lock.l_type == F_UNLCK)
305 return true;
306
307 return m_flocks.with([&](auto& flocks) {
308 for (auto const& lock : flocks) {
309 if (!range_overlap(lock.start, lock.len, new_lock.l_start, new_lock.l_len))
310 continue;
311
312 // There are two cases where we can attempt downgrade:
313 //
314 // 1) We're the owner of this lock. The downgrade will immediately
315 // succeed.
316 // 2) We're not the owner of this lock. Our downgrade attempt will
317 // fail, and the thread will start blocking on an FlockBlocker.
318 //
319 // For the first case, we get the description from try_apply_flock
320 // below. For the second case, the check below would always be
321 // false, so there is no need to store the description in the
322 // blocker in the first place.
323 if (new_lock.l_type == F_RDLCK && lock.type == F_WRLCK)
324 return description.has_value() && lock.owner == &description.value() && lock.start == new_lock.l_start && lock.len == new_lock.l_len;
325
326 if (new_lock.l_type == F_WRLCK)
327 return false;
328 }
329 return true;
330 });
331}
332
333ErrorOr<bool> Inode::try_apply_flock(Process const& process, OpenFileDescription const& description, flock const& new_lock)
334{
335 return m_flocks.with([&](auto& flocks) -> ErrorOr<bool> {
336 if (!can_apply_flock(new_lock, description))
337 return false;
338
339 bool did_manipulate_lock = false;
340 for (size_t i = 0; i < flocks.size(); ++i) {
341 auto const& lock = flocks[i];
342
343 bool is_potential_downgrade = new_lock.l_type == F_RDLCK && lock.type == F_WRLCK;
344 bool is_potential_unlock = new_lock.l_type == F_UNLCK;
345
346 bool is_lock_owner = &description == lock.owner;
347 bool lock_range_exactly_matches = lock.start == new_lock.l_start && lock.len == new_lock.l_len;
348 bool can_manage_this_lock = is_lock_owner && lock_range_exactly_matches;
349
350 if ((is_potential_downgrade || is_potential_unlock) && can_manage_this_lock) {
351 flocks.remove(i);
352 did_manipulate_lock = true;
353 break;
354 }
355 }
356
357 if (new_lock.l_type != F_UNLCK)
358 TRY(flocks.try_append(Flock { new_lock.l_start, new_lock.l_len, &description, process.pid().value(), new_lock.l_type }));
359
360 if (did_manipulate_lock)
361 m_flock_blocker_set.unblock_all_blockers_whose_conditions_are_met();
362
363 // Judging by the Linux implementation, unlocking a non-existent lock
364 // also works.
365 return true;
366 });
367}
368
369ErrorOr<void> Inode::apply_flock(Process const& process, OpenFileDescription const& description, Userspace<flock const*> input_lock, ShouldBlock should_block)
370{
371 auto new_lock = TRY(copy_typed_from_user(input_lock));
372 TRY(normalize_flock(description, new_lock));
373
374 while (true) {
375 auto success = TRY(try_apply_flock(process, description, new_lock));
376 if (success)
377 return {};
378
379 if (should_block == ShouldBlock::No)
380 return EAGAIN;
381
382 if (Thread::current()->block<Thread::FlockBlocker>({}, *this, new_lock).was_interrupted())
383 return EINTR;
384 }
385}
386
387ErrorOr<void> Inode::get_flock(OpenFileDescription const& description, Userspace<flock*> reference_lock) const
388{
389 flock lookup = {};
390 TRY(copy_from_user(&lookup, reference_lock));
391 TRY(normalize_flock(description, lookup));
392
393 return m_flocks.with([&](auto& flocks) {
394 for (auto const& lock : flocks) {
395 if (!range_overlap(lock.start, lock.len, lookup.l_start, lookup.l_len))
396 continue;
397
398 // Locks with the same owner can't conflict with each other.
399 if (lock.pid == Process::current().pid())
400 continue;
401
402 if ((lookup.l_type == F_RDLCK && lock.type == F_WRLCK) || lookup.l_type == F_WRLCK) {
403 lookup = { lock.type, SEEK_SET, lock.start, lock.len, lock.pid };
404 return copy_to_user(reference_lock, &lookup);
405 }
406 }
407
408 lookup.l_type = F_UNLCK;
409 return copy_to_user(reference_lock, &lookup);
410 });
411}
412
413void Inode::remove_flocks_for_description(OpenFileDescription const& description)
414{
415 m_flocks.with([&](auto& flocks) {
416 flocks.remove_all_matching([&](auto& entry) { return entry.owner == &description; });
417 });
418}
419
420bool Inode::has_watchers() const
421{
422 return !m_watchers.with([&](auto& watchers) { return watchers.is_empty(); });
423}
424
425}