Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/Bitmap.h>
28#include <AK/BufferStream.h>
29#include <AK/HashMap.h>
30#include <AK/StdLibExtras.h>
31#include <AK/StringView.h>
32#include <Kernel/Devices/BlockDevice.h>
33#include <Kernel/FileSystem/Ext2FileSystem.h>
34#include <Kernel/FileSystem/FileDescription.h>
35#include <Kernel/FileSystem/ext2_fs.h>
36#include <Kernel/Process.h>
37#include <Kernel/UnixTypes.h>
38#include <LibC/errno_numbers.h>
39
40//#define EXT2_DEBUG
41
42namespace Kernel {
43
44static const size_t max_link_count = 65535;
45static const size_t max_block_size = 4096;
46static const ssize_t max_inline_symlink_length = 60;
47
48static u8 to_ext2_file_type(mode_t mode)
49{
50 if (is_regular_file(mode))
51 return EXT2_FT_REG_FILE;
52 if (is_directory(mode))
53 return EXT2_FT_DIR;
54 if (is_character_device(mode))
55 return EXT2_FT_CHRDEV;
56 if (is_block_device(mode))
57 return EXT2_FT_BLKDEV;
58 if (is_fifo(mode))
59 return EXT2_FT_FIFO;
60 if (is_socket(mode))
61 return EXT2_FT_SOCK;
62 if (is_symlink(mode))
63 return EXT2_FT_SYMLINK;
64 return EXT2_FT_UNKNOWN;
65}
66
67NonnullRefPtr<Ext2FS> Ext2FS::create(FileDescription& file_description)
68{
69 return adopt(*new Ext2FS(file_description));
70}
71
72Ext2FS::Ext2FS(FileDescription& file_description)
73 : FileBackedFS(file_description)
74{
75}
76
77Ext2FS::~Ext2FS()
78{
79}
80
81bool Ext2FS::flush_super_block()
82{
83 LOCKER(m_lock);
84 ASSERT((sizeof(ext2_super_block) % logical_block_size()) == 0);
85 bool success = raw_write_blocks(2, (sizeof(ext2_super_block) / logical_block_size()), (const u8*)&m_super_block);
86 ASSERT(success);
87 return true;
88}
89
90const ext2_group_desc& Ext2FS::group_descriptor(GroupIndex group_index) const
91{
92 // FIXME: Should this fail gracefully somehow?
93 ASSERT(group_index <= m_block_group_count);
94 return block_group_descriptors()[group_index - 1];
95}
96
97bool Ext2FS::initialize()
98{
99 LOCKER(m_lock);
100 ASSERT((sizeof(ext2_super_block) % logical_block_size()) == 0);
101 bool success = raw_read_blocks(2, (sizeof(ext2_super_block) / logical_block_size()), (u8*)&m_super_block);
102 ASSERT(success);
103
104 auto& super_block = this->super_block();
105#ifdef EXT2_DEBUG
106 klog() << "ext2fs: super block magic: " << String::format("%x", super_block.s_magic) << " (super block size: " << sizeof(ext2_super_block) << ")";
107#endif
108 if (super_block.s_magic != EXT2_SUPER_MAGIC)
109 return false;
110
111#ifdef EXT2_DEBUG
112 klog() << "ext2fs: " << super_block.s_inodes_count << " inodes, " << super_block.s_blocks_count << " blocks";
113 klog() << "ext2fs: block size = " << EXT2_BLOCK_SIZE(&super_block);
114 klog() << "ext2fs: first data block = " << super_block.s_first_data_block;
115 klog() << "ext2fs: inodes per block = " << inodes_per_block();
116 klog() << "ext2fs: inodes per group = " << inodes_per_group();
117 klog() << "ext2fs: free inodes = " << super_block.s_free_inodes_count;
118 klog() << "ext2fs: desc per block = " << EXT2_DESC_PER_BLOCK(&super_block);
119 klog() << "ext2fs: desc size = " << EXT2_DESC_SIZE(&super_block);
120#endif
121
122 set_block_size(EXT2_BLOCK_SIZE(&super_block));
123
124 ASSERT(block_size() <= (int)max_block_size);
125
126 m_block_group_count = ceil_div(super_block.s_blocks_count, super_block.s_blocks_per_group);
127
128 if (m_block_group_count == 0) {
129 klog() << "ext2fs: no block groups :(";
130 return false;
131 }
132
133 unsigned blocks_to_read = ceil_div(m_block_group_count * (unsigned)sizeof(ext2_group_desc), block_size());
134 BlockIndex first_block_of_bgdt = block_size() == 1024 ? 2 : 1;
135 m_cached_group_descriptor_table = KBuffer::create_with_size(block_size() * blocks_to_read, Region::Access::Read | Region::Access::Write, "Ext2FS: Block group descriptors");
136 read_blocks(first_block_of_bgdt, blocks_to_read, m_cached_group_descriptor_table.value().data());
137
138#ifdef EXT2_DEBUG
139 for (unsigned i = 1; i <= m_block_group_count; ++i) {
140 auto& group = group_descriptor(i);
141 klog() << "ext2fs: group[" << i << "] { block_bitmap: " << group.bg_block_bitmap << ", inode_bitmap: " << group.bg_inode_bitmap << ", inode_table: " << group.bg_inode_table << " }";
142 }
143#endif
144
145 return true;
146}
147
148const char* Ext2FS::class_name() const
149{
150 return "Ext2FS";
151}
152
153InodeIdentifier Ext2FS::root_inode() const
154{
155 return { fsid(), EXT2_ROOT_INO };
156}
157
158bool Ext2FS::read_block_containing_inode(unsigned inode, unsigned& block_index, unsigned& offset, u8* buffer) const
159{
160 LOCKER(m_lock);
161 auto& super_block = this->super_block();
162
163 if (inode != EXT2_ROOT_INO && inode < EXT2_FIRST_INO(&super_block))
164 return false;
165
166 if (inode > super_block.s_inodes_count)
167 return false;
168
169 auto& bgd = group_descriptor(group_index_from_inode(inode));
170
171 offset = ((inode - 1) % inodes_per_group()) * inode_size();
172 block_index = bgd.bg_inode_table + (offset >> EXT2_BLOCK_SIZE_BITS(&super_block));
173 offset &= block_size() - 1;
174
175 return read_block(block_index, buffer);
176}
177
178Ext2FS::BlockListShape Ext2FS::compute_block_list_shape(unsigned blocks)
179{
180 BlockListShape shape;
181 const unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
182 unsigned blocks_remaining = blocks;
183
184 shape.direct_blocks = min((unsigned)EXT2_NDIR_BLOCKS, blocks_remaining);
185 blocks_remaining -= shape.direct_blocks;
186 if (!blocks_remaining)
187 return shape;
188
189 shape.indirect_blocks = min(blocks_remaining, entries_per_block);
190 blocks_remaining -= shape.indirect_blocks;
191 shape.meta_blocks += 1;
192 if (!blocks_remaining)
193 return shape;
194
195 shape.doubly_indirect_blocks = min(blocks_remaining, entries_per_block * entries_per_block);
196 blocks_remaining -= shape.doubly_indirect_blocks;
197 shape.meta_blocks += 1;
198 shape.meta_blocks += shape.doubly_indirect_blocks / entries_per_block;
199 if ((shape.doubly_indirect_blocks % entries_per_block) != 0)
200 shape.meta_blocks += 1;
201 if (!blocks_remaining)
202 return shape;
203
204 dbg() << "we don't know how to compute tind ext2fs blocks yet!";
205 ASSERT_NOT_REACHED();
206
207 shape.triply_indirect_blocks = min(blocks_remaining, entries_per_block * entries_per_block * entries_per_block);
208 blocks_remaining -= shape.triply_indirect_blocks;
209 if (!blocks_remaining)
210 return shape;
211
212 ASSERT_NOT_REACHED();
213
214 return {};
215}
216
217bool Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e2inode, const Vector<BlockIndex>& blocks)
218{
219 LOCKER(m_lock);
220
221 // NOTE: There is a mismatch between i_blocks and blocks.size() since i_blocks includes meta blocks and blocks.size() does not.
222 auto old_block_count = ceil_div(e2inode.i_size, block_size());
223
224 auto old_shape = compute_block_list_shape(old_block_count);
225 auto new_shape = compute_block_list_shape(blocks.size());
226
227 Vector<BlockIndex> new_meta_blocks;
228 if (new_shape.meta_blocks > old_shape.meta_blocks) {
229 new_meta_blocks = allocate_blocks(group_index_from_inode(inode_index), new_shape.meta_blocks - old_shape.meta_blocks);
230 }
231
232 e2inode.i_blocks = (blocks.size() + new_shape.meta_blocks) * (block_size() / 512);
233
234 bool inode_dirty = false;
235
236 unsigned output_block_index = 0;
237 unsigned remaining_blocks = blocks.size();
238 for (unsigned i = 0; i < new_shape.direct_blocks; ++i) {
239 if (e2inode.i_block[i] != blocks[output_block_index])
240 inode_dirty = true;
241 e2inode.i_block[i] = blocks[output_block_index];
242 ++output_block_index;
243 --remaining_blocks;
244 }
245 if (inode_dirty) {
246#ifdef EXT2_DEBUG
247 dbg() << "Ext2FS: Writing " << min((size_t)EXT2_NDIR_BLOCKS, blocks.size()) << " direct block(s) to i_block array of inode " << inode_index;
248 for (size_t i = 0; i < min((size_t)EXT2_NDIR_BLOCKS, blocks.size()); ++i)
249 dbg() << " + " << blocks[i];
250#endif
251 write_ext2_inode(inode_index, e2inode);
252 inode_dirty = false;
253 }
254
255 if (!remaining_blocks)
256 return true;
257
258 const unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
259
260 bool ind_block_new = !e2inode.i_block[EXT2_IND_BLOCK];
261 if (ind_block_new) {
262 BlockIndex new_indirect_block = new_meta_blocks.take_last();
263 if (e2inode.i_block[EXT2_IND_BLOCK] != new_indirect_block)
264 inode_dirty = true;
265 e2inode.i_block[EXT2_IND_BLOCK] = new_indirect_block;
266 if (inode_dirty) {
267#ifdef EXT2_DEBUG
268 dbg() << "Ext2FS: Adding the indirect block to i_block array of inode " << inode_index;
269#endif
270 write_ext2_inode(inode_index, e2inode);
271 inode_dirty = false;
272 }
273 }
274
275 if (old_shape.indirect_blocks == new_shape.indirect_blocks) {
276 // No need to update the singly indirect block array.
277 remaining_blocks -= new_shape.indirect_blocks;
278 output_block_index += new_shape.indirect_blocks;
279 } else {
280 auto block_contents = ByteBuffer::create_uninitialized(block_size());
281 BufferStream stream(block_contents);
282 ASSERT(new_shape.indirect_blocks <= entries_per_block);
283 for (unsigned i = 0; i < new_shape.indirect_blocks; ++i) {
284 stream << blocks[output_block_index++];
285 --remaining_blocks;
286 }
287 stream.fill_to_end(0);
288 bool success = write_block(e2inode.i_block[EXT2_IND_BLOCK], block_contents.data());
289 ASSERT(success);
290 }
291
292 if (!remaining_blocks)
293 return true;
294
295 bool dind_block_dirty = false;
296
297 bool dind_block_new = !e2inode.i_block[EXT2_DIND_BLOCK];
298 if (dind_block_new) {
299 BlockIndex new_dindirect_block = new_meta_blocks.take_last();
300 if (e2inode.i_block[EXT2_DIND_BLOCK] != new_dindirect_block)
301 inode_dirty = true;
302 e2inode.i_block[EXT2_DIND_BLOCK] = new_dindirect_block;
303 if (inode_dirty) {
304#ifdef EXT2_DEBUG
305 dbg() << "Ext2FS: Adding the doubly-indirect block to i_block array of inode " << inode_index;
306#endif
307 write_ext2_inode(inode_index, e2inode);
308 inode_dirty = false;
309 }
310 }
311
312 if (old_shape.doubly_indirect_blocks == new_shape.doubly_indirect_blocks) {
313 // No need to update the doubly indirect block data.
314 remaining_blocks -= new_shape.doubly_indirect_blocks;
315 output_block_index += new_shape.doubly_indirect_blocks;
316 } else {
317 unsigned indirect_block_count = new_shape.doubly_indirect_blocks / entries_per_block;
318 if ((new_shape.doubly_indirect_blocks % entries_per_block) != 0)
319 indirect_block_count++;
320
321 auto dind_block_contents = ByteBuffer::create_uninitialized(block_size());
322 read_block(e2inode.i_block[EXT2_DIND_BLOCK], dind_block_contents.data());
323 if (dind_block_new) {
324 memset(dind_block_contents.data(), 0, dind_block_contents.size());
325 dind_block_dirty = true;
326 }
327 auto* dind_block_as_pointers = (unsigned*)dind_block_contents.data();
328
329 ASSERT(indirect_block_count <= entries_per_block);
330 for (unsigned i = 0; i < indirect_block_count; ++i) {
331 bool ind_block_dirty = false;
332
333 BlockIndex indirect_block_index = dind_block_as_pointers[i];
334
335 bool ind_block_new = !indirect_block_index;
336 if (ind_block_new) {
337 indirect_block_index = new_meta_blocks.take_last();
338 dind_block_as_pointers[i] = indirect_block_index;
339 dind_block_dirty = true;
340 }
341
342 auto ind_block_contents = ByteBuffer::create_uninitialized(block_size());
343 read_block(indirect_block_index, ind_block_contents.data());
344 if (ind_block_new) {
345 memset(ind_block_contents.data(), 0, dind_block_contents.size());
346 ind_block_dirty = true;
347 }
348 auto* ind_block_as_pointers = (unsigned*)ind_block_contents.data();
349
350 unsigned entries_to_write = new_shape.doubly_indirect_blocks - (i * entries_per_block);
351 if (entries_to_write > entries_per_block)
352 entries_to_write = entries_per_block;
353
354 ASSERT(entries_to_write <= entries_per_block);
355 for (unsigned j = 0; j < entries_to_write; ++j) {
356 BlockIndex output_block = blocks[output_block_index++];
357 if (ind_block_as_pointers[j] != output_block) {
358 ind_block_as_pointers[j] = output_block;
359 ind_block_dirty = true;
360 }
361 --remaining_blocks;
362 }
363 for (unsigned j = entries_to_write; j < entries_per_block; ++j) {
364 if (ind_block_as_pointers[j] != 0) {
365 ind_block_as_pointers[j] = 0;
366 ind_block_dirty = true;
367 }
368 }
369
370 if (ind_block_dirty) {
371 bool success = write_block(indirect_block_index, ind_block_contents.data());
372 ASSERT(success);
373 }
374 }
375 for (unsigned i = indirect_block_count; i < entries_per_block; ++i) {
376 if (dind_block_as_pointers[i] != 0) {
377 dind_block_as_pointers[i] = 0;
378 dind_block_dirty = true;
379 }
380 }
381
382 if (dind_block_dirty) {
383 bool success = write_block(e2inode.i_block[EXT2_DIND_BLOCK], dind_block_contents.data());
384 ASSERT(success);
385 }
386 }
387
388 if (!remaining_blocks)
389 return true;
390
391 // FIXME: Implement!
392 dbg() << "we don't know how to write tind ext2fs blocks yet!";
393 ASSERT_NOT_REACHED();
394}
395
396Vector<Ext2FS::BlockIndex> Ext2FS::block_list_for_inode(const ext2_inode& e2inode, bool include_block_list_blocks) const
397{
398 auto block_list = block_list_for_inode_impl(e2inode, include_block_list_blocks);
399 while (!block_list.is_empty() && block_list.last() == 0)
400 block_list.take_last();
401 return block_list;
402}
403
404Vector<Ext2FS::BlockIndex> Ext2FS::block_list_for_inode_impl(const ext2_inode& e2inode, bool include_block_list_blocks) const
405{
406 LOCKER(m_lock);
407 unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
408
409 unsigned block_count = ceil_div(e2inode.i_size, block_size());
410
411 // If we are handling a symbolic link, the path is stored in the 60 bytes in
412 // the inode that are used for the 12 direct and 3 indirect block pointers,
413 // If the path is longer than 60 characters, a block is allocated, and the
414 // block contains the destination path. The file size corresponds to the
415 // path length of the destination.
416 if (is_symlink(e2inode.i_mode) && e2inode.i_blocks == 0)
417 block_count = 0;
418
419#ifdef EXT2_DEBUG
420 dbg() << "Ext2FS::block_list_for_inode(): i_size=" << e2inode.i_size << ", i_blocks=" << e2inode.i_blocks << ", block_count=" << block_count;
421#endif
422
423 unsigned blocks_remaining = block_count;
424 Vector<BlockIndex> list;
425
426 auto add_block = [&](BlockIndex bi) {
427 if (blocks_remaining) {
428 list.append(bi);
429 --blocks_remaining;
430 }
431 };
432
433 if (include_block_list_blocks) {
434 // This seems like an excessive over-estimate but w/e.
435 list.ensure_capacity(blocks_remaining * 2);
436 } else {
437 list.ensure_capacity(blocks_remaining);
438 }
439
440 unsigned direct_count = min(block_count, (unsigned)EXT2_NDIR_BLOCKS);
441 for (unsigned i = 0; i < direct_count; ++i) {
442 auto block_index = e2inode.i_block[i];
443 add_block(block_index);
444 }
445
446 if (!blocks_remaining)
447 return list;
448
449 auto process_block_array = [&](unsigned array_block_index, auto&& callback) {
450 if (include_block_list_blocks)
451 callback(array_block_index);
452 auto array_block = ByteBuffer::create_uninitialized(block_size());
453 read_block(array_block_index, array_block.data());
454 ASSERT(array_block);
455 auto* array = reinterpret_cast<const __u32*>(array_block.data());
456 unsigned count = min(blocks_remaining, entries_per_block);
457 for (BlockIndex i = 0; i < count; ++i)
458 callback(array[i]);
459 };
460
461 process_block_array(e2inode.i_block[EXT2_IND_BLOCK], [&](unsigned block_index) {
462 add_block(block_index);
463 });
464
465 if (!blocks_remaining)
466 return list;
467
468 process_block_array(e2inode.i_block[EXT2_DIND_BLOCK], [&](unsigned block_index) {
469 process_block_array(block_index, [&](unsigned block_index2) {
470 add_block(block_index2);
471 });
472 });
473
474 if (!blocks_remaining)
475 return list;
476
477 process_block_array(e2inode.i_block[EXT2_TIND_BLOCK], [&](unsigned block_index) {
478 process_block_array(block_index, [&](unsigned block_index2) {
479 process_block_array(block_index2, [&](unsigned block_index3) {
480 add_block(block_index3);
481 });
482 });
483 });
484
485 return list;
486}
487
488void Ext2FS::free_inode(Ext2FSInode& inode)
489{
490 LOCKER(m_lock);
491 ASSERT(inode.m_raw_inode.i_links_count == 0);
492#ifdef EXT2_DEBUG
493 dbg() << "Ext2FS: Inode " << inode.identifier() << " has no more links, time to delete!";
494#endif
495
496 struct timeval now;
497 kgettimeofday(now);
498 inode.m_raw_inode.i_dtime = now.tv_sec;
499 write_ext2_inode(inode.index(), inode.m_raw_inode);
500
501 auto block_list = block_list_for_inode(inode.m_raw_inode, true);
502
503 for (auto block_index : block_list) {
504 ASSERT(block_index <= super_block().s_blocks_count);
505 if (block_index)
506 set_block_allocation_state(block_index, false);
507 }
508
509 set_inode_allocation_state(inode.index(), false);
510
511 if (inode.is_directory()) {
512 auto& bgd = const_cast<ext2_group_desc&>(group_descriptor(group_index_from_inode(inode.index())));
513 --bgd.bg_used_dirs_count;
514 dbg() << "Ext2FS: Decremented bg_used_dirs_count to " << bgd.bg_used_dirs_count;
515 m_block_group_descriptors_dirty = true;
516 }
517}
518
519void Ext2FS::flush_block_group_descriptor_table()
520{
521 LOCKER(m_lock);
522 unsigned blocks_to_write = ceil_div(m_block_group_count * (unsigned)sizeof(ext2_group_desc), block_size());
523 unsigned first_block_of_bgdt = block_size() == 1024 ? 2 : 1;
524 write_blocks(first_block_of_bgdt, blocks_to_write, (const u8*)block_group_descriptors());
525}
526
527void Ext2FS::flush_writes()
528{
529 LOCKER(m_lock);
530 if (m_super_block_dirty) {
531 flush_super_block();
532 m_super_block_dirty = false;
533 }
534 if (m_block_group_descriptors_dirty) {
535 flush_block_group_descriptor_table();
536 m_block_group_descriptors_dirty = false;
537 }
538 for (auto& cached_bitmap : m_cached_bitmaps) {
539 if (cached_bitmap->dirty) {
540 write_block(cached_bitmap->bitmap_block_index, cached_bitmap->buffer.data());
541 cached_bitmap->dirty = false;
542#ifdef EXT2_DEBUG
543 dbg() << "Flushed bitmap block " << cached_bitmap->bitmap_block_index;
544#endif
545 }
546 }
547
548 FileBackedFS::flush_writes();
549
550 // Uncache Inodes that are only kept alive by the index-to-inode lookup cache.
551 // We don't uncache Inodes that are being watched by at least one InodeWatcher.
552
553 // FIXME: It would be better to keep a capped number of Inodes around.
554 // The problem is that they are quite heavy objects, and use a lot of heap memory
555 // for their (child name lookup) and (block list) caches.
556 Vector<InodeIndex> unused_inodes;
557 for (auto& it : m_inode_cache) {
558 if (it.value->ref_count() != 1)
559 continue;
560 if (it.value->has_watchers())
561 continue;
562 unused_inodes.append(it.key);
563 }
564 for (auto index : unused_inodes)
565 uncache_inode(index);
566}
567
568Ext2FSInode::Ext2FSInode(Ext2FS& fs, unsigned index)
569 : Inode(fs, index)
570{
571}
572
573Ext2FSInode::~Ext2FSInode()
574{
575 if (m_raw_inode.i_links_count == 0)
576 fs().free_inode(*this);
577}
578
579InodeMetadata Ext2FSInode::metadata() const
580{
581 LOCKER(m_lock);
582 InodeMetadata metadata;
583 metadata.inode = identifier();
584 metadata.size = m_raw_inode.i_size;
585 metadata.mode = m_raw_inode.i_mode;
586 metadata.uid = m_raw_inode.i_uid;
587 metadata.gid = m_raw_inode.i_gid;
588 metadata.link_count = m_raw_inode.i_links_count;
589 metadata.atime = m_raw_inode.i_atime;
590 metadata.ctime = m_raw_inode.i_ctime;
591 metadata.mtime = m_raw_inode.i_mtime;
592 metadata.dtime = m_raw_inode.i_dtime;
593 metadata.block_size = fs().block_size();
594 metadata.block_count = m_raw_inode.i_blocks;
595
596 if (Kernel::is_character_device(m_raw_inode.i_mode) || Kernel::is_block_device(m_raw_inode.i_mode)) {
597 unsigned dev = m_raw_inode.i_block[0];
598 if (!dev)
599 dev = m_raw_inode.i_block[1];
600 metadata.major_device = (dev & 0xfff00) >> 8;
601 metadata.minor_device = (dev & 0xff) | ((dev >> 12) & 0xfff00);
602 }
603 return metadata;
604}
605
606void Ext2FSInode::flush_metadata()
607{
608 LOCKER(m_lock);
609#ifdef EXT2_DEBUG
610 dbg() << "Ext2FS: flush_metadata for inode " << identifier();
611#endif
612 fs().write_ext2_inode(index(), m_raw_inode);
613 if (is_directory()) {
614 // Unless we're about to go away permanently, invalidate the lookup cache.
615 if (m_raw_inode.i_links_count != 0) {
616 // FIXME: This invalidation is way too hardcore. It's sad to throw away the whole cache.
617 m_lookup_cache.clear();
618 }
619 }
620 set_metadata_dirty(false);
621}
622
623RefPtr<Inode> Ext2FS::get_inode(InodeIdentifier inode) const
624{
625 LOCKER(m_lock);
626 ASSERT(inode.fsid() == fsid());
627
628 {
629 auto it = m_inode_cache.find(inode.index());
630 if (it != m_inode_cache.end())
631 return (*it).value;
632 }
633
634 if (!get_inode_allocation_state(inode.index())) {
635 m_inode_cache.set(inode.index(), nullptr);
636 return nullptr;
637 }
638
639 unsigned block_index;
640 unsigned offset;
641 u8 block[max_block_size];
642 if (!read_block_containing_inode(inode.index(), block_index, offset, block))
643 return {};
644
645 auto new_inode = adopt(*new Ext2FSInode(const_cast<Ext2FS&>(*this), inode.index()));
646 memcpy(&new_inode->m_raw_inode, reinterpret_cast<ext2_inode*>(block + offset), sizeof(ext2_inode));
647 m_inode_cache.set(inode.index(), new_inode);
648 return new_inode;
649}
650
651ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDescription* description) const
652{
653 Locker inode_locker(m_lock);
654 ASSERT(offset >= 0);
655 if (m_raw_inode.i_size == 0)
656 return 0;
657
658 // Symbolic links shorter than 60 characters are store inline inside the i_block array.
659 // This avoids wasting an entire block on short links. (Most links are short.)
660 if (is_symlink() && size() < max_inline_symlink_length) {
661 ASSERT(offset == 0);
662 ssize_t nread = min((off_t)size() - offset, static_cast<off_t>(count));
663 memcpy(buffer, ((const u8*)m_raw_inode.i_block) + offset, (size_t)nread);
664 return nread;
665 }
666
667 Locker fs_locker(fs().m_lock);
668
669 if (m_block_list.is_empty())
670 m_block_list = fs().block_list_for_inode(m_raw_inode);
671
672 if (m_block_list.is_empty()) {
673 klog() << "ext2fs: read_bytes: empty block list for inode " << index();
674 return -EIO;
675 }
676
677 const int block_size = fs().block_size();
678
679 size_t first_block_logical_index = offset / block_size;
680 size_t last_block_logical_index = (offset + count) / block_size;
681 if (last_block_logical_index >= m_block_list.size())
682 last_block_logical_index = m_block_list.size() - 1;
683
684 int offset_into_first_block = offset % block_size;
685
686 ssize_t nread = 0;
687 size_t remaining_count = min((off_t)count, (off_t)size() - offset);
688 u8* out = buffer;
689
690#ifdef EXT2_DEBUG
691 dbg() << "Ext2FS: Reading up to " << count << " bytes " << offset << " bytes into inode " << identifier() << " to " << (const void*)buffer;
692#endif
693
694 u8 block[max_block_size];
695
696 for (size_t bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) {
697 auto block_index = m_block_list[bi];
698 ASSERT(block_index);
699 bool success = fs().read_block(block_index, block, description);
700 if (!success) {
701 klog() << "ext2fs: read_bytes: read_block(" << block_index << ") failed (lbi: " << bi << ")";
702 return -EIO;
703 }
704
705 size_t offset_into_block = (bi == first_block_logical_index) ? offset_into_first_block : 0;
706 size_t num_bytes_to_copy = min(block_size - offset_into_block, remaining_count);
707 memcpy(out, block + offset_into_block, num_bytes_to_copy);
708 remaining_count -= num_bytes_to_copy;
709 nread += num_bytes_to_copy;
710 out += num_bytes_to_copy;
711 }
712
713 return nread;
714}
715
716KResult Ext2FSInode::resize(u64 new_size)
717{
718 u64 old_size = size();
719 if (old_size == new_size)
720 return KSuccess;
721
722 u64 block_size = fs().block_size();
723 size_t blocks_needed_before = ceil_div(old_size, block_size);
724 size_t blocks_needed_after = ceil_div(new_size, block_size);
725
726#ifdef EXT2_DEBUG
727 dbg() << "Ext2FSInode::resize(): blocks needed before (size was " << old_size << "): " << blocks_needed_before;
728 dbg() << "Ext2FSInode::resize(): blocks needed after (size is " << new_size << "): " << blocks_needed_after;
729#endif
730
731 if (blocks_needed_after > blocks_needed_before) {
732 u32 additional_blocks_needed = blocks_needed_after - blocks_needed_before;
733 if (additional_blocks_needed > fs().super_block().s_free_blocks_count)
734 return KResult(-ENOSPC);
735 }
736
737 auto block_list = fs().block_list_for_inode(m_raw_inode);
738 if (blocks_needed_after > blocks_needed_before) {
739 auto new_blocks = fs().allocate_blocks(fs().group_index_from_inode(index()), blocks_needed_after - blocks_needed_before);
740 block_list.append(move(new_blocks));
741 } else if (blocks_needed_after < blocks_needed_before) {
742#ifdef EXT2_DEBUG
743 dbg() << "Ext2FS: Shrinking inode " << identifier() << ". Old block list is " << block_list.size() << " entries:";
744 for (auto block_index : block_list) {
745 dbg() << " # " << block_index;
746 }
747#endif
748 while (block_list.size() != blocks_needed_after) {
749 auto block_index = block_list.take_last();
750 if (block_index)
751 fs().set_block_allocation_state(block_index, false);
752 }
753 }
754
755 bool success = fs().write_block_list_for_inode(index(), m_raw_inode, block_list);
756 if (!success)
757 return KResult(-EIO);
758
759 m_raw_inode.i_size = new_size;
760 set_metadata_dirty(true);
761
762 m_block_list = move(block_list);
763 return KSuccess;
764}
765
766ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, FileDescription* description)
767{
768 ASSERT(offset >= 0);
769 ASSERT(count >= 0);
770
771 Locker inode_locker(m_lock);
772 Locker fs_locker(fs().m_lock);
773
774 auto result = prepare_to_write_data();
775 if (result.is_error())
776 return result;
777
778 if (is_symlink()) {
779 ASSERT(offset == 0);
780 if (max((size_t)(offset + count), (size_t)m_raw_inode.i_size) < max_inline_symlink_length) {
781#ifdef EXT2_DEBUG
782 dbg() << "Ext2FS: write_bytes poking into i_block array for inline symlink '" << StringView(data, count) << " ' (" << count << " bytes)";
783#endif
784 memcpy(((u8*)m_raw_inode.i_block) + offset, data, (size_t)count);
785 if ((size_t)(offset + count) > (size_t)m_raw_inode.i_size)
786 m_raw_inode.i_size = offset + count;
787 set_metadata_dirty(true);
788 return count;
789 }
790 }
791
792 const size_t block_size = fs().block_size();
793 u64 old_size = size();
794 u64 new_size = max(static_cast<u64>(offset) + count, (u64)size());
795
796 auto resize_result = resize(new_size);
797 if (resize_result.is_error())
798 return resize_result;
799
800 if (m_block_list.is_empty())
801 m_block_list = fs().block_list_for_inode(m_raw_inode);
802
803 if (m_block_list.is_empty()) {
804 dbg() << "Ext2FSInode::write_bytes(): empty block list for inode " << index();
805 return -EIO;
806 }
807
808 size_t first_block_logical_index = offset / block_size;
809 size_t last_block_logical_index = (offset + count) / block_size;
810 if (last_block_logical_index >= m_block_list.size())
811 last_block_logical_index = m_block_list.size() - 1;
812
813 size_t offset_into_first_block = offset % block_size;
814
815 size_t last_logical_block_index_in_file = new_size / block_size;
816
817 ssize_t nwritten = 0;
818 size_t remaining_count = min((off_t)count, (off_t)new_size - offset);
819 const u8* in = data;
820
821#ifdef EXT2_DEBUG
822 dbg() << "Ext2FS: Writing " << count << " bytes " << offset << " bytes into inode " << identifier() << " from " << (const void*)data;
823#endif
824
825 auto buffer_block = ByteBuffer::create_uninitialized(block_size);
826 for (size_t bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) {
827 size_t offset_into_block = (bi == first_block_logical_index) ? offset_into_first_block : 0;
828 size_t num_bytes_to_copy = min(block_size - offset_into_block, remaining_count);
829
830 ByteBuffer block;
831 if (offset_into_block != 0 || num_bytes_to_copy != block_size) {
832 block = ByteBuffer::create_uninitialized(block_size);
833 bool success = fs().read_block(m_block_list[bi], block.data(), description);
834 if (!success) {
835 dbg() << "Ext2FS: In write_bytes, read_block(" << m_block_list[bi] << ") failed (bi: " << bi << ")";
836 return -EIO;
837 }
838 } else
839 block = buffer_block;
840
841 memcpy(block.data() + offset_into_block, in, num_bytes_to_copy);
842 if (bi == last_logical_block_index_in_file && num_bytes_to_copy < block_size) {
843 size_t padding_start = new_size % block_size;
844 size_t padding_bytes = block_size - padding_start;
845#ifdef EXT2_DEBUG
846 dbg() << "Ext2FS: Padding last block of file with zero x " << padding_bytes << " (new_size=" << new_size << ", offset_into_block=" << offset_into_block << ", num_bytes_to_copy=" << num_bytes_to_copy << ")";
847#endif
848 memset(block.data() + padding_start, 0, padding_bytes);
849 }
850#ifdef EXT2_DEBUG
851 dbg() << "Ext2FS: Writing block " << m_block_list[bi] << " (offset_into_block: " << offset_into_block << ")";
852#endif
853 bool success = fs().write_block(m_block_list[bi], block.data(), description);
854 if (!success) {
855 dbg() << "Ext2FS: write_block(" << m_block_list[bi] << ") failed (bi: " << bi << ")";
856 ASSERT_NOT_REACHED();
857 return -EIO;
858 }
859 remaining_count -= num_bytes_to_copy;
860 nwritten += num_bytes_to_copy;
861 in += num_bytes_to_copy;
862 }
863
864#ifdef EXT2_DEBUG
865 dbg() << "Ext2FS: After write, i_size=" << m_raw_inode.i_size << ", i_blocks=" << m_raw_inode.i_blocks << " (" << m_block_list.size() << " blocks in list)";
866#endif
867
868 if (old_size != new_size)
869 inode_size_changed(old_size, new_size);
870 inode_contents_changed(offset, count, data);
871 return nwritten;
872}
873
874bool Ext2FSInode::traverse_as_directory(Function<bool(const FS::DirectoryEntry&)> callback) const
875{
876 LOCKER(m_lock);
877 ASSERT(is_directory());
878
879#ifdef EXT2_DEBUG
880 dbg() << "Ext2FS: Traversing as directory: " << identifier();
881#endif
882
883 auto buffer = read_entire();
884 ASSERT(buffer);
885 auto* entry = reinterpret_cast<ext2_dir_entry_2*>(buffer.data());
886
887 while (entry < buffer.end_pointer()) {
888 if (entry->inode != 0) {
889#ifdef EXT2_DEBUG
890 dbg() << "Ext2Inode::traverse_as_directory: " << entry->inode << ", name_len: " << entry->name_len << ", rec_len: " << entry->rec_len << ", file_type: " << entry->file_type << ", name: " << String(entry->name, entry->name_len);
891#endif
892 if (!callback({ entry->name, entry->name_len, { fsid(), entry->inode }, entry->file_type }))
893 break;
894 }
895 entry = (ext2_dir_entry_2*)((char*)entry + entry->rec_len);
896 }
897 return true;
898}
899
900bool Ext2FSInode::write_directory(const Vector<FS::DirectoryEntry>& entries)
901{
902 LOCKER(m_lock);
903
904 int directory_size = 0;
905 for (auto& entry : entries)
906 directory_size += EXT2_DIR_REC_LEN(entry.name_length);
907
908 auto block_size = fs().block_size();
909
910 int blocks_needed = ceil_div(directory_size, block_size);
911 int occupied_size = blocks_needed * block_size;
912
913#ifdef EXT2_DEBUG
914 dbg() << "Ext2FS: New directory inode " << identifier() << " contents to write (size " << directory_size << ", occupied " << occupied_size << "):";
915#endif
916
917 auto directory_data = ByteBuffer::create_uninitialized(occupied_size);
918
919 BufferStream stream(directory_data);
920 for (size_t i = 0; i < entries.size(); ++i) {
921 auto& entry = entries[i];
922
923 int record_length = EXT2_DIR_REC_LEN(entry.name_length);
924 if (i == entries.size() - 1)
925 record_length += occupied_size - directory_size;
926
927#ifdef EXT2_DEBUG
928 dbg() << "* Inode: " << entry.inode
929 << ", name_len: " << u16(entry.name_length)
930 << ", rec_len: " << u16(record_length)
931 << ", file_type: " << u8(entry.file_type)
932 << ", name: " << entry.name;
933#endif
934
935 stream << u32(entry.inode.index());
936 stream << u16(record_length);
937 stream << u8(entry.name_length);
938 stream << u8(entry.file_type);
939 stream << entry.name;
940
941 int padding = record_length - entry.name_length - 8;
942 for (int j = 0; j < padding; ++j)
943 stream << u8(0);
944 }
945
946 stream.fill_to_end(0);
947
948 ssize_t nwritten = write_bytes(0, directory_data.size(), directory_data.data(), nullptr);
949 if (nwritten < 0)
950 return false;
951 set_metadata_dirty(true);
952 return static_cast<size_t>(nwritten) == directory_data.size();
953}
954
955KResult Ext2FSInode::add_child(InodeIdentifier child_id, const StringView& name, mode_t mode)
956{
957 LOCKER(m_lock);
958 ASSERT(is_directory());
959
960 if (name.length() > EXT2_NAME_LEN)
961 return KResult(-ENAMETOOLONG);
962
963#ifdef EXT2_DEBUG
964 dbg() << "Ext2FSInode::add_child(): Adding inode " << child_id.index() << " with name '" << name << "' and mode " << mode << " to directory " << index();
965#endif
966
967 Vector<FS::DirectoryEntry> entries;
968 bool name_already_exists = false;
969 traverse_as_directory([&](auto& entry) {
970 if (name == entry.name) {
971 name_already_exists = true;
972 return false;
973 }
974 entries.append(entry);
975 return true;
976 });
977 if (name_already_exists) {
978 dbg() << "Ext2FSInode::add_child(): Name '" << name << "' already exists in inode " << index();
979 return KResult(-EEXIST);
980 }
981
982 auto child_inode = fs().get_inode(child_id);
983 if (child_inode) {
984 auto result = child_inode->increment_link_count();
985 if (result.is_error())
986 return result;
987 }
988
989 entries.empend(name.characters_without_null_termination(), name.length(), child_id, to_ext2_file_type(mode));
990 bool success = write_directory(entries);
991 if (success)
992 m_lookup_cache.set(name, child_id.index());
993 return KSuccess;
994}
995
996KResult Ext2FSInode::remove_child(const StringView& name)
997{
998 LOCKER(m_lock);
999#ifdef EXT2_DEBUG
1000 dbg() << "Ext2FSInode::remove_child(" << name << ") in inode " << index();
1001#endif
1002 ASSERT(is_directory());
1003
1004 auto it = m_lookup_cache.find(name);
1005 if (it == m_lookup_cache.end())
1006 return KResult(-ENOENT);
1007 auto child_inode_index = (*it).value;
1008
1009 InodeIdentifier child_id { fsid(), child_inode_index };
1010
1011#ifdef EXT2_DEBUG
1012 dbg() << "Ext2FSInode::remove_child(): Removing '" << name << "' in directory " << index();
1013#endif
1014
1015 Vector<FS::DirectoryEntry> entries;
1016 traverse_as_directory([&](auto& entry) {
1017 if (name != entry.name)
1018 entries.append(entry);
1019 return true;
1020 });
1021
1022 bool success = write_directory(entries);
1023 if (!success) {
1024 // FIXME: Plumb error from write_directory().
1025 return KResult(-EIO);
1026 }
1027
1028 m_lookup_cache.remove(name);
1029
1030 auto child_inode = fs().get_inode(child_id);
1031 child_inode->decrement_link_count();
1032 return KSuccess;
1033}
1034
1035unsigned Ext2FS::inodes_per_block() const
1036{
1037 return EXT2_INODES_PER_BLOCK(&super_block());
1038}
1039
1040unsigned Ext2FS::inodes_per_group() const
1041{
1042 return EXT2_INODES_PER_GROUP(&super_block());
1043}
1044
1045unsigned Ext2FS::inode_size() const
1046{
1047 return EXT2_INODE_SIZE(&super_block());
1048}
1049unsigned Ext2FS::blocks_per_group() const
1050{
1051 return EXT2_BLOCKS_PER_GROUP(&super_block());
1052}
1053
1054bool Ext2FS::write_ext2_inode(unsigned inode, const ext2_inode& e2inode)
1055{
1056 LOCKER(m_lock);
1057 unsigned block_index;
1058 unsigned offset;
1059 u8 block[max_block_size];
1060 if (!read_block_containing_inode(inode, block_index, offset, block))
1061 return false;
1062 memcpy(reinterpret_cast<ext2_inode*>(block + offset), &e2inode, inode_size());
1063 bool success = write_block(block_index, block);
1064 ASSERT(success);
1065 return success;
1066}
1067
1068Vector<Ext2FS::BlockIndex> Ext2FS::allocate_blocks(GroupIndex preferred_group_index, size_t count)
1069{
1070 LOCKER(m_lock);
1071#ifdef EXT2_DEBUG
1072 dbg() << "Ext2FS: allocate_blocks(preferred group: " << preferred_group_index << ", count: " << count << ")";
1073#endif
1074 if (count == 0)
1075 return {};
1076
1077 Vector<BlockIndex> blocks;
1078#ifdef EXT2_DEBUG
1079 dbg() << "Ext2FS: allocate_blocks:";
1080#endif
1081 blocks.ensure_capacity(count);
1082
1083 GroupIndex group_index = preferred_group_index;
1084
1085 if (!group_descriptor(preferred_group_index).bg_free_blocks_count) {
1086 group_index = 1;
1087 }
1088
1089 while (blocks.size() < count) {
1090
1091 bool found_a_group = false;
1092 if (group_descriptor(group_index).bg_free_blocks_count) {
1093 found_a_group = true;
1094 } else {
1095 if (group_index == preferred_group_index)
1096 group_index = 1;
1097 for (; group_index < m_block_group_count; ++group_index) {
1098 if (group_descriptor(group_index).bg_free_blocks_count) {
1099 found_a_group = true;
1100 break;
1101 }
1102 }
1103 }
1104
1105 ASSERT(found_a_group);
1106 auto& bgd = group_descriptor(group_index);
1107 auto& cached_bitmap = get_bitmap_block(bgd.bg_block_bitmap);
1108
1109 int blocks_in_group = min(blocks_per_group(), super_block().s_blocks_count);
1110 auto block_bitmap = Bitmap::wrap(cached_bitmap.buffer.data(), blocks_in_group);
1111
1112 BlockIndex first_block_in_group = (group_index - 1) * blocks_per_group() + first_block_index();
1113 size_t free_region_size = 0;
1114 auto first_unset_bit_index = block_bitmap.find_longest_range_of_unset_bits(count - blocks.size(), free_region_size);
1115 ASSERT(first_unset_bit_index.has_value());
1116#ifdef EXT2_DEBUG
1117 dbg() << "Ext2FS: allocating free region of size: " << free_region_size << "[" << group_index << "]";
1118#endif
1119 for (size_t i = 0; i < free_region_size; ++i) {
1120 BlockIndex block_index = (first_unset_bit_index.value() + i) + first_block_in_group;
1121 set_block_allocation_state(block_index, true);
1122 blocks.unchecked_append(block_index);
1123#ifdef EXT2_DEBUG
1124 dbg() << " allocated > " << block_index;
1125#endif
1126 }
1127 }
1128
1129 ASSERT(blocks.size() == count);
1130 return blocks;
1131}
1132
1133unsigned Ext2FS::find_a_free_inode(GroupIndex preferred_group, off_t expected_size)
1134{
1135 LOCKER(m_lock);
1136#ifdef EXT2_DEBUG
1137 dbg() << "Ext2FS: find_a_free_inode(preferred_group: " << preferred_group << ", expected_size: " << String::format("%ld", expected_size) << ")";
1138#endif
1139
1140 unsigned needed_blocks = ceil_div(expected_size, block_size());
1141
1142#ifdef EXT2_DEBUG
1143 dbg() << "Ext2FS: minimum needed blocks: " << needed_blocks;
1144#endif
1145
1146 unsigned group_index = 0;
1147
1148 // FIXME: We shouldn't refuse to allocate an inode if there is no group that can house the whole thing.
1149 // In those cases we should just spread it across multiple groups.
1150 auto is_suitable_group = [this, needed_blocks](GroupIndex group_index) {
1151 auto& bgd = group_descriptor(group_index);
1152 return bgd.bg_free_inodes_count && bgd.bg_free_blocks_count >= needed_blocks;
1153 };
1154
1155 if (preferred_group && is_suitable_group(preferred_group)) {
1156 group_index = preferred_group;
1157 } else {
1158 for (unsigned i = 1; i <= m_block_group_count; ++i) {
1159 if (is_suitable_group(i))
1160 group_index = i;
1161 }
1162 }
1163
1164 if (!group_index) {
1165 klog() << "Ext2FS: find_a_free_inode: no suitable group found for new inode with " << needed_blocks << " blocks needed :(";
1166 return 0;
1167 }
1168
1169#ifdef EXT2_DEBUG
1170 dbg() << "Ext2FS: find_a_free_inode: found suitable group [" << group_index << "] for new inode with " << needed_blocks << " blocks needed :^)";
1171#endif
1172
1173 auto& bgd = group_descriptor(group_index);
1174 unsigned inodes_in_group = min(inodes_per_group(), super_block().s_inodes_count);
1175 unsigned first_free_inode_in_group = 0;
1176
1177 unsigned first_inode_in_group = (group_index - 1) * inodes_per_group() + 1;
1178
1179 auto& cached_bitmap = get_bitmap_block(bgd.bg_inode_bitmap);
1180 auto inode_bitmap = Bitmap::wrap(cached_bitmap.buffer.data(), inodes_in_group);
1181 for (size_t i = 0; i < inode_bitmap.size(); ++i) {
1182 if (inode_bitmap.get(i))
1183 continue;
1184 first_free_inode_in_group = first_inode_in_group + i;
1185 break;
1186 }
1187
1188 if (!first_free_inode_in_group) {
1189 klog() << "Ext2FS: first_free_inode_in_group returned no inode, despite bgd claiming there are inodes :(";
1190 return 0;
1191 }
1192
1193 unsigned inode = first_free_inode_in_group;
1194#ifdef EXT2_DEBUG
1195 dbg() << "Ext2FS: found suitable inode " << inode;
1196#endif
1197
1198 ASSERT(get_inode_allocation_state(inode) == false);
1199 return inode;
1200}
1201
1202Ext2FS::GroupIndex Ext2FS::group_index_from_block_index(BlockIndex block_index) const
1203{
1204 if (!block_index)
1205 return 0;
1206 return (block_index - 1) / blocks_per_group() + 1;
1207}
1208
1209unsigned Ext2FS::group_index_from_inode(unsigned inode) const
1210{
1211 if (!inode)
1212 return 0;
1213 return (inode - 1) / inodes_per_group() + 1;
1214}
1215
1216bool Ext2FS::get_inode_allocation_state(InodeIndex index) const
1217{
1218 LOCKER(m_lock);
1219 if (index == 0)
1220 return true;
1221 unsigned group_index = group_index_from_inode(index);
1222 auto& bgd = group_descriptor(group_index);
1223 unsigned index_in_group = index - ((group_index - 1) * inodes_per_group());
1224 unsigned bit_index = (index_in_group - 1) % inodes_per_group();
1225
1226 auto& cached_bitmap = const_cast<Ext2FS&>(*this).get_bitmap_block(bgd.bg_inode_bitmap);
1227 return cached_bitmap.bitmap(inodes_per_group()).get(bit_index);
1228}
1229
1230bool Ext2FS::set_inode_allocation_state(InodeIndex inode_index, bool new_state)
1231{
1232 LOCKER(m_lock);
1233 unsigned group_index = group_index_from_inode(inode_index);
1234 auto& bgd = group_descriptor(group_index);
1235 unsigned index_in_group = inode_index - ((group_index - 1) * inodes_per_group());
1236 unsigned bit_index = (index_in_group - 1) % inodes_per_group();
1237
1238 auto& cached_bitmap = get_bitmap_block(bgd.bg_inode_bitmap);
1239
1240 bool current_state = cached_bitmap.bitmap(inodes_per_group()).get(bit_index);
1241#ifdef EXT2_DEBUG
1242 dbg() << "Ext2FS: set_inode_allocation_state(" << inode_index << ") " << String::format("%u", current_state) << " -> " << String::format("%u", new_state);
1243#endif
1244
1245 if (current_state == new_state) {
1246 ASSERT_NOT_REACHED();
1247 return true;
1248 }
1249
1250 cached_bitmap.bitmap(inodes_per_group()).set(bit_index, new_state);
1251 cached_bitmap.dirty = true;
1252
1253 // Update superblock
1254#ifdef EXT2_DEBUG
1255 dbg() << "Ext2FS: superblock free inode count " << m_super_block.s_free_inodes_count << " -> " << (m_super_block.s_free_inodes_count - 1);
1256#endif
1257 if (new_state)
1258 --m_super_block.s_free_inodes_count;
1259 else
1260 ++m_super_block.s_free_inodes_count;
1261 m_super_block_dirty = true;
1262
1263 // Update BGD
1264 auto& mutable_bgd = const_cast<ext2_group_desc&>(bgd);
1265 if (new_state)
1266 --mutable_bgd.bg_free_inodes_count;
1267 else
1268 ++mutable_bgd.bg_free_inodes_count;
1269#ifdef EXT2_DEBUG
1270 dbg() << "Ext2FS: group free inode count " << bgd.bg_free_inodes_count << " -> " << (bgd.bg_free_inodes_count - 1);
1271#endif
1272
1273 m_block_group_descriptors_dirty = true;
1274 return true;
1275}
1276
1277Ext2FS::BlockIndex Ext2FS::first_block_index() const
1278{
1279 return block_size() == 1024 ? 1 : 0;
1280}
1281
1282Ext2FS::CachedBitmap& Ext2FS::get_bitmap_block(BlockIndex bitmap_block_index)
1283{
1284 for (auto& cached_bitmap : m_cached_bitmaps) {
1285 if (cached_bitmap->bitmap_block_index == bitmap_block_index)
1286 return *cached_bitmap;
1287 }
1288
1289 auto block = KBuffer::create_with_size(block_size(), Region::Access::Read | Region::Access::Write, "Ext2FS: Cached bitmap block");
1290 bool success = read_block(bitmap_block_index, block.data());
1291 ASSERT(success);
1292 m_cached_bitmaps.append(make<CachedBitmap>(bitmap_block_index, move(block)));
1293 return *m_cached_bitmaps.last();
1294}
1295
1296bool Ext2FS::set_block_allocation_state(BlockIndex block_index, bool new_state)
1297{
1298 ASSERT(block_index != 0);
1299 LOCKER(m_lock);
1300#ifdef EXT2_DEBUG
1301 dbg() << "Ext2FS: set_block_allocation_state(block=" << block_index << ", state=" << String::format("%u", new_state) << ")";
1302#endif
1303
1304 GroupIndex group_index = group_index_from_block_index(block_index);
1305 auto& bgd = group_descriptor(group_index);
1306 BlockIndex index_in_group = (block_index - first_block_index()) - ((group_index - 1) * blocks_per_group());
1307 unsigned bit_index = index_in_group % blocks_per_group();
1308
1309 auto& cached_bitmap = get_bitmap_block(bgd.bg_block_bitmap);
1310
1311 bool current_state = cached_bitmap.bitmap(blocks_per_group()).get(bit_index);
1312#ifdef EXT2_DEBUG
1313 dbg() << "Ext2FS: block " << block_index << " state: " << String::format("%u", current_state) << " -> " << String::format("%u", new_state) << " (in bitmap block " << bgd.bg_block_bitmap << ")";
1314#endif
1315
1316 if (current_state == new_state) {
1317 ASSERT_NOT_REACHED();
1318 return true;
1319 }
1320
1321 cached_bitmap.bitmap(blocks_per_group()).set(bit_index, new_state);
1322 cached_bitmap.dirty = true;
1323
1324 // Update superblock
1325#ifdef EXT2_DEBUG
1326 dbg() << "Ext2FS: superblock free block count " << m_super_block.s_free_blocks_count << " -> " << (m_super_block.s_free_blocks_count - 1);
1327#endif
1328 if (new_state)
1329 --m_super_block.s_free_blocks_count;
1330 else
1331 ++m_super_block.s_free_blocks_count;
1332 m_super_block_dirty = true;
1333
1334 // Update BGD
1335 auto& mutable_bgd = const_cast<ext2_group_desc&>(bgd);
1336 if (new_state)
1337 --mutable_bgd.bg_free_blocks_count;
1338 else
1339 ++mutable_bgd.bg_free_blocks_count;
1340#ifdef EXT2_DEBUG
1341 dbg() << "Ext2FS: group " << group_index << " free block count " << bgd.bg_free_blocks_count << " -> " << (bgd.bg_free_blocks_count - 1);
1342#endif
1343
1344 m_block_group_descriptors_dirty = true;
1345 return true;
1346}
1347
1348KResult Ext2FS::create_directory(InodeIdentifier parent_id, const String& name, mode_t mode, uid_t uid, gid_t gid)
1349{
1350 LOCKER(m_lock);
1351 ASSERT(parent_id.fsid() == fsid());
1352
1353 // Fix up the mode to definitely be a directory.
1354 // FIXME: This is a bit on the hackish side.
1355 mode &= ~0170000;
1356 mode |= 0040000;
1357
1358 // NOTE: When creating a new directory, make the size 1 block.
1359 // There's probably a better strategy here, but this works for now.
1360 auto inode_or_error = create_inode(parent_id, name, mode, block_size(), 0, uid, gid);
1361 if (inode_or_error.is_error())
1362 return inode_or_error.error();
1363
1364 auto& inode = inode_or_error.value();
1365
1366#ifdef EXT2_DEBUG
1367 dbg() << "Ext2FS: create_directory: created new directory named '" << name << "' with inode " << inode->identifier();
1368#endif
1369
1370 Vector<DirectoryEntry> entries;
1371 entries.empend(".", inode->identifier(), EXT2_FT_DIR);
1372 entries.empend("..", parent_id, EXT2_FT_DIR);
1373
1374 bool success = static_cast<Ext2FSInode&>(*inode).write_directory(entries);
1375 ASSERT(success);
1376
1377 auto parent_inode = get_inode(parent_id);
1378 auto result = parent_inode->increment_link_count();
1379 if (result.is_error())
1380 return result;
1381
1382 auto& bgd = const_cast<ext2_group_desc&>(group_descriptor(group_index_from_inode(inode->identifier().index())));
1383 ++bgd.bg_used_dirs_count;
1384#ifdef EXT2_DEBUG
1385 dbg() << "Ext2FS: incremented bg_used_dirs_count " << bgd.bg_used_dirs_count - 1 << " -> " << bgd.bg_used_dirs_count;
1386#endif
1387
1388 m_block_group_descriptors_dirty = true;
1389
1390 return KSuccess;
1391}
1392
1393KResultOr<NonnullRefPtr<Inode>> Ext2FS::create_inode(InodeIdentifier parent_id, const String& name, mode_t mode, off_t size, dev_t dev, uid_t uid, gid_t gid)
1394{
1395 LOCKER(m_lock);
1396 ASSERT(parent_id.fsid() == fsid());
1397 auto parent_inode = get_inode(parent_id);
1398 ASSERT(parent_inode);
1399
1400 if (static_cast<const Ext2FSInode&>(*parent_inode).m_raw_inode.i_links_count == 0)
1401 return KResult(-ENOENT);
1402
1403#ifdef EXT2_DEBUG
1404 dbg() << "Ext2FS: Adding inode '" << name << "' (mode " << String::format("%o", mode) << ") to parent directory " << parent_inode->identifier();
1405#endif
1406
1407 size_t needed_blocks = ceil_div(size, block_size());
1408 if ((size_t)needed_blocks > super_block().s_free_blocks_count) {
1409 dbg() << "Ext2FS: create_inode: not enough free blocks";
1410 return KResult(-ENOSPC);
1411 }
1412
1413 // NOTE: This doesn't commit the inode allocation just yet!
1414 auto inode_id = find_a_free_inode(0, size);
1415 if (!inode_id) {
1416 klog() << "Ext2FS: create_inode: allocate_inode failed";
1417 return KResult(-ENOSPC);
1418 }
1419
1420 // Try adding it to the directory first, in case the name is already in use.
1421 auto result = parent_inode->add_child({ fsid(), inode_id }, name, mode);
1422 if (result.is_error())
1423 return result;
1424
1425 auto blocks = allocate_blocks(group_index_from_inode(inode_id), needed_blocks);
1426 ASSERT(blocks.size() == needed_blocks);
1427
1428 // Looks like we're good, time to update the inode bitmap and group+global inode counters.
1429 bool success = set_inode_allocation_state(inode_id, true);
1430 ASSERT(success);
1431
1432 unsigned initial_links_count;
1433 if (is_directory(mode))
1434 initial_links_count = 2; // (parent directory + "." entry in self)
1435 else
1436 initial_links_count = 1;
1437
1438 struct timeval now;
1439 kgettimeofday(now);
1440 ext2_inode e2inode;
1441 memset(&e2inode, 0, sizeof(ext2_inode));
1442 e2inode.i_mode = mode;
1443 e2inode.i_uid = uid;
1444 e2inode.i_gid = gid;
1445 e2inode.i_size = size;
1446 e2inode.i_atime = now.tv_sec;
1447 e2inode.i_ctime = now.tv_sec;
1448 e2inode.i_mtime = now.tv_sec;
1449 e2inode.i_dtime = 0;
1450 e2inode.i_links_count = initial_links_count;
1451
1452 if (is_character_device(mode))
1453 e2inode.i_block[0] = dev;
1454 else if (is_block_device(mode))
1455 e2inode.i_block[1] = dev;
1456
1457 success = write_block_list_for_inode(inode_id, e2inode, blocks);
1458 ASSERT(success);
1459
1460#ifdef EXT2_DEBUG
1461 dbg() << "Ext2FS: writing initial metadata for inode " << inode_id;
1462#endif
1463 e2inode.i_flags = 0;
1464 success = write_ext2_inode(inode_id, e2inode);
1465 ASSERT(success);
1466
1467 // We might have cached the fact that this inode didn't exist. Wipe the slate.
1468 m_inode_cache.remove(inode_id);
1469
1470 auto inode = get_inode({ fsid(), inode_id });
1471 // If we've already computed a block list, no sense in throwing it away.
1472 static_cast<Ext2FSInode&>(*inode).m_block_list = move(blocks);
1473 return inode.release_nonnull();
1474}
1475
1476void Ext2FSInode::populate_lookup_cache() const
1477{
1478 LOCKER(m_lock);
1479 if (!m_lookup_cache.is_empty())
1480 return;
1481 HashMap<String, unsigned> children;
1482
1483 traverse_as_directory([&children](auto& entry) {
1484 children.set(String(entry.name, entry.name_length), entry.inode.index());
1485 return true;
1486 });
1487
1488 if (!m_lookup_cache.is_empty())
1489 return;
1490 m_lookup_cache = move(children);
1491}
1492
1493RefPtr<Inode> Ext2FSInode::lookup(StringView name)
1494{
1495 ASSERT(is_directory());
1496 populate_lookup_cache();
1497 LOCKER(m_lock);
1498 auto it = m_lookup_cache.find(name.hash(), [&](auto& entry) { return entry.key == name; });
1499 if (it != m_lookup_cache.end())
1500 return fs().get_inode({ fsid(), (*it).value });
1501 return {};
1502}
1503
1504void Ext2FSInode::one_ref_left()
1505{
1506 // FIXME: I would like to not live forever, but uncached Ext2FS is fucking painful right now.
1507}
1508
1509int Ext2FSInode::set_atime(time_t t)
1510{
1511 LOCKER(m_lock);
1512 if (fs().is_readonly())
1513 return -EROFS;
1514 m_raw_inode.i_atime = t;
1515 set_metadata_dirty(true);
1516 return 0;
1517}
1518
1519int Ext2FSInode::set_ctime(time_t t)
1520{
1521 LOCKER(m_lock);
1522 if (fs().is_readonly())
1523 return -EROFS;
1524 m_raw_inode.i_ctime = t;
1525 set_metadata_dirty(true);
1526 return 0;
1527}
1528
1529int Ext2FSInode::set_mtime(time_t t)
1530{
1531 LOCKER(m_lock);
1532 if (fs().is_readonly())
1533 return -EROFS;
1534 m_raw_inode.i_mtime = t;
1535 set_metadata_dirty(true);
1536 return 0;
1537}
1538
1539KResult Ext2FSInode::increment_link_count()
1540{
1541 LOCKER(m_lock);
1542 if (fs().is_readonly())
1543 return KResult(-EROFS);
1544 if (m_raw_inode.i_links_count == max_link_count)
1545 return KResult(-EMLINK);
1546 ++m_raw_inode.i_links_count;
1547 set_metadata_dirty(true);
1548 return KSuccess;
1549}
1550
1551KResult Ext2FSInode::decrement_link_count()
1552{
1553 LOCKER(m_lock);
1554 if (fs().is_readonly())
1555 return KResult(-EROFS);
1556 ASSERT(m_raw_inode.i_links_count);
1557 --m_raw_inode.i_links_count;
1558 if (ref_count() == 1 && m_raw_inode.i_links_count == 0)
1559 fs().uncache_inode(index());
1560 set_metadata_dirty(true);
1561 return KSuccess;
1562}
1563
1564void Ext2FS::uncache_inode(InodeIndex index)
1565{
1566 LOCKER(m_lock);
1567 m_inode_cache.remove(index);
1568}
1569
1570size_t Ext2FSInode::directory_entry_count() const
1571{
1572 ASSERT(is_directory());
1573 LOCKER(m_lock);
1574 populate_lookup_cache();
1575 return m_lookup_cache.size();
1576}
1577
1578KResult Ext2FSInode::chmod(mode_t mode)
1579{
1580 LOCKER(m_lock);
1581 if (m_raw_inode.i_mode == mode)
1582 return KSuccess;
1583 m_raw_inode.i_mode = mode;
1584 set_metadata_dirty(true);
1585 return KSuccess;
1586}
1587
1588KResult Ext2FSInode::chown(uid_t uid, gid_t gid)
1589{
1590 LOCKER(m_lock);
1591 if (m_raw_inode.i_uid == uid && m_raw_inode.i_gid == gid)
1592 return KSuccess;
1593 m_raw_inode.i_uid = uid;
1594 m_raw_inode.i_gid = gid;
1595 set_metadata_dirty(true);
1596 return KSuccess;
1597}
1598
1599KResult Ext2FSInode::truncate(u64 size)
1600{
1601 LOCKER(m_lock);
1602 if (static_cast<u64>(m_raw_inode.i_size) == size)
1603 return KSuccess;
1604 auto result = resize(size);
1605 if (result.is_error())
1606 return result;
1607 set_metadata_dirty(true);
1608 return KSuccess;
1609}
1610
1611unsigned Ext2FS::total_block_count() const
1612{
1613 LOCKER(m_lock);
1614 return super_block().s_blocks_count;
1615}
1616
1617unsigned Ext2FS::free_block_count() const
1618{
1619 LOCKER(m_lock);
1620 return super_block().s_free_blocks_count;
1621}
1622
1623unsigned Ext2FS::total_inode_count() const
1624{
1625 LOCKER(m_lock);
1626 return super_block().s_inodes_count;
1627}
1628
1629unsigned Ext2FS::free_inode_count() const
1630{
1631 LOCKER(m_lock);
1632 return super_block().s_free_inodes_count;
1633}
1634
1635KResult Ext2FS::prepare_to_unmount() const
1636{
1637 LOCKER(m_lock);
1638
1639 for (auto& it : m_inode_cache) {
1640 if (it.value->ref_count() > 1)
1641 return KResult(-EBUSY);
1642 }
1643
1644 m_inode_cache.clear();
1645 return KSuccess;
1646}
1647
1648}