Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/Bitmap.h>
28#include <AK/BufferStream.h>
29#include <AK/HashMap.h>
30#include <AK/StdLibExtras.h>
31#include <Kernel/Devices/BlockDevice.h>
32#include <Kernel/FileSystem/Ext2FileSystem.h>
33#include <Kernel/FileSystem/FileDescription.h>
34#include <Kernel/FileSystem/ext2_fs.h>
35#include <Kernel/Process.h>
36#include <Kernel/UnixTypes.h>
37#include <LibC/errno_numbers.h>
38
39//#define EXT2_DEBUG
40
41namespace Kernel {
42
43static const size_t max_link_count = 65535;
44static const size_t max_block_size = 4096;
45static const ssize_t max_inline_symlink_length = 60;
46
47static u8 to_ext2_file_type(mode_t mode)
48{
49 if (is_regular_file(mode))
50 return EXT2_FT_REG_FILE;
51 if (is_directory(mode))
52 return EXT2_FT_DIR;
53 if (is_character_device(mode))
54 return EXT2_FT_CHRDEV;
55 if (is_block_device(mode))
56 return EXT2_FT_BLKDEV;
57 if (is_fifo(mode))
58 return EXT2_FT_FIFO;
59 if (is_socket(mode))
60 return EXT2_FT_SOCK;
61 if (is_symlink(mode))
62 return EXT2_FT_SYMLINK;
63 return EXT2_FT_UNKNOWN;
64}
65
66NonnullRefPtr<Ext2FS> Ext2FS::create(BlockDevice& device)
67{
68 return adopt(*new Ext2FS(device));
69}
70
71Ext2FS::Ext2FS(BlockDevice& device)
72 : DiskBackedFS(device)
73{
74}
75
76Ext2FS::~Ext2FS()
77{
78}
79
80bool Ext2FS::flush_super_block()
81{
82 LOCKER(m_lock);
83 bool success = device().write_blocks(2, 1, (const u8*)&m_super_block);
84 ASSERT(success);
85 return true;
86}
87
88const ext2_group_desc& Ext2FS::group_descriptor(GroupIndex group_index) const
89{
90 // FIXME: Should this fail gracefully somehow?
91 ASSERT(group_index <= m_block_group_count);
92 return block_group_descriptors()[group_index - 1];
93}
94
95bool Ext2FS::initialize()
96{
97 LOCKER(m_lock);
98 bool success = const_cast<BlockDevice&>(device()).read_blocks(2, 1, (u8*)&m_super_block);
99 ASSERT(success);
100
101 auto& super_block = this->super_block();
102#ifdef EXT2_DEBUG
103 kprintf("ext2fs: super block magic: %x (super block size: %u)\n", super_block.s_magic, sizeof(ext2_super_block));
104#endif
105 if (super_block.s_magic != EXT2_SUPER_MAGIC)
106 return false;
107
108#ifdef EXT2_DEBUG
109 kprintf("ext2fs: %u inodes, %u blocks\n", super_block.s_inodes_count, super_block.s_blocks_count);
110 kprintf("ext2fs: block size = %u\n", EXT2_BLOCK_SIZE(&super_block));
111 kprintf("ext2fs: first data block = %u\n", super_block.s_first_data_block);
112 kprintf("ext2fs: inodes per block = %u\n", inodes_per_block());
113 kprintf("ext2fs: inodes per group = %u\n", inodes_per_group());
114 kprintf("ext2fs: free inodes = %u\n", super_block.s_free_inodes_count);
115 kprintf("ext2fs: desc per block = %u\n", EXT2_DESC_PER_BLOCK(&super_block));
116 kprintf("ext2fs: desc size = %u\n", EXT2_DESC_SIZE(&super_block));
117#endif
118
119 set_block_size(EXT2_BLOCK_SIZE(&super_block));
120
121 ASSERT(block_size() <= (int)max_block_size);
122
123 m_block_group_count = ceil_div(super_block.s_blocks_count, super_block.s_blocks_per_group);
124
125 if (m_block_group_count == 0) {
126 kprintf("ext2fs: no block groups :(\n");
127 return false;
128 }
129
130 unsigned blocks_to_read = ceil_div(m_block_group_count * (unsigned)sizeof(ext2_group_desc), block_size());
131 BlockIndex first_block_of_bgdt = block_size() == 1024 ? 2 : 1;
132 m_cached_group_descriptor_table = KBuffer::create_with_size(block_size() * blocks_to_read, Region::Access::Read | Region::Access::Write, "Ext2FS: Block group descriptors");
133 read_blocks(first_block_of_bgdt, blocks_to_read, m_cached_group_descriptor_table.value().data());
134
135#ifdef EXT2_DEBUG
136 for (unsigned i = 1; i <= m_block_group_count; ++i) {
137 auto& group = group_descriptor(i);
138 kprintf("ext2fs: group[%u] { block_bitmap: %u, inode_bitmap: %u, inode_table: %u }\n",
139 i,
140 group.bg_block_bitmap,
141 group.bg_inode_bitmap,
142 group.bg_inode_table);
143 }
144#endif
145
146 return true;
147}
148
149const char* Ext2FS::class_name() const
150{
151 return "Ext2FS";
152}
153
154InodeIdentifier Ext2FS::root_inode() const
155{
156 return { fsid(), EXT2_ROOT_INO };
157}
158
159bool Ext2FS::read_block_containing_inode(unsigned inode, unsigned& block_index, unsigned& offset, u8* buffer) const
160{
161 LOCKER(m_lock);
162 auto& super_block = this->super_block();
163
164 if (inode != EXT2_ROOT_INO && inode < EXT2_FIRST_INO(&super_block))
165 return false;
166
167 if (inode > super_block.s_inodes_count)
168 return false;
169
170 auto& bgd = group_descriptor(group_index_from_inode(inode));
171
172 offset = ((inode - 1) % inodes_per_group()) * inode_size();
173 block_index = bgd.bg_inode_table + (offset >> EXT2_BLOCK_SIZE_BITS(&super_block));
174 offset &= block_size() - 1;
175
176 return read_block(block_index, buffer);
177}
178
179Ext2FS::BlockListShape Ext2FS::compute_block_list_shape(unsigned blocks)
180{
181 BlockListShape shape;
182 const unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
183 unsigned blocks_remaining = blocks;
184
185 shape.direct_blocks = min((unsigned)EXT2_NDIR_BLOCKS, blocks_remaining);
186 blocks_remaining -= shape.direct_blocks;
187 if (!blocks_remaining)
188 return shape;
189
190 shape.indirect_blocks = min(blocks_remaining, entries_per_block);
191 blocks_remaining -= shape.indirect_blocks;
192 shape.meta_blocks += 1;
193 if (!blocks_remaining)
194 return shape;
195
196 shape.doubly_indirect_blocks = min(blocks_remaining, entries_per_block * entries_per_block);
197 blocks_remaining -= shape.doubly_indirect_blocks;
198 shape.meta_blocks += 1;
199 shape.meta_blocks += shape.doubly_indirect_blocks / entries_per_block;
200 if ((shape.doubly_indirect_blocks % entries_per_block) != 0)
201 shape.meta_blocks += 1;
202 if (!blocks_remaining)
203 return shape;
204
205 dbg() << "we don't know how to compute tind ext2fs blocks yet!";
206 ASSERT_NOT_REACHED();
207
208 shape.triply_indirect_blocks = min(blocks_remaining, entries_per_block * entries_per_block * entries_per_block);
209 blocks_remaining -= shape.triply_indirect_blocks;
210 if (!blocks_remaining)
211 return shape;
212
213 ASSERT_NOT_REACHED();
214
215 return {};
216}
217
218bool Ext2FS::write_block_list_for_inode(InodeIndex inode_index, ext2_inode& e2inode, const Vector<BlockIndex>& blocks)
219{
220 LOCKER(m_lock);
221
222 // NOTE: There is a mismatch between i_blocks and blocks.size() since i_blocks includes meta blocks and blocks.size() does not.
223 auto old_block_count = ceil_div(e2inode.i_size, block_size());
224
225 auto old_shape = compute_block_list_shape(old_block_count);
226 auto new_shape = compute_block_list_shape(blocks.size());
227
228 Vector<BlockIndex> new_meta_blocks;
229 if (new_shape.meta_blocks > old_shape.meta_blocks) {
230 new_meta_blocks = allocate_blocks(group_index_from_inode(inode_index), new_shape.meta_blocks - old_shape.meta_blocks);
231 }
232
233 e2inode.i_blocks = (blocks.size() + new_shape.meta_blocks) * (block_size() / 512);
234
235 bool inode_dirty = false;
236
237 unsigned output_block_index = 0;
238 unsigned remaining_blocks = blocks.size();
239 for (unsigned i = 0; i < new_shape.direct_blocks; ++i) {
240 if (e2inode.i_block[i] != blocks[output_block_index])
241 inode_dirty = true;
242 e2inode.i_block[i] = blocks[output_block_index];
243 ++output_block_index;
244 --remaining_blocks;
245 }
246 if (inode_dirty) {
247#ifdef EXT2_DEBUG
248 dbgprintf("Ext2FS: Writing %u direct block(s) to i_block array of inode %u\n", min(EXT2_NDIR_BLOCKS, blocks.size()), inode_index);
249 for (int i = 0; i < min(EXT2_NDIR_BLOCKS, blocks.size()); ++i)
250 dbgprintf(" + %u\n", blocks[i]);
251#endif
252 write_ext2_inode(inode_index, e2inode);
253 inode_dirty = false;
254 }
255
256 if (!remaining_blocks)
257 return true;
258
259 const unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
260
261 bool ind_block_new = !e2inode.i_block[EXT2_IND_BLOCK];
262 if (ind_block_new) {
263 BlockIndex new_indirect_block = new_meta_blocks.take_last();
264 if (e2inode.i_block[EXT2_IND_BLOCK] != new_indirect_block)
265 inode_dirty = true;
266 e2inode.i_block[EXT2_IND_BLOCK] = new_indirect_block;
267 if (inode_dirty) {
268#ifdef EXT2_DEBUG
269 dbgprintf("Ext2FS: Adding the indirect block to i_block array of inode %u\n", inode_index);
270#endif
271 write_ext2_inode(inode_index, e2inode);
272 inode_dirty = false;
273 }
274 }
275
276 if (old_shape.indirect_blocks == new_shape.indirect_blocks) {
277 // No need to update the singly indirect block array.
278 remaining_blocks -= new_shape.indirect_blocks;
279 output_block_index += new_shape.indirect_blocks;
280 } else {
281 auto block_contents = ByteBuffer::create_uninitialized(block_size());
282 BufferStream stream(block_contents);
283 ASSERT(new_shape.indirect_blocks <= entries_per_block);
284 for (unsigned i = 0; i < new_shape.indirect_blocks; ++i) {
285 stream << blocks[output_block_index++];
286 --remaining_blocks;
287 }
288 stream.fill_to_end(0);
289 bool success = write_block(e2inode.i_block[EXT2_IND_BLOCK], block_contents.data());
290 ASSERT(success);
291 }
292
293 if (!remaining_blocks)
294 return true;
295
296 bool dind_block_dirty = false;
297
298 bool dind_block_new = !e2inode.i_block[EXT2_DIND_BLOCK];
299 if (dind_block_new) {
300 BlockIndex new_dindirect_block = new_meta_blocks.take_last();
301 if (e2inode.i_block[EXT2_DIND_BLOCK] != new_dindirect_block)
302 inode_dirty = true;
303 e2inode.i_block[EXT2_DIND_BLOCK] = new_dindirect_block;
304 if (inode_dirty) {
305#ifdef EXT2_DEBUG
306 dbgprintf("Ext2FS: Adding the doubly-indirect block to i_block array of inode %u\n", inode_index);
307#endif
308 write_ext2_inode(inode_index, e2inode);
309 inode_dirty = false;
310 }
311 }
312
313 if (old_shape.doubly_indirect_blocks == new_shape.doubly_indirect_blocks) {
314 // No need to update the doubly indirect block data.
315 remaining_blocks -= new_shape.doubly_indirect_blocks;
316 output_block_index += new_shape.doubly_indirect_blocks;
317 } else {
318 unsigned indirect_block_count = new_shape.doubly_indirect_blocks / entries_per_block;
319 if ((new_shape.doubly_indirect_blocks % entries_per_block) != 0)
320 indirect_block_count++;
321
322 auto dind_block_contents = ByteBuffer::create_uninitialized(block_size());
323 read_block(e2inode.i_block[EXT2_DIND_BLOCK], dind_block_contents.data());
324 if (dind_block_new) {
325 memset(dind_block_contents.data(), 0, dind_block_contents.size());
326 dind_block_dirty = true;
327 }
328 auto* dind_block_as_pointers = (unsigned*)dind_block_contents.data();
329
330 ASSERT(indirect_block_count <= entries_per_block);
331 for (unsigned i = 0; i < indirect_block_count; ++i) {
332 bool ind_block_dirty = false;
333
334 BlockIndex indirect_block_index = dind_block_as_pointers[i];
335
336 bool ind_block_new = !indirect_block_index;
337 if (ind_block_new) {
338 indirect_block_index = new_meta_blocks.take_last();
339 dind_block_as_pointers[i] = indirect_block_index;
340 dind_block_dirty = true;
341 }
342
343 auto ind_block_contents = ByteBuffer::create_uninitialized(block_size());
344 read_block(indirect_block_index, ind_block_contents.data());
345 if (ind_block_new) {
346 memset(ind_block_contents.data(), 0, dind_block_contents.size());
347 ind_block_dirty = true;
348 }
349 auto* ind_block_as_pointers = (unsigned*)ind_block_contents.data();
350
351 unsigned entries_to_write = new_shape.doubly_indirect_blocks - (i * entries_per_block);
352 if (entries_to_write > entries_per_block)
353 entries_to_write = entries_per_block;
354
355 ASSERT(entries_to_write <= entries_per_block);
356 for (unsigned j = 0; j < entries_to_write; ++j) {
357 BlockIndex output_block = blocks[output_block_index++];
358 if (ind_block_as_pointers[j] != output_block) {
359 ind_block_as_pointers[j] = output_block;
360 ind_block_dirty = true;
361 }
362 --remaining_blocks;
363 }
364 for (unsigned j = entries_to_write; j < entries_per_block; ++j) {
365 if (ind_block_as_pointers[j] != 0) {
366 ind_block_as_pointers[j] = 0;
367 ind_block_dirty = true;
368 }
369 }
370
371 if (ind_block_dirty) {
372 bool success = write_block(indirect_block_index, ind_block_contents.data());
373 ASSERT(success);
374 }
375 }
376 for (unsigned i = indirect_block_count; i < entries_per_block; ++i) {
377 if (dind_block_as_pointers[i] != 0) {
378 dind_block_as_pointers[i] = 0;
379 dind_block_dirty = true;
380 }
381 }
382
383 if (dind_block_dirty) {
384 bool success = write_block(e2inode.i_block[EXT2_DIND_BLOCK], dind_block_contents.data());
385 ASSERT(success);
386 }
387 }
388
389 if (!remaining_blocks)
390 return true;
391
392 // FIXME: Implement!
393 dbg() << "we don't know how to write tind ext2fs blocks yet!";
394 ASSERT_NOT_REACHED();
395}
396
397Vector<Ext2FS::BlockIndex> Ext2FS::block_list_for_inode(const ext2_inode& e2inode, bool include_block_list_blocks) const
398{
399 auto block_list = block_list_for_inode_impl(e2inode, include_block_list_blocks);
400 while (!block_list.is_empty() && block_list.last() == 0)
401 block_list.take_last();
402 return block_list;
403}
404
405Vector<Ext2FS::BlockIndex> Ext2FS::block_list_for_inode_impl(const ext2_inode& e2inode, bool include_block_list_blocks) const
406{
407 LOCKER(m_lock);
408 unsigned entries_per_block = EXT2_ADDR_PER_BLOCK(&super_block());
409
410 unsigned block_count = ceil_div(e2inode.i_size, block_size());
411
412#ifdef EXT2_DEBUG
413 dbgprintf("Ext2FS::block_list_for_inode(): i_size=%u, i_blocks=%u, block_count=%u\n", e2inode.i_size, e2inode.i_blocks, block_count);
414#endif
415
416 unsigned blocks_remaining = block_count;
417 Vector<BlockIndex> list;
418
419 auto add_block = [&](BlockIndex bi) {
420 if (blocks_remaining) {
421 list.append(bi);
422 --blocks_remaining;
423 }
424 };
425
426 if (include_block_list_blocks) {
427 // This seems like an excessive over-estimate but w/e.
428 list.ensure_capacity(blocks_remaining * 2);
429 } else {
430 list.ensure_capacity(blocks_remaining);
431 }
432
433 unsigned direct_count = min(block_count, (unsigned)EXT2_NDIR_BLOCKS);
434 for (unsigned i = 0; i < direct_count; ++i) {
435 auto block_index = e2inode.i_block[i];
436 add_block(block_index);
437 }
438
439 if (!blocks_remaining)
440 return list;
441
442 auto process_block_array = [&](unsigned array_block_index, auto&& callback) {
443 if (include_block_list_blocks)
444 callback(array_block_index);
445 auto array_block = ByteBuffer::create_uninitialized(block_size());
446 read_block(array_block_index, array_block.data());
447 ASSERT(array_block);
448 auto* array = reinterpret_cast<const __u32*>(array_block.data());
449 unsigned count = min(blocks_remaining, entries_per_block);
450 for (BlockIndex i = 0; i < count; ++i)
451 callback(array[i]);
452 };
453
454 process_block_array(e2inode.i_block[EXT2_IND_BLOCK], [&](unsigned block_index) {
455 add_block(block_index);
456 });
457
458 if (!blocks_remaining)
459 return list;
460
461 process_block_array(e2inode.i_block[EXT2_DIND_BLOCK], [&](unsigned block_index) {
462 process_block_array(block_index, [&](unsigned block_index2) {
463 add_block(block_index2);
464 });
465 });
466
467 if (!blocks_remaining)
468 return list;
469
470 process_block_array(e2inode.i_block[EXT2_TIND_BLOCK], [&](unsigned block_index) {
471 process_block_array(block_index, [&](unsigned block_index2) {
472 process_block_array(block_index2, [&](unsigned block_index3) {
473 add_block(block_index3);
474 });
475 });
476 });
477
478 return list;
479}
480
481void Ext2FS::free_inode(Ext2FSInode& inode)
482{
483 LOCKER(m_lock);
484 ASSERT(inode.m_raw_inode.i_links_count == 0);
485#ifdef EXT2_DEBUG
486 dbg() << "Ext2FS: Inode " << inode.identifier() << " has no more links, time to delete!";
487#endif
488
489 struct timeval now;
490 kgettimeofday(now);
491 inode.m_raw_inode.i_dtime = now.tv_sec;
492 write_ext2_inode(inode.index(), inode.m_raw_inode);
493
494 auto block_list = block_list_for_inode(inode.m_raw_inode, true);
495
496 for (auto block_index : block_list) {
497 if (block_index)
498 set_block_allocation_state(block_index, false);
499 }
500
501 set_inode_allocation_state(inode.index(), false);
502
503 if (inode.is_directory()) {
504 auto& bgd = const_cast<ext2_group_desc&>(group_descriptor(group_index_from_inode(inode.index())));
505 --bgd.bg_used_dirs_count;
506 dbg() << "Ext2FS: Decremented bg_used_dirs_count to " << bgd.bg_used_dirs_count;
507 m_block_group_descriptors_dirty = true;
508 }
509}
510
511void Ext2FS::flush_block_group_descriptor_table()
512{
513 LOCKER(m_lock);
514 unsigned blocks_to_write = ceil_div(m_block_group_count * (unsigned)sizeof(ext2_group_desc), block_size());
515 unsigned first_block_of_bgdt = block_size() == 1024 ? 2 : 1;
516 write_blocks(first_block_of_bgdt, blocks_to_write, (const u8*)block_group_descriptors());
517}
518
519void Ext2FS::flush_writes()
520{
521 LOCKER(m_lock);
522 if (m_super_block_dirty) {
523 flush_super_block();
524 m_super_block_dirty = false;
525 }
526 if (m_block_group_descriptors_dirty) {
527 flush_block_group_descriptor_table();
528 m_block_group_descriptors_dirty = false;
529 }
530 for (auto& cached_bitmap : m_cached_bitmaps) {
531 if (cached_bitmap->dirty) {
532 write_block(cached_bitmap->bitmap_block_index, cached_bitmap->buffer.data());
533 cached_bitmap->dirty = false;
534#ifdef EXT2_DEBUG
535 dbg() << "Flushed bitmap block " << cached_bitmap->bitmap_block_index;
536#endif
537 }
538 }
539
540 DiskBackedFS::flush_writes();
541
542 // Uncache Inodes that are only kept alive by the index-to-inode lookup cache.
543 // We don't uncache Inodes that are being watched by at least one InodeWatcher.
544
545 // FIXME: It would be better to keep a capped number of Inodes around.
546 // The problem is that they are quite heavy objects, and use a lot of heap memory
547 // for their (child name lookup) and (block list) caches.
548 Vector<InodeIndex> unused_inodes;
549 for (auto& it : m_inode_cache) {
550 if (it.value->ref_count() != 1)
551 continue;
552 if (it.value->has_watchers())
553 continue;
554 unused_inodes.append(it.key);
555 }
556 for (auto index : unused_inodes)
557 uncache_inode(index);
558}
559
560Ext2FSInode::Ext2FSInode(Ext2FS& fs, unsigned index)
561 : Inode(fs, index)
562{
563}
564
565Ext2FSInode::~Ext2FSInode()
566{
567 if (m_raw_inode.i_links_count == 0)
568 fs().free_inode(*this);
569}
570
571InodeMetadata Ext2FSInode::metadata() const
572{
573 LOCKER(m_lock);
574 InodeMetadata metadata;
575 metadata.inode = identifier();
576 metadata.size = m_raw_inode.i_size;
577 metadata.mode = m_raw_inode.i_mode;
578 metadata.uid = m_raw_inode.i_uid;
579 metadata.gid = m_raw_inode.i_gid;
580 metadata.link_count = m_raw_inode.i_links_count;
581 metadata.atime = m_raw_inode.i_atime;
582 metadata.ctime = m_raw_inode.i_ctime;
583 metadata.mtime = m_raw_inode.i_mtime;
584 metadata.dtime = m_raw_inode.i_dtime;
585 metadata.block_size = fs().block_size();
586 metadata.block_count = m_raw_inode.i_blocks;
587
588 if (Kernel::is_character_device(m_raw_inode.i_mode) || Kernel::is_block_device(m_raw_inode.i_mode)) {
589 unsigned dev = m_raw_inode.i_block[0];
590 if (!dev)
591 dev = m_raw_inode.i_block[1];
592 metadata.major_device = (dev & 0xfff00) >> 8;
593 metadata.minor_device = (dev & 0xff) | ((dev >> 12) & 0xfff00);
594 }
595 return metadata;
596}
597
598void Ext2FSInode::flush_metadata()
599{
600 LOCKER(m_lock);
601#ifdef EXT2_DEBUG
602 dbg() << "Ext2FS: flush_metadata for inode " << identifier();
603#endif
604 fs().write_ext2_inode(index(), m_raw_inode);
605 if (is_directory()) {
606 // Unless we're about to go away permanently, invalidate the lookup cache.
607 if (m_raw_inode.i_links_count != 0) {
608 // FIXME: This invalidation is way too hardcore. It's sad to throw away the whole cache.
609 m_lookup_cache.clear();
610 }
611 }
612 set_metadata_dirty(false);
613}
614
615RefPtr<Inode> Ext2FS::get_inode(InodeIdentifier inode) const
616{
617 LOCKER(m_lock);
618 ASSERT(inode.fsid() == fsid());
619
620 {
621 auto it = m_inode_cache.find(inode.index());
622 if (it != m_inode_cache.end())
623 return (*it).value;
624 }
625
626 if (!get_inode_allocation_state(inode.index())) {
627 m_inode_cache.set(inode.index(), nullptr);
628 return nullptr;
629 }
630
631 unsigned block_index;
632 unsigned offset;
633 u8 block[max_block_size];
634 if (!read_block_containing_inode(inode.index(), block_index, offset, block))
635 return {};
636
637 auto new_inode = adopt(*new Ext2FSInode(const_cast<Ext2FS&>(*this), inode.index()));
638 memcpy(&new_inode->m_raw_inode, reinterpret_cast<ext2_inode*>(block + offset), sizeof(ext2_inode));
639 m_inode_cache.set(inode.index(), new_inode);
640 return new_inode;
641}
642
643ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDescription* description) const
644{
645 Locker inode_locker(m_lock);
646 ASSERT(offset >= 0);
647 if (m_raw_inode.i_size == 0)
648 return 0;
649
650 // Symbolic links shorter than 60 characters are store inline inside the i_block array.
651 // This avoids wasting an entire block on short links. (Most links are short.)
652 if (is_symlink() && size() < max_inline_symlink_length) {
653 ASSERT(offset == 0);
654 ssize_t nread = min((off_t)size() - offset, static_cast<off_t>(count));
655 memcpy(buffer, ((const u8*)m_raw_inode.i_block) + offset, (size_t)nread);
656 return nread;
657 }
658
659 Locker fs_locker(fs().m_lock);
660
661 if (m_block_list.is_empty())
662 m_block_list = fs().block_list_for_inode(m_raw_inode);
663
664 if (m_block_list.is_empty()) {
665 kprintf("ext2fs: read_bytes: empty block list for inode %u\n", index());
666 return -EIO;
667 }
668
669 const int block_size = fs().block_size();
670
671 size_t first_block_logical_index = offset / block_size;
672 size_t last_block_logical_index = (offset + count) / block_size;
673 if (last_block_logical_index >= m_block_list.size())
674 last_block_logical_index = m_block_list.size() - 1;
675
676 int offset_into_first_block = offset % block_size;
677
678 ssize_t nread = 0;
679 size_t remaining_count = min((off_t)count, (off_t)size() - offset);
680 u8* out = buffer;
681
682#ifdef EXT2_DEBUG
683 dbg() << "Ext2FS: Reading up to " << count << " bytes " << offset << " bytes into inode " << identifier() << " to " << (const void*)buffer;
684#endif
685
686 u8 block[max_block_size];
687
688 for (size_t bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) {
689 auto block_index = m_block_list[bi];
690 ASSERT(block_index);
691 bool success = fs().read_block(block_index, block, description);
692 if (!success) {
693 kprintf("ext2fs: read_bytes: read_block(%u) failed (lbi: %u)\n", block_index, bi);
694 return -EIO;
695 }
696
697 size_t offset_into_block = (bi == first_block_logical_index) ? offset_into_first_block : 0;
698 size_t num_bytes_to_copy = min(block_size - offset_into_block, remaining_count);
699 memcpy(out, block + offset_into_block, num_bytes_to_copy);
700 remaining_count -= num_bytes_to_copy;
701 nread += num_bytes_to_copy;
702 out += num_bytes_to_copy;
703 }
704
705 return nread;
706}
707
708KResult Ext2FSInode::resize(u64 new_size)
709{
710 u64 old_size = size();
711 if (old_size == new_size)
712 return KSuccess;
713
714 u64 block_size = fs().block_size();
715 size_t blocks_needed_before = ceil_div(old_size, block_size);
716 size_t blocks_needed_after = ceil_div(new_size, block_size);
717
718#ifdef EXT2_DEBUG
719 dbgprintf("Ext2FSInode::resize(): blocks needed before (size was %Q): %d\n", old_size, blocks_needed_before);
720 dbgprintf("Ext2FSInode::resize(): blocks needed after (size is %Q): %d\n", new_size, blocks_needed_after);
721#endif
722
723 if (blocks_needed_after > blocks_needed_before) {
724 u32 additional_blocks_needed = blocks_needed_after - blocks_needed_before;
725 if (additional_blocks_needed > fs().super_block().s_free_blocks_count)
726 return KResult(-ENOSPC);
727 }
728
729 auto block_list = fs().block_list_for_inode(m_raw_inode);
730 if (blocks_needed_after > blocks_needed_before) {
731 auto new_blocks = fs().allocate_blocks(fs().group_index_from_inode(index()), blocks_needed_after - blocks_needed_before);
732 block_list.append(move(new_blocks));
733 } else if (blocks_needed_after < blocks_needed_before) {
734#ifdef EXT2_DEBUG
735 dbg() << "Ext2FS: Shrinking inode " << identifier() << ". Old block list is " << block_list.size() << " entries:";
736 for (auto block_index : block_list) {
737 dbg() << " # " << block_index;
738 }
739#endif
740 while (block_list.size() != blocks_needed_after) {
741 auto block_index = block_list.take_last();
742 if (block_index)
743 fs().set_block_allocation_state(block_index, false);
744 }
745 }
746
747 bool success = fs().write_block_list_for_inode(index(), m_raw_inode, block_list);
748 if (!success)
749 return KResult(-EIO);
750
751 m_raw_inode.i_size = new_size;
752 set_metadata_dirty(true);
753
754 m_block_list = move(block_list);
755 return KSuccess;
756}
757
758ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, FileDescription* description)
759{
760 ASSERT(offset >= 0);
761 ASSERT(count >= 0);
762
763 Locker inode_locker(m_lock);
764 Locker fs_locker(fs().m_lock);
765
766 if (is_symlink()) {
767 ASSERT(offset == 0);
768 if (max((size_t)(offset + count), (size_t)m_raw_inode.i_size) < max_inline_symlink_length) {
769#ifdef EXT2_DEBUG
770 dbg() << "Ext2FS: write_bytes poking into i_block array for inline symlink '" << StringView(data, count) << " ' (" << count << " bytes)";
771#endif
772 memcpy(((u8*)m_raw_inode.i_block) + offset, data, (size_t)count);
773 if ((size_t)(offset + count) > (size_t)m_raw_inode.i_size)
774 m_raw_inode.i_size = offset + count;
775 set_metadata_dirty(true);
776 return count;
777 }
778 }
779
780 const size_t block_size = fs().block_size();
781 u64 old_size = size();
782 u64 new_size = max(static_cast<u64>(offset) + count, (u64)size());
783
784 auto resize_result = resize(new_size);
785 if (resize_result.is_error())
786 return resize_result;
787
788 if (m_block_list.is_empty())
789 m_block_list = fs().block_list_for_inode(m_raw_inode);
790
791 if (m_block_list.is_empty()) {
792 dbg() << "Ext2FSInode::write_bytes(): empty block list for inode " << index();
793 return -EIO;
794 }
795
796 size_t first_block_logical_index = offset / block_size;
797 size_t last_block_logical_index = (offset + count) / block_size;
798 if (last_block_logical_index >= m_block_list.size())
799 last_block_logical_index = m_block_list.size() - 1;
800
801 size_t offset_into_first_block = offset % block_size;
802
803 size_t last_logical_block_index_in_file = new_size / block_size;
804
805 ssize_t nwritten = 0;
806 size_t remaining_count = min((off_t)count, (off_t)new_size - offset);
807 const u8* in = data;
808
809#ifdef EXT2_DEBUG
810 dbg() << "Ext2FS: Writing " << count << " bytes " << offset << " bytes into inode " << identifier() << " from " << (const void*)data;
811#endif
812
813 auto buffer_block = ByteBuffer::create_uninitialized(block_size);
814 for (size_t bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) {
815 size_t offset_into_block = (bi == first_block_logical_index) ? offset_into_first_block : 0;
816 size_t num_bytes_to_copy = min(block_size - offset_into_block, remaining_count);
817
818 ByteBuffer block;
819 if (offset_into_block != 0 || num_bytes_to_copy != block_size) {
820 block = ByteBuffer::create_uninitialized(block_size);
821 bool success = fs().read_block(m_block_list[bi], block.data(), description);
822 if (!success) {
823 dbg() << "Ext2FS: In write_bytes, read_block(" << m_block_list[bi] << ") failed (bi: " << bi << ")";
824 return -EIO;
825 }
826 } else
827 block = buffer_block;
828
829 memcpy(block.data() + offset_into_block, in, num_bytes_to_copy);
830 if (bi == last_logical_block_index_in_file && num_bytes_to_copy < block_size) {
831 size_t padding_start = new_size % block_size;
832 size_t padding_bytes = block_size - padding_start;
833#ifdef EXT2_DEBUG
834 dbg() << "Ext2FS: Padding last block of file with zero x " << padding_bytes << " (new_size=" << new_size << ", offset_into_block=" << offset_into_block << ", num_bytes_to_copy=" << num_bytes_to_copy << ")";
835#endif
836 memset(block.data() + padding_start, 0, padding_bytes);
837 }
838#ifdef EXT2_DEBUG
839 dbg() << "Ext2FS: Writing block " << m_block_list[bi] << " (offset_into_block: " << offset_into_block << ")";
840#endif
841 bool success = fs().write_block(m_block_list[bi], block.data(), description);
842 if (!success) {
843 dbg() << "Ext2FS: write_block(" << m_block_list[bi] << ") failed (bi: " << bi << ")";
844 ASSERT_NOT_REACHED();
845 return -EIO;
846 }
847 remaining_count -= num_bytes_to_copy;
848 nwritten += num_bytes_to_copy;
849 in += num_bytes_to_copy;
850 }
851
852#ifdef EXT2_DEBUG
853 dbg() << "Ext2FS: After write, i_size=" << m_raw_inode.i_size << ", i_blocks=" << m_raw_inode.i_blocks << " (" << m_block_list.size() << " blocks in list)";
854#endif
855
856 if (old_size != new_size)
857 inode_size_changed(old_size, new_size);
858 inode_contents_changed(offset, count, data);
859 return nwritten;
860}
861
862bool Ext2FSInode::traverse_as_directory(Function<bool(const FS::DirectoryEntry&)> callback) const
863{
864 LOCKER(m_lock);
865 ASSERT(is_directory());
866
867#ifdef EXT2_DEBUG
868 dbg() << "Ext2FS: Traversing as directory: " << identifier();
869#endif
870
871 auto buffer = read_entire();
872 ASSERT(buffer);
873 auto* entry = reinterpret_cast<ext2_dir_entry_2*>(buffer.data());
874
875 while (entry < buffer.end_pointer()) {
876 if (entry->inode != 0) {
877#ifdef EXT2_DEBUG
878 dbgprintf("Ext2Inode::traverse_as_directory: %u, name_len: %u, rec_len: %u, file_type: %u, name: %s\n", entry->inode, entry->name_len, entry->rec_len, entry->file_type, String(entry->name, entry->name_len).characters());
879#endif
880 if (!callback({ entry->name, entry->name_len, { fsid(), entry->inode }, entry->file_type }))
881 break;
882 }
883 entry = (ext2_dir_entry_2*)((char*)entry + entry->rec_len);
884 }
885 return true;
886}
887
888bool Ext2FSInode::write_directory(const Vector<FS::DirectoryEntry>& entries)
889{
890 LOCKER(m_lock);
891
892 int directory_size = 0;
893 for (auto& entry : entries)
894 directory_size += EXT2_DIR_REC_LEN(entry.name_length);
895
896 auto block_size = fs().block_size();
897
898 int blocks_needed = ceil_div(directory_size, block_size);
899 int occupied_size = blocks_needed * block_size;
900
901#ifdef EXT2_DEBUG
902 dbg() << "Ext2FS: New directory inode " << identifier() << " contents to write (size " << directory_size << ", occupied " << occupied_size << "):";
903#endif
904
905 auto directory_data = ByteBuffer::create_uninitialized(occupied_size);
906
907 BufferStream stream(directory_data);
908 for (size_t i = 0; i < entries.size(); ++i) {
909 auto& entry = entries[i];
910
911 int record_length = EXT2_DIR_REC_LEN(entry.name_length);
912 if (i == entries.size() - 1)
913 record_length += occupied_size - directory_size;
914
915#ifdef EXT2_DEBUG
916 dbg() << "* Inode: " << entry.inode
917 << ", name_len: " << u16(entry.name_length)
918 << ", rec_len: " << u16(record_length)
919 << ", file_type: " << u8(entry.file_type)
920 << ", name: " << entry.name;
921#endif
922
923 stream << u32(entry.inode.index());
924 stream << u16(record_length);
925 stream << u8(entry.name_length);
926 stream << u8(entry.file_type);
927 stream << entry.name;
928
929 int padding = record_length - entry.name_length - 8;
930 for (int j = 0; j < padding; ++j)
931 stream << u8(0);
932 }
933
934 stream.fill_to_end(0);
935
936 ssize_t nwritten = write_bytes(0, directory_data.size(), directory_data.data(), nullptr);
937 if (nwritten < 0)
938 return false;
939 set_metadata_dirty(true);
940 return static_cast<size_t>(nwritten) == directory_data.size();
941}
942
943KResult Ext2FSInode::add_child(InodeIdentifier child_id, const StringView& name, mode_t mode)
944{
945 LOCKER(m_lock);
946 ASSERT(is_directory());
947
948 if (name.length() > EXT2_NAME_LEN)
949 return KResult(-ENAMETOOLONG);
950
951#ifdef EXT2_DEBUG
952 dbg() << "Ext2FSInode::add_child(): Adding inode " << child_id.index() << " with name '" << name << " and mode " << mode << " to directory " << index();
953#endif
954
955 Vector<FS::DirectoryEntry> entries;
956 bool name_already_exists = false;
957 traverse_as_directory([&](auto& entry) {
958 if (name == entry.name) {
959 name_already_exists = true;
960 return false;
961 }
962 entries.append(entry);
963 return true;
964 });
965 if (name_already_exists) {
966 dbg() << "Ext2FSInode::add_child(): Name '" << name << "' already exists in inode " << index();
967 return KResult(-EEXIST);
968 }
969
970 auto child_inode = fs().get_inode(child_id);
971 if (child_inode) {
972 auto result = child_inode->increment_link_count();
973 if (result.is_error())
974 return result;
975 }
976
977 entries.empend(name.characters_without_null_termination(), name.length(), child_id, to_ext2_file_type(mode));
978 bool success = write_directory(entries);
979 if (success)
980 m_lookup_cache.set(name, child_id.index());
981 return KSuccess;
982}
983
984KResult Ext2FSInode::remove_child(const StringView& name)
985{
986 LOCKER(m_lock);
987#ifdef EXT2_DEBUG
988 dbg() << "Ext2FSInode::remove_child(" << name << ") in inode " << index();
989#endif
990 ASSERT(is_directory());
991
992 auto it = m_lookup_cache.find(name);
993 if (it == m_lookup_cache.end())
994 return KResult(-ENOENT);
995 auto child_inode_index = (*it).value;
996
997 InodeIdentifier child_id { fsid(), child_inode_index };
998
999#ifdef EXT2_DEBUG
1000 dbg() << "Ext2FSInode::remove_child(): Removing '" << name << "' in directory " << index();
1001#endif
1002
1003 Vector<FS::DirectoryEntry> entries;
1004 traverse_as_directory([&](auto& entry) {
1005 if (name != entry.name)
1006 entries.append(entry);
1007 return true;
1008 });
1009
1010 bool success = write_directory(entries);
1011 if (!success) {
1012 // FIXME: Plumb error from write_directory().
1013 return KResult(-EIO);
1014 }
1015
1016 m_lookup_cache.remove(name);
1017
1018 auto child_inode = fs().get_inode(child_id);
1019 child_inode->decrement_link_count();
1020 return KSuccess;
1021}
1022
1023unsigned Ext2FS::inodes_per_block() const
1024{
1025 return EXT2_INODES_PER_BLOCK(&super_block());
1026}
1027
1028unsigned Ext2FS::inodes_per_group() const
1029{
1030 return EXT2_INODES_PER_GROUP(&super_block());
1031}
1032
1033unsigned Ext2FS::inode_size() const
1034{
1035 return EXT2_INODE_SIZE(&super_block());
1036}
1037unsigned Ext2FS::blocks_per_group() const
1038{
1039 return EXT2_BLOCKS_PER_GROUP(&super_block());
1040}
1041
1042bool Ext2FS::write_ext2_inode(unsigned inode, const ext2_inode& e2inode)
1043{
1044 LOCKER(m_lock);
1045 unsigned block_index;
1046 unsigned offset;
1047 u8 block[max_block_size];
1048 if (!read_block_containing_inode(inode, block_index, offset, block))
1049 return false;
1050 memcpy(reinterpret_cast<ext2_inode*>(block + offset), &e2inode, inode_size());
1051 bool success = write_block(block_index, block);
1052 ASSERT(success);
1053 return success;
1054}
1055
1056Ext2FS::BlockIndex Ext2FS::allocate_block(GroupIndex preferred_group_index)
1057{
1058 LOCKER(m_lock);
1059#ifdef EXT2_DEBUG
1060 dbg() << "Ext2FS: allocate_block() preferred_group_index: " << preferred_group_index;
1061#endif
1062 bool found_a_group = false;
1063 GroupIndex group_index = preferred_group_index;
1064
1065 if (group_descriptor(preferred_group_index).bg_free_blocks_count) {
1066 found_a_group = true;
1067 } else {
1068 for (group_index = 1; group_index < m_block_group_count; ++group_index) {
1069 if (group_descriptor(group_index).bg_free_blocks_count) {
1070 found_a_group = true;
1071 break;
1072 }
1073 }
1074 }
1075 ASSERT(found_a_group);
1076 auto& bgd = group_descriptor(group_index);
1077 auto& cached_bitmap = get_bitmap_block(bgd.bg_block_bitmap);
1078
1079 int blocks_in_group = min(blocks_per_group(), super_block().s_blocks_count);
1080 auto block_bitmap = Bitmap::wrap(cached_bitmap.buffer.data(), blocks_in_group);
1081
1082 BlockIndex first_block_in_group = (group_index - 1) * blocks_per_group() + first_block_index();
1083 int first_unset_bit_index = block_bitmap.find_first_unset();
1084 ASSERT(first_unset_bit_index != -1);
1085 BlockIndex block_index = (unsigned)first_unset_bit_index + first_block_in_group;
1086 set_block_allocation_state(block_index, true);
1087 return block_index;
1088}
1089
1090Vector<Ext2FS::BlockIndex> Ext2FS::allocate_blocks(GroupIndex preferred_group_index, size_t count)
1091{
1092 LOCKER(m_lock);
1093#ifdef EXT2_DEBUG
1094 dbgprintf("Ext2FS: allocate_blocks(preferred group: %u, count: %u)\n", preferred_group_index, count);
1095#endif
1096 if (count == 0)
1097 return {};
1098
1099 Vector<BlockIndex> blocks;
1100#ifdef EXT2_DEBUG
1101 dbg() << "Ext2FS: allocate_blocks:";
1102#endif
1103 blocks.ensure_capacity(count);
1104
1105 bool found_a_group = false;
1106 GroupIndex group_index = preferred_group_index;
1107
1108 if (!group_descriptor(preferred_group_index).bg_free_blocks_count) {
1109 group_index = 1;
1110 }
1111
1112 while (blocks.size() < count) {
1113 if (group_descriptor(group_index).bg_free_blocks_count) {
1114 found_a_group = true;
1115 } else {
1116 if (group_index == preferred_group_index)
1117 group_index = 1;
1118 for (; group_index < m_block_group_count; ++group_index) {
1119 if (group_descriptor(group_index).bg_free_blocks_count) {
1120 found_a_group = true;
1121 break;
1122 }
1123 }
1124 }
1125 ASSERT(found_a_group);
1126 auto& bgd = group_descriptor(group_index);
1127 auto& cached_bitmap = get_bitmap_block(bgd.bg_block_bitmap);
1128
1129 int blocks_in_group = min(blocks_per_group(), super_block().s_blocks_count);
1130 auto block_bitmap = Bitmap::wrap(cached_bitmap.buffer.data(), blocks_in_group);
1131
1132 BlockIndex first_block_in_group = (group_index - 1) * blocks_per_group() + first_block_index();
1133 size_t free_region_size = 0;
1134 auto first_unset_bit_index = block_bitmap.find_longest_range_of_unset_bits(count - blocks.size(), free_region_size);
1135 ASSERT(first_unset_bit_index.has_value());
1136#ifdef EXT2_DEBUG
1137 dbg() << "Ext2FS: allocating free region of size: " << free_region_size << "[" << group_index << "]";
1138#endif
1139 for (size_t i = 0; i < free_region_size; ++i) {
1140 BlockIndex block_index = (first_unset_bit_index.value() + i) + first_block_in_group;
1141 set_block_allocation_state(block_index, true);
1142 blocks.unchecked_append(block_index);
1143#ifdef EXT2_DEBUG
1144 dbg() << " allocated > " << block_index;
1145#endif
1146 }
1147 }
1148
1149 ASSERT(blocks.size() == count);
1150 return blocks;
1151}
1152
1153unsigned Ext2FS::find_a_free_inode(GroupIndex preferred_group, off_t expected_size)
1154{
1155 LOCKER(m_lock);
1156#ifdef EXT2_DEBUG
1157 dbgprintf("Ext2FS: find_a_free_inode(preferred_group: %u, expected_size: %ld)\n", preferred_group, expected_size);
1158#endif
1159
1160 unsigned needed_blocks = ceil_div(expected_size, block_size());
1161
1162#ifdef EXT2_DEBUG
1163 dbgprintf("Ext2FS: minimum needed blocks: %u\n", needed_blocks);
1164#endif
1165
1166 unsigned group_index = 0;
1167
1168 // FIXME: We shouldn't refuse to allocate an inode if there is no group that can house the whole thing.
1169 // In those cases we should just spread it across multiple groups.
1170 auto is_suitable_group = [this, needed_blocks](GroupIndex group_index) {
1171 auto& bgd = group_descriptor(group_index);
1172 return bgd.bg_free_inodes_count && bgd.bg_free_blocks_count >= needed_blocks;
1173 };
1174
1175 if (preferred_group && is_suitable_group(preferred_group)) {
1176 group_index = preferred_group;
1177 } else {
1178 for (unsigned i = 1; i <= m_block_group_count; ++i) {
1179 if (is_suitable_group(i))
1180 group_index = i;
1181 }
1182 }
1183
1184 if (!group_index) {
1185 kprintf("Ext2FS: find_a_free_inode: no suitable group found for new inode with %u blocks needed :(\n", needed_blocks);
1186 return 0;
1187 }
1188
1189#ifdef EXT2_DEBUG
1190 dbgprintf("Ext2FS: find_a_free_inode: found suitable group [%u] for new inode with %u blocks needed :^)\n", group_index, needed_blocks);
1191#endif
1192
1193 auto& bgd = group_descriptor(group_index);
1194 unsigned inodes_in_group = min(inodes_per_group(), super_block().s_inodes_count);
1195 unsigned first_free_inode_in_group = 0;
1196
1197 unsigned first_inode_in_group = (group_index - 1) * inodes_per_group() + 1;
1198
1199 auto& cached_bitmap = get_bitmap_block(bgd.bg_inode_bitmap);
1200 auto inode_bitmap = Bitmap::wrap(cached_bitmap.buffer.data(), inodes_in_group);
1201 for (size_t i = 0; i < inode_bitmap.size(); ++i) {
1202 if (inode_bitmap.get(i))
1203 continue;
1204 first_free_inode_in_group = first_inode_in_group + i;
1205 break;
1206 }
1207
1208 if (!first_free_inode_in_group) {
1209 kprintf("Ext2FS: first_free_inode_in_group returned no inode, despite bgd claiming there are inodes :(\n");
1210 return 0;
1211 }
1212
1213 unsigned inode = first_free_inode_in_group;
1214#ifdef EXT2_DEBUG
1215 dbgprintf("Ext2FS: found suitable inode %u\n", inode);
1216#endif
1217
1218 ASSERT(get_inode_allocation_state(inode) == false);
1219 return inode;
1220}
1221
1222Ext2FS::GroupIndex Ext2FS::group_index_from_block_index(BlockIndex block_index) const
1223{
1224 if (!block_index)
1225 return 0;
1226 return (block_index - 1) / blocks_per_group() + 1;
1227}
1228
1229unsigned Ext2FS::group_index_from_inode(unsigned inode) const
1230{
1231 if (!inode)
1232 return 0;
1233 return (inode - 1) / inodes_per_group() + 1;
1234}
1235
1236bool Ext2FS::get_inode_allocation_state(InodeIndex index) const
1237{
1238 LOCKER(m_lock);
1239 if (index == 0)
1240 return true;
1241 unsigned group_index = group_index_from_inode(index);
1242 auto& bgd = group_descriptor(group_index);
1243 unsigned index_in_group = index - ((group_index - 1) * inodes_per_group());
1244 unsigned bit_index = (index_in_group - 1) % inodes_per_group();
1245
1246 auto& cached_bitmap = const_cast<Ext2FS&>(*this).get_bitmap_block(bgd.bg_inode_bitmap);
1247 return cached_bitmap.bitmap(inodes_per_group()).get(bit_index);
1248}
1249
1250bool Ext2FS::set_inode_allocation_state(InodeIndex inode_index, bool new_state)
1251{
1252 LOCKER(m_lock);
1253 unsigned group_index = group_index_from_inode(inode_index);
1254 auto& bgd = group_descriptor(group_index);
1255 unsigned index_in_group = inode_index - ((group_index - 1) * inodes_per_group());
1256 unsigned bit_index = (index_in_group - 1) % inodes_per_group();
1257
1258 auto& cached_bitmap = get_bitmap_block(bgd.bg_inode_bitmap);
1259
1260 bool current_state = cached_bitmap.bitmap(inodes_per_group()).get(bit_index);
1261#ifdef EXT2_DEBUG
1262 dbgprintf("Ext2FS: set_inode_allocation_state(%u) %u -> %u\n", inode_index, current_state, new_state);
1263#endif
1264
1265 if (current_state == new_state) {
1266 ASSERT_NOT_REACHED();
1267 return true;
1268 }
1269
1270 cached_bitmap.bitmap(inodes_per_group()).set(bit_index, new_state);
1271 cached_bitmap.dirty = true;
1272
1273 // Update superblock
1274#ifdef EXT2_DEBUG
1275 dbgprintf("Ext2FS: superblock free inode count %u -> %u\n", m_super_block.s_free_inodes_count, m_super_block.s_free_inodes_count - 1);
1276#endif
1277 if (new_state)
1278 --m_super_block.s_free_inodes_count;
1279 else
1280 ++m_super_block.s_free_inodes_count;
1281 m_super_block_dirty = true;
1282
1283 // Update BGD
1284 auto& mutable_bgd = const_cast<ext2_group_desc&>(bgd);
1285 if (new_state)
1286 --mutable_bgd.bg_free_inodes_count;
1287 else
1288 ++mutable_bgd.bg_free_inodes_count;
1289#ifdef EXT2_DEBUG
1290 dbgprintf("Ext2FS: group free inode count %u -> %u\n", bgd.bg_free_inodes_count, bgd.bg_free_inodes_count - 1);
1291#endif
1292
1293 m_block_group_descriptors_dirty = true;
1294 return true;
1295}
1296
1297Ext2FS::BlockIndex Ext2FS::first_block_index() const
1298{
1299 return block_size() == 1024 ? 1 : 0;
1300}
1301
1302Ext2FS::CachedBitmap& Ext2FS::get_bitmap_block(BlockIndex bitmap_block_index)
1303{
1304 for (auto& cached_bitmap : m_cached_bitmaps) {
1305 if (cached_bitmap->bitmap_block_index == bitmap_block_index)
1306 return *cached_bitmap;
1307 }
1308
1309 auto block = KBuffer::create_with_size(block_size(), Region::Access::Read | Region::Access::Write, "Ext2FS: Cached bitmap block");
1310 bool success = read_block(bitmap_block_index, block.data());
1311 ASSERT(success);
1312 m_cached_bitmaps.append(make<CachedBitmap>(bitmap_block_index, move(block)));
1313 return *m_cached_bitmaps.last();
1314}
1315
1316bool Ext2FS::set_block_allocation_state(BlockIndex block_index, bool new_state)
1317{
1318 ASSERT(block_index != 0);
1319 LOCKER(m_lock);
1320#ifdef EXT2_DEBUG
1321 dbgprintf("Ext2FS: set_block_allocation_state(block=%u, state=%u)\n", block_index, new_state);
1322#endif
1323
1324 GroupIndex group_index = group_index_from_block_index(block_index);
1325 auto& bgd = group_descriptor(group_index);
1326 BlockIndex index_in_group = (block_index - first_block_index()) - ((group_index - 1) * blocks_per_group());
1327 unsigned bit_index = index_in_group % blocks_per_group();
1328
1329 auto& cached_bitmap = get_bitmap_block(bgd.bg_block_bitmap);
1330
1331 bool current_state = cached_bitmap.bitmap(blocks_per_group()).get(bit_index);
1332#ifdef EXT2_DEBUG
1333 dbgprintf("Ext2FS: block %u state: %u -> %u (in bitmap block %u)\n", block_index, current_state, new_state, bgd.bg_block_bitmap);
1334#endif
1335
1336 if (current_state == new_state) {
1337 ASSERT_NOT_REACHED();
1338 return true;
1339 }
1340
1341 cached_bitmap.bitmap(blocks_per_group()).set(bit_index, new_state);
1342 cached_bitmap.dirty = true;
1343
1344 // Update superblock
1345#ifdef EXT2_DEBUG
1346 dbgprintf("Ext2FS: superblock free block count %u -> %u\n", m_super_block.s_free_blocks_count, m_super_block.s_free_blocks_count - 1);
1347#endif
1348 if (new_state)
1349 --m_super_block.s_free_blocks_count;
1350 else
1351 ++m_super_block.s_free_blocks_count;
1352 m_super_block_dirty = true;
1353
1354 // Update BGD
1355 auto& mutable_bgd = const_cast<ext2_group_desc&>(bgd);
1356 if (new_state)
1357 --mutable_bgd.bg_free_blocks_count;
1358 else
1359 ++mutable_bgd.bg_free_blocks_count;
1360#ifdef EXT2_DEBUG
1361 dbgprintf("Ext2FS: group %u free block count %u -> %u\n", group_index, bgd.bg_free_blocks_count, bgd.bg_free_blocks_count - 1);
1362#endif
1363
1364 m_block_group_descriptors_dirty = true;
1365 return true;
1366}
1367
1368KResult Ext2FS::create_directory(InodeIdentifier parent_id, const String& name, mode_t mode, uid_t uid, gid_t gid)
1369{
1370 LOCKER(m_lock);
1371 ASSERT(parent_id.fsid() == fsid());
1372
1373 // Fix up the mode to definitely be a directory.
1374 // FIXME: This is a bit on the hackish side.
1375 mode &= ~0170000;
1376 mode |= 0040000;
1377
1378 // NOTE: When creating a new directory, make the size 1 block.
1379 // There's probably a better strategy here, but this works for now.
1380 auto inode_or_error = create_inode(parent_id, name, mode, block_size(), 0, uid, gid);
1381 if (inode_or_error.is_error())
1382 return inode_or_error.error();
1383
1384 auto& inode = inode_or_error.value();
1385
1386#ifdef EXT2_DEBUG
1387 dbgprintf("Ext2FS: create_directory: created new directory named '%s' with inode %u\n", name.characters(), inode->identifier().index());
1388#endif
1389
1390 Vector<DirectoryEntry> entries;
1391 entries.empend(".", inode->identifier(), EXT2_FT_DIR);
1392 entries.empend("..", parent_id, EXT2_FT_DIR);
1393
1394 bool success = static_cast<Ext2FSInode&>(*inode).write_directory(entries);
1395 ASSERT(success);
1396
1397 auto parent_inode = get_inode(parent_id);
1398 auto result = parent_inode->increment_link_count();
1399 if (result.is_error())
1400 return result;
1401
1402 auto& bgd = const_cast<ext2_group_desc&>(group_descriptor(group_index_from_inode(inode->identifier().index())));
1403 ++bgd.bg_used_dirs_count;
1404#ifdef EXT2_DEBUG
1405 dbgprintf("Ext2FS: incremented bg_used_dirs_count %u -> %u\n", bgd.bg_used_dirs_count - 1, bgd.bg_used_dirs_count);
1406#endif
1407
1408 m_block_group_descriptors_dirty = true;
1409
1410 return KSuccess;
1411}
1412
1413KResultOr<NonnullRefPtr<Inode>> Ext2FS::create_inode(InodeIdentifier parent_id, const String& name, mode_t mode, off_t size, dev_t dev, uid_t uid, gid_t gid)
1414{
1415 LOCKER(m_lock);
1416 ASSERT(parent_id.fsid() == fsid());
1417 auto parent_inode = get_inode(parent_id);
1418 ASSERT(parent_inode);
1419
1420 if (static_cast<const Ext2FSInode&>(*parent_inode).m_raw_inode.i_links_count == 0)
1421 return KResult(-ENOENT);
1422
1423#ifdef EXT2_DEBUG
1424 dbgprintf("Ext2FS: Adding inode '%s' (mode %o) to parent directory %u:\n", name.characters(), mode, parent_inode->identifier().index());
1425#endif
1426
1427 size_t needed_blocks = ceil_div(size, block_size());
1428 if ((size_t)needed_blocks > super_block().s_free_blocks_count) {
1429 dbg() << "Ext2FS: create_inode: not enough free blocks";
1430 return KResult(-ENOSPC);
1431 }
1432
1433 // NOTE: This doesn't commit the inode allocation just yet!
1434 auto inode_id = find_a_free_inode(0, size);
1435 if (!inode_id) {
1436 kprintf("Ext2FS: create_inode: allocate_inode failed\n");
1437 return KResult(-ENOSPC);
1438 }
1439
1440 // Try adding it to the directory first, in case the name is already in use.
1441 auto result = parent_inode->add_child({ fsid(), inode_id }, name, mode);
1442 if (result.is_error())
1443 return result;
1444
1445 auto blocks = allocate_blocks(group_index_from_inode(inode_id), needed_blocks);
1446 ASSERT(blocks.size() == needed_blocks);
1447
1448 // Looks like we're good, time to update the inode bitmap and group+global inode counters.
1449 bool success = set_inode_allocation_state(inode_id, true);
1450 ASSERT(success);
1451
1452 unsigned initial_links_count;
1453 if (is_directory(mode))
1454 initial_links_count = 2; // (parent directory + "." entry in self)
1455 else
1456 initial_links_count = 1;
1457
1458 struct timeval now;
1459 kgettimeofday(now);
1460 ext2_inode e2inode;
1461 memset(&e2inode, 0, sizeof(ext2_inode));
1462 e2inode.i_mode = mode;
1463 e2inode.i_uid = uid;
1464 e2inode.i_gid = gid;
1465 e2inode.i_size = size;
1466 e2inode.i_atime = now.tv_sec;
1467 e2inode.i_ctime = now.tv_sec;
1468 e2inode.i_mtime = now.tv_sec;
1469 e2inode.i_dtime = 0;
1470 e2inode.i_links_count = initial_links_count;
1471
1472 if (is_character_device(mode))
1473 e2inode.i_block[0] = dev;
1474 else if (is_block_device(mode))
1475 e2inode.i_block[1] = dev;
1476
1477 success = write_block_list_for_inode(inode_id, e2inode, blocks);
1478 ASSERT(success);
1479
1480#ifdef EXT2_DEBUG
1481 dbgprintf("Ext2FS: writing initial metadata for inode %u\n", inode_id);
1482#endif
1483 e2inode.i_flags = 0;
1484 success = write_ext2_inode(inode_id, e2inode);
1485 ASSERT(success);
1486
1487 // We might have cached the fact that this inode didn't exist. Wipe the slate.
1488 m_inode_cache.remove(inode_id);
1489
1490 auto inode = get_inode({ fsid(), inode_id });
1491 // If we've already computed a block list, no sense in throwing it away.
1492 static_cast<Ext2FSInode&>(*inode).m_block_list = move(blocks);
1493 return inode.release_nonnull();
1494}
1495
1496void Ext2FSInode::populate_lookup_cache() const
1497{
1498 LOCKER(m_lock);
1499 if (!m_lookup_cache.is_empty())
1500 return;
1501 HashMap<String, unsigned> children;
1502
1503 traverse_as_directory([&children](auto& entry) {
1504 children.set(String(entry.name, entry.name_length), entry.inode.index());
1505 return true;
1506 });
1507
1508 if (!m_lookup_cache.is_empty())
1509 return;
1510 m_lookup_cache = move(children);
1511}
1512
1513RefPtr<Inode> Ext2FSInode::lookup(StringView name)
1514{
1515 ASSERT(is_directory());
1516 populate_lookup_cache();
1517 LOCKER(m_lock);
1518 auto it = m_lookup_cache.find(name.hash(), [&](auto& entry) { return entry.key == name; });
1519 if (it != m_lookup_cache.end())
1520 return fs().get_inode({ fsid(), (*it).value });
1521 return {};
1522}
1523
1524void Ext2FSInode::one_ref_left()
1525{
1526 // FIXME: I would like to not live forever, but uncached Ext2FS is fucking painful right now.
1527}
1528
1529int Ext2FSInode::set_atime(time_t t)
1530{
1531 LOCKER(m_lock);
1532 if (fs().is_readonly())
1533 return -EROFS;
1534 m_raw_inode.i_atime = t;
1535 set_metadata_dirty(true);
1536 return 0;
1537}
1538
1539int Ext2FSInode::set_ctime(time_t t)
1540{
1541 LOCKER(m_lock);
1542 if (fs().is_readonly())
1543 return -EROFS;
1544 m_raw_inode.i_ctime = t;
1545 set_metadata_dirty(true);
1546 return 0;
1547}
1548
1549int Ext2FSInode::set_mtime(time_t t)
1550{
1551 LOCKER(m_lock);
1552 if (fs().is_readonly())
1553 return -EROFS;
1554 m_raw_inode.i_mtime = t;
1555 set_metadata_dirty(true);
1556 return 0;
1557}
1558
1559KResult Ext2FSInode::increment_link_count()
1560{
1561 LOCKER(m_lock);
1562 if (fs().is_readonly())
1563 return KResult(-EROFS);
1564 if (m_raw_inode.i_links_count == max_link_count)
1565 return KResult(-EMLINK);
1566 ++m_raw_inode.i_links_count;
1567 set_metadata_dirty(true);
1568 return KSuccess;
1569}
1570
1571KResult Ext2FSInode::decrement_link_count()
1572{
1573 LOCKER(m_lock);
1574 if (fs().is_readonly())
1575 return KResult(-EROFS);
1576 ASSERT(m_raw_inode.i_links_count);
1577 --m_raw_inode.i_links_count;
1578 if (ref_count() == 1 && m_raw_inode.i_links_count == 0)
1579 fs().uncache_inode(index());
1580 set_metadata_dirty(true);
1581 return KSuccess;
1582}
1583
1584void Ext2FS::uncache_inode(InodeIndex index)
1585{
1586 LOCKER(m_lock);
1587 m_inode_cache.remove(index);
1588}
1589
1590size_t Ext2FSInode::directory_entry_count() const
1591{
1592 ASSERT(is_directory());
1593 LOCKER(m_lock);
1594 populate_lookup_cache();
1595 return m_lookup_cache.size();
1596}
1597
1598KResult Ext2FSInode::chmod(mode_t mode)
1599{
1600 LOCKER(m_lock);
1601 if (m_raw_inode.i_mode == mode)
1602 return KSuccess;
1603 m_raw_inode.i_mode = mode;
1604 set_metadata_dirty(true);
1605 return KSuccess;
1606}
1607
1608KResult Ext2FSInode::chown(uid_t uid, gid_t gid)
1609{
1610 LOCKER(m_lock);
1611 if (m_raw_inode.i_uid == uid && m_raw_inode.i_gid == gid)
1612 return KSuccess;
1613 m_raw_inode.i_uid = uid;
1614 m_raw_inode.i_gid = gid;
1615 set_metadata_dirty(true);
1616 return KSuccess;
1617}
1618
1619KResult Ext2FSInode::truncate(u64 size)
1620{
1621 LOCKER(m_lock);
1622 if (static_cast<u64>(m_raw_inode.i_size) == size)
1623 return KSuccess;
1624 auto result = resize(size);
1625 if (result.is_error())
1626 return result;
1627 set_metadata_dirty(true);
1628 return KSuccess;
1629}
1630
1631unsigned Ext2FS::total_block_count() const
1632{
1633 LOCKER(m_lock);
1634 return super_block().s_blocks_count;
1635}
1636
1637unsigned Ext2FS::free_block_count() const
1638{
1639 LOCKER(m_lock);
1640 return super_block().s_free_blocks_count;
1641}
1642
1643unsigned Ext2FS::total_inode_count() const
1644{
1645 LOCKER(m_lock);
1646 return super_block().s_inodes_count;
1647}
1648
1649unsigned Ext2FS::free_inode_count() const
1650{
1651 LOCKER(m_lock);
1652 return super_block().s_free_inodes_count;
1653}
1654
1655KResult Ext2FS::prepare_to_unmount() const
1656{
1657 LOCKER(m_lock);
1658
1659 for (auto& it : m_inode_cache) {
1660 if (it.value->ref_count() > 1)
1661 return KResult(-EBUSY);
1662 }
1663
1664 m_inode_cache.clear();
1665 return KSuccess;
1666}
1667
1668}