Serenity Operating System
at master 156 lines 6.8 kB view raw
1/* 2 * Copyright (c) 2020, Andreas Kling <kling@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/Debug.h> 8#include <AK/Demangle.h> 9#include <AK/OwnPtr.h> 10#include <AK/QuickSort.h> 11#include <AK/Vector.h> 12#include <LibCore/ArgsParser.h> 13#include <LibCore/MappedFile.h> 14#include <LibCore/System.h> 15#include <LibELF/Image.h> 16#include <LibMain/Main.h> 17#include <LibX86/Disassembler.h> 18#include <LibX86/ELFSymbolProvider.h> 19#include <string.h> 20 21ErrorOr<int> serenity_main(Main::Arguments args) 22{ 23 StringView path {}; 24 25 Core::ArgsParser args_parser; 26 args_parser.set_general_help( 27 "Disassemble an executable, and show human-readable " 28 "assembly code for each function."); 29 args_parser.add_positional_argument(path, "Path to i386 binary file", "path"); 30 args_parser.parse(args); 31 32 RefPtr<Core::MappedFile> file; 33 u8 const* asm_data = nullptr; 34 size_t asm_size = 0; 35 if ((TRY(Core::System::stat(path))).st_size > 0) { 36 file = TRY(Core::MappedFile::map(path)); 37 asm_data = static_cast<u8 const*>(file->data()); 38 asm_size = file->size(); 39 } 40 41 struct Symbol { 42 size_t value; 43 size_t size; 44 StringView name; 45 46 size_t address() const { return value; } 47 size_t address_end() const { return value + size; } 48 49 bool contains(size_t virtual_address) { return address() <= virtual_address && virtual_address < address_end(); } 50 }; 51 Vector<Symbol> symbols; 52 53 size_t file_offset = 0; 54 Vector<Symbol>::Iterator current_symbol = symbols.begin(); 55 OwnPtr<X86::ELFSymbolProvider> symbol_provider; // nullptr for non-ELF disassembly. 56 OwnPtr<ELF::Image> elf; 57 if (asm_size >= 4 && strncmp(reinterpret_cast<char const*>(asm_data), "\u007fELF", 4) == 0) { 58 elf = make<ELF::Image>(asm_data, asm_size); 59 if (elf->is_valid()) { 60 symbol_provider = make<X86::ELFSymbolProvider>(*elf); 61 elf->for_each_section_of_type(SHT_PROGBITS, [&](ELF::Image::Section const& section) { 62 // FIXME: Disassemble all SHT_PROGBITS sections, not just .text. 63 if (section.name() != ".text") 64 return IterationDecision::Continue; 65 asm_data = reinterpret_cast<u8 const*>(section.raw_data()); 66 asm_size = section.size(); 67 file_offset = section.address(); 68 return IterationDecision::Break; 69 }); 70 symbols.ensure_capacity(elf->symbol_count() + 1); 71 symbols.append({ 0, 0, StringView() }); // Sentinel. 72 elf->for_each_symbol([&](ELF::Image::Symbol const& symbol) { 73 symbols.append({ symbol.value(), symbol.size(), symbol.name() }); 74 return IterationDecision::Continue; 75 }); 76 quick_sort(symbols, [](auto& a, auto& b) { 77 if (a.value != b.value) 78 return a.value < b.value; 79 if (a.size != b.size) 80 return a.size < b.size; 81 return a.name < b.name; 82 }); 83 if constexpr (DISASM_DUMP_DEBUG) { 84 for (size_t i = 0; i < symbols.size(); ++i) 85 dbgln("{}: {:p}, {}", symbols[i].name, symbols[i].value, symbols[i].size); 86 } 87 } 88 } 89 90 X86::SimpleInstructionStream stream(asm_data, asm_size); 91 X86::Disassembler disassembler(stream); 92 93 bool is_first_symbol = true; 94 bool current_instruction_is_in_symbol = false; 95 96 for (;;) { 97 auto offset = stream.offset(); 98 auto insn = disassembler.next(); 99 if (!insn.has_value()) 100 break; 101 102 // Prefix regions of instructions belonging to a symbol with the symbol's name. 103 // Separate regions of instructions belonging to distinct symbols with newlines, 104 // and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines. 105 // Interesting cases: 106 // - More than 1 symbol covering a region of instructions (ICF, D1/D2) 107 // - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after) 108 // Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address. 109 size_t virtual_offset = file_offset + offset; 110 if (current_symbol < symbols.end() && !current_symbol->contains(virtual_offset)) { 111 if (!is_first_symbol && current_instruction_is_in_symbol) { 112 // The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline. 113 outln(); 114 current_instruction_is_in_symbol = (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)); 115 } 116 117 // Try to find symbol covering current instruction, if one exists. 118 while (current_symbol + 1 < symbols.end() && !(current_symbol + 1)->contains(virtual_offset) && (current_symbol + 1)->address() <= virtual_offset) { 119 ++current_symbol; 120 if (!is_first_symbol) 121 outln("\n({} ({:p}-{:p}))\n", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end()); 122 } 123 while (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)) { 124 if (!is_first_symbol && !current_instruction_is_in_symbol) 125 outln(); 126 ++current_symbol; 127 current_instruction_is_in_symbol = true; 128 outln("{} ({:p}-{:p}):", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end()); 129 } 130 131 is_first_symbol = false; 132 } 133 134 size_t length = insn.value().length(); 135 StringBuilder builder; 136 builder.appendff("{:p} ", virtual_offset); 137 for (size_t i = 0; i < 7; i++) { 138 if (i < length) 139 builder.appendff("{:02x} ", asm_data[offset + i]); 140 else 141 builder.append(" "sv); 142 } 143 builder.append(" "sv); 144 builder.append(insn.value().to_deprecated_string(virtual_offset, symbol_provider)); 145 outln("{}", builder.string_view()); 146 147 for (size_t bytes_printed = 7; bytes_printed < length; bytes_printed += 7) { 148 builder.clear(); 149 builder.appendff("{:p} ", virtual_offset + bytes_printed); 150 for (size_t i = bytes_printed; i < bytes_printed + 7 && i < length; i++) 151 builder.appendff(" {:02x}", asm_data[offset + i]); 152 outln("{}", builder.string_view()); 153 } 154 } 155 return 0; 156}