Serenity Operating System
1/*
2 * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/Debug.h>
8#include <AK/Demangle.h>
9#include <AK/OwnPtr.h>
10#include <AK/QuickSort.h>
11#include <AK/Vector.h>
12#include <LibCore/ArgsParser.h>
13#include <LibCore/MappedFile.h>
14#include <LibCore/System.h>
15#include <LibELF/Image.h>
16#include <LibMain/Main.h>
17#include <LibX86/Disassembler.h>
18#include <LibX86/ELFSymbolProvider.h>
19#include <string.h>
20
21ErrorOr<int> serenity_main(Main::Arguments args)
22{
23 StringView path {};
24
25 Core::ArgsParser args_parser;
26 args_parser.set_general_help(
27 "Disassemble an executable, and show human-readable "
28 "assembly code for each function.");
29 args_parser.add_positional_argument(path, "Path to i386 binary file", "path");
30 args_parser.parse(args);
31
32 RefPtr<Core::MappedFile> file;
33 u8 const* asm_data = nullptr;
34 size_t asm_size = 0;
35 if ((TRY(Core::System::stat(path))).st_size > 0) {
36 file = TRY(Core::MappedFile::map(path));
37 asm_data = static_cast<u8 const*>(file->data());
38 asm_size = file->size();
39 }
40
41 struct Symbol {
42 size_t value;
43 size_t size;
44 StringView name;
45
46 size_t address() const { return value; }
47 size_t address_end() const { return value + size; }
48
49 bool contains(size_t virtual_address) { return address() <= virtual_address && virtual_address < address_end(); }
50 };
51 Vector<Symbol> symbols;
52
53 size_t file_offset = 0;
54 Vector<Symbol>::Iterator current_symbol = symbols.begin();
55 OwnPtr<X86::ELFSymbolProvider> symbol_provider; // nullptr for non-ELF disassembly.
56 OwnPtr<ELF::Image> elf;
57 if (asm_size >= 4 && strncmp(reinterpret_cast<char const*>(asm_data), "\u007fELF", 4) == 0) {
58 elf = make<ELF::Image>(asm_data, asm_size);
59 if (elf->is_valid()) {
60 symbol_provider = make<X86::ELFSymbolProvider>(*elf);
61 elf->for_each_section_of_type(SHT_PROGBITS, [&](ELF::Image::Section const& section) {
62 // FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
63 if (section.name() != ".text")
64 return IterationDecision::Continue;
65 asm_data = reinterpret_cast<u8 const*>(section.raw_data());
66 asm_size = section.size();
67 file_offset = section.address();
68 return IterationDecision::Break;
69 });
70 symbols.ensure_capacity(elf->symbol_count() + 1);
71 symbols.append({ 0, 0, StringView() }); // Sentinel.
72 elf->for_each_symbol([&](ELF::Image::Symbol const& symbol) {
73 symbols.append({ symbol.value(), symbol.size(), symbol.name() });
74 return IterationDecision::Continue;
75 });
76 quick_sort(symbols, [](auto& a, auto& b) {
77 if (a.value != b.value)
78 return a.value < b.value;
79 if (a.size != b.size)
80 return a.size < b.size;
81 return a.name < b.name;
82 });
83 if constexpr (DISASM_DUMP_DEBUG) {
84 for (size_t i = 0; i < symbols.size(); ++i)
85 dbgln("{}: {:p}, {}", symbols[i].name, symbols[i].value, symbols[i].size);
86 }
87 }
88 }
89
90 X86::SimpleInstructionStream stream(asm_data, asm_size);
91 X86::Disassembler disassembler(stream);
92
93 bool is_first_symbol = true;
94 bool current_instruction_is_in_symbol = false;
95
96 for (;;) {
97 auto offset = stream.offset();
98 auto insn = disassembler.next();
99 if (!insn.has_value())
100 break;
101
102 // Prefix regions of instructions belonging to a symbol with the symbol's name.
103 // Separate regions of instructions belonging to distinct symbols with newlines,
104 // and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
105 // Interesting cases:
106 // - More than 1 symbol covering a region of instructions (ICF, D1/D2)
107 // - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
108 // Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
109 size_t virtual_offset = file_offset + offset;
110 if (current_symbol < symbols.end() && !current_symbol->contains(virtual_offset)) {
111 if (!is_first_symbol && current_instruction_is_in_symbol) {
112 // The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
113 outln();
114 current_instruction_is_in_symbol = (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset));
115 }
116
117 // Try to find symbol covering current instruction, if one exists.
118 while (current_symbol + 1 < symbols.end() && !(current_symbol + 1)->contains(virtual_offset) && (current_symbol + 1)->address() <= virtual_offset) {
119 ++current_symbol;
120 if (!is_first_symbol)
121 outln("\n({} ({:p}-{:p}))\n", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end());
122 }
123 while (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)) {
124 if (!is_first_symbol && !current_instruction_is_in_symbol)
125 outln();
126 ++current_symbol;
127 current_instruction_is_in_symbol = true;
128 outln("{} ({:p}-{:p}):", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end());
129 }
130
131 is_first_symbol = false;
132 }
133
134 size_t length = insn.value().length();
135 StringBuilder builder;
136 builder.appendff("{:p} ", virtual_offset);
137 for (size_t i = 0; i < 7; i++) {
138 if (i < length)
139 builder.appendff("{:02x} ", asm_data[offset + i]);
140 else
141 builder.append(" "sv);
142 }
143 builder.append(" "sv);
144 builder.append(insn.value().to_deprecated_string(virtual_offset, symbol_provider));
145 outln("{}", builder.string_view());
146
147 for (size_t bytes_printed = 7; bytes_printed < length; bytes_printed += 7) {
148 builder.clear();
149 builder.appendff("{:p} ", virtual_offset + bytes_printed);
150 for (size_t i = bytes_printed; i < bytes_printed + 7 && i < length; i++)
151 builder.appendff(" {:02x}", asm_data[offset + i]);
152 outln("{}", builder.string_view());
153 }
154 }
155 return 0;
156}