1From 405801d8a8be734425eca4f3eebc56287804ac93 Mon Sep 17 00:00:00 2001
2From: Jakub Konka <kubkon@jakubkonka.com>
3Date: Sun, 5 Feb 2023 10:04:34 +0100
4Subject: [PATCH] macho: temp fix alignment and enable some logs
5
6---
7 src/link/MachO/Object.zig | 80 ++++++++++++++++++++++++++------------
8 src/link/MachO/ZldAtom.zig | 29 +++++++-------
9 src/link/MachO/zld.zig | 22 +++++------
10 3 files changed, 79 insertions(+), 52 deletions(-)
11
12diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
13index 401184da515..05638c1f858 100644
14--- a/src/link/MachO/Object.zig
15+++ b/src/link/MachO/Object.zig
16@@ -54,12 +54,18 @@ atom_by_index_table: []AtomIndex = undefined,
17 /// Can be undefined as set together with in_symtab.
18 globals_lookup: []i64 = undefined,
19
20+/// All relocs sorted and flattened.
21+relocs: std.ArrayListUnmanaged(macho.relocation_info) = .{},
22+sect_relocs_lookup: std.ArrayListUnmanaged(u32) = .{},
23+
24 atoms: std.ArrayListUnmanaged(AtomIndex) = .{},
25
26 pub fn deinit(self: *Object, gpa: Allocator) void {
27 self.atoms.deinit(gpa);
28 gpa.free(self.name);
29 gpa.free(self.contents);
30+ self.relocs.deinit(gpa);
31+ self.sect_relocs_lookup.deinit(gpa);
32 if (self.in_symtab) |_| {
33 gpa.free(self.source_symtab_lookup);
34 gpa.free(self.source_address_lookup);
35@@ -101,6 +107,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
36 return error.MismatchedCpuArchitecture;
37 }
38
39+ const nsects = self.getSourceSections().len;
40+ try self.sect_relocs_lookup.resize(allocator, nsects);
41+ mem.set(u32, self.sect_relocs_lookup.items, 0);
42+
43 var it = LoadCommandIterator{
44 .ncmds = self.header.ncmds,
45 .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
46@@ -110,13 +120,11 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
47 .SYMTAB => {
48 const symtab = cmd.cast(macho.symtab_command).?;
49 self.in_symtab = @ptrCast(
50- [*]const macho.nlist_64,
51- @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]),
52+ [*]align(1) const macho.nlist_64,
53+ self.contents.ptr + symtab.symoff,
54 )[0..symtab.nsyms];
55 self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize];
56
57- const nsects = self.getSourceSections().len;
58-
59 self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects);
60 self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
61 self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
62@@ -192,6 +200,17 @@ const SymbolAtIndex = struct {
63 return mem.sliceTo(@ptrCast([*:0]const u8, ctx.in_strtab.?.ptr + off), 0);
64 }
65
66+ fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 {
67+ const sym = self.getSymbol(ctx);
68+ if (!sym.ext()) {
69+ const sym_name = self.getSymbolName(ctx);
70+ if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 0;
71+ return 1;
72+ }
73+ if (sym.weakDef() or sym.pext()) return 2;
74+ return 3;
75+ }
76+
77 /// Performs lexicographic-like check.
78 /// * lhs and rhs defined
79 /// * if lhs == rhs
80@@ -206,23 +225,15 @@ const SymbolAtIndex = struct {
81 if (lhs.sect() and rhs.sect()) {
82 if (lhs.n_value == rhs.n_value) {
83 if (lhs.n_sect == rhs.n_sect) {
84- if (lhs.ext() and rhs.ext()) {
85- if ((lhs.pext() or lhs.weakDef()) and (rhs.pext() or rhs.weakDef())) {
86- return false;
87- } else return rhs.pext() or rhs.weakDef();
88- } else {
89- const lhs_name = lhs_index.getSymbolName(ctx);
90- const lhs_temp = mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L");
91- const rhs_name = rhs_index.getSymbolName(ctx);
92- const rhs_temp = mem.startsWith(u8, rhs_name, "l") or mem.startsWith(u8, rhs_name, "L");
93- if (lhs_temp and rhs_temp) {
94- return false;
95- } else return rhs_temp;
96- }
97+ const lhs_senior = lhs_index.getSymbolSeniority(ctx);
98+ const rhs_senior = rhs_index.getSymbolSeniority(ctx);
99+ if (lhs_senior == rhs_senior) {
100+ return lessThanByNStrx(ctx, lhs_index, rhs_index);
101+ } else return lhs_senior < rhs_senior;
102 } else return lhs.n_sect < rhs.n_sect;
103 } else return lhs.n_value < rhs.n_value;
104 } else if (lhs.undf() and rhs.undf()) {
105- return false;
106+ return lessThanByNStrx(ctx, lhs_index, rhs_index);
107 } else return rhs.undf();
108 }
109
110@@ -393,6 +404,16 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void {
111 zld.sections.items(.header)[out_sect_id].sectName(),
112 });
113
114+ // Parse all relocs for the input section, and sort in descending order.
115+ // Previously, I have wrongly assumed the compilers output relocations for each
116+ // section in a sorted manner which is simply not true.
117+ const start = @intCast(u32, self.relocs.items.len);
118+ if (self.getSourceRelocs(section.header)) |relocs| {
119+ try self.relocs.appendUnalignedSlice(gpa, relocs);
120+ std.sort.sort(macho.relocation_info, self.relocs.items[start..], {}, relocGreaterThan);
121+ }
122+ self.sect_relocs_lookup.items[section.id] = start;
123+
124 const cpu_arch = zld.options.target.cpu.arch;
125 const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1);
126 const sect_start_index = sect_sym_index + sect_loc.index;
127@@ -559,7 +580,7 @@ pub fn getSourceSections(self: Object) []const macho.section_64 {
128 } else unreachable;
129 }
130
131-pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry {
132+pub fn parseDataInCode(self: Object) ?[]align(1) const macho.data_in_code_entry {
133 var it = LoadCommandIterator{
134 .ncmds = self.header.ncmds,
135 .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
136@@ -569,10 +590,7 @@ pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry {
137 .DATA_IN_CODE => {
138 const dice = cmd.cast(macho.linkedit_data_command).?;
139 const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry));
140- return @ptrCast(
141- [*]const macho.data_in_code_entry,
142- @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]),
143- )[0..ndice];
144+ return @ptrCast([*]align(1) const macho.data_in_code_entry, self.contents.ptr + dice.dataoff)[0..ndice];
145 },
146 else => {},
147 }
148@@ -632,11 +650,23 @@ pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 {
149 return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)];
150 }
151
152-pub fn getRelocs(self: Object, sect: macho.section_64) []align(1) const macho.relocation_info {
153- if (sect.nreloc == 0) return &[0]macho.relocation_info{};
154+fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info {
155+ if (sect.nreloc == 0) return null;
156 return @ptrCast([*]align(1) const macho.relocation_info, self.contents.ptr + sect.reloff)[0..sect.nreloc];
157 }
158
159+pub fn getRelocs(self: Object, sect_id: u16) []const macho.relocation_info {
160+ const sect = self.getSourceSection(sect_id);
161+ const start = self.sect_relocs_lookup.items[sect_id];
162+ const len = sect.nreloc;
163+ return self.relocs.items[start..][0..len];
164+}
165+
166+fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool {
167+ _ = ctx;
168+ return lhs.r_address > rhs.r_address;
169+}
170+
171 pub fn getSymbolName(self: Object, index: u32) []const u8 {
172 const strtab = self.in_strtab.?;
173 const sym = self.symtab[index];
174diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig
175index 817aa816625..b42309598d7 100644
176--- a/src/link/MachO/ZldAtom.zig
177+++ b/src/link/MachO/ZldAtom.zig
178@@ -465,7 +465,7 @@ pub fn resolveRelocs(
179 zld: *Zld,
180 atom_index: AtomIndex,
181 atom_code: []u8,
182- atom_relocs: []align(1) const macho.relocation_info,
183+ atom_relocs: []const macho.relocation_info,
184 reverse_lookup: []u32,
185 ) !void {
186 const arch = zld.options.target.cpu.arch;
187@@ -540,7 +540,7 @@ fn resolveRelocsArm64(
188 zld: *Zld,
189 atom_index: AtomIndex,
190 atom_code: []u8,
191- atom_relocs: []align(1) const macho.relocation_info,
192+ atom_relocs: []const macho.relocation_info,
193 reverse_lookup: []u32,
194 context: RelocContext,
195 ) !void {
196@@ -579,7 +579,6 @@ fn resolveRelocsArm64(
197 }
198
199 const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup);
200- const rel_offset = @intCast(u32, rel.r_address - context.base_offset);
201
202 log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{
203 @tagName(rel_type),
204@@ -589,6 +588,7 @@ fn resolveRelocsArm64(
205 target.file,
206 });
207
208+ const rel_offset = @intCast(u32, rel.r_address - context.base_offset);
209 const source_addr = blk: {
210 const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
211 break :blk source_sym.n_value + rel_offset;
212@@ -596,7 +596,7 @@ fn resolveRelocsArm64(
213 const is_tlv = is_tlv: {
214 const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
215 const header = zld.sections.items(.header)[source_sym.n_sect - 1];
216- break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES;
217+ break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES;
218 };
219 const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv);
220
221@@ -831,7 +831,7 @@ fn resolveRelocsX86(
222 zld: *Zld,
223 atom_index: AtomIndex,
224 atom_code: []u8,
225- atom_relocs: []align(1) const macho.relocation_info,
226+ atom_relocs: []const macho.relocation_info,
227 reverse_lookup: []u32,
228 context: RelocContext,
229 ) !void {
230@@ -877,7 +877,7 @@ fn resolveRelocsX86(
231 const is_tlv = is_tlv: {
232 const source_sym = zld.getSymbol(atom.getSymbolWithLoc());
233 const header = zld.sections.items(.header)[source_sym.n_sect - 1];
234- break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES;
235+ break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES;
236 };
237
238 log.debug(" | source_addr = 0x{x}", .{source_addr});
239@@ -1015,27 +1015,24 @@ pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 {
240 return code[offset..][0..code_len];
241 }
242
243-pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []align(1) const macho.relocation_info {
244+pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []const macho.relocation_info {
245 const atom = zld.getAtomPtr(atom_index);
246 assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs.
247 const object = zld.objects.items[atom.getFile().?];
248
249- const source_sect = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
250- const source_sect = object.getSourceSection(source_sym.n_sect - 1);
251- assert(!source_sect.isZerofill());
252- break :blk source_sect;
253+ const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
254+ break :blk source_sym.n_sect - 1;
255 } else blk: {
256 // If there was no matching symbol present in the source symtab, this means
257 // we are dealing with either an entire section, or part of it, but also
258 // starting at the beginning.
259 const nbase = @intCast(u32, object.in_symtab.?.len);
260 const sect_id = @intCast(u16, atom.sym_index - nbase);
261- const source_sect = object.getSourceSection(sect_id);
262- assert(!source_sect.isZerofill());
263- break :blk source_sect;
264+ break :blk sect_id;
265 };
266-
267- const relocs = object.getRelocs(source_sect);
268+ const source_sect = object.getSourceSection(source_sect_id);
269+ assert(!source_sect.isZerofill());
270+ const relocs = object.getRelocs(source_sect_id);
271
272 if (atom.cached_relocs_start == -1) {
273 const indexes = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
274diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig
275index 3a2ea79c6ec..cee3f302c08 100644
276--- a/src/link/MachO/zld.zig
277+++ b/src/link/MachO/zld.zig
278@@ -396,7 +396,7 @@ pub const Zld = struct {
279 break :blk null;
280 }
281
282- switch (sect.@"type"()) {
283+ switch (sect.type()) {
284 macho.S_4BYTE_LITERALS,
285 macho.S_8BYTE_LITERALS,
286 macho.S_16BYTE_LITERALS,
287@@ -1701,7 +1701,7 @@ pub const Zld = struct {
288 break :outer;
289 }
290 }
291- switch (header.@"type"()) {
292+ switch (header.type()) {
293 macho.S_NON_LAZY_SYMBOL_POINTERS => {
294 try self.writeGotPointer(count, buffer.writer());
295 },
296@@ -1718,7 +1718,7 @@ pub const Zld = struct {
297 break :outer;
298 }
299 }
300- if (header.@"type"() == macho.S_SYMBOL_STUBS) {
301+ if (header.type() == macho.S_SYMBOL_STUBS) {
302 try self.writeStubCode(atom_index, count, buffer.writer());
303 } else if (mem.eql(u8, header.sectName(), "__stub_helper")) {
304 try self.writeStubHelperCode(atom_index, buffer.writer());
305@@ -1802,7 +1802,7 @@ pub const Zld = struct {
306 for (slice.items(.header)) |*header, sect_id| {
307 if (header.size == 0) continue;
308 if (self.requiresThunks()) {
309- if (header.isCode() and !(header.@"type"() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue;
310+ if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue;
311 }
312
313 var atom_index = slice.items(.first_atom_index)[sect_id];
314@@ -1830,7 +1830,7 @@ pub const Zld = struct {
315 if (self.requiresThunks()) {
316 for (slice.items(.header)) |header, sect_id| {
317 if (!header.isCode()) continue;
318- if (header.@"type"() == macho.S_SYMBOL_STUBS) continue;
319+ if (header.type() == macho.S_SYMBOL_STUBS) continue;
320 if (mem.eql(u8, header.sectName(), "__stub_helper")) continue;
321
322 // Create jump/branch range extenders if needed.
323@@ -1994,10 +1994,10 @@ pub const Zld = struct {
324 const section_precedence: u4 = blk: {
325 if (header.isCode()) {
326 if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0;
327- if (header.@"type"() == macho.S_SYMBOL_STUBS) break :blk 0x1;
328+ if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1;
329 break :blk 0x2;
330 }
331- switch (header.@"type"()) {
332+ switch (header.type()) {
333 macho.S_NON_LAZY_SYMBOL_POINTERS,
334 macho.S_LAZY_SYMBOL_POINTERS,
335 => break :blk 0x0,
336@@ -2121,7 +2121,7 @@ pub const Zld = struct {
337
338 // Finally, unpack the rest.
339 for (slice.items(.header)) |header, sect_id| {
340- switch (header.@"type"()) {
341+ switch (header.type()) {
342 macho.S_LITERAL_POINTERS,
343 macho.S_REGULAR,
344 macho.S_MOD_INIT_FUNC_POINTERS,
345@@ -2252,7 +2252,7 @@ pub const Zld = struct {
346 // Finally, unpack the rest.
347 const slice = self.sections.slice();
348 for (slice.items(.header)) |header, sect_id| {
349- switch (header.@"type"()) {
350+ switch (header.type()) {
351 macho.S_LITERAL_POINTERS,
352 macho.S_REGULAR,
353 macho.S_MOD_INIT_FUNC_POINTERS,
354@@ -2707,10 +2707,10 @@ pub const Zld = struct {
355 }
356
357 fn filterDataInCode(
358- dices: []const macho.data_in_code_entry,
359+ dices: []align(1) const macho.data_in_code_entry,
360 start_addr: u64,
361 end_addr: u64,
362- ) []const macho.data_in_code_entry {
363+ ) []align(1) const macho.data_in_code_entry {
364 const Predicate = struct {
365 addr: u64,
366
367