Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1From 405801d8a8be734425eca4f3eebc56287804ac93 Mon Sep 17 00:00:00 2001 2From: Jakub Konka <kubkon@jakubkonka.com> 3Date: Sun, 5 Feb 2023 10:04:34 +0100 4Subject: [PATCH] macho: temp fix alignment and enable some logs 5 6--- 7 src/link/MachO/Object.zig | 80 ++++++++++++++++++++++++++------------ 8 src/link/MachO/ZldAtom.zig | 29 +++++++------- 9 src/link/MachO/zld.zig | 22 +++++------ 10 3 files changed, 79 insertions(+), 52 deletions(-) 11 12diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig 13index 401184da515..05638c1f858 100644 14--- a/src/link/MachO/Object.zig 15+++ b/src/link/MachO/Object.zig 16@@ -54,12 +54,18 @@ atom_by_index_table: []AtomIndex = undefined, 17 /// Can be undefined as set together with in_symtab. 18 globals_lookup: []i64 = undefined, 19 20+/// All relocs sorted and flattened. 21+relocs: std.ArrayListUnmanaged(macho.relocation_info) = .{}, 22+sect_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, 23+ 24 atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, 25 26 pub fn deinit(self: *Object, gpa: Allocator) void { 27 self.atoms.deinit(gpa); 28 gpa.free(self.name); 29 gpa.free(self.contents); 30+ self.relocs.deinit(gpa); 31+ self.sect_relocs_lookup.deinit(gpa); 32 if (self.in_symtab) |_| { 33 gpa.free(self.source_symtab_lookup); 34 gpa.free(self.source_address_lookup); 35@@ -101,6 +107,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) 36 return error.MismatchedCpuArchitecture; 37 } 38 39+ const nsects = self.getSourceSections().len; 40+ try self.sect_relocs_lookup.resize(allocator, nsects); 41+ mem.set(u32, self.sect_relocs_lookup.items, 0); 42+ 43 var it = LoadCommandIterator{ 44 .ncmds = self.header.ncmds, 45 .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], 46@@ -110,13 +120,11 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) 47 .SYMTAB => { 48 const symtab = cmd.cast(macho.symtab_command).?; 49 self.in_symtab = @ptrCast( 50- [*]const macho.nlist_64, 51- @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), 52+ [*]align(1) const macho.nlist_64, 53+ self.contents.ptr + symtab.symoff, 54 )[0..symtab.nsyms]; 55 self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; 56 57- const nsects = self.getSourceSections().len; 58- 59 self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); 60 self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); 61 self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); 62@@ -192,6 +200,17 @@ const SymbolAtIndex = struct { 63 return mem.sliceTo(@ptrCast([*:0]const u8, ctx.in_strtab.?.ptr + off), 0); 64 } 65 66+ fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { 67+ const sym = self.getSymbol(ctx); 68+ if (!sym.ext()) { 69+ const sym_name = self.getSymbolName(ctx); 70+ if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 0; 71+ return 1; 72+ } 73+ if (sym.weakDef() or sym.pext()) return 2; 74+ return 3; 75+ } 76+ 77 /// Performs lexicographic-like check. 78 /// * lhs and rhs defined 79 /// * if lhs == rhs 80@@ -206,23 +225,15 @@ const SymbolAtIndex = struct { 81 if (lhs.sect() and rhs.sect()) { 82 if (lhs.n_value == rhs.n_value) { 83 if (lhs.n_sect == rhs.n_sect) { 84- if (lhs.ext() and rhs.ext()) { 85- if ((lhs.pext() or lhs.weakDef()) and (rhs.pext() or rhs.weakDef())) { 86- return false; 87- } else return rhs.pext() or rhs.weakDef(); 88- } else { 89- const lhs_name = lhs_index.getSymbolName(ctx); 90- const lhs_temp = mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); 91- const rhs_name = rhs_index.getSymbolName(ctx); 92- const rhs_temp = mem.startsWith(u8, rhs_name, "l") or mem.startsWith(u8, rhs_name, "L"); 93- if (lhs_temp and rhs_temp) { 94- return false; 95- } else return rhs_temp; 96- } 97+ const lhs_senior = lhs_index.getSymbolSeniority(ctx); 98+ const rhs_senior = rhs_index.getSymbolSeniority(ctx); 99+ if (lhs_senior == rhs_senior) { 100+ return lessThanByNStrx(ctx, lhs_index, rhs_index); 101+ } else return lhs_senior < rhs_senior; 102 } else return lhs.n_sect < rhs.n_sect; 103 } else return lhs.n_value < rhs.n_value; 104 } else if (lhs.undf() and rhs.undf()) { 105- return false; 106+ return lessThanByNStrx(ctx, lhs_index, rhs_index); 107 } else return rhs.undf(); 108 } 109 110@@ -393,6 +404,16 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { 111 zld.sections.items(.header)[out_sect_id].sectName(), 112 }); 113 114+ // Parse all relocs for the input section, and sort in descending order. 115+ // Previously, I have wrongly assumed the compilers output relocations for each 116+ // section in a sorted manner which is simply not true. 117+ const start = @intCast(u32, self.relocs.items.len); 118+ if (self.getSourceRelocs(section.header)) |relocs| { 119+ try self.relocs.appendUnalignedSlice(gpa, relocs); 120+ std.sort.sort(macho.relocation_info, self.relocs.items[start..], {}, relocGreaterThan); 121+ } 122+ self.sect_relocs_lookup.items[section.id] = start; 123+ 124 const cpu_arch = zld.options.target.cpu.arch; 125 const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); 126 const sect_start_index = sect_sym_index + sect_loc.index; 127@@ -559,7 +580,7 @@ pub fn getSourceSections(self: Object) []const macho.section_64 { 128 } else unreachable; 129 } 130 131-pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { 132+pub fn parseDataInCode(self: Object) ?[]align(1) const macho.data_in_code_entry { 133 var it = LoadCommandIterator{ 134 .ncmds = self.header.ncmds, 135 .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], 136@@ -569,10 +590,7 @@ pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { 137 .DATA_IN_CODE => { 138 const dice = cmd.cast(macho.linkedit_data_command).?; 139 const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); 140- return @ptrCast( 141- [*]const macho.data_in_code_entry, 142- @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), 143- )[0..ndice]; 144+ return @ptrCast([*]align(1) const macho.data_in_code_entry, self.contents.ptr + dice.dataoff)[0..ndice]; 145 }, 146 else => {}, 147 } 148@@ -632,11 +650,23 @@ pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { 149 return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; 150 } 151 152-pub fn getRelocs(self: Object, sect: macho.section_64) []align(1) const macho.relocation_info { 153- if (sect.nreloc == 0) return &[0]macho.relocation_info{}; 154+fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info { 155+ if (sect.nreloc == 0) return null; 156 return @ptrCast([*]align(1) const macho.relocation_info, self.contents.ptr + sect.reloff)[0..sect.nreloc]; 157 } 158 159+pub fn getRelocs(self: Object, sect_id: u16) []const macho.relocation_info { 160+ const sect = self.getSourceSection(sect_id); 161+ const start = self.sect_relocs_lookup.items[sect_id]; 162+ const len = sect.nreloc; 163+ return self.relocs.items[start..][0..len]; 164+} 165+ 166+fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { 167+ _ = ctx; 168+ return lhs.r_address > rhs.r_address; 169+} 170+ 171 pub fn getSymbolName(self: Object, index: u32) []const u8 { 172 const strtab = self.in_strtab.?; 173 const sym = self.symtab[index]; 174diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig 175index 817aa816625..b42309598d7 100644 176--- a/src/link/MachO/ZldAtom.zig 177+++ b/src/link/MachO/ZldAtom.zig 178@@ -465,7 +465,7 @@ pub fn resolveRelocs( 179 zld: *Zld, 180 atom_index: AtomIndex, 181 atom_code: []u8, 182- atom_relocs: []align(1) const macho.relocation_info, 183+ atom_relocs: []const macho.relocation_info, 184 reverse_lookup: []u32, 185 ) !void { 186 const arch = zld.options.target.cpu.arch; 187@@ -540,7 +540,7 @@ fn resolveRelocsArm64( 188 zld: *Zld, 189 atom_index: AtomIndex, 190 atom_code: []u8, 191- atom_relocs: []align(1) const macho.relocation_info, 192+ atom_relocs: []const macho.relocation_info, 193 reverse_lookup: []u32, 194 context: RelocContext, 195 ) !void { 196@@ -579,7 +579,6 @@ fn resolveRelocsArm64( 197 } 198 199 const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); 200- const rel_offset = @intCast(u32, rel.r_address - context.base_offset); 201 202 log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ 203 @tagName(rel_type), 204@@ -589,6 +588,7 @@ fn resolveRelocsArm64( 205 target.file, 206 }); 207 208+ const rel_offset = @intCast(u32, rel.r_address - context.base_offset); 209 const source_addr = blk: { 210 const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); 211 break :blk source_sym.n_value + rel_offset; 212@@ -596,7 +596,7 @@ fn resolveRelocsArm64( 213 const is_tlv = is_tlv: { 214 const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); 215 const header = zld.sections.items(.header)[source_sym.n_sect - 1]; 216- break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; 217+ break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; 218 }; 219 const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); 220 221@@ -831,7 +831,7 @@ fn resolveRelocsX86( 222 zld: *Zld, 223 atom_index: AtomIndex, 224 atom_code: []u8, 225- atom_relocs: []align(1) const macho.relocation_info, 226+ atom_relocs: []const macho.relocation_info, 227 reverse_lookup: []u32, 228 context: RelocContext, 229 ) !void { 230@@ -877,7 +877,7 @@ fn resolveRelocsX86( 231 const is_tlv = is_tlv: { 232 const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); 233 const header = zld.sections.items(.header)[source_sym.n_sect - 1]; 234- break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; 235+ break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; 236 }; 237 238 log.debug(" | source_addr = 0x{x}", .{source_addr}); 239@@ -1015,27 +1015,24 @@ pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 { 240 return code[offset..][0..code_len]; 241 } 242 243-pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []align(1) const macho.relocation_info { 244+pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []const macho.relocation_info { 245 const atom = zld.getAtomPtr(atom_index); 246 assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. 247 const object = zld.objects.items[atom.getFile().?]; 248 249- const source_sect = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { 250- const source_sect = object.getSourceSection(source_sym.n_sect - 1); 251- assert(!source_sect.isZerofill()); 252- break :blk source_sect; 253+ const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { 254+ break :blk source_sym.n_sect - 1; 255 } else blk: { 256 // If there was no matching symbol present in the source symtab, this means 257 // we are dealing with either an entire section, or part of it, but also 258 // starting at the beginning. 259 const nbase = @intCast(u32, object.in_symtab.?.len); 260 const sect_id = @intCast(u16, atom.sym_index - nbase); 261- const source_sect = object.getSourceSection(sect_id); 262- assert(!source_sect.isZerofill()); 263- break :blk source_sect; 264+ break :blk sect_id; 265 }; 266- 267- const relocs = object.getRelocs(source_sect); 268+ const source_sect = object.getSourceSection(source_sect_id); 269+ assert(!source_sect.isZerofill()); 270+ const relocs = object.getRelocs(source_sect_id); 271 272 if (atom.cached_relocs_start == -1) { 273 const indexes = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { 274diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig 275index 3a2ea79c6ec..cee3f302c08 100644 276--- a/src/link/MachO/zld.zig 277+++ b/src/link/MachO/zld.zig 278@@ -396,7 +396,7 @@ pub const Zld = struct { 279 break :blk null; 280 } 281 282- switch (sect.@"type"()) { 283+ switch (sect.type()) { 284 macho.S_4BYTE_LITERALS, 285 macho.S_8BYTE_LITERALS, 286 macho.S_16BYTE_LITERALS, 287@@ -1701,7 +1701,7 @@ pub const Zld = struct { 288 break :outer; 289 } 290 } 291- switch (header.@"type"()) { 292+ switch (header.type()) { 293 macho.S_NON_LAZY_SYMBOL_POINTERS => { 294 try self.writeGotPointer(count, buffer.writer()); 295 }, 296@@ -1718,7 +1718,7 @@ pub const Zld = struct { 297 break :outer; 298 } 299 } 300- if (header.@"type"() == macho.S_SYMBOL_STUBS) { 301+ if (header.type() == macho.S_SYMBOL_STUBS) { 302 try self.writeStubCode(atom_index, count, buffer.writer()); 303 } else if (mem.eql(u8, header.sectName(), "__stub_helper")) { 304 try self.writeStubHelperCode(atom_index, buffer.writer()); 305@@ -1802,7 +1802,7 @@ pub const Zld = struct { 306 for (slice.items(.header)) |*header, sect_id| { 307 if (header.size == 0) continue; 308 if (self.requiresThunks()) { 309- if (header.isCode() and !(header.@"type"() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; 310+ if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; 311 } 312 313 var atom_index = slice.items(.first_atom_index)[sect_id]; 314@@ -1830,7 +1830,7 @@ pub const Zld = struct { 315 if (self.requiresThunks()) { 316 for (slice.items(.header)) |header, sect_id| { 317 if (!header.isCode()) continue; 318- if (header.@"type"() == macho.S_SYMBOL_STUBS) continue; 319+ if (header.type() == macho.S_SYMBOL_STUBS) continue; 320 if (mem.eql(u8, header.sectName(), "__stub_helper")) continue; 321 322 // Create jump/branch range extenders if needed. 323@@ -1994,10 +1994,10 @@ pub const Zld = struct { 324 const section_precedence: u4 = blk: { 325 if (header.isCode()) { 326 if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; 327- if (header.@"type"() == macho.S_SYMBOL_STUBS) break :blk 0x1; 328+ if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; 329 break :blk 0x2; 330 } 331- switch (header.@"type"()) { 332+ switch (header.type()) { 333 macho.S_NON_LAZY_SYMBOL_POINTERS, 334 macho.S_LAZY_SYMBOL_POINTERS, 335 => break :blk 0x0, 336@@ -2121,7 +2121,7 @@ pub const Zld = struct { 337 338 // Finally, unpack the rest. 339 for (slice.items(.header)) |header, sect_id| { 340- switch (header.@"type"()) { 341+ switch (header.type()) { 342 macho.S_LITERAL_POINTERS, 343 macho.S_REGULAR, 344 macho.S_MOD_INIT_FUNC_POINTERS, 345@@ -2252,7 +2252,7 @@ pub const Zld = struct { 346 // Finally, unpack the rest. 347 const slice = self.sections.slice(); 348 for (slice.items(.header)) |header, sect_id| { 349- switch (header.@"type"()) { 350+ switch (header.type()) { 351 macho.S_LITERAL_POINTERS, 352 macho.S_REGULAR, 353 macho.S_MOD_INIT_FUNC_POINTERS, 354@@ -2707,10 +2707,10 @@ pub const Zld = struct { 355 } 356 357 fn filterDataInCode( 358- dices: []const macho.data_in_code_entry, 359+ dices: []align(1) const macho.data_in_code_entry, 360 start_addr: u64, 361 end_addr: u64, 362- ) []const macho.data_in_code_entry { 363+ ) []align(1) const macho.data_in_code_entry { 364 const Predicate = struct { 365 addr: u64, 366 367