Strategies for finding binary dependencies
1// © Vlad-Stefan Harbuz <vlad@vlad.website>
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::fs::File;
6use std::io::{self, BufRead};
7
8use clap::Parser;
9use elf::ElfStream;
10use elf::endian::AnyEndian;
11use walkdir::WalkDir;
12
13/// Examine shared objects within a directory containing many extracted Python wheel files
14#[derive(Parser, Debug)]
15#[command(version, about, long_about = None)]
16struct Args {
17 /// Target directory
18 #[arg(short, long)]
19 dir: String,
20}
21
22fn read_lines(filename: &str) -> io::Result<io::Lines<io::BufReader<File>>> {
23 let file = File::open(filename)?;
24 Ok(io::BufReader::new(file).lines())
25}
26
27fn get_needed_libs(
28 elf_file: &mut ElfStream<AnyEndian, std::fs::File>,
29) -> Option<impl Iterator<Item = &str>> {
30 let dyns = elf_file.dynamic().expect("failed to get .dynamic")?;
31 let needed_dt_vals: Vec<u64> = dyns
32 .iter()
33 .filter(|d| d.d_tag == elf::abi::DT_NEEDED)
34 .map(|d| d.d_val())
35 .collect();
36
37 let (_dynsyms, dynstrs) = elf_file
38 .dynamic_symbol_table()
39 .expect("failed to get .dynsym and string table")?;
40
41 Some(needed_dt_vals.into_iter().map(move |dt_val| {
42 dynstrs
43 .get(dt_val as usize)
44 .expect("failed to get name from string table")
45 }))
46}
47
48fn get_wheel_name(wheel_dir_path: &str) -> Option<String> {
49 for dist_info_candidate in WalkDir::new(wheel_dir_path).min_depth(1).max_depth(1) {
50 let dist_info_candidate = dist_info_candidate.expect("could not read file");
51 let dist_info_candidate_path = dist_info_candidate
52 .path()
53 .to_str()
54 .expect("could not convert path to str");
55 if dist_info_candidate_path.ends_with(".dist-info") {
56 let metadata_path = format!("{}/METADATA", dist_info_candidate_path);
57 let metadata_lines = read_lines(&metadata_path).expect("could not read METADATA");
58 for line in metadata_lines.map_while(Result::ok) {
59 if line.starts_with("Name: ") {
60 return Some(line.replace("Name: ", ""));
61 }
62 }
63 }
64 }
65 None
66}
67
68fn main() {
69 let args = Args::parse();
70
71 let mut found_libs = HashMap::new();
72 let mut n_so_files = 0;
73 let mut n_bad_so_files = 0;
74 let mut n_needed_libs = 0;
75
76 for wheel_dir in WalkDir::new(args.dir).min_depth(1).max_depth(1) {
77 let wheel_dir = wheel_dir.expect("could not read file");
78 let wheel_dir_meta = wheel_dir.metadata().expect("could not get file metadata");
79 if wheel_dir_meta.is_file() {
80 continue;
81 }
82 let wheel_dir_path = wheel_dir
83 .path()
84 .to_str()
85 .expect("could not convert path to str");
86
87 let name = get_wheel_name(wheel_dir_path);
88
89 if let Some(name) = name {
90 // println!("→ {}", name);
91
92 for file in WalkDir::new(wheel_dir_path) {
93 let file = file.expect("could not read file");
94 let filename = file
95 .file_name()
96 .to_str()
97 .expect("could not covert filename to str");
98 let path = file.path().to_str().expect("could not convert path to str");
99 let is_so = path.ends_with(".so") || path.contains(".so.");
100 if is_so {
101 // println!("{}", filename);
102 n_so_files += 1;
103 let pathbuf = std::path::PathBuf::from(path);
104 let file_stream = File::open(pathbuf).expect("could not open file");
105 let elf_file = ElfStream::<AnyEndian, _>::open_stream(file_stream);
106 if let Ok(mut elf_file) = elf_file {
107 let needed_libs = get_needed_libs(&mut elf_file);
108 if let Some(needed_libs) = needed_libs {
109 for name in needed_libs {
110 n_needed_libs += 1;
111 let libname =
112 name.split(".so").next().expect("could not get libname");
113 // println!("\t{}", libname);
114 found_libs
115 .entry(libname.to_owned())
116 .and_modify(|entry| *entry += 1)
117 .or_insert(1);
118 }
119 }
120 } else {
121 eprintln!("Could not open ELF stream for {}", path);
122 n_bad_so_files += 1;
123 }
124 }
125 }
126 } else {
127 panic!("Could not get name for {}", wheel_dir_path);
128 }
129 }
130
131 let mut found_libs_vec: Vec<(&String, &u32)> = found_libs.iter().collect();
132 found_libs_vec.sort_by(|a, b| b.1.cmp(a.1));
133 for (name, count) in found_libs_vec {
134 println!("{},{}", name, count);
135 }
136
137 eprintln!("Found {} .so files", n_so_files);
138 eprintln!("Found {} bad .so files", n_bad_so_files);
139 eprintln!("Found {} needed libs", n_needed_libs);
140}