Strategies for finding binary dependencies
at main 140 lines 5.2 kB view raw
1// © Vlad-Stefan Harbuz <vlad@vlad.website> 2// SPDX-License-Identifier: Apache-2.0 3 4use std::collections::HashMap; 5use std::fs::File; 6use std::io::{self, BufRead}; 7 8use clap::Parser; 9use elf::ElfStream; 10use elf::endian::AnyEndian; 11use walkdir::WalkDir; 12 13/// Examine shared objects within a directory containing many extracted Python wheel files 14#[derive(Parser, Debug)] 15#[command(version, about, long_about = None)] 16struct Args { 17 /// Target directory 18 #[arg(short, long)] 19 dir: String, 20} 21 22fn read_lines(filename: &str) -> io::Result<io::Lines<io::BufReader<File>>> { 23 let file = File::open(filename)?; 24 Ok(io::BufReader::new(file).lines()) 25} 26 27fn get_needed_libs( 28 elf_file: &mut ElfStream<AnyEndian, std::fs::File>, 29) -> Option<impl Iterator<Item = &str>> { 30 let dyns = elf_file.dynamic().expect("failed to get .dynamic")?; 31 let needed_dt_vals: Vec<u64> = dyns 32 .iter() 33 .filter(|d| d.d_tag == elf::abi::DT_NEEDED) 34 .map(|d| d.d_val()) 35 .collect(); 36 37 let (_dynsyms, dynstrs) = elf_file 38 .dynamic_symbol_table() 39 .expect("failed to get .dynsym and string table")?; 40 41 Some(needed_dt_vals.into_iter().map(move |dt_val| { 42 dynstrs 43 .get(dt_val as usize) 44 .expect("failed to get name from string table") 45 })) 46} 47 48fn get_wheel_name(wheel_dir_path: &str) -> Option<String> { 49 for dist_info_candidate in WalkDir::new(wheel_dir_path).min_depth(1).max_depth(1) { 50 let dist_info_candidate = dist_info_candidate.expect("could not read file"); 51 let dist_info_candidate_path = dist_info_candidate 52 .path() 53 .to_str() 54 .expect("could not convert path to str"); 55 if dist_info_candidate_path.ends_with(".dist-info") { 56 let metadata_path = format!("{}/METADATA", dist_info_candidate_path); 57 let metadata_lines = read_lines(&metadata_path).expect("could not read METADATA"); 58 for line in metadata_lines.map_while(Result::ok) { 59 if line.starts_with("Name: ") { 60 return Some(line.replace("Name: ", "")); 61 } 62 } 63 } 64 } 65 None 66} 67 68fn main() { 69 let args = Args::parse(); 70 71 let mut found_libs = HashMap::new(); 72 let mut n_so_files = 0; 73 let mut n_bad_so_files = 0; 74 let mut n_needed_libs = 0; 75 76 for wheel_dir in WalkDir::new(args.dir).min_depth(1).max_depth(1) { 77 let wheel_dir = wheel_dir.expect("could not read file"); 78 let wheel_dir_meta = wheel_dir.metadata().expect("could not get file metadata"); 79 if wheel_dir_meta.is_file() { 80 continue; 81 } 82 let wheel_dir_path = wheel_dir 83 .path() 84 .to_str() 85 .expect("could not convert path to str"); 86 87 let name = get_wheel_name(wheel_dir_path); 88 89 if let Some(name) = name { 90 // println!("→ {}", name); 91 92 for file in WalkDir::new(wheel_dir_path) { 93 let file = file.expect("could not read file"); 94 let filename = file 95 .file_name() 96 .to_str() 97 .expect("could not covert filename to str"); 98 let path = file.path().to_str().expect("could not convert path to str"); 99 let is_so = path.ends_with(".so") || path.contains(".so."); 100 if is_so { 101 // println!("{}", filename); 102 n_so_files += 1; 103 let pathbuf = std::path::PathBuf::from(path); 104 let file_stream = File::open(pathbuf).expect("could not open file"); 105 let elf_file = ElfStream::<AnyEndian, _>::open_stream(file_stream); 106 if let Ok(mut elf_file) = elf_file { 107 let needed_libs = get_needed_libs(&mut elf_file); 108 if let Some(needed_libs) = needed_libs { 109 for name in needed_libs { 110 n_needed_libs += 1; 111 let libname = 112 name.split(".so").next().expect("could not get libname"); 113 // println!("\t{}", libname); 114 found_libs 115 .entry(libname.to_owned()) 116 .and_modify(|entry| *entry += 1) 117 .or_insert(1); 118 } 119 } 120 } else { 121 eprintln!("Could not open ELF stream for {}", path); 122 n_bad_so_files += 1; 123 } 124 } 125 } 126 } else { 127 panic!("Could not get name for {}", wheel_dir_path); 128 } 129 } 130 131 let mut found_libs_vec: Vec<(&String, &u32)> = found_libs.iter().collect(); 132 found_libs_vec.sort_by(|a, b| b.1.cmp(a.1)); 133 for (name, count) in found_libs_vec { 134 println!("{},{}", name, count); 135 } 136 137 eprintln!("Found {} .so files", n_so_files); 138 eprintln!("Found {} bad .so files", n_bad_so_files); 139 eprintln!("Found {} needed libs", n_needed_libs); 140}