// © Vlad-Stefan Harbuz // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fs::File; use std::io::{self, BufRead}; use clap::Parser; use elf::ElfStream; use elf::endian::AnyEndian; use walkdir::WalkDir; /// Examine shared objects within a directory containing many extracted Python wheel files #[derive(Parser, Debug)] #[command(version, about, long_about = None)] struct Args { /// Target directory #[arg(short, long)] dir: String, } fn read_lines(filename: &str) -> io::Result>> { let file = File::open(filename)?; Ok(io::BufReader::new(file).lines()) } fn get_needed_libs( elf_file: &mut ElfStream, ) -> Option> { let dyns = elf_file.dynamic().expect("failed to get .dynamic")?; let needed_dt_vals: Vec = dyns .iter() .filter(|d| d.d_tag == elf::abi::DT_NEEDED) .map(|d| d.d_val()) .collect(); let (_dynsyms, dynstrs) = elf_file .dynamic_symbol_table() .expect("failed to get .dynsym and string table")?; Some(needed_dt_vals.into_iter().map(move |dt_val| { dynstrs .get(dt_val as usize) .expect("failed to get name from string table") })) } fn get_wheel_name(wheel_dir_path: &str) -> Option { for dist_info_candidate in WalkDir::new(wheel_dir_path).min_depth(1).max_depth(1) { let dist_info_candidate = dist_info_candidate.expect("could not read file"); let dist_info_candidate_path = dist_info_candidate .path() .to_str() .expect("could not convert path to str"); if dist_info_candidate_path.ends_with(".dist-info") { let metadata_path = format!("{}/METADATA", dist_info_candidate_path); let metadata_lines = read_lines(&metadata_path).expect("could not read METADATA"); for line in metadata_lines.map_while(Result::ok) { if line.starts_with("Name: ") { return Some(line.replace("Name: ", "")); } } } } None } fn main() { let args = Args::parse(); let mut found_libs = HashMap::new(); let mut n_so_files = 0; let mut n_bad_so_files = 0; let mut n_needed_libs = 0; for wheel_dir in WalkDir::new(args.dir).min_depth(1).max_depth(1) { let wheel_dir = wheel_dir.expect("could not read file"); let wheel_dir_meta = wheel_dir.metadata().expect("could not get file metadata"); if wheel_dir_meta.is_file() { continue; } let wheel_dir_path = wheel_dir .path() .to_str() .expect("could not convert path to str"); let name = get_wheel_name(wheel_dir_path); if let Some(name) = name { // println!("→ {}", name); for file in WalkDir::new(wheel_dir_path) { let file = file.expect("could not read file"); let filename = file .file_name() .to_str() .expect("could not covert filename to str"); let path = file.path().to_str().expect("could not convert path to str"); let is_so = path.ends_with(".so") || path.contains(".so."); if is_so { // println!("{}", filename); n_so_files += 1; let pathbuf = std::path::PathBuf::from(path); let file_stream = File::open(pathbuf).expect("could not open file"); let elf_file = ElfStream::::open_stream(file_stream); if let Ok(mut elf_file) = elf_file { let needed_libs = get_needed_libs(&mut elf_file); if let Some(needed_libs) = needed_libs { for name in needed_libs { n_needed_libs += 1; let libname = name.split(".so").next().expect("could not get libname"); // println!("\t{}", libname); found_libs .entry(libname.to_owned()) .and_modify(|entry| *entry += 1) .or_insert(1); } } } else { eprintln!("Could not open ELF stream for {}", path); n_bad_so_files += 1; } } } } else { panic!("Could not get name for {}", wheel_dir_path); } } let mut found_libs_vec: Vec<(&String, &u32)> = found_libs.iter().collect(); found_libs_vec.sort_by(|a, b| b.1.cmp(a.1)); for (name, count) in found_libs_vec { println!("{},{}", name, count); } eprintln!("Found {} .so files", n_so_files); eprintln!("Found {} bad .so files", n_bad_so_files); eprintln!("Found {} needed libs", n_needed_libs); }