code complexity & repetition analysis tool
1use crate::error::{MccabreError, Result};
2use crate::tokenizer::Language;
3use ignore::WalkBuilder;
4use std::path::{Path, PathBuf};
5use std::{fs, io};
6
7/// File entry with source code and metadata
8#[derive(Debug, Clone)]
9pub struct SourceFile {
10 pub path: PathBuf,
11 pub content: String,
12 pub language: Language,
13}
14
15/// File loader that respects .gitignore and supports various input types
16pub struct FileLoader {
17 /// Whether to respect .gitignore files
18 respect_gitignore: bool,
19}
20
21impl Default for FileLoader {
22 fn default() -> Self {
23 Self { respect_gitignore: true }
24 }
25}
26
27impl FileLoader {
28 pub fn new() -> Self {
29 Self::default()
30 }
31
32 /// Enable or disable gitignore awareness
33 pub fn with_gitignore(mut self, respect: bool) -> Self {
34 self.respect_gitignore = respect;
35 self
36 }
37
38 /// Load files from a path (file, directory, or list)
39 pub fn load<P: AsRef<Path>>(&self, path: P) -> Result<Vec<SourceFile>> {
40 let path = path.as_ref();
41
42 if path.is_file() {
43 let file = self.load_file(path)?;
44 Ok(vec![file])
45 } else if path.is_dir() {
46 self.load_directory(path)
47 } else {
48 Err(MccabreError::FileRead {
49 path: path.to_path_buf(),
50 source: std::io::Error::new(std::io::ErrorKind::NotFound, "Path is neither a file nor a directory"),
51 })
52 }
53 }
54
55 /// Load multiple paths
56 pub fn load_multiple<P: AsRef<Path>>(&self, paths: &[P]) -> Result<Vec<SourceFile>> {
57 let mut files = Vec::new();
58
59 for path in paths {
60 let mut loaded = self.load(path)?;
61 files.append(&mut loaded);
62 }
63
64 files.sort_by(|a, b| a.path.cmp(&b.path));
65 files.dedup_by(|a, b| a.path == b.path);
66
67 Ok(files)
68 }
69
70 /// Load a single file
71 fn load_file(&self, path: &Path) -> Result<SourceFile> {
72 let language = Language::from_path(path)?;
73 let content =
74 fs::read_to_string(path).map_err(|e| MccabreError::FileRead { path: path.to_path_buf(), source: e })?;
75
76 Ok(SourceFile { path: path.to_path_buf(), content, language })
77 }
78
79 /// Load all supported files from a directory
80 fn load_directory(&self, dir: &Path) -> Result<Vec<SourceFile>> {
81 let mut files = Vec::new();
82
83 let walker = WalkBuilder::new(dir)
84 .standard_filters(self.respect_gitignore)
85 .hidden(false)
86 .parents(true)
87 .build();
88
89 for entry in walker {
90 let entry = entry.map_err(|e| MccabreError::Io(io::Error::other(e.to_string())))?;
91 let path = entry.path();
92
93 if !path.is_file() {
94 continue;
95 }
96
97 match self.load_file(path) {
98 Ok(file) => files.push(file),
99 Err(MccabreError::UnsupportedFileType(_)) => continue,
100 Err(e) => return Err(e),
101 }
102 }
103
104 Ok(files)
105 }
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111 use std::fs;
112 use tempfile::TempDir;
113
114 #[test]
115 fn test_load_single_file() -> Result<()> {
116 let temp_dir = TempDir::new().unwrap();
117 let file_path = temp_dir.path().join("test.rs");
118 fs::write(&file_path, "fn main() {}").unwrap();
119
120 let loader = FileLoader::new();
121 let files = loader.load(&file_path)?;
122
123 assert_eq!(files.len(), 1);
124 assert_eq!(files[0].content, "fn main() {}");
125 assert_eq!(files[0].language, Language::Rust);
126
127 Ok(())
128 }
129
130 #[test]
131 fn test_load_directory() -> Result<()> {
132 let temp_dir = TempDir::new().unwrap();
133 fs::write(temp_dir.path().join("file1.rs"), "fn test1() {}").unwrap();
134 fs::write(temp_dir.path().join("file2.js"), "function test2() {}").unwrap();
135 fs::write(temp_dir.path().join("readme.txt"), "Not code").unwrap();
136
137 let loader = FileLoader::new();
138 let files = loader.load(temp_dir.path())?;
139
140 assert_eq!(files.len(), 2);
141
142 let has_rust = files.iter().any(|f| f.path.ends_with("file1.rs"));
143 let has_js = files.iter().any(|f| f.path.ends_with("file2.js"));
144 assert!(has_rust);
145 assert!(has_js);
146
147 Ok(())
148 }
149
150 #[test]
151 fn test_gitignore_respected() -> Result<()> {
152 let temp_dir = TempDir::new().unwrap();
153 fs::write(temp_dir.path().join("included.rs"), "fn included() {}").unwrap();
154
155 let ignored_dir = temp_dir.path().join("build");
156 fs::create_dir(&ignored_dir).unwrap();
157 fs::write(ignored_dir.join("excluded.rs"), "fn excluded() {}").unwrap();
158
159 fs::write(temp_dir.path().join(".gitignore"), "build/\n").unwrap();
160
161 let loader_with_gitignore = FileLoader::new().with_gitignore(true);
162 let files_with = loader_with_gitignore.load(temp_dir.path())?;
163
164 let loader_without_gitignore = FileLoader::new().with_gitignore(false);
165 let files_without = loader_without_gitignore.load(temp_dir.path())?;
166
167 assert!(files_with.iter().any(|f| f.path.ends_with("included.rs")));
168
169 assert!(files_without.iter().any(|f| f.path.ends_with("included.rs")));
170 assert!(files_without.iter().any(|f| f.path.ends_with("excluded.rs")));
171
172 Ok(())
173 }
174
175 #[test]
176 fn test_unsupported_file_type() {
177 let temp_dir = TempDir::new().unwrap();
178 let file_path = temp_dir.path().join("test.xyz");
179 fs::write(&file_path, "random content").unwrap();
180
181 let loader = FileLoader::new();
182 let result = loader.load(&file_path);
183
184 assert!(matches!(result, Err(MccabreError::UnsupportedFileType(_))));
185 }
186
187 #[test]
188 fn test_load_multiple() -> Result<()> {
189 let temp_dir = TempDir::new().unwrap();
190 let file1 = temp_dir.path().join("test1.rs");
191 let file2 = temp_dir.path().join("test2.js");
192
193 fs::write(&file1, "fn test1() {}").unwrap();
194 fs::write(&file2, "function test2() {}").unwrap();
195
196 let loader = FileLoader::new();
197 let files = loader.load_multiple(&[&file1, &file2])?;
198
199 assert_eq!(files.len(), 2);
200
201 Ok(())
202 }
203}