code complexity & repetition analysis tool

core utilities & commands

+1640 -15
+163
crates/cli/src/commands/analyze.rs
··· 1 + use anyhow::Result; 2 + use mccabre_core::{ 3 + cloner::CloneDetector, 4 + complexity::{CyclomaticMetrics, LocMetrics}, 5 + config::Config, 6 + loader::FileLoader, 7 + reporter::{FileReport, Report}, 8 + }; 9 + use owo_colors::OwoColorize; 10 + use std::path::PathBuf; 11 + 12 + pub fn run( 13 + path: PathBuf, json: bool, threshold: Option<usize>, min_tokens: Option<usize>, config_path: Option<PathBuf>, 14 + respect_gitignore: bool, 15 + ) -> Result<()> { 16 + let config = if let Some(config_path) = config_path { 17 + Config::from_file(config_path)? 18 + } else { 19 + Config::load_default()? 20 + }; 21 + 22 + let config = config.merge_with_cli(threshold, min_tokens, Some(respect_gitignore)); 23 + let loader = FileLoader::new().with_gitignore(config.files.respect_gitignore); 24 + let files = loader.load(&path)?; 25 + 26 + if files.is_empty() { 27 + eprintln!("{}", "No supported files found".yellow()); 28 + return Ok(()); 29 + } 30 + 31 + let mut file_reports = Vec::new(); 32 + 33 + for file in &files { 34 + let loc = LocMetrics::calculate(&file.content, file.language)?; 35 + let cyclomatic = CyclomaticMetrics::calculate(&file.content, file.language)?; 36 + 37 + file_reports.push(FileReport { path: file.path.clone(), loc, cyclomatic }); 38 + } 39 + 40 + let clones = if config.clones.enabled { 41 + let detector = CloneDetector::new(config.clones.min_tokens); 42 + let files_for_clone_detection: Vec<_> = files 43 + .iter() 44 + .map(|f| (f.path.clone(), f.content.clone(), f.language)) 45 + .collect(); 46 + detector.detect_across_files(&files_for_clone_detection)? 47 + } else { 48 + Vec::new() 49 + }; 50 + 51 + let report = Report::new(file_reports, clones); 52 + 53 + if json { 54 + println!("{}", report.to_json()?); 55 + } else { 56 + print_pretty_report(&report, &config); 57 + } 58 + 59 + Ok(()) 60 + } 61 + 62 + fn print_pretty_report(report: &Report, config: &Config) { 63 + println!("{}", "=".repeat(80).cyan()); 64 + println!("{}", "MCCABRE CODE ANALYSIS REPORT".cyan().bold()); 65 + println!("{}", "=".repeat(80).cyan()); 66 + println!(); 67 + 68 + println!("{}", "SUMMARY".green().bold()); 69 + println!("{}", "-".repeat(80).cyan()); 70 + println!("Total files analyzed: {}", report.summary.total_files.bold()); 71 + println!( 72 + "Total physical LOC: {}", 73 + report.summary.total_physical_loc.bold() 74 + ); 75 + println!( 76 + "Total logical LOC: {}", 77 + report.summary.total_logical_loc.bold() 78 + ); 79 + println!( 80 + "Average complexity: {}", 81 + format!("{:.2}", report.summary.avg_complexity).bold() 82 + ); 83 + println!("Maximum complexity: {}", report.summary.max_complexity.bold()); 84 + println!( 85 + "High complexity files: {}", 86 + report.summary.high_complexity_files.bold() 87 + ); 88 + println!("Clone groups detected: {}", report.summary.total_clones.bold()); 89 + println!(); 90 + 91 + if !report.files.is_empty() { 92 + println!("{}", "FILE METRICS".green().bold()); 93 + println!("{}", "-".repeat(80).cyan()); 94 + 95 + for file in &report.files { 96 + println!("{} {}", "FILE:".blue().bold(), file.path.display().bold()); 97 + 98 + let complexity_value = file.cyclomatic.file_complexity; 99 + let complexity_text = format!("Cyclomatic Complexity: {}", complexity_value); 100 + 101 + if complexity_value > config.complexity.error_threshold { 102 + println!(" {}", complexity_text.red().bold()); 103 + } else if complexity_value > config.complexity.warning_threshold { 104 + println!(" {}", complexity_text.yellow()); 105 + } else { 106 + println!(" {}", complexity_text.green()); 107 + } 108 + println!(" Physical LOC: {}", file.loc.physical); 109 + println!(" Logical LOC: {}", file.loc.logical); 110 + println!(" Comment lines: {}", file.loc.comments); 111 + println!(" Blank lines: {}", file.loc.blank); 112 + println!(); 113 + 114 + if !file.cyclomatic.functions.is_empty() { 115 + println!(" {}:", "Functions".magenta()); 116 + for func in &file.cyclomatic.functions { 117 + let func_text = format!( 118 + " - {} (line {}): complexity {}", 119 + func.name, func.line, func.complexity 120 + ); 121 + 122 + if func.complexity > config.complexity.error_threshold { 123 + println!("{}", func_text.red()); 124 + } else if func.complexity > config.complexity.warning_threshold { 125 + println!("{}", func_text.yellow()); 126 + } else { 127 + println!("{}", func_text); 128 + } 129 + } 130 + println!(); 131 + } 132 + } 133 + } 134 + 135 + if !report.clones.is_empty() { 136 + println!("{}", "DETECTED CLONES".green().bold()); 137 + println!("{}", "-".repeat(80).cyan()); 138 + 139 + for clone in &report.clones { 140 + println!( 141 + "{} {} {} {} {} {}", 142 + "Clone Group".yellow(), 143 + format!("#{}", clone.id).yellow().bold(), 144 + "(length:".dimmed(), 145 + format!("{} tokens", clone.length).bold(), 146 + format!("{} occurrences)", clone.locations.len()).bold(), 147 + "".dimmed() 148 + ); 149 + 150 + for loc in &clone.locations { 151 + println!( 152 + " {} {}:{}", 153 + "-".dimmed(), 154 + loc.file.display(), 155 + format!("{}-{}", loc.start_line, loc.end_line).dimmed() 156 + ); 157 + } 158 + println!(); 159 + } 160 + } 161 + 162 + println!("{}", "=".repeat(80).cyan()); 163 + }
+82
crates/cli/src/commands/clones.rs
··· 1 + use anyhow::Result; 2 + use mccabre_core::{cloner::CloneDetector, config::Config, loader::FileLoader, reporter::Report}; 3 + use owo_colors::OwoColorize; 4 + use std::path::PathBuf; 5 + 6 + pub fn run( 7 + path: PathBuf, json: bool, min_tokens: Option<usize>, config_path: Option<PathBuf>, respect_gitignore: bool, 8 + ) -> Result<()> { 9 + let config = if let Some(config_path) = config_path { 10 + Config::from_file(config_path)? 11 + } else { 12 + Config::load_default()? 13 + }; 14 + 15 + let config = config.merge_with_cli(None, min_tokens, Some(respect_gitignore)); 16 + let loader = FileLoader::new().with_gitignore(config.files.respect_gitignore); 17 + let files = loader.load(&path)?; 18 + 19 + if files.is_empty() { 20 + eprintln!("{}", "No supported files found".yellow()); 21 + return Ok(()); 22 + } 23 + 24 + let detector = CloneDetector::new(config.clones.min_tokens); 25 + let files_for_clone_detection: Vec<_> = files 26 + .iter() 27 + .map(|f| (f.path.clone(), f.content.clone(), f.language)) 28 + .collect(); 29 + let clones = detector.detect_across_files(&files_for_clone_detection)?; 30 + 31 + let report = Report::new(Vec::new(), clones); 32 + 33 + if json { 34 + println!("{}", report.to_json()?); 35 + } else { 36 + print_clones_report(&report); 37 + } 38 + 39 + Ok(()) 40 + } 41 + 42 + fn print_clones_report(report: &Report) { 43 + println!("{}", "=".repeat(80).cyan()); 44 + println!("{}", "CLONE DETECTION REPORT".cyan().bold()); 45 + println!("{}\n", "=".repeat(80).cyan()); 46 + 47 + if report.clones.is_empty() { 48 + println!("{}", "No clones detected!".green().bold()); 49 + } else { 50 + println!( 51 + "{} {} {}", 52 + "Found".green().bold(), 53 + report.clones.len().to_string().yellow().bold(), 54 + "clone groups".green().bold() 55 + ); 56 + println!(); 57 + 58 + for clone in &report.clones { 59 + println!( 60 + "{} {} {} {} {} {}", 61 + "Clone Group".yellow(), 62 + format!("#{}", clone.id).yellow().bold(), 63 + "(length:".dimmed(), 64 + format!("{} tokens", clone.length).bold(), 65 + format!("{} occurrences)", clone.locations.len()).bold(), 66 + "".dimmed() 67 + ); 68 + 69 + for loc in &clone.locations { 70 + println!( 71 + " {} {}:{}", 72 + "-".dimmed(), 73 + loc.file.display(), 74 + format!("{}-{}", loc.start_line, loc.end_line).dimmed() 75 + ); 76 + } 77 + println!(); 78 + } 79 + } 80 + 81 + println!("{}", "=".repeat(80).cyan()); 82 + }
+117
crates/cli/src/commands/complexity.rs
··· 1 + use anyhow::Result; 2 + use mccabre_core::{ 3 + complexity::{CyclomaticMetrics, LocMetrics}, 4 + config::Config, 5 + loader::FileLoader, 6 + reporter::{FileReport, Report}, 7 + }; 8 + use owo_colors::OwoColorize; 9 + use std::path::PathBuf; 10 + 11 + pub fn run( 12 + path: PathBuf, json: bool, threshold: Option<usize>, config_path: Option<PathBuf>, respect_gitignore: bool, 13 + ) -> Result<()> { 14 + let config = if let Some(config_path) = config_path { 15 + Config::from_file(config_path)? 16 + } else { 17 + Config::load_default()? 18 + }; 19 + 20 + let config = config.merge_with_cli(threshold, None, Some(respect_gitignore)); 21 + let loader = FileLoader::new().with_gitignore(config.files.respect_gitignore); 22 + let files = loader.load(&path)?; 23 + 24 + if files.is_empty() { 25 + eprintln!("{}", "No supported files found".yellow()); 26 + return Ok(()); 27 + } 28 + 29 + let mut file_reports = Vec::new(); 30 + 31 + for file in &files { 32 + let loc = LocMetrics::calculate(&file.content, file.language)?; 33 + let cyclomatic = CyclomaticMetrics::calculate(&file.content, file.language)?; 34 + 35 + file_reports.push(FileReport { path: file.path.clone(), loc, cyclomatic }); 36 + } 37 + 38 + let report = Report::new(file_reports, Vec::new()); 39 + 40 + if json { 41 + println!("{}", report.to_json()?); 42 + } else { 43 + print_complexity_report(&report, &config); 44 + } 45 + 46 + Ok(()) 47 + } 48 + 49 + fn print_complexity_report(report: &Report, config: &Config) { 50 + println!("{}", "=".repeat(80).cyan()); 51 + println!("{}", "COMPLEXITY ANALYSIS".cyan().bold()); 52 + println!("{}\n", "=".repeat(80).cyan()); 53 + 54 + println!("{}", "SUMMARY".green().bold()); 55 + println!("{}", "-".repeat(80).cyan()); 56 + println!("Total files analyzed: {}", report.summary.total_files.bold()); 57 + println!( 58 + "Total physical LOC: {}", 59 + report.summary.total_physical_loc.bold() 60 + ); 61 + println!( 62 + "Total logical LOC: {}", 63 + report.summary.total_logical_loc.bold() 64 + ); 65 + println!( 66 + "Average complexity: {}", 67 + format!("{:.2}", report.summary.avg_complexity).bold() 68 + ); 69 + println!("Maximum complexity: {}", report.summary.max_complexity.bold()); 70 + println!( 71 + "High complexity files: {}\n", 72 + report.summary.high_complexity_files.bold() 73 + ); 74 + 75 + println!("{}", "FILE METRICS".green().bold()); 76 + println!("{}", "-".repeat(80).cyan()); 77 + 78 + for file in &report.files { 79 + println!("{} {}", "FILE:".blue().bold(), file.path.display().bold()); 80 + 81 + let complexity_value = file.cyclomatic.file_complexity; 82 + let complexity_text = format!("Cyclomatic Complexity: {}", complexity_value); 83 + 84 + if complexity_value > config.complexity.error_threshold { 85 + println!(" {}", complexity_text.red().bold()); 86 + } else if complexity_value > config.complexity.warning_threshold { 87 + println!(" {}", complexity_text.yellow()); 88 + } else { 89 + println!(" {}", complexity_text.green()); 90 + } 91 + println!(" Physical LOC: {}", file.loc.physical); 92 + println!(" Logical LOC: {}", file.loc.logical); 93 + println!(" Comment lines: {}", file.loc.comments); 94 + println!(" Blank lines: {}\n", file.loc.blank); 95 + 96 + if !file.cyclomatic.functions.is_empty() { 97 + println!(" {}:", "Functions".magenta()); 98 + for func in &file.cyclomatic.functions { 99 + let func_text = format!( 100 + " - {} (line {}): complexity {}", 101 + func.name, func.line, func.complexity 102 + ); 103 + 104 + if func.complexity > config.complexity.error_threshold { 105 + println!("{}", func_text.red()); 106 + } else if func.complexity > config.complexity.warning_threshold { 107 + println!("{}", func_text.yellow()); 108 + } else { 109 + println!("{}", func_text); 110 + } 111 + } 112 + println!(); 113 + } 114 + } 115 + 116 + println!("{}", "=".repeat(80).cyan()); 117 + }
+39
crates/cli/src/commands/dump_config.rs
··· 1 + use anyhow::Result; 2 + use mccabre_core::config::Config; 3 + use owo_colors::OwoColorize; 4 + use std::path::PathBuf; 5 + 6 + pub fn run(config_path: Option<PathBuf>) -> Result<()> { 7 + let config = if let Some(path) = config_path { 8 + println!("{} {}", "Loading config from:".blue(), path.display()); 9 + Config::from_file(&path)? 10 + } else { 11 + println!("{}", "Using default configuration".blue()); 12 + Config::load_default()? 13 + }; 14 + 15 + println!(); 16 + println!("{}", "CONFIGURATION".green().bold()); 17 + println!("{}", "=".repeat(80).cyan()); 18 + println!(); 19 + 20 + println!("{}", "Complexity Settings:".yellow().bold()); 21 + println!(" Warning threshold: {}", config.complexity.warning_threshold); 22 + println!(" Error threshold: {}", config.complexity.error_threshold); 23 + println!(); 24 + 25 + println!("{}", "Clone Detection Settings:".yellow().bold()); 26 + println!(" Enabled: {}", config.clones.enabled); 27 + println!(" Minimum tokens: {}", config.clones.min_tokens); 28 + println!(); 29 + 30 + println!("{}", "File Settings:".yellow().bold()); 31 + println!(" Respect .gitignore: {}", config.files.respect_gitignore); 32 + println!(); 33 + 34 + println!("{}", "=".repeat(80).cyan()); 35 + println!(); 36 + println!("{}", "To save this configuration, create a mccabre.toml file.".dimmed()); 37 + 38 + Ok(()) 39 + }
+4
crates/cli/src/commands/mod.rs
··· 1 + pub mod analyze; 2 + pub mod clones; 3 + pub mod complexity; 4 + pub mod dump_config;
+116 -2
crates/cli/src/main.rs
··· 1 - fn main() { 2 - println!("Hello, world!"); 1 + mod commands; 2 + 3 + use anyhow::Result; 4 + use clap::{Parser, Subcommand}; 5 + use std::path::PathBuf; 6 + 7 + #[derive(Parser)] 8 + #[command(name = "mccabre")] 9 + #[command(about = "Code complexity & clone detection tool", long_about = None)] 10 + #[command(version)] 11 + struct Cli { 12 + #[command(subcommand)] 13 + command: Commands, 14 + } 15 + 16 + #[derive(Subcommand)] 17 + enum Commands { 18 + /// Run full analysis (complexity + clones + LOC) 19 + Analyze { 20 + /// Path to file or directory to analyze 21 + #[arg(value_name = "PATH", default_value = ".")] 22 + path: PathBuf, 23 + 24 + /// Output in JSON format 25 + #[arg(short, long)] 26 + json: bool, 27 + 28 + /// Complexity threshold for warnings 29 + #[arg(long)] 30 + threshold: Option<usize>, 31 + 32 + /// Minimum tokens for clone detection 33 + #[arg(long, default_value = "30")] 34 + min_tokens: usize, 35 + 36 + /// Path to config file 37 + #[arg(short, long)] 38 + config: Option<PathBuf>, 39 + 40 + /// Disable gitignore awareness 41 + #[arg(long)] 42 + no_gitignore: bool, 43 + }, 44 + 45 + /// Analyze cyclomatic complexity and LOC only 46 + Complexity { 47 + /// Path to file or directory to analyze 48 + #[arg(value_name = "PATH", default_value = ".")] 49 + path: PathBuf, 50 + 51 + /// Output in JSON format 52 + #[arg(short, long)] 53 + json: bool, 54 + 55 + /// Complexity threshold for warnings 56 + #[arg(long)] 57 + threshold: Option<usize>, 58 + 59 + /// Path to config file 60 + #[arg(short, long)] 61 + config: Option<PathBuf>, 62 + 63 + /// Disable gitignore awareness 64 + #[arg(long)] 65 + no_gitignore: bool, 66 + }, 67 + 68 + /// Detect code clones only 69 + Clones { 70 + /// Path to file or directory to analyze 71 + #[arg(value_name = "PATH", default_value = ".")] 72 + path: PathBuf, 73 + 74 + /// Output in JSON format 75 + #[arg(short, long)] 76 + json: bool, 77 + 78 + /// Minimum tokens for clone detection 79 + #[arg(long, default_value = "30")] 80 + min_tokens: usize, 81 + 82 + /// Path to config file 83 + #[arg(short, long)] 84 + config: Option<PathBuf>, 85 + 86 + /// Disable gitignore awareness 87 + #[arg(long)] 88 + no_gitignore: bool, 89 + }, 90 + 91 + /// Display current configuration 92 + DumpConfig { 93 + /// Path to config file (if not specified, shows defaults) 94 + #[arg(short, long)] 95 + config: Option<PathBuf>, 96 + }, 97 + } 98 + 99 + fn main() -> Result<()> { 100 + let cli = Cli::parse(); 101 + 102 + match cli.command { 103 + Commands::Analyze { path, json, threshold, min_tokens, config, no_gitignore } => { 104 + commands::analyze::run(path, json, threshold, Some(min_tokens), config, !no_gitignore) 105 + } 106 + 107 + Commands::Complexity { path, json, threshold, config, no_gitignore } => { 108 + commands::complexity::run(path, json, threshold, config, !no_gitignore) 109 + } 110 + 111 + Commands::Clones { path, json, min_tokens, config, no_gitignore } => { 112 + commands::clones::run(path, json, Some(min_tokens), config, !no_gitignore) 113 + } 114 + 115 + Commands::DumpConfig { config } => commands::dump_config::run(config), 116 + } 3 117 }
+185
crates/core/src/config.rs
··· 1 + use crate::error::{MccabreError, Result}; 2 + use serde::{Deserialize, Serialize}; 3 + use std::fs; 4 + use std::path::Path; 5 + 6 + /// Configuration for mccabre analysis 7 + #[derive(Debug, Clone, Serialize, Deserialize, Default)] 8 + pub struct Config { 9 + /// Cyclomatic complexity thresholds 10 + #[serde(default)] 11 + pub complexity: ComplexityConfig, 12 + 13 + /// Clone detection settings 14 + #[serde(default)] 15 + pub clones: CloneConfig, 16 + 17 + /// File filtering settings 18 + #[serde(default)] 19 + pub files: FileConfig, 20 + } 21 + 22 + #[derive(Debug, Clone, Serialize, Deserialize)] 23 + pub struct ComplexityConfig { 24 + /// Threshold for warning level (default: 10) 25 + #[serde(default = "default_warning_threshold")] 26 + pub warning_threshold: usize, 27 + 28 + /// Threshold for error level (default: 20) 29 + #[serde(default = "default_error_threshold")] 30 + pub error_threshold: usize, 31 + } 32 + 33 + #[derive(Debug, Clone, Serialize, Deserialize)] 34 + pub struct CloneConfig { 35 + /// Minimum number of tokens for clone detection (default: 30) 36 + #[serde(default = "default_min_tokens")] 37 + pub min_tokens: usize, 38 + 39 + /// Whether to enable clone detection (default: true) 40 + #[serde(default = "default_true")] 41 + pub enabled: bool, 42 + } 43 + 44 + #[derive(Debug, Clone, Serialize, Deserialize)] 45 + pub struct FileConfig { 46 + /// Whether to respect .gitignore (default: true) 47 + #[serde(default = "default_true")] 48 + pub respect_gitignore: bool, 49 + } 50 + 51 + impl Default for ComplexityConfig { 52 + fn default() -> Self { 53 + Self { warning_threshold: default_warning_threshold(), error_threshold: default_error_threshold() } 54 + } 55 + } 56 + 57 + impl Default for CloneConfig { 58 + fn default() -> Self { 59 + Self { min_tokens: default_min_tokens(), enabled: default_true() } 60 + } 61 + } 62 + 63 + impl Default for FileConfig { 64 + fn default() -> Self { 65 + Self { respect_gitignore: default_true() } 66 + } 67 + } 68 + 69 + fn default_warning_threshold() -> usize { 70 + 10 71 + } 72 + 73 + fn default_error_threshold() -> usize { 74 + 20 75 + } 76 + 77 + fn default_min_tokens() -> usize { 78 + 30 79 + } 80 + 81 + fn default_true() -> bool { 82 + true 83 + } 84 + 85 + impl Config { 86 + /// Load configuration from a TOML file 87 + pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> { 88 + let content = fs::read_to_string(path.as_ref()) 89 + .map_err(|e| MccabreError::FileRead { path: path.as_ref().to_path_buf(), source: e })?; 90 + 91 + toml::from_str(&content).map_err(|e| MccabreError::InvalidConfig(e.to_string())) 92 + } 93 + 94 + /// Try to load configuration from default locations 95 + /// Looks for: mccabre.toml, .mccabre.toml, .mccabre/config.toml 96 + pub fn load_default() -> Result<Self> { 97 + let candidates = vec!["mccabre.toml", ".mccabre.toml", ".mccabre/config.toml"]; 98 + 99 + for path in candidates { 100 + if Path::new(path).exists() { 101 + return Self::from_file(path); 102 + } 103 + } 104 + 105 + Ok(Self::default()) 106 + } 107 + 108 + /// Save configuration to a TOML file 109 + pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()> { 110 + let content = toml::to_string_pretty(self).map_err(|e| MccabreError::InvalidConfig(e.to_string()))?; 111 + 112 + fs::write(path.as_ref(), content) 113 + .map_err(|e| MccabreError::FileRead { path: path.as_ref().to_path_buf(), source: e })?; 114 + 115 + Ok(()) 116 + } 117 + 118 + /// Merge with CLI overrides 119 + pub fn merge_with_cli( 120 + mut self, complexity_threshold: Option<usize>, min_tokens: Option<usize>, respect_gitignore: Option<bool>, 121 + ) -> Self { 122 + if let Some(threshold) = complexity_threshold { 123 + self.complexity.warning_threshold = threshold; 124 + } 125 + 126 + if let Some(min) = min_tokens { 127 + self.clones.min_tokens = min; 128 + } 129 + 130 + if let Some(respect) = respect_gitignore { 131 + self.files.respect_gitignore = respect; 132 + } 133 + 134 + self 135 + } 136 + } 137 + 138 + #[cfg(test)] 139 + mod tests { 140 + use super::*; 141 + use tempfile::TempDir; 142 + 143 + #[test] 144 + fn test_default_config() { 145 + let config = Config::default(); 146 + assert_eq!(config.complexity.warning_threshold, 10); 147 + assert_eq!(config.complexity.error_threshold, 20); 148 + assert_eq!(config.clones.min_tokens, 30); 149 + assert!(config.clones.enabled); 150 + assert!(config.files.respect_gitignore); 151 + } 152 + 153 + #[test] 154 + fn test_save_and_load() { 155 + let temp_dir = TempDir::new().unwrap(); 156 + let config_path = temp_dir.path().join("test_config.toml"); 157 + 158 + let config = Config::default(); 159 + config.save(&config_path).unwrap(); 160 + 161 + let loaded = Config::from_file(&config_path).unwrap(); 162 + assert_eq!(loaded.complexity.warning_threshold, config.complexity.warning_threshold); 163 + assert_eq!(loaded.clones.min_tokens, config.clones.min_tokens); 164 + } 165 + 166 + #[test] 167 + fn test_merge_with_cli() { 168 + let mut config = Config::default(); 169 + config = config.merge_with_cli(Some(15), Some(40), Some(false)); 170 + 171 + assert_eq!(config.complexity.warning_threshold, 15); 172 + assert_eq!(config.clones.min_tokens, 40); 173 + assert!(!config.files.respect_gitignore); 174 + } 175 + 176 + #[test] 177 + fn test_partial_cli_override() { 178 + let mut config = Config::default(); 179 + config = config.merge_with_cli(Some(25), None, None); 180 + 181 + assert_eq!(config.complexity.warning_threshold, 25); 182 + assert_eq!(config.clones.min_tokens, 30); 183 + assert!(config.files.respect_gitignore); 184 + } 185 + }
+23
crates/core/src/error.rs
··· 1 + use std::io; 2 + use std::path::PathBuf; 3 + use thiserror::Error; 4 + 5 + #[derive(Error, Debug)] 6 + pub enum MccabreError { 7 + #[error("Failed to read file {path}: {source}")] 8 + FileRead { path: PathBuf, source: io::Error }, 9 + 10 + #[error("Unsupported file type: {0}")] 11 + UnsupportedFileType(String), 12 + 13 + #[error("Invalid configuration: {0}")] 14 + InvalidConfig(String), 15 + 16 + #[error("Tokenization failed: {0}")] 17 + TokenizationError(String), 18 + 19 + #[error("IO error: {0}")] 20 + Io(#[from] io::Error), 21 + } 22 + 23 + pub type Result<T> = std::result::Result<T, MccabreError>;
+8 -13
crates/core/src/lib.rs
··· 1 - pub fn add(left: u64, right: u64) -> u64 { 2 - left + right 3 - } 1 + pub mod cloner; 2 + pub mod complexity; 3 + pub mod config; 4 + pub mod error; 5 + pub mod loader; 6 + pub mod reporter; 7 + pub mod tokenizer; 4 8 5 - #[cfg(test)] 6 - mod tests { 7 - use super::*; 8 - 9 - #[test] 10 - fn it_works() { 11 - let result = add(2, 2); 12 - assert_eq!(result, 4); 13 - } 14 - } 9 + pub use error::{MccabreError, Result};
+203
crates/core/src/loader.rs
··· 1 + use crate::error::{MccabreError, Result}; 2 + use crate::tokenizer::Language; 3 + use ignore::WalkBuilder; 4 + use std::path::{Path, PathBuf}; 5 + use std::{fs, io}; 6 + 7 + /// File entry with source code and metadata 8 + #[derive(Debug, Clone)] 9 + pub struct SourceFile { 10 + pub path: PathBuf, 11 + pub content: String, 12 + pub language: Language, 13 + } 14 + 15 + /// File loader that respects .gitignore and supports various input types 16 + pub struct FileLoader { 17 + /// Whether to respect .gitignore files 18 + respect_gitignore: bool, 19 + } 20 + 21 + impl Default for FileLoader { 22 + fn default() -> Self { 23 + Self { respect_gitignore: true } 24 + } 25 + } 26 + 27 + impl FileLoader { 28 + pub fn new() -> Self { 29 + Self::default() 30 + } 31 + 32 + /// Enable or disable gitignore awareness 33 + pub fn with_gitignore(mut self, respect: bool) -> Self { 34 + self.respect_gitignore = respect; 35 + self 36 + } 37 + 38 + /// Load files from a path (file, directory, or list) 39 + pub fn load<P: AsRef<Path>>(&self, path: P) -> Result<Vec<SourceFile>> { 40 + let path = path.as_ref(); 41 + 42 + if path.is_file() { 43 + let file = self.load_file(path)?; 44 + Ok(vec![file]) 45 + } else if path.is_dir() { 46 + self.load_directory(path) 47 + } else { 48 + Err(MccabreError::FileRead { 49 + path: path.to_path_buf(), 50 + source: std::io::Error::new(std::io::ErrorKind::NotFound, "Path is neither a file nor a directory"), 51 + }) 52 + } 53 + } 54 + 55 + /// Load multiple paths 56 + pub fn load_multiple<P: AsRef<Path>>(&self, paths: &[P]) -> Result<Vec<SourceFile>> { 57 + let mut files = Vec::new(); 58 + 59 + for path in paths { 60 + let mut loaded = self.load(path)?; 61 + files.append(&mut loaded); 62 + } 63 + 64 + files.sort_by(|a, b| a.path.cmp(&b.path)); 65 + files.dedup_by(|a, b| a.path == b.path); 66 + 67 + Ok(files) 68 + } 69 + 70 + /// Load a single file 71 + fn load_file(&self, path: &Path) -> Result<SourceFile> { 72 + let language = Language::from_path(path)?; 73 + let content = 74 + fs::read_to_string(path).map_err(|e| MccabreError::FileRead { path: path.to_path_buf(), source: e })?; 75 + 76 + Ok(SourceFile { path: path.to_path_buf(), content, language }) 77 + } 78 + 79 + /// Load all supported files from a directory 80 + fn load_directory(&self, dir: &Path) -> Result<Vec<SourceFile>> { 81 + let mut files = Vec::new(); 82 + 83 + let walker = WalkBuilder::new(dir) 84 + .standard_filters(self.respect_gitignore) 85 + .hidden(false) 86 + .parents(true) 87 + .build(); 88 + 89 + for entry in walker { 90 + let entry = entry.map_err(|e| MccabreError::Io(io::Error::other(e.to_string())))?; 91 + let path = entry.path(); 92 + 93 + if !path.is_file() { 94 + continue; 95 + } 96 + 97 + match self.load_file(path) { 98 + Ok(file) => files.push(file), 99 + Err(MccabreError::UnsupportedFileType(_)) => continue, 100 + Err(e) => return Err(e), 101 + } 102 + } 103 + 104 + Ok(files) 105 + } 106 + } 107 + 108 + #[cfg(test)] 109 + mod tests { 110 + use super::*; 111 + use std::fs; 112 + use tempfile::TempDir; 113 + 114 + #[test] 115 + fn test_load_single_file() -> Result<()> { 116 + let temp_dir = TempDir::new().unwrap(); 117 + let file_path = temp_dir.path().join("test.rs"); 118 + fs::write(&file_path, "fn main() {}").unwrap(); 119 + 120 + let loader = FileLoader::new(); 121 + let files = loader.load(&file_path)?; 122 + 123 + assert_eq!(files.len(), 1); 124 + assert_eq!(files[0].content, "fn main() {}"); 125 + assert_eq!(files[0].language, Language::Rust); 126 + 127 + Ok(()) 128 + } 129 + 130 + #[test] 131 + fn test_load_directory() -> Result<()> { 132 + let temp_dir = TempDir::new().unwrap(); 133 + fs::write(temp_dir.path().join("file1.rs"), "fn test1() {}").unwrap(); 134 + fs::write(temp_dir.path().join("file2.js"), "function test2() {}").unwrap(); 135 + fs::write(temp_dir.path().join("readme.txt"), "Not code").unwrap(); 136 + 137 + let loader = FileLoader::new(); 138 + let files = loader.load(temp_dir.path())?; 139 + 140 + assert_eq!(files.len(), 2); 141 + 142 + let has_rust = files.iter().any(|f| f.path.ends_with("file1.rs")); 143 + let has_js = files.iter().any(|f| f.path.ends_with("file2.js")); 144 + assert!(has_rust); 145 + assert!(has_js); 146 + 147 + Ok(()) 148 + } 149 + 150 + #[test] 151 + fn test_gitignore_respected() -> Result<()> { 152 + let temp_dir = TempDir::new().unwrap(); 153 + fs::write(temp_dir.path().join("included.rs"), "fn included() {}").unwrap(); 154 + 155 + let ignored_dir = temp_dir.path().join("build"); 156 + fs::create_dir(&ignored_dir).unwrap(); 157 + fs::write(ignored_dir.join("excluded.rs"), "fn excluded() {}").unwrap(); 158 + 159 + fs::write(temp_dir.path().join(".gitignore"), "build/\n").unwrap(); 160 + 161 + let loader_with_gitignore = FileLoader::new().with_gitignore(true); 162 + let files_with = loader_with_gitignore.load(temp_dir.path())?; 163 + 164 + let loader_without_gitignore = FileLoader::new().with_gitignore(false); 165 + let files_without = loader_without_gitignore.load(temp_dir.path())?; 166 + 167 + assert!(files_with.iter().any(|f| f.path.ends_with("included.rs"))); 168 + 169 + assert!(files_without.iter().any(|f| f.path.ends_with("included.rs"))); 170 + assert!(files_without.iter().any(|f| f.path.ends_with("excluded.rs"))); 171 + 172 + Ok(()) 173 + } 174 + 175 + #[test] 176 + fn test_unsupported_file_type() { 177 + let temp_dir = TempDir::new().unwrap(); 178 + let file_path = temp_dir.path().join("test.xyz"); 179 + fs::write(&file_path, "random content").unwrap(); 180 + 181 + let loader = FileLoader::new(); 182 + let result = loader.load(&file_path); 183 + 184 + assert!(matches!(result, Err(MccabreError::UnsupportedFileType(_)))); 185 + } 186 + 187 + #[test] 188 + fn test_load_multiple() -> Result<()> { 189 + let temp_dir = TempDir::new().unwrap(); 190 + let file1 = temp_dir.path().join("test1.rs"); 191 + let file2 = temp_dir.path().join("test2.js"); 192 + 193 + fs::write(&file1, "fn test1() {}").unwrap(); 194 + fs::write(&file2, "function test2() {}").unwrap(); 195 + 196 + let loader = FileLoader::new(); 197 + let files = loader.load_multiple(&[&file1, &file2])?; 198 + 199 + assert_eq!(files.len(), 2); 200 + 201 + Ok(()) 202 + } 203 + }
+259
crates/core/src/reporter.rs
··· 1 + use crate::cloner::Clone; 2 + use crate::complexity::{CyclomaticMetrics, LocMetrics, Severity}; 3 + use serde::{Deserialize, Serialize}; 4 + use std::path::PathBuf; 5 + 6 + /// Complete analysis report for a codebase 7 + #[derive(Debug, Clone, Serialize, Deserialize)] 8 + pub struct Report { 9 + /// Per-file analysis results 10 + pub files: Vec<FileReport>, 11 + /// Detected clones across all files 12 + pub clones: Vec<Clone>, 13 + /// Summary statistics 14 + pub summary: Summary, 15 + } 16 + 17 + #[derive(Debug, Clone, Serialize, Deserialize)] 18 + pub struct FileReport { 19 + /// File path 20 + pub path: PathBuf, 21 + /// Lines of code metrics 22 + pub loc: LocMetrics, 23 + /// Cyclomatic complexity metrics 24 + pub cyclomatic: CyclomaticMetrics, 25 + } 26 + 27 + #[derive(Debug, Clone, Serialize, Deserialize)] 28 + pub struct Summary { 29 + /// Total number of files analyzed 30 + pub total_files: usize, 31 + /// Total physical lines of code 32 + pub total_physical_loc: usize, 33 + /// Total logical lines of code 34 + pub total_logical_loc: usize, 35 + /// Average cyclomatic complexity 36 + pub avg_complexity: f64, 37 + /// Maximum cyclomatic complexity 38 + pub max_complexity: usize, 39 + /// Number of files with high complexity 40 + pub high_complexity_files: usize, 41 + /// Total number of clone groups 42 + pub total_clones: usize, 43 + } 44 + 45 + impl Report { 46 + pub fn new(files: Vec<FileReport>, clones: Vec<Clone>) -> Self { 47 + let summary = Summary::from_files(&files, &clones); 48 + Self { files, clones, summary } 49 + } 50 + 51 + /// Serialize to JSON 52 + pub fn to_json(&self) -> serde_json::Result<String> { 53 + serde_json::to_string_pretty(self) 54 + } 55 + 56 + /// Generate plaintext report 57 + pub fn to_plaintext(&self) -> String { 58 + let mut output = String::new(); 59 + 60 + output.push_str(&"=".repeat(80)); 61 + output.push('\n'); 62 + output.push_str("MCCABRE CODE ANALYSIS REPORT\n"); 63 + output.push_str(&"=".repeat(80)); 64 + output.push_str("\n\n"); 65 + 66 + output.push_str("SUMMARY\n"); 67 + output.push_str(&"-".repeat(80)); 68 + output.push('\n'); 69 + output.push_str(&format!("Total files analyzed: {}\n", self.summary.total_files)); 70 + output.push_str(&format!( 71 + "Total physical LOC: {}\n", 72 + self.summary.total_physical_loc 73 + )); 74 + output.push_str(&format!( 75 + "Total logical LOC: {}\n", 76 + self.summary.total_logical_loc 77 + )); 78 + output.push_str(&format!( 79 + "Average complexity: {:.2}\n", 80 + self.summary.avg_complexity 81 + )); 82 + output.push_str(&format!( 83 + "Maximum complexity: {}\n", 84 + self.summary.max_complexity 85 + )); 86 + output.push_str(&format!( 87 + "High complexity files: {}\n", 88 + self.summary.high_complexity_files 89 + )); 90 + output.push_str(&format!( 91 + "Clone groups detected: {}\n\n", 92 + self.summary.total_clones 93 + )); 94 + 95 + if !self.files.is_empty() { 96 + output.push_str("FILE METRICS\n"); 97 + output.push_str(&"-".repeat(80)); 98 + output.push('\n'); 99 + 100 + for file in &self.files { 101 + output.push_str(&format!("FILE: {}\n", file.path.display())); 102 + output.push_str(&format!( 103 + " Cyclomatic Complexity: {} ({})\n", 104 + file.cyclomatic.file_complexity, 105 + match file.cyclomatic.severity() { 106 + Severity::Low => "low", 107 + Severity::Moderate => "moderate", 108 + Severity::High => "high", 109 + Severity::VeryHigh => "very high", 110 + } 111 + )); 112 + output.push_str(&format!(" Physical LOC: {}\n", file.loc.physical)); 113 + output.push_str(&format!(" Logical LOC: {}\n", file.loc.logical)); 114 + output.push_str(&format!(" Comment lines: {}\n", file.loc.comments)); 115 + output.push_str(&format!(" Blank lines: {}\n\n", file.loc.blank)); 116 + 117 + if !file.cyclomatic.functions.is_empty() { 118 + output.push_str(" Functions:\n"); 119 + for func in &file.cyclomatic.functions { 120 + output.push_str(&format!( 121 + " - {} (line {}): complexity {}\n", 122 + func.name, func.line, func.complexity 123 + )); 124 + } 125 + output.push('\n'); 126 + } 127 + } 128 + } 129 + 130 + if !self.clones.is_empty() { 131 + output.push_str("DETECTED CLONES\n"); 132 + output.push_str(&"-".repeat(80)); 133 + output.push('\n'); 134 + 135 + for clone in &self.clones { 136 + output.push_str(&format!( 137 + "Clone Group #{} (length: {} tokens, {} occurrences)\n", 138 + clone.id, 139 + clone.length, 140 + clone.locations.len() 141 + )); 142 + 143 + for loc in &clone.locations { 144 + output.push_str(&format!( 145 + " - {}:{}-{}\n", 146 + loc.file.display(), 147 + loc.start_line, 148 + loc.end_line 149 + )); 150 + } 151 + output.push('\n'); 152 + } 153 + } 154 + 155 + output.push_str(&"=".repeat(80)); 156 + output.push('\n'); 157 + 158 + output 159 + } 160 + } 161 + 162 + impl Summary { 163 + fn from_files(files: &[FileReport], clones: &[Clone]) -> Self { 164 + let total_files = files.len(); 165 + let total_physical_loc = files.iter().map(|f| f.loc.physical).sum(); 166 + let total_logical_loc = files.iter().map(|f| f.loc.logical).sum(); 167 + 168 + let complexities: Vec<usize> = files.iter().map(|f| f.cyclomatic.file_complexity).collect(); 169 + let avg_complexity = if !complexities.is_empty() { 170 + complexities.iter().sum::<usize>() as f64 / complexities.len() as f64 171 + } else { 172 + 0.0 173 + }; 174 + 175 + let max_complexity = complexities.iter().max().copied().unwrap_or(0); 176 + 177 + let high_complexity_files = files 178 + .iter() 179 + .filter(|f| matches!(f.cyclomatic.severity(), Severity::High | Severity::VeryHigh)) 180 + .count(); 181 + 182 + let total_clones = clones.len(); 183 + 184 + Self { 185 + total_files, 186 + total_physical_loc, 187 + total_logical_loc, 188 + avg_complexity, 189 + max_complexity, 190 + high_complexity_files, 191 + total_clones, 192 + } 193 + } 194 + } 195 + 196 + #[cfg(test)] 197 + mod tests { 198 + use super::*; 199 + use crate::complexity::FunctionComplexity; 200 + 201 + #[test] 202 + fn test_empty_report() { 203 + let report = Report::new(vec![], vec![]); 204 + assert_eq!(report.summary.total_files, 0); 205 + assert_eq!(report.summary.total_clones, 0); 206 + } 207 + 208 + #[test] 209 + fn test_report_summary() { 210 + let files = vec![ 211 + FileReport { 212 + path: PathBuf::from("test1.rs"), 213 + loc: LocMetrics { physical: 100, logical: 80, comments: 10, blank: 10 }, 214 + cyclomatic: CyclomaticMetrics { file_complexity: 5, functions: vec![] }, 215 + }, 216 + FileReport { 217 + path: PathBuf::from("test2.rs"), 218 + loc: LocMetrics { physical: 50, logical: 40, comments: 5, blank: 5 }, 219 + cyclomatic: CyclomaticMetrics { file_complexity: 15, functions: vec![] }, 220 + }, 221 + ]; 222 + 223 + let report = Report::new(files, vec![]); 224 + 225 + assert_eq!(report.summary.total_files, 2); 226 + assert_eq!(report.summary.total_physical_loc, 150); 227 + assert_eq!(report.summary.total_logical_loc, 120); 228 + assert_eq!(report.summary.avg_complexity, 10.0); 229 + assert_eq!(report.summary.max_complexity, 15); 230 + } 231 + 232 + #[test] 233 + fn test_to_json() { 234 + let report = Report::new(vec![], vec![]); 235 + let json = report.to_json().unwrap(); 236 + assert!(json.contains("files")); 237 + assert!(json.contains("clones")); 238 + assert!(json.contains("summary")); 239 + } 240 + 241 + #[test] 242 + fn test_to_plaintext() { 243 + let files = vec![FileReport { 244 + path: PathBuf::from("test.rs"), 245 + loc: LocMetrics { physical: 10, logical: 8, comments: 1, blank: 1 }, 246 + cyclomatic: CyclomaticMetrics { 247 + file_complexity: 3, 248 + functions: vec![FunctionComplexity { name: "test".to_string(), complexity: 3, line: 1 }], 249 + }, 250 + }]; 251 + 252 + let report = Report::new(files, vec![]); 253 + let plaintext = report.to_plaintext(); 254 + 255 + assert!(plaintext.contains("MCCABRE CODE ANALYSIS REPORT")); 256 + assert!(plaintext.contains("test.rs")); 257 + assert!(plaintext.contains("Cyclomatic Complexity")); 258 + } 259 + }
+441
crates/core/src/tokenizer.rs
··· 1 + use crate::error::{MccabreError, Result}; 2 + use std::path::Path; 3 + 4 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 5 + pub enum Language { 6 + Rust, 7 + JavaScript, 8 + TypeScript, 9 + Go, 10 + Java, 11 + Cpp, 12 + } 13 + 14 + impl Language { 15 + /// Detect language from file extension 16 + pub fn from_path(path: &Path) -> Result<Self> { 17 + let extension = path 18 + .extension() 19 + .and_then(|e| e.to_str()) 20 + .ok_or_else(|| MccabreError::UnsupportedFileType(path.to_string_lossy().to_string()))?; 21 + 22 + match extension { 23 + "rs" => Ok(Language::Rust), 24 + "js" | "jsx" | "mjs" | "cjs" => Ok(Language::JavaScript), 25 + "ts" | "tsx" => Ok(Language::TypeScript), 26 + "go" => Ok(Language::Go), 27 + "java" => Ok(Language::Java), 28 + "cpp" | "cc" | "cxx" | "c++" | "hpp" | "h" | "hh" | "hxx" => Ok(Language::Cpp), 29 + _ => Err(MccabreError::UnsupportedFileType(extension.to_string())), 30 + } 31 + } 32 + 33 + /// Get single-line comment prefix 34 + pub fn single_line_comment(&self) -> &'static str { 35 + match self { 36 + Language::Rust 37 + | Language::JavaScript 38 + | Language::TypeScript 39 + | Language::Go 40 + | Language::Java 41 + | Language::Cpp => "//", 42 + } 43 + } 44 + 45 + /// Get multi-line comment delimiters (start, end) 46 + pub fn multi_line_comment(&self) -> (&'static str, &'static str) { 47 + match self { 48 + Language::Rust 49 + | Language::JavaScript 50 + | Language::TypeScript 51 + | Language::Go 52 + | Language::Java 53 + | Language::Cpp => ("/*", "*/"), 54 + } 55 + } 56 + } 57 + 58 + #[derive(Debug, Clone, PartialEq, Eq)] 59 + pub enum TokenType { 60 + If, 61 + Else, 62 + ElseIf, 63 + While, 64 + For, 65 + Loop, 66 + Match, 67 + Switch, 68 + Case, 69 + Default, 70 + Catch, 71 + 72 + LogicalAnd, 73 + LogicalOr, 74 + Ternary, 75 + 76 + Operator(String), 77 + 78 + Identifier(String), 79 + Literal(String), 80 + 81 + LeftBrace, 82 + RightBrace, 83 + LeftParen, 84 + RightParen, 85 + LeftBracket, 86 + RightBracket, 87 + Semicolon, 88 + Comma, 89 + 90 + Comment, 91 + Whitespace, 92 + Newline, 93 + Unknown(char), 94 + } 95 + 96 + impl TokenType { 97 + /// Returns true if this token contributes to cyclomatic complexity 98 + pub fn is_decision_point(&self) -> bool { 99 + matches!( 100 + self, 101 + TokenType::If 102 + | TokenType::ElseIf 103 + | TokenType::While 104 + | TokenType::For 105 + | TokenType::Loop 106 + | TokenType::Match 107 + | TokenType::Switch 108 + | TokenType::Case 109 + | TokenType::Catch 110 + | TokenType::LogicalAnd 111 + | TokenType::LogicalOr 112 + | TokenType::Ternary 113 + ) 114 + } 115 + 116 + /// Returns true if this token should be included in clone detection 117 + pub fn is_significant(&self) -> bool { 118 + !matches!(self, TokenType::Comment | TokenType::Whitespace | TokenType::Newline) 119 + } 120 + } 121 + 122 + #[derive(Debug, Clone)] 123 + pub struct Token { 124 + pub token_type: TokenType, 125 + pub line: usize, 126 + pub column: usize, 127 + pub text: String, 128 + } 129 + 130 + pub struct Tokenizer { 131 + source: Vec<char>, 132 + position: usize, 133 + line: usize, 134 + column: usize, 135 + _language: Language, 136 + } 137 + 138 + impl Tokenizer { 139 + pub fn new(source: &str, language: Language) -> Self { 140 + Self { source: source.chars().collect(), position: 0, line: 1, column: 1, _language: language } 141 + } 142 + 143 + pub fn tokenize(mut self) -> Result<Vec<Token>> { 144 + let mut tokens = Vec::new(); 145 + 146 + while !self.is_at_end() { 147 + if let Some(token) = self.next_token()? { 148 + tokens.push(token); 149 + } 150 + } 151 + 152 + Ok(tokens) 153 + } 154 + 155 + fn next_token(&mut self) -> Result<Option<Token>> { 156 + let start_line = self.line; 157 + let start_column = self.column; 158 + let start_pos = self.position; 159 + let ch = self.current()?; 160 + 161 + if ch.is_whitespace() { 162 + if ch == '\n' { 163 + self.advance(); 164 + return Ok(Some(Token { 165 + token_type: TokenType::Newline, 166 + line: start_line, 167 + column: start_column, 168 + text: "\n".to_string(), 169 + })); 170 + } else { 171 + while !self.is_at_end() && self.current()?.is_whitespace() && self.current()? != '\n' { 172 + self.advance(); 173 + } 174 + return Ok(Some(Token { 175 + token_type: TokenType::Whitespace, 176 + line: start_line, 177 + column: start_column, 178 + text: " ".to_string(), 179 + })); 180 + } 181 + } 182 + 183 + if ch == '/' { 184 + if self.peek() == Some('/') { 185 + while !self.is_at_end() && self.current()? != '\n' { 186 + self.advance(); 187 + } 188 + return Ok(Some(Token { 189 + token_type: TokenType::Comment, 190 + line: start_line, 191 + column: start_column, 192 + text: "//".to_string(), 193 + })); 194 + } else if self.peek() == Some('*') { 195 + self.advance(); 196 + self.advance(); 197 + while !self.is_at_end() { 198 + if self.current()? == '*' && self.peek() == Some('/') { 199 + self.advance(); 200 + self.advance(); 201 + break; 202 + } 203 + self.advance(); 204 + } 205 + return Ok(Some(Token { 206 + token_type: TokenType::Comment, 207 + line: start_line, 208 + column: start_column, 209 + text: "/**/".to_string(), 210 + })); 211 + } 212 + } 213 + 214 + if ch == '"' || ch == '\'' { 215 + let quote = ch; 216 + self.advance(); 217 + while !self.is_at_end() && self.current()? != quote { 218 + if self.current()? == '\\' { 219 + self.advance(); 220 + if !self.is_at_end() { 221 + self.advance(); 222 + } 223 + } else { 224 + self.advance(); 225 + } 226 + } 227 + if !self.is_at_end() { 228 + self.advance(); 229 + } 230 + let text: String = self.source[start_pos..self.position].iter().collect(); 231 + return Ok(Some(Token { 232 + token_type: TokenType::Literal(text.clone()), 233 + line: start_line, 234 + column: start_column, 235 + text, 236 + })); 237 + } 238 + 239 + if ch.is_ascii_digit() { 240 + while !self.is_at_end() 241 + && (self.current()?.is_ascii_alphanumeric() || self.current()? == '.' || self.current()? == '_') 242 + { 243 + self.advance(); 244 + } 245 + let text: String = self.source[start_pos..self.position].iter().collect(); 246 + return Ok(Some(Token { 247 + token_type: TokenType::Literal(text.clone()), 248 + line: start_line, 249 + column: start_column, 250 + text, 251 + })); 252 + } 253 + 254 + if ch.is_alphabetic() || ch == '_' { 255 + while !self.is_at_end() && (self.current()?.is_alphanumeric() || self.current()? == '_') { 256 + self.advance(); 257 + } 258 + let text: String = self.source[start_pos..self.position].iter().collect(); 259 + let token_type = self.classify_keyword(&text); 260 + return Ok(Some(Token { token_type, line: start_line, column: start_column, text })); 261 + } 262 + 263 + let token_type = match ch { 264 + '{' => { 265 + self.advance(); 266 + TokenType::LeftBrace 267 + } 268 + '}' => { 269 + self.advance(); 270 + TokenType::RightBrace 271 + } 272 + '(' => { 273 + self.advance(); 274 + TokenType::LeftParen 275 + } 276 + ')' => { 277 + self.advance(); 278 + TokenType::RightParen 279 + } 280 + '[' => { 281 + self.advance(); 282 + TokenType::LeftBracket 283 + } 284 + ']' => { 285 + self.advance(); 286 + TokenType::RightBracket 287 + } 288 + ';' => { 289 + self.advance(); 290 + TokenType::Semicolon 291 + } 292 + ',' => { 293 + self.advance(); 294 + TokenType::Comma 295 + } 296 + '?' => { 297 + self.advance(); 298 + TokenType::Ternary 299 + } 300 + '&' if self.peek() == Some('&') => { 301 + self.advance(); 302 + self.advance(); 303 + TokenType::LogicalAnd 304 + } 305 + '|' if self.peek() == Some('|') => { 306 + self.advance(); 307 + self.advance(); 308 + TokenType::LogicalOr 309 + } 310 + _ => { 311 + let op_chars = "+-*/%=<>!&|^~"; 312 + if op_chars.contains(ch) { 313 + while !self.is_at_end() && op_chars.contains(self.current()?) { 314 + self.advance(); 315 + } 316 + let text: String = self.source[start_pos..self.position].iter().collect(); 317 + TokenType::Operator(text) 318 + } else { 319 + self.advance(); 320 + TokenType::Unknown(ch) 321 + } 322 + } 323 + }; 324 + 325 + let text: String = self.source[start_pos..self.position].iter().collect(); 326 + Ok(Some(Token { token_type, line: start_line, column: start_column, text })) 327 + } 328 + 329 + fn classify_keyword(&self, word: &str) -> TokenType { 330 + match word { 331 + "if" => TokenType::If, 332 + "else" => TokenType::Else, 333 + "elif" => TokenType::ElseIf, 334 + "while" => TokenType::While, 335 + "for" => TokenType::For, 336 + "loop" => TokenType::Loop, 337 + "match" => TokenType::Match, 338 + "switch" => TokenType::Switch, 339 + "case" => TokenType::Case, 340 + "default" => TokenType::Default, 341 + "catch" => TokenType::Catch, 342 + _ => TokenType::Identifier(word.to_string()), 343 + } 344 + } 345 + 346 + fn current(&self) -> Result<char> { 347 + self.source 348 + .get(self.position) 349 + .copied() 350 + .ok_or_else(|| MccabreError::TokenizationError("Unexpected end of input".to_string())) 351 + } 352 + 353 + fn peek(&self) -> Option<char> { 354 + self.source.get(self.position + 1).copied() 355 + } 356 + 357 + fn advance(&mut self) { 358 + if let Some(ch) = self.source.get(self.position) { 359 + if *ch == '\n' { 360 + self.line += 1; 361 + self.column = 1; 362 + } else { 363 + self.column += 1; 364 + } 365 + self.position += 1; 366 + } 367 + } 368 + 369 + fn is_at_end(&self) -> bool { 370 + self.position >= self.source.len() 371 + } 372 + } 373 + 374 + #[cfg(test)] 375 + mod tests { 376 + use super::*; 377 + 378 + #[test] 379 + fn test_language_detection() { 380 + assert_eq!(Language::from_path(Path::new("test.rs")).unwrap(), Language::Rust); 381 + assert_eq!(Language::from_path(Path::new("test.js")).unwrap(), Language::JavaScript); 382 + assert_eq!(Language::from_path(Path::new("test.ts")).unwrap(), Language::TypeScript); 383 + assert_eq!(Language::from_path(Path::new("test.go")).unwrap(), Language::Go); 384 + assert_eq!(Language::from_path(Path::new("test.java")).unwrap(), Language::Java); 385 + assert_eq!(Language::from_path(Path::new("test.cpp")).unwrap(), Language::Cpp); 386 + } 387 + 388 + #[test] 389 + fn test_tokenize_simple() { 390 + let source = "if (x > 5) { return true; }"; 391 + let tokenizer = Tokenizer::new(source, Language::Rust); 392 + let tokens = tokenizer.tokenize().unwrap(); 393 + 394 + let significant: Vec<_> = tokens.iter().filter(|t| t.token_type.is_significant()).collect(); 395 + 396 + assert!(!significant.is_empty()); 397 + assert!(tokens.iter().any(|t| matches!(t.token_type, TokenType::If))); 398 + } 399 + 400 + #[test] 401 + fn test_decision_points() { 402 + let source = "if (x && y || z) { while (true) { } }"; 403 + let tokenizer = Tokenizer::new(source, Language::Rust); 404 + let tokens = tokenizer.tokenize().unwrap(); 405 + let decision_count = tokens.iter().filter(|t| t.token_type.is_decision_point()).count(); 406 + assert_eq!(decision_count, 4); 407 + } 408 + 409 + #[test] 410 + fn test_comments() { 411 + let source = r#" 412 + // Single line comment 413 + /* Multi-line 414 + comment */ 415 + let x = 5; 416 + "#; 417 + let tokenizer = Tokenizer::new(source, Language::Rust); 418 + let tokens = tokenizer.tokenize().unwrap(); 419 + 420 + let comments: Vec<_> = tokens 421 + .iter() 422 + .filter(|t| matches!(t.token_type, TokenType::Comment)) 423 + .collect(); 424 + 425 + assert_eq!(comments.len(), 2); 426 + } 427 + 428 + #[test] 429 + fn test_strings() { 430 + let source = r#"let s = "hello \"world\""; let c = 'x';"#; 431 + let tokenizer = Tokenizer::new(source, Language::Rust); 432 + let tokens = tokenizer.tokenize().unwrap(); 433 + 434 + let literals: Vec<_> = tokens 435 + .iter() 436 + .filter(|t| matches!(t.token_type, TokenType::Literal(_))) 437 + .collect(); 438 + 439 + assert!(literals.len() >= 2); 440 + } 441 + }