code complexity & repetition analysis tool

cyclomatic complexity & loc metrics

+431
+287
crates/core/src/complexity/cyclomatic.rs
···
··· 1 + use crate::Result; 2 + use crate::tokenizer::{Language, Token, TokenType, Tokenizer}; 3 + use serde::{Deserialize, Serialize}; 4 + 5 + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] 6 + pub enum Severity { 7 + Low, 8 + Moderate, 9 + High, 10 + VeryHigh, 11 + } 12 + 13 + /// Cyclomatic Complexity metrics for a file 14 + #[derive(Debug, Clone, Serialize, Deserialize)] 15 + pub struct CyclomaticMetrics { 16 + /// Overall file complexity 17 + pub file_complexity: usize, 18 + /// Individual function complexities (if we can detect them) 19 + pub functions: Vec<FunctionComplexity>, 20 + } 21 + 22 + #[derive(Debug, Clone, Serialize, Deserialize)] 23 + pub struct FunctionComplexity { 24 + /// Function name (if identifiable) 25 + pub name: String, 26 + /// Cyclomatic complexity value 27 + pub complexity: usize, 28 + /// Line number where function starts 29 + pub line: usize, 30 + } 31 + 32 + impl CyclomaticMetrics { 33 + /// Calculate cyclomatic complexity from source code 34 + /// 35 + /// Uses the simplified formula: CC = number of decision points + 1 36 + /// Decision points include: if, else if, while, for, loop, match/switch, case, catch, &&, ||, ? 37 + pub fn calculate(source: &str, language: Language) -> Result<Self> { 38 + let tokens = Tokenizer::new(source, language).tokenize()?; 39 + let decision_points = tokens.iter().filter(|t| t.token_type.is_decision_point()).count(); 40 + let file_complexity = if decision_points == 0 { 1 } else { decision_points + 1 }; 41 + let functions = Self::detect_functions(&tokens, language); 42 + 43 + Ok(CyclomaticMetrics { file_complexity, functions }) 44 + } 45 + 46 + /// Attempt to detect function boundaries and calculate per-function complexity 47 + /// 48 + /// Look for function patterns: 49 + /// - Rust: "fn" identifier "(" ... ")" "{" 50 + /// - JS/TS: "function" identifier "(" ... ")" "{" 51 + /// - Go: "func" identifier "(" ... ")" "{" 52 + /// - Java/C++: type identifier "(" ... ")" "{" 53 + fn detect_functions(tokens: &[Token], _language: Language) -> Vec<FunctionComplexity> { 54 + let mut functions = Vec::new(); 55 + let mut i = 0; 56 + 57 + while i < tokens.len() { 58 + let is_function_keyword = if let TokenType::Identifier(name) = &tokens[i].token_type { 59 + name == "fn" || name == "func" || name == "function" 60 + } else { 61 + false 62 + }; 63 + 64 + if is_function_keyword { 65 + let mut name = "anonymous".to_string(); 66 + let line = tokens[i].line; 67 + 68 + if i + 1 < tokens.len() 69 + && let TokenType::Identifier(id) = &tokens[i + 1].token_type 70 + { 71 + name = id.clone(); 72 + } 73 + 74 + let body_start = Self::find_next_token(tokens, i, TokenType::LeftBrace); 75 + 76 + if let Some(body_start_idx) = body_start 77 + && let Some(body_end_idx) = Self::find_matching_brace(tokens, body_start_idx) 78 + { 79 + let decision_points = tokens[body_start_idx..=body_end_idx] 80 + .iter() 81 + .filter(|t| t.token_type.is_decision_point()) 82 + .count(); 83 + 84 + let complexity = if decision_points == 0 { 1 } else { decision_points + 1 }; 85 + 86 + functions.push(FunctionComplexity { name, complexity, line }); 87 + 88 + i = body_end_idx + 1; 89 + continue; 90 + } 91 + } 92 + 93 + i += 1; 94 + } 95 + 96 + functions 97 + } 98 + 99 + /// Find the next token of a specific type 100 + fn find_next_token(tokens: &[Token], start: usize, token_type: TokenType) -> Option<usize> { 101 + tokens[start..] 102 + .iter() 103 + .position(|t| std::mem::discriminant(&t.token_type) == std::mem::discriminant(&token_type)) 104 + .map(|pos| start + pos) 105 + } 106 + 107 + /// Find the matching closing brace for an opening brace 108 + fn find_matching_brace(tokens: &[Token], open_idx: usize) -> Option<usize> { 109 + let mut depth = 0; 110 + 111 + for (offset, token) in tokens[open_idx..].iter().enumerate() { 112 + match token.token_type { 113 + TokenType::LeftBrace => depth += 1, 114 + TokenType::RightBrace => { 115 + depth -= 1; 116 + if depth == 0 { 117 + return Some(open_idx + offset); 118 + } 119 + } 120 + _ => {} 121 + } 122 + } 123 + 124 + None 125 + } 126 + 127 + /// Get severity level based on complexity threshold 128 + /// Standard thresholds from literature: 129 + /// 1-10: Simple, low risk 130 + /// 11-20: More complex, moderate risk 131 + /// 21-50: Complex, high risk 132 + /// 50+: Very complex, very high risk 133 + pub fn severity(&self) -> Severity { 134 + match self.file_complexity { 135 + 1..=10 => Severity::Low, 136 + 11..=20 => Severity::Moderate, 137 + 21..=50 => Severity::High, 138 + _ => Severity::VeryHigh, 139 + } 140 + } 141 + } 142 + 143 + #[cfg(test)] 144 + mod tests { 145 + use super::*; 146 + 147 + #[test] 148 + fn test_simple_function() { 149 + let source = r#" 150 + fn simple() { 151 + let x = 5; 152 + return x; 153 + } 154 + "#; 155 + let metrics = CyclomaticMetrics::calculate(source, Language::Rust).unwrap(); 156 + assert_eq!(metrics.file_complexity, 1); 157 + assert_eq!(metrics.severity(), Severity::Low); 158 + } 159 + 160 + #[test] 161 + fn test_single_if() { 162 + let source = r#" 163 + fn check(x: i32) { 164 + if x > 5 { 165 + println!("big"); 166 + } 167 + } 168 + "#; 169 + let metrics = CyclomaticMetrics::calculate(source, Language::Rust).unwrap(); 170 + assert_eq!(metrics.file_complexity, 2); 171 + } 172 + 173 + #[test] 174 + fn test_multiple_decision_points() { 175 + let source = r#" 176 + fn complex(x: i32, y: i32) { 177 + if x > 0 && y > 0 { 178 + while x < 10 { 179 + x += 1; 180 + } 181 + } else if x < 0 { 182 + for i in 0..5 { 183 + println!("{}", i); 184 + } 185 + } 186 + } 187 + "#; 188 + let metrics = CyclomaticMetrics::calculate(source, Language::Rust).unwrap(); 189 + assert_eq!(metrics.file_complexity, 6); 190 + } 191 + 192 + #[test] 193 + fn test_ternary_operator() { 194 + let source = r#" 195 + let x = condition ? true_value : false_value; 196 + let y = a && b ? c : d; 197 + "#; 198 + let metrics = CyclomaticMetrics::calculate(source, Language::JavaScript).unwrap(); 199 + assert_eq!(metrics.file_complexity, 4); 200 + } 201 + 202 + #[test] 203 + fn test_switch_case() { 204 + let source = r#" 205 + switch (x) { 206 + case 1: 207 + break; 208 + case 2: 209 + break; 210 + default: 211 + break; 212 + } 213 + "#; 214 + let metrics = CyclomaticMetrics::calculate(source, Language::JavaScript).unwrap(); 215 + assert!(metrics.file_complexity >= 4); 216 + } 217 + 218 + #[test] 219 + fn test_function_detection_rust() { 220 + let source = r#" 221 + fn simple() { 222 + let x = 5; 223 + } 224 + 225 + fn complex() { 226 + if true { 227 + while false { 228 + loop { break; } 229 + } 230 + } 231 + } 232 + "#; 233 + let metrics = CyclomaticMetrics::calculate(source, Language::Rust).unwrap(); 234 + 235 + if !metrics.functions.is_empty() { 236 + for func in &metrics.functions { 237 + assert!(func.complexity >= 1); 238 + assert!(!func.name.is_empty()); 239 + } 240 + } 241 + } 242 + 243 + #[test] 244 + fn test_javascript_function() { 245 + let source = r#" 246 + function hello() { 247 + if (x > 0) { 248 + return true; 249 + } 250 + return false; 251 + } 252 + "#; 253 + let metrics = CyclomaticMetrics::calculate(source, Language::JavaScript).unwrap(); 254 + assert!(!metrics.functions.is_empty()); 255 + assert_eq!(metrics.file_complexity, 2); 256 + } 257 + 258 + #[test] 259 + fn test_severity_levels() { 260 + assert_eq!( 261 + CyclomaticMetrics { file_complexity: 5, functions: vec![] }.severity(), 262 + Severity::Low 263 + ); 264 + assert_eq!( 265 + CyclomaticMetrics { file_complexity: 15, functions: vec![] }.severity(), 266 + Severity::Moderate 267 + ); 268 + assert_eq!( 269 + CyclomaticMetrics { file_complexity: 25, functions: vec![] }.severity(), 270 + Severity::High 271 + ); 272 + assert_eq!( 273 + CyclomaticMetrics { file_complexity: 100, functions: vec![] }.severity(), 274 + Severity::VeryHigh 275 + ); 276 + } 277 + 278 + #[test] 279 + fn test_logical_operators() { 280 + let source = r#" 281 + if (a && b && c) { } 282 + if (x || y || z) { } 283 + "#; 284 + let metrics = CyclomaticMetrics::calculate(source, Language::JavaScript).unwrap(); 285 + assert_eq!(metrics.file_complexity, 7); 286 + } 287 + }
+139
crates/core/src/complexity/loc.rs
···
··· 1 + use crate::Result; 2 + use crate::tokenizer::{Language, TokenType, Tokenizer}; 3 + use serde::{Deserialize, Serialize}; 4 + 5 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 6 + enum LineKind { 7 + Code, 8 + Comment, 9 + Blank, 10 + } 11 + 12 + /// Lines of Code metrics for a single file 13 + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] 14 + pub struct LocMetrics { 15 + /// Total number of lines in the file 16 + pub physical: usize, 17 + /// Number of non-blank, non-comment lines 18 + pub logical: usize, 19 + /// Number of comment lines 20 + pub comments: usize, 21 + /// Number of blank lines 22 + pub blank: usize, 23 + } 24 + 25 + impl LocMetrics { 26 + pub fn calculate(source: &str, language: Language) -> Result<Self> { 27 + let tokens = Tokenizer::new(source, language).tokenize()?; 28 + let physical = if source.is_empty() { 0 } else { source.split('\n').count() }; 29 + let mut line_types = vec![LineKind::Blank; physical]; 30 + 31 + for token in &tokens { 32 + let line_idx = token.line.saturating_sub(1); 33 + if line_idx >= line_types.len() { 34 + continue; 35 + } 36 + 37 + match token.token_type { 38 + _ if token.token_type.is_significant() => { 39 + line_types[line_idx] = LineKind::Code; 40 + } 41 + TokenType::Comment => { 42 + if line_types[line_idx] != LineKind::Code { 43 + line_types[line_idx] = LineKind::Comment; 44 + } 45 + } 46 + _ => {} 47 + } 48 + } 49 + 50 + for (idx, line) in source.lines().enumerate() { 51 + if line.trim().is_empty() && idx < line_types.len() { 52 + line_types[idx] = LineKind::Blank; 53 + } 54 + } 55 + 56 + let comments = line_types.iter().filter(|&&t| t == LineKind::Comment).count(); 57 + let blank = line_types.iter().filter(|&&t| t == LineKind::Blank).count(); 58 + let logical = physical - comments - blank; 59 + 60 + Ok(LocMetrics { physical, logical, comments, blank }) 61 + } 62 + } 63 + 64 + #[cfg(test)] 65 + mod tests { 66 + use super::*; 67 + 68 + #[test] 69 + fn test_empty_file() { 70 + let source = ""; 71 + let metrics = LocMetrics::calculate(source, Language::Rust).unwrap(); 72 + assert_eq!(metrics.physical, 0); 73 + assert_eq!(metrics.logical, 0); 74 + } 75 + 76 + #[test] 77 + fn test_simple_code() { 78 + let source = r#" 79 + fn main() { 80 + println!("Hello"); 81 + } 82 + "#; 83 + let metrics = LocMetrics::calculate(source, Language::Rust).unwrap(); 84 + assert_eq!(metrics.physical, 5); 85 + assert!(metrics.logical >= 2); 86 + assert!(metrics.blank >= 1); 87 + } 88 + 89 + #[test] 90 + fn test_comments() { 91 + let source = r#" 92 + // This is a comment 93 + /* Multi-line 94 + comment */ 95 + let x = 5; // inline comment 96 + "#; 97 + let metrics = LocMetrics::calculate(source, Language::Rust).unwrap(); 98 + assert!(metrics.comments >= 2); 99 + assert!(metrics.logical >= 1); 100 + } 101 + 102 + #[test] 103 + fn test_blank_lines() { 104 + let source = r#" 105 + 106 + 107 + fn test() {} 108 + 109 + 110 + "#; 111 + let metrics = LocMetrics::calculate(source, Language::Rust).unwrap(); 112 + assert!(metrics.blank >= 4); 113 + assert!(metrics.logical >= 1); 114 + } 115 + 116 + #[test] 117 + fn test_javascript() { 118 + let source = r#" 119 + function hello() { 120 + console.log("Hello"); 121 + } 122 + "#; 123 + let metrics = LocMetrics::calculate(source, Language::JavaScript).unwrap(); 124 + assert_eq!(metrics.physical, 5); 125 + assert!(metrics.logical >= 2); 126 + } 127 + 128 + #[test] 129 + fn test_all_comments() { 130 + let source = r#" 131 + // Comment 1 132 + // Comment 2 133 + /* Comment 3 */ 134 + "#; 135 + let metrics = LocMetrics::calculate(source, Language::Rust).unwrap(); 136 + assert!(metrics.comments >= 3); 137 + assert_eq!(metrics.logical, 0); 138 + } 139 + }
+5
crates/core/src/complexity/mod.rs
···
··· 1 + pub mod cyclomatic; 2 + pub mod loc; 3 + 4 + pub use cyclomatic::{CyclomaticMetrics, FunctionComplexity, Severity}; 5 + pub use loc::LocMetrics;