code complexity & repetition analysis tool
at main 441 lines 13 kB view raw
1use crate::error::{MccabreError, Result}; 2use std::path::Path; 3 4#[derive(Debug, Clone, Copy, PartialEq, Eq)] 5pub enum Language { 6 Rust, 7 JavaScript, 8 TypeScript, 9 Go, 10 Java, 11 Cpp, 12} 13 14impl Language { 15 /// Detect language from file extension 16 pub fn from_path(path: &Path) -> Result<Self> { 17 let extension = path 18 .extension() 19 .and_then(|e| e.to_str()) 20 .ok_or_else(|| MccabreError::UnsupportedFileType(path.to_string_lossy().to_string()))?; 21 22 match extension { 23 "rs" => Ok(Language::Rust), 24 "js" | "jsx" | "mjs" | "cjs" => Ok(Language::JavaScript), 25 "ts" | "tsx" => Ok(Language::TypeScript), 26 "go" => Ok(Language::Go), 27 "java" => Ok(Language::Java), 28 "cpp" | "cc" | "cxx" | "c++" | "hpp" | "h" | "hh" | "hxx" => Ok(Language::Cpp), 29 _ => Err(MccabreError::UnsupportedFileType(extension.to_string())), 30 } 31 } 32 33 /// Get single-line comment prefix 34 pub fn single_line_comment(&self) -> &'static str { 35 match self { 36 Language::Rust 37 | Language::JavaScript 38 | Language::TypeScript 39 | Language::Go 40 | Language::Java 41 | Language::Cpp => "//", 42 } 43 } 44 45 /// Get multi-line comment delimiters (start, end) 46 pub fn multi_line_comment(&self) -> (&'static str, &'static str) { 47 match self { 48 Language::Rust 49 | Language::JavaScript 50 | Language::TypeScript 51 | Language::Go 52 | Language::Java 53 | Language::Cpp => ("/*", "*/"), 54 } 55 } 56} 57 58#[derive(Debug, Clone, PartialEq, Eq)] 59pub enum TokenType { 60 If, 61 Else, 62 ElseIf, 63 While, 64 For, 65 Loop, 66 Match, 67 Switch, 68 Case, 69 Default, 70 Catch, 71 72 LogicalAnd, 73 LogicalOr, 74 Ternary, 75 76 Operator(String), 77 78 Identifier(String), 79 Literal(String), 80 81 LeftBrace, 82 RightBrace, 83 LeftParen, 84 RightParen, 85 LeftBracket, 86 RightBracket, 87 Semicolon, 88 Comma, 89 90 Comment, 91 Whitespace, 92 Newline, 93 Unknown(char), 94} 95 96impl TokenType { 97 /// Returns true if this token contributes to cyclomatic complexity 98 pub fn is_decision_point(&self) -> bool { 99 matches!( 100 self, 101 TokenType::If 102 | TokenType::ElseIf 103 | TokenType::While 104 | TokenType::For 105 | TokenType::Loop 106 | TokenType::Match 107 | TokenType::Switch 108 | TokenType::Case 109 | TokenType::Catch 110 | TokenType::LogicalAnd 111 | TokenType::LogicalOr 112 | TokenType::Ternary 113 ) 114 } 115 116 /// Returns true if this token should be included in clone detection 117 pub fn is_significant(&self) -> bool { 118 !matches!(self, TokenType::Comment | TokenType::Whitespace | TokenType::Newline) 119 } 120} 121 122#[derive(Debug, Clone)] 123pub struct Token { 124 pub token_type: TokenType, 125 pub line: usize, 126 pub column: usize, 127 pub text: String, 128} 129 130pub struct Tokenizer { 131 source: Vec<char>, 132 position: usize, 133 line: usize, 134 column: usize, 135 _language: Language, 136} 137 138impl Tokenizer { 139 pub fn new(source: &str, language: Language) -> Self { 140 Self { source: source.chars().collect(), position: 0, line: 1, column: 1, _language: language } 141 } 142 143 pub fn tokenize(mut self) -> Result<Vec<Token>> { 144 let mut tokens = Vec::new(); 145 146 while !self.is_at_end() { 147 if let Some(token) = self.next_token()? { 148 tokens.push(token); 149 } 150 } 151 152 Ok(tokens) 153 } 154 155 fn next_token(&mut self) -> Result<Option<Token>> { 156 let start_line = self.line; 157 let start_column = self.column; 158 let start_pos = self.position; 159 let ch = self.current()?; 160 161 if ch.is_whitespace() { 162 if ch == '\n' { 163 self.advance(); 164 return Ok(Some(Token { 165 token_type: TokenType::Newline, 166 line: start_line, 167 column: start_column, 168 text: "\n".to_string(), 169 })); 170 } else { 171 while !self.is_at_end() && self.current()?.is_whitespace() && self.current()? != '\n' { 172 self.advance(); 173 } 174 return Ok(Some(Token { 175 token_type: TokenType::Whitespace, 176 line: start_line, 177 column: start_column, 178 text: " ".to_string(), 179 })); 180 } 181 } 182 183 if ch == '/' { 184 if self.peek() == Some('/') { 185 while !self.is_at_end() && self.current()? != '\n' { 186 self.advance(); 187 } 188 return Ok(Some(Token { 189 token_type: TokenType::Comment, 190 line: start_line, 191 column: start_column, 192 text: "//".to_string(), 193 })); 194 } else if self.peek() == Some('*') { 195 self.advance(); 196 self.advance(); 197 while !self.is_at_end() { 198 if self.current()? == '*' && self.peek() == Some('/') { 199 self.advance(); 200 self.advance(); 201 break; 202 } 203 self.advance(); 204 } 205 return Ok(Some(Token { 206 token_type: TokenType::Comment, 207 line: start_line, 208 column: start_column, 209 text: "/**/".to_string(), 210 })); 211 } 212 } 213 214 if ch == '"' || ch == '\'' { 215 let quote = ch; 216 self.advance(); 217 while !self.is_at_end() && self.current()? != quote { 218 if self.current()? == '\\' { 219 self.advance(); 220 if !self.is_at_end() { 221 self.advance(); 222 } 223 } else { 224 self.advance(); 225 } 226 } 227 if !self.is_at_end() { 228 self.advance(); 229 } 230 let text: String = self.source[start_pos..self.position].iter().collect(); 231 return Ok(Some(Token { 232 token_type: TokenType::Literal(text.clone()), 233 line: start_line, 234 column: start_column, 235 text, 236 })); 237 } 238 239 if ch.is_ascii_digit() { 240 while !self.is_at_end() 241 && (self.current()?.is_ascii_alphanumeric() || self.current()? == '.' || self.current()? == '_') 242 { 243 self.advance(); 244 } 245 let text: String = self.source[start_pos..self.position].iter().collect(); 246 return Ok(Some(Token { 247 token_type: TokenType::Literal(text.clone()), 248 line: start_line, 249 column: start_column, 250 text, 251 })); 252 } 253 254 if ch.is_alphabetic() || ch == '_' { 255 while !self.is_at_end() && (self.current()?.is_alphanumeric() || self.current()? == '_') { 256 self.advance(); 257 } 258 let text: String = self.source[start_pos..self.position].iter().collect(); 259 let token_type = self.classify_keyword(&text); 260 return Ok(Some(Token { token_type, line: start_line, column: start_column, text })); 261 } 262 263 let token_type = match ch { 264 '{' => { 265 self.advance(); 266 TokenType::LeftBrace 267 } 268 '}' => { 269 self.advance(); 270 TokenType::RightBrace 271 } 272 '(' => { 273 self.advance(); 274 TokenType::LeftParen 275 } 276 ')' => { 277 self.advance(); 278 TokenType::RightParen 279 } 280 '[' => { 281 self.advance(); 282 TokenType::LeftBracket 283 } 284 ']' => { 285 self.advance(); 286 TokenType::RightBracket 287 } 288 ';' => { 289 self.advance(); 290 TokenType::Semicolon 291 } 292 ',' => { 293 self.advance(); 294 TokenType::Comma 295 } 296 '?' => { 297 self.advance(); 298 TokenType::Ternary 299 } 300 '&' if self.peek() == Some('&') => { 301 self.advance(); 302 self.advance(); 303 TokenType::LogicalAnd 304 } 305 '|' if self.peek() == Some('|') => { 306 self.advance(); 307 self.advance(); 308 TokenType::LogicalOr 309 } 310 _ => { 311 let op_chars = "+-*/%=<>!&|^~"; 312 if op_chars.contains(ch) { 313 while !self.is_at_end() && op_chars.contains(self.current()?) { 314 self.advance(); 315 } 316 let text: String = self.source[start_pos..self.position].iter().collect(); 317 TokenType::Operator(text) 318 } else { 319 self.advance(); 320 TokenType::Unknown(ch) 321 } 322 } 323 }; 324 325 let text: String = self.source[start_pos..self.position].iter().collect(); 326 Ok(Some(Token { token_type, line: start_line, column: start_column, text })) 327 } 328 329 fn classify_keyword(&self, word: &str) -> TokenType { 330 match word { 331 "if" => TokenType::If, 332 "else" => TokenType::Else, 333 "elif" => TokenType::ElseIf, 334 "while" => TokenType::While, 335 "for" => TokenType::For, 336 "loop" => TokenType::Loop, 337 "match" => TokenType::Match, 338 "switch" => TokenType::Switch, 339 "case" => TokenType::Case, 340 "default" => TokenType::Default, 341 "catch" => TokenType::Catch, 342 _ => TokenType::Identifier(word.to_string()), 343 } 344 } 345 346 fn current(&self) -> Result<char> { 347 self.source 348 .get(self.position) 349 .copied() 350 .ok_or_else(|| MccabreError::TokenizationError("Unexpected end of input".to_string())) 351 } 352 353 fn peek(&self) -> Option<char> { 354 self.source.get(self.position + 1).copied() 355 } 356 357 fn advance(&mut self) { 358 if let Some(ch) = self.source.get(self.position) { 359 if *ch == '\n' { 360 self.line += 1; 361 self.column = 1; 362 } else { 363 self.column += 1; 364 } 365 self.position += 1; 366 } 367 } 368 369 fn is_at_end(&self) -> bool { 370 self.position >= self.source.len() 371 } 372} 373 374#[cfg(test)] 375mod tests { 376 use super::*; 377 378 #[test] 379 fn test_language_detection() { 380 assert_eq!(Language::from_path(Path::new("test.rs")).unwrap(), Language::Rust); 381 assert_eq!(Language::from_path(Path::new("test.js")).unwrap(), Language::JavaScript); 382 assert_eq!(Language::from_path(Path::new("test.ts")).unwrap(), Language::TypeScript); 383 assert_eq!(Language::from_path(Path::new("test.go")).unwrap(), Language::Go); 384 assert_eq!(Language::from_path(Path::new("test.java")).unwrap(), Language::Java); 385 assert_eq!(Language::from_path(Path::new("test.cpp")).unwrap(), Language::Cpp); 386 } 387 388 #[test] 389 fn test_tokenize_simple() { 390 let source = "if (x > 5) { return true; }"; 391 let tokenizer = Tokenizer::new(source, Language::Rust); 392 let tokens = tokenizer.tokenize().unwrap(); 393 394 let significant: Vec<_> = tokens.iter().filter(|t| t.token_type.is_significant()).collect(); 395 396 assert!(!significant.is_empty()); 397 assert!(tokens.iter().any(|t| matches!(t.token_type, TokenType::If))); 398 } 399 400 #[test] 401 fn test_decision_points() { 402 let source = "if (x && y || z) { while (true) { } }"; 403 let tokenizer = Tokenizer::new(source, Language::Rust); 404 let tokens = tokenizer.tokenize().unwrap(); 405 let decision_count = tokens.iter().filter(|t| t.token_type.is_decision_point()).count(); 406 assert_eq!(decision_count, 4); 407 } 408 409 #[test] 410 fn test_comments() { 411 let source = r#" 412// Single line comment 413/* Multi-line 414 comment */ 415let x = 5; 416"#; 417 let tokenizer = Tokenizer::new(source, Language::Rust); 418 let tokens = tokenizer.tokenize().unwrap(); 419 420 let comments: Vec<_> = tokens 421 .iter() 422 .filter(|t| matches!(t.token_type, TokenType::Comment)) 423 .collect(); 424 425 assert_eq!(comments.len(), 2); 426 } 427 428 #[test] 429 fn test_strings() { 430 let source = r#"let s = "hello \"world\""; let c = 'x';"#; 431 let tokenizer = Tokenizer::new(source, Language::Rust); 432 let tokens = tokenizer.tokenize().unwrap(); 433 434 let literals: Vec<_> = tokens 435 .iter() 436 .filter(|t| matches!(t.token_type, TokenType::Literal(_))) 437 .collect(); 438 439 assert!(literals.len() >= 2); 440 } 441}