Context-free grammar library
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Latest changes

+204 -107
+3
Cargo.toml
··· 15 15 ] 16 16 17 17 resolver = "2" 18 + 19 + [workspace.dependencies] 20 + miniserde = "0.1"
+11 -11
cfg-classify/src/lib.rs
··· 16 16 pub mod useful; 17 17 18 18 pub trait CfgClassifyExt { 19 - #[cfg(feature = "ll")] 20 - fn ll_parse_table(&self) -> ll::LlParseTable; 21 - #[cfg(feature = "lr")] 22 - fn lr0_fsm_builder(&mut self) -> lr::Lr0FsmBuilder; 23 - #[cfg(feature = "lr")] 24 - fn lr0_closure_builder(&mut self) -> lr::Lr0ClosureBuilder; 25 - fn recursion(&self) -> recursive::Recursion; 19 + #[cfg(feature = "ll")] 20 + fn ll_parse_table(&self) -> ll::LlParseTable<'_>; 21 + #[cfg(feature = "lr")] 22 + fn lr0_fsm_builder(&mut self) -> lr::Lr0FsmBuilder<'_>; 23 + #[cfg(feature = "lr")] 24 + fn lr0_closure_builder(&mut self) -> lr::Lr0ClosureBuilder<'_>; 25 + fn recursion(&self) -> recursive::Recursion<'_>; 26 26 fn make_proper(&mut self) -> bool; 27 27 fn usefulness(&mut self) -> useful::Usefulness; 28 28 fn usefulness_with_roots(&mut self, roots: &[Symbol]) -> useful::Usefulness; ··· 30 30 31 31 impl CfgClassifyExt for Cfg { 32 32 #[cfg(feature = "ll")] 33 - fn ll_parse_table(&self) -> ll::LlParseTable { 33 + fn ll_parse_table(&self) -> ll::LlParseTable<'_> { 34 34 ll::LlParseTable::new(self) 35 35 } 36 36 37 - fn recursion(&self) -> recursive::Recursion { 37 + fn recursion(&self) -> recursive::Recursion<'_> { 38 38 recursive::Recursion::new(self) 39 39 } 40 40 41 41 #[cfg(feature = "lr")] 42 - fn lr0_fsm_builder(&mut self) -> lr::Lr0FsmBuilder { 42 + fn lr0_fsm_builder(&mut self) -> lr::Lr0FsmBuilder<'_> { 43 43 lr::Lr0FsmBuilder::new(self) 44 44 } 45 45 46 46 #[cfg(feature = "lr")] 47 - fn lr0_closure_builder(&mut self) -> lr::Lr0ClosureBuilder { 47 + fn lr0_closure_builder(&mut self) -> lr::Lr0ClosureBuilder<'_> { 48 48 lr::Lr0ClosureBuilder::new(self) 49 49 } 50 50
+2 -3
cfg-grammar/Cargo.toml
··· 15 15 name = "cfg_grammar" 16 16 17 17 [dependencies] 18 - bit-vec = "0.8" 18 + bit-vec = { version = "0.8", features = ["miniserde"] } 19 19 cfg-symbol = { version = "0.0.1", path = "../cfg-symbol/" } 20 20 cfg-history = { version = "0.0.1", path = "../cfg-history/" } 21 21 smallvec = { version = "1.13", features = ["const_generics"], optional = true } ··· 23 23 24 24 # serialize 25 25 serde = { version = "1.0", features = ["derive"], optional = true } 26 - miniserde = { version = "0.1", optional = true } 26 + miniserde = { workspace = true } 27 27 28 28 [features] 29 29 serde = ["dep:serde", "cfg-symbol/serde", "smallvec/serde", "bit-vec/serde", "cfg-history/serde"] 30 - miniserde = ["dep:miniserde", "cfg-symbol/miniserde", "bit-vec/miniserde"]
+2 -2
cfg-grammar/src/cfg.rs
··· 410 410 } 411 411 412 412 /// Starts building a new rule. 413 - pub fn rule(&mut self, lhs: Symbol) -> RuleBuilder { 413 + pub fn rule(&mut self, lhs: Symbol) -> RuleBuilder<'_> { 414 414 RuleBuilder::new(self).rule(lhs) 415 415 } 416 416 417 417 /// Starts building a new precedenced rule. 418 - pub fn precedenced_rule(&mut self, lhs: Symbol) -> PrecedencedRuleBuilder { 418 + pub fn precedenced_rule(&mut self, lhs: Symbol) -> PrecedencedRuleBuilder<'_> { 419 419 PrecedencedRuleBuilder::new(self, lhs) 420 420 } 421 421
+1 -2
cfg-grammar/src/symbol_bit_set.rs
··· 8 8 use crate::local_prelude::*; 9 9 10 10 /// A set of symbols in the form of a bit vector. 11 - #[cfg_attr(feature = "miniserde", derive(miniserde::Serialize, miniserde::Deserialize))] 12 - #[derive(Clone, Debug)] 11 + #[derive(miniserde::Serialize, miniserde::Deserialize, Clone, Debug)] 13 12 pub struct SymbolBitSet { 14 13 bit_vec: BitVec, 15 14 }
+2 -1
cfg-history/Cargo.toml
··· 7 7 cfg-symbol = { version = "0.0.1", path = "../cfg-symbol/" } 8 8 smallvec = { version = "1.15", optional = true } 9 9 serde = { version = "1.0", features = ["derive"], optional = true } 10 - miniserde = { version = "0.1", optional = true } 10 + miniserde = { workspace = true } 11 + nanoserde = { version = "0.2", optional = true } 11 12 12 13 [features] 13 14 default = ["earley"]
+12 -8
cfg-history/src/earley/mod.rs
··· 10 10 11 11 use rule_dot::RuleDot; 12 12 13 - #[cfg_attr(feature = "miniserde", derive(miniserde::Serialize, miniserde::Deserialize))] 14 - #[derive(Copy, Clone, Default, Debug, Eq, PartialEq)] 13 + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 14 + #[cfg_attr(feature = "nanoserde", derive(nanoserde::SerBin, nanoserde::DeBin))] 15 + #[derive(miniserde::Serialize, miniserde::Deserialize, Copy, Clone, Default, Debug, Eq, PartialEq)] 15 16 pub struct ExternalOrigin { pub id: u32 } 16 - #[cfg_attr(feature = "miniserde", derive(miniserde::Serialize, miniserde::Deserialize))] 17 - #[derive(Copy, Clone, Default, Debug, Eq, PartialEq)] 17 + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 18 + #[cfg_attr(feature = "nanoserde", derive(nanoserde::SerBin, nanoserde::DeBin))] 19 + #[derive(miniserde::Serialize, miniserde::Deserialize, Copy, Clone, Default, Debug, Eq, PartialEq)] 18 20 pub struct EventId { pub id: u32 } 19 - #[cfg_attr(feature = "miniserde", derive(miniserde::Serialize, miniserde::Deserialize))] 20 - #[derive(Copy, Clone, Default, Debug, Eq, PartialEq)] 21 + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 22 + #[cfg_attr(feature = "nanoserde", derive(nanoserde::SerBin, nanoserde::DeBin))] 23 + #[derive(miniserde::Serialize, miniserde::Deserialize, Copy, Clone, Default, Debug, Eq, PartialEq)] 21 24 pub struct MinimalDistance { pub distance: u32 } 22 25 pub type NullingEliminated = Option<(Symbol, bool)>; 23 - #[cfg_attr(feature = "miniserde", derive(miniserde::Serialize, miniserde::Deserialize))] 24 - #[derive(Copy, Clone, Default, Debug, Eq, PartialEq)] 26 + #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 27 + #[cfg_attr(feature = "nanoserde", derive(nanoserde::SerBin, nanoserde::DeBin))] 28 + #[derive(miniserde::Serialize, miniserde::Deserialize, Copy, Clone, Default, Debug, Eq, PartialEq)] 25 29 pub struct ExternalDottedRule { 26 30 id: u32, 27 31 pos: u32,
+40 -18
cfg-load/src/advanced.rs
··· 1 1 #![deny(unsafe_code)] 2 2 3 3 use cfg_history::RootHistoryNode; 4 - use cfg_regexp::{CfgRegexpExt, LexerMap}; 4 + use cfg_regexp::CfgRegexpExt; 5 5 use cfg_symbol_bit_matrix::Remap; 6 6 use tiny_earley::{grammar, forest, Recognizer, Symbol}; 7 7 8 8 use cfg_grammar::{Cfg, SymbolBitSet}; 9 9 use cfg_sequence::CfgSequenceExt; 10 - use std::{collections::{BTreeSet, HashMap, HashSet}, convert::AsRef, fmt, iter, str::Chars}; 10 + use std::{collections::{BTreeMap, BTreeSet, HashMap, HashSet}, convert::AsRef, iter, str::Chars}; 11 11 12 12 use elsa::FrozenIndexSet; 13 + 14 + pub use cfg_regexp::LexerMap; 13 15 14 16 use crate::LoadError; 15 17 pub struct StringInterner { ··· 91 93 struct LexerVal(Vec<Rule>); 92 94 93 95 struct Evaluator { 94 - symbols: [Symbol; 23], 96 + symbols: [Symbol; 24], 95 97 tokens: Vec<(Token, usize, usize)>, 96 98 } 97 99 ··· 100 102 101 103 fn leaf(&self, terminal: Symbol, values: u32) -> Self::Elem { 102 104 #[allow(unused_variables)] 103 - let [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2] = 105 + let [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2, tilde_op] = 104 106 self.symbols; 105 107 if terminal == ident { 106 108 self.tokens[values as usize].0.ident() ··· 113 115 114 116 fn product(&self, action_num: u32, args: Vec<Self::Elem>) -> Self::Elem { 115 117 #[allow(unused_variables)] 116 - let [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2] = 118 + let [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2, tilde_op] = 117 119 self.symbols; 118 120 // let mut iter = args.into_iter(); 119 121 match ( ··· 175 177 (12, Value::Alt(alt), Value::Ident(action), _) => { 176 178 Value::Alt2(alt, Some(action)) 177 179 } 178 - // alt ::= alt fragment; 179 - (13, Value::Alt(mut alt), Value::Fragment(fragment), _) => { 180 + // alt ::= alt tilde_op fragment; 181 + (13, Value::Alt(mut alt), _, Value::Fragment(fragment)) => { 180 182 alt.push(fragment); 181 183 Value::Alt(alt) 182 184 } ··· 248 250 RParen, 249 251 GtOp, 250 252 LexerKeyword, 253 + TildeOp, 251 254 Error(usize, usize), 252 255 } 253 256 ··· 332 335 self.advance(); 333 336 Token::Mul 334 337 } 338 + '~' => { 339 + self.advance(); 340 + Token::TildeOp 341 + } 335 342 '(' => { 336 343 self.advance(); 337 344 Token::LParen ··· 407 414 } 408 415 409 416 pub trait CfgLoadAdvancedExt { 410 - fn load_advanced(grammar: &str) -> Result<(Cfg, LexerMap, SymbolBitSet), LoadError>; 417 + fn load_advanced(grammar: &str) -> Result<AdvancedGrammar, LoadError>; 411 418 } 412 419 413 420 // fn lexeme_set(cfg: &Cfg, lexeme_origin: usize) -> Result<SymbolBitSet, LoadError> { ··· 422 429 // } 423 430 // } 424 431 432 + pub struct AdvancedGrammar { 433 + pub cfg: Cfg, 434 + pub lexer_map: LexerMap, 435 + pub sbs: SymbolBitSet, 436 + pub actions: BTreeMap<usize, Option<String>>, 437 + } 438 + 425 439 impl CfgLoadAdvancedExt for Cfg { 426 - fn load_advanced(grammar: &str) -> Result<(Cfg, LexerMap, SymbolBitSet), LoadError> { 440 + fn load_advanced(grammar: &str) -> Result<AdvancedGrammar, LoadError> { 427 441 use tiny_earley::Grammar; 428 442 let bnf_grammar = grammar! { 429 - S = [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2] 443 + S = [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2, tilde_op] 430 444 R = { 431 445 start ::= start decl; // 2 432 446 start ::= decl; // 3 ··· 439 453 rhs ::= alt2; // 10 440 454 alt2 ::= alt; // 11 441 455 alt2 ::= alt action; // 12 442 - alt ::= alt fragment; // 13 456 + alt ::= alt tilde_op fragment; // 13 443 457 alt ::= fragment; // 14 444 458 fragment ::= ident op_plus; // 15 445 459 fragment ::= ident op_mul; // 16 ··· 454 468 }; 455 469 let symbols = bnf_grammar.symbols(); 456 470 #[allow(unused_variables)] 457 - let [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2] = bnf_grammar.symbols(); 471 + let [start, rule, alt, rhs, bnf_op, ident, pipe, op_mul, op_plus, semicolon, fragment, string, decl, action, lexer_keyword, lexer, lbrace, rbrace, rules, gt_op, lparen, rparen, alt2, tilde_op] = bnf_grammar.symbols(); 458 472 let mut recognizer = Recognizer::new(&bnf_grammar); 459 473 let tokens = Lexer::tokenize(grammar); 460 474 for (i, &(ref ch, line, col)) in tokens.iter().enumerate() { ··· 471 485 Token::LParen => lparen, 472 486 Token::RParen => rparen, 473 487 Token::GtOp => gt_op, 488 + Token::TildeOp => tilde_op, 474 489 Token::LexerKeyword => lexer_keyword, 475 490 Token::Whitespace => continue, 476 491 &Token::Error(line_no, col_no) => return Err(LoadError::Parse { reason: "failed to tokenize".to_string(), line: line_no as u32, col: col_no as u32, token: None }), ··· 503 518 let mut intern_empty = true; 504 519 let mut lexer_classes = LexerMap::new(); 505 520 let mut lhs_in_parser = HashSet::new(); 521 + let mut actions = BTreeMap::new(); 506 522 // check_for_lexical_error(&rules, lexer.as_ref()); 507 523 for (idx, (rule, is_lexer)) in rules.into_iter().zip(iter::repeat(false)).chain(lexer.unwrap_or(LexerVal(vec![])).0.into_iter().zip(iter::repeat(true))).enumerate() { 508 524 let lhs = intern.get_or_intern(&rule.lhs[..]); ··· 526 542 } 527 543 match &*arg { 528 544 &Fragment::Call { ref func, ref arg } => { 529 - return Err(LoadError::Lex { reason: format!("expected Regex(string)") }); 545 + return Err(LoadError::Lex { reason: format!("expected Regex(string), found {:?}({:?})", func, arg) }); 530 546 } 531 547 &Fragment::Lex { ref string } => { 532 548 let (mut regexp_cfg, classes) = Cfg::from_regexp(string).map_err(|err| LoadError::Lex { reason: err.to_string() })?; 533 549 let mut remap = Remap::new(&mut regexp_cfg); 534 550 let mut sym_map = HashMap::new(); 535 - for (class, sym) in classes.0 { 536 - let new_sym = *lexer_classes.0.entry(class).or_insert_with(|| cfg.next_sym(None)); 551 + for (class, sym) in classes.classes { 552 + let new_sym = *lexer_classes.classes.entry(class).or_insert_with(|| cfg.next_sym(None)); 537 553 sym_map.insert(sym, new_sym); 538 554 } 539 555 remap.remap_symbols(|sym| { ··· 544 560 Ok(regexp_cfg.roots()[0]) 545 561 } 546 562 &Fragment::Rhs { ref ident, rep } => { 547 - return Err(LoadError::Lex { reason: format!("expected Regex(string)") }); 563 + return Err(LoadError::Lex { reason: format!("expected Regex(string), found {:?} repeated {:?}", ident, rep) }); 548 564 } 549 565 } 550 566 } ··· 555 571 let rhs_sym = *sym_map.entry(id).or_insert_with(|| { 556 572 cfg.sym_source_mut().next_sym(Some(name.clone().into())) 557 573 }); 558 - let child: Vec<_> = string.chars().map(|ch| *lexer_classes.0.entry(ch.into()).or_insert_with(|| cfg.next_sym(None))).collect(); 574 + let child: Vec<_> = string.chars().map(|ch| *lexer_classes.classes.entry(ch.into()).or_insert_with(|| cfg.next_sym(None))).collect(); 559 575 cfg.rule(rhs_sym).rhs(&child[..]); 560 576 set_of_lexical.insert(rhs_sym); 561 577 Ok(rhs_sym) ··· 583 599 set_of_lexical.insert(lhs_sym); 584 600 } 585 601 cfg.rule(lhs_sym).history(RootHistoryNode::Origin { origin: idx + 1 }.into()).rhs(rhs_syms?); 602 + actions.insert(idx + 1, rule.action); 586 603 } 587 604 let mut sbs = SymbolBitSet::new(); 588 605 sbs.reset(cfg.sym_source()); 589 606 for sym in set_of_lexical { 590 607 sbs.set(sym, true); 591 608 } 592 - Ok((cfg, lexer_classes, sbs)) 609 + Ok(AdvancedGrammar { 610 + cfg, 611 + lexer_map: lexer_classes, 612 + sbs, 613 + actions, 614 + }) 593 615 } else { 594 616 return Err(LoadError::Eval { reason: format!("evaluation failed: Expected Value::Rules, got {:?}", result) }); 595 617 }
+1 -1
cfg-load/src/basic.rs
··· 5 5 6 6 use cfg_grammar::Cfg; 7 7 use cfg_sequence::CfgSequenceExt; 8 - use std::{collections::HashMap, convert::AsRef, fmt::{self, Write}, str::Chars}; 8 + use std::{collections::HashMap, convert::AsRef, fmt::Write, str::Chars}; 9 9 10 10 use elsa::FrozenIndexSet; 11 11
+2 -2
cfg-load/src/lib.rs
··· 3 3 4 4 use std::fmt; 5 5 6 - pub use crate::basic::{CfgLoadExt}; 7 - pub use crate::advanced::{CfgLoadAdvancedExt}; 6 + pub use crate::basic::CfgLoadExt; 7 + pub use crate::advanced::CfgLoadAdvancedExt; 8 8 9 9 #[derive(Debug, Clone)] 10 10 pub enum LoadError {
+22 -22
cfg-load/tests/test_load.rs
··· 13 13 14 14 #[test] 15 15 fn test_load_advanced() { 16 - let (cfg, _, bs) = Cfg::load_advanced(r#" 17 - start ::= a b c d; 16 + let grammar = Cfg::load_advanced(r#" 17 + start ::= a ~ b ~ c ~ d; 18 18 a ::= y; 19 19 "#).unwrap(); 20 - assert_eq!(cfg.rules().count(), 2); 21 - let (cfg, _, bs) = Cfg::load_advanced(r#" 22 - start ::= a b c d; 20 + assert_eq!(grammar.cfg.rules().count(), 2); 21 + let grammar = Cfg::load_advanced(r#" 22 + start ::= a ~ b ~ c ~ d; 23 23 a ::= "y"; 24 24 "#).unwrap(); 25 - assert_eq!(cfg.rules().count(), 3); 26 - let (cfg, _, bs) = Cfg::load_advanced(r#" 27 - start ::= a b c d; 25 + assert_eq!(grammar.cfg.rules().count(), 3); 26 + let grammar = Cfg::load_advanced(r#" 27 + start ::= a ~ b ~ c ~ d; 28 28 a ::= "y"; 29 29 lexer { 30 30 x ::= "z"; 31 31 } 32 32 "#).unwrap(); 33 - assert_eq!(cfg.rules().count(), 5); 34 - let (cfg, lm, bs) = Cfg::load_advanced(r#" 35 - start ::= a b c d; 33 + assert_eq!(grammar.cfg.rules().count(), 5); 34 + let grammar = Cfg::load_advanced(r#" 35 + start ::= a ~ b ~ c ~ d; 36 36 a ::= "y"; 37 37 lexer { 38 38 x ::= "zzt"; 39 39 x ::= Regexp("test"); 40 40 } 41 41 "#).unwrap(); 42 - assert_eq!(cfg.rules().count(), 7); 43 - assert_eq!(format!("{:?}", lm.0.keys().flat_map(|cl| cl.iter().next()).collect::<Vec<_>>()), "[('e', 'f'), ('s', 't'), ('t', 'u'), ('y', 'z'), ('z', '{')]"); 44 - assert_eq!(cfg.to_bnf(), r#"start ::= a b c d; 42 + assert_eq!(grammar.cfg.rules().count(), 7); 43 + assert_eq!(format!("{:?}", grammar.lexer_map.classes.keys().flat_map(|cl| cl.iter().next()).collect::<Vec<_>>()), "[('e', 'f'), ('s', 't'), ('t', 'u'), ('y', 'z'), ('z', '{')]"); 44 + assert_eq!(grammar.cfg.to_bnf(), r#"start ::= a b c d; 45 45 __lex0 ::= g6; 46 46 a ::= __lex0; 47 47 __lex1 ::= g9 g9 g10; ··· 49 49 g13 ::= g10 g11 g12 g10; 50 50 x ::= g13; 51 51 "#); 52 - assert_eq!(bs.iter().count(), 3); 52 + assert_eq!(grammar.sbs.iter().count(), 3); 53 53 } 54 54 55 55 #[test] 56 56 fn test_err() { 57 57 assert!(Cfg::load_advanced(r#" 58 - start ::= a b c d; 58 + start ::= a ~ b ~ c ~ d; 59 59 x ::= "y"; 60 60 lexer { 61 61 x ::= "zzt"; ··· 66 66 67 67 #[test] 68 68 fn test_big() { 69 - let (cfg, lm, bs) = Cfg::load_advanced(r#" 70 - start ::= a b c d; 69 + let grammar = Cfg::load_advanced(r#" 70 + start ::= a ~ b ~ c ~ d; 71 71 a ::= "y"; 72 72 lexer { 73 73 x ::= "zzt"; 74 74 x ::= Regexp("test(er)?"); 75 75 } 76 76 "#).unwrap(); 77 - assert_eq!(cfg.rules().count(), 11); 78 - assert_eq!(format!("{:?}", lm.0.keys().flat_map(|cl| cl.iter().next()).collect::<Vec<_>>()), "[('e', 'f'), ('r', 's'), ('s', 't'), ('t', 'u'), ('y', 'z'), ('z', '{')]"); 79 - assert_eq!(cfg.to_bnf(), r#"start ::= a b c d; 77 + assert_eq!(grammar.cfg.rules().count(), 11); 78 + assert_eq!(format!("{:?}", grammar.lexer_map.classes.keys().flat_map(|cl| cl.iter().next()).collect::<Vec<_>>()), "[('e', 'f'), ('r', 's'), ('s', 't'), ('t', 'u'), ('y', 'z'), ('z', '{')]"); 79 + assert_eq!(grammar.cfg.to_bnf(), r#"start ::= a b c d; 80 80 __lex0 ::= g6; 81 81 a ::= __lex0; 82 82 __lex1 ::= g9 g9 g10; ··· 88 88 g17 ::= g10 g11 g13 g10 g15; 89 89 x ::= g17; 90 90 "#); 91 - assert_eq!(bs.iter().count(), 3); 91 + assert_eq!(grammar.sbs.iter().count(), 3); 92 92 }
-1
cfg-predict-distance/src/lib.rs
··· 1 1 //! Calculation of minimum distance from one part of the grammar to another. 2 2 3 3 use cfg_grammar::*; 4 - use cfg_history::{earley::History, HistoryId}; 5 4 use cfg_symbol::Symbol; 6 5 7 6 /// Calculation of minimum distance from one part of the grammar to another.
+1
cfg-regexp/Cargo.toml
··· 5 5 6 6 [dependencies] 7 7 regex-syntax = "0.8" 8 + regex-automata = "0.4" 8 9 log = "0.4" 9 10 cfg-grammar = { version = "0.0.1", path = "../cfg-grammar/" } 10 11 cfg-sequence = { version = "0.0.1", path = "../cfg-sequence/" }
+68 -14
cfg-regexp/src/lib.rs
··· 8 8 use cfg_grammar::Cfg; 9 9 use cfg_sequence::CfgSequenceExt; 10 10 use cfg_symbol::Symbol; 11 - use regex_syntax::hir::{self, Class, Hir, HirKind}; 11 + use regex_syntax::hir::{Class, Hir, HirKind}; 12 12 use regex_syntax::Parser; 13 13 14 14 pub trait CfgRegexpExt: Sized { ··· 64 64 } 65 65 66 66 #[derive(Debug, Clone)] 67 - pub struct LexerMap(pub BTreeMap<LexerClasses, Symbol>); 67 + pub struct LexerMap { 68 + pub classes: BTreeMap<LexerClasses, Symbol>, 69 + ascii: Vec<Vec<Symbol>>, 70 + ranges: BTreeMap<char, Vec<Symbol>>, 71 + } 68 72 69 73 impl LexerMap { 70 74 pub fn new() -> Self { 71 - LexerMap(BTreeMap::new()) 75 + LexerMap { 76 + classes: BTreeMap::new(), 77 + ascii: vec![], 78 + ranges: BTreeMap::new(), 79 + } 80 + } 81 + 82 + pub fn compute(&mut self) { 83 + let mut result = vec![vec![]; 256]; 84 + for (lexer_classes, &symbol) in &self.classes { 85 + for &class in &lexer_classes.set { 86 + if class.0.is_ascii() { 87 + for ascii in class.0 as u32 ..= (class.1 as u32).min(256) { 88 + result[ascii as usize].push(symbol); 89 + } 90 + } 91 + } 92 + } 93 + self.ascii = result; 94 + let mut ranges = BTreeMap::new(); 95 + for (lexer_classes, &symbol) in &self.classes { 96 + for &class in &lexer_classes.set { 97 + ranges.entry(class.0).or_insert(vec![]).push((true, symbol)); 98 + ranges.entry(char::from_u32(class.1 as u32 + 1).unwrap()).or_insert(vec![]).push((false, symbol)); 99 + } 100 + } 101 + let mut result = BTreeMap::new(); 102 + let mut work = BTreeSet::new(); 103 + for (ch, changes) in ranges { 104 + for (is_added, symbol) in changes { 105 + if is_added { 106 + work.insert(symbol); 107 + } else { 108 + work.remove(&symbol); 109 + } 110 + } 111 + result.entry(ch).or_insert(vec![]).extend(work.iter().copied()); 112 + } 113 + self.ranges = result; 114 + } 115 + 116 + pub fn get(&self, ch: char) -> &[Symbol] { 117 + if ch.is_ascii() { 118 + &self.ascii[ch as usize][..] 119 + } else { 120 + self.ranges.range(..=ch).next_back().map(|(_, v)| &v[..]).unwrap_or(&[]) 121 + } 72 122 } 73 123 } 74 124 75 125 impl Translator { 76 126 fn cfg_from_hir(hir: Hir) -> (Cfg, LexerMap) { 77 127 let cfg = Cfg::new(); 78 - let class_map = LexerMap(BTreeMap::new()); 128 + let class_map = LexerMap::new(); 79 129 let mut this = Self { cfg, class_map }; 80 130 let x = this.walk_hir(&hir, 0); 81 131 let lhs = match (x.len(), x.get(0).map_or(0, |y| y.len())) { ··· 111 161 HirKind::Literal(lit) => { 112 162 let mut syms = vec![]; 113 163 for &byte in &lit.0 { 114 - syms.push(*self.class_map.0.entry(byte.into()).or_insert_with(|| self.cfg.next_sym(None))); 164 + syms.push(*self.class_map.classes.entry(byte.into()).or_insert_with(|| self.cfg.next_sym(None))); 115 165 } 116 166 println!("{indent}Literal: {:?}", lit); 117 167 vec![syms] 118 168 } 119 169 HirKind::Class(class) => { 120 - let sym = *self.class_map.0.entry(class.clone().into()).or_insert_with(|| self.cfg.next_sym(None)); 170 + let sym = *self.class_map.classes.entry(class.clone().into()).or_insert_with(|| self.cfg.next_sym(None)); 121 171 println!("{indent}Class: {:?}", class); 122 172 vec![vec![sym]] 123 173 } ··· 156 206 println!("{indent}Concat:"); 157 207 let mut result = vec![]; 158 208 for expr in exprs { 159 - let mut x = self.walk_hir(expr, depth + 1); 209 + let x = self.walk_hir(expr, depth + 1); 160 210 match x.len() { 161 211 0 => {} 162 212 1 => { ··· 181 231 } 182 232 alternatives 183 233 } 184 - HirKind::Look(look) => { 234 + HirKind::Look(_look) => { 185 235 unimplemented!() 186 236 } 187 237 HirKind::Empty => { ··· 196 246 mod tests { 197 247 use super::*; 198 248 199 - use regex_syntax::hir::{self, Hir, HirKind}; 200 - use regex_syntax::Parser; 201 - 202 249 #[test] 203 250 fn it_works() { 204 251 let result = add(2, 2); ··· 206 253 207 254 208 255 let pattern = r"(?i)(foo|bar)\d+"; 209 - let (result, map) = Cfg::from_regexp(pattern).unwrap(); 210 - assert_eq!(result.rules().count(), 4); 211 - assert_eq!(map.0, BTreeMap::new()); 256 + let (result, mut map) = Cfg::from_regexp(pattern).unwrap(); 257 + assert_eq!(result.rules().count(), 5); 258 + map.compute(); 259 + assert_eq!(map.get('b').len(), 1); 260 + assert_eq!(map.get('c').len(), 0); 261 + assert_eq!(map.get('B').len(), 1); 262 + assert_eq!(map.get('D').len(), 0); 263 + assert_eq!(map.get('o').len(), 1); 264 + assert_eq!(map.get('🯰').len(), 1); 265 + assert_eq!(map.get('🯹').len(), 1); 212 266 } 213 267 }
-1
cfg-sequence/Cargo.toml
··· 18 18 19 19 [features] 20 20 serde = ["cfg-symbol/serde"] 21 - miniserde = ["cfg-symbol/miniserde"]
-1
cfg-sequence/src/builder.rs
··· 5 5 use crate::destination::SequenceDestination; 6 6 use crate::{Separator, Sequence}; 7 7 use cfg_history::earley::History; 8 - use cfg_history::HistoryId; 9 8 use cfg_symbol::Symbol; 10 9 11 10 /// Sequence rule builder.
+2 -2
cfg-sequence/src/ext.rs
··· 4 4 use crate::{builder::SequenceRuleBuilder, destination::SequenceDestination, rewrite::SequencesToProductions}; 5 5 6 6 pub trait CfgSequenceExt { 7 - fn sequence(&mut self, lhs: Symbol) -> SequenceRuleBuilder<SequencesToProductions>; 7 + fn sequence(&mut self, lhs: Symbol) -> SequenceRuleBuilder<SequencesToProductions<'_>>; 8 8 } 9 9 10 10 impl CfgSequenceExt for Cfg { 11 - fn sequence(&mut self, lhs: Symbol) -> SequenceRuleBuilder<SequencesToProductions> { 11 + fn sequence(&mut self, lhs: Symbol) -> SequenceRuleBuilder<SequencesToProductions<'_>> { 12 12 SequencesToProductions::new(self).sequence(lhs) 13 13 } 14 14 }
+1 -1
cfg-sequence/src/lib.rs
··· 11 11 12 12 use std::ops::{Bound, RangeBounds}; 13 13 14 - use cfg_history::{earley::History, HistoryId}; 14 + use cfg_history::earley::History; 15 15 use cfg_symbol::Symbol; 16 16 17 17 use self::Separator::*;
+2 -2
cfg-symbol-bit-matrix/src/remap_symbols.rs
··· 120 120 } 121 121 122 122 pub trait CfgRemapSymbolsExt { 123 - fn remap(&mut self) -> Remap; 123 + fn remap(&mut self) -> Remap<'_>; 124 124 } 125 125 126 126 impl CfgRemapSymbolsExt for Cfg { 127 - fn remap(&mut self) -> Remap { 127 + fn remap(&mut self) -> Remap<'_> { 128 128 Remap::new(self) 129 129 } 130 130 }
+2 -1
cfg-symbol/Cargo.toml
··· 17 17 [dependencies] 18 18 # serialize 19 19 serde = { version = "1.0", features = ["derive"], optional = true } 20 - miniserde = { version = "0.1", optional = true } 20 + miniserde = { workspace = true } 21 + nanoserde = { version = "0.2", optional = true }
+1 -4
cfg-symbol/src/intern.rs
··· 5 5 #[cfg(feature = "serde")] 6 6 use serde::{Deserialize, Serialize}; 7 7 8 - #[cfg(feature = "miniserde")] 9 - use miniserde::{Deserialize as MiniDeserialize, Serialize as MiniSerialize}; 10 - 11 8 /// Contains maps for translation between internal and external symbols. 12 9 #[derive(Clone, Default, Debug)] 13 10 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 14 - #[cfg_attr(feature = "miniserde", derive(MiniSerialize, MiniDeserialize))] 11 + #[derive(miniserde::Serialize, miniserde::Deserialize)] 15 12 pub struct Mapping { 16 13 /// An array of internal symbols, indexed by external symbol ID. 17 14 pub to_internal: Vec<Option<Symbol>>,
+4 -6
cfg-symbol/src/source.rs
··· 36 36 } 37 37 38 38 /// A source of numeric symbols. 39 - #[cfg_attr(feature = "miniserde", derive(miniserde::Serialize, miniserde::Deserialize))] 39 + #[derive(miniserde::Serialize, miniserde::Deserialize)] 40 40 #[derive(Clone, Debug, Default)] 41 41 pub struct SymbolSource<T: SymbolPrimitive = NonZeroU32> { 42 42 next_symbol: Symbol<T>, ··· 87 87 self.names.push(name.map(|cow| cow.into())); 88 88 ret 89 89 } 90 - pub fn name_of(&self, sym: Symbol) -> Cow<str> { 90 + pub fn name_of(&self, sym: Symbol) -> Cow<'_, str> { 91 91 match self.names.get(sym.usize()) { 92 92 Some(Some(name)) => { 93 93 Cow::Borrowed(&name.name[..]) ··· 168 168 } 169 169 } 170 170 171 - #[cfg(feature = "miniserde")] 172 171 mod miniserde_impls { 173 - use crate::{Symbol, SymbolSource, SymbolPrimitive}; 174 172 use super::SymbolName; 175 173 use std::rc::Rc; 176 174 use miniserde::de::{Deserialize, Visitor}; 177 175 use miniserde::{de, ser, Serialize}; 178 - use miniserde::{make_place, Error, Result}; 176 + use miniserde::{make_place, Result}; 179 177 180 178 make_place!(Place); 181 179 ··· 193 191 } 194 192 195 193 impl Serialize for SymbolName { 196 - fn begin(&self) -> ser::Fragment { 194 + fn begin(&self) -> ser::Fragment<'_> { 197 195 ser::Fragment::Str(self.name.to_string().into()) 198 196 } 199 197 }
+18 -3
cfg-symbol/src/symbol.rs
··· 57 57 } 58 58 } 59 59 60 - #[cfg(feature = "miniserde")] 61 60 mod miniserde_impls { 62 61 use super::{Symbol, SymbolPrimitive}; 63 62 use std::num::NonZeroU32; ··· 70 69 impl<T: SymbolPrimitive> Visitor for Place<Symbol<T>> { 71 70 fn nonnegative(&mut self, n: u64) -> Result<()> { 72 71 if n < T::MAX as u64 { 73 - if let Some(Ok(nonzero_num)) = NonZeroU32::new(n as u32).map(|n| TryInto::<T>::try_into(n)) { 72 + if let Some(Ok(nonzero_num)) = NonZeroU32::new((n + 1) as u32).map(|n| TryInto::<T>::try_into(n)) { 74 73 self.out = Some(Symbol { n: nonzero_num }); 75 74 Ok(()) 76 75 } else { ··· 89 88 } 90 89 91 90 impl<T: SymbolPrimitive> Serialize for Symbol<T> { 92 - fn begin(&self) -> ser::Fragment { 91 + fn begin(&self) -> ser::Fragment<'_> { 93 92 let n: u32 = (*self).into(); 94 93 ser::Fragment::U64(n as u64) 95 94 } 96 95 } 97 96 } 97 + 98 + #[cfg(feature = "nanoserde")] 99 + impl nanoserde::DeBin for Symbol<NonZeroU32> { 100 + fn de_bin(offset: &mut usize, bytes: &[u8]) -> Result<Self, nanoserde::DeBinErr> { 101 + Ok(Symbol { 102 + n: u32::de_bin(offset, bytes)?.try_into().unwrap() 103 + }) 104 + } 105 + } 106 + 107 + #[cfg(feature = "nanoserde")] 108 + impl nanoserde::SerBin for Symbol<NonZeroU32> { 109 + fn ser_bin(&self, output: &mut Vec<u8>) { 110 + u32::ser_bin(&self.n.get(), output); 111 + } 112 + }
-1
cfg/Cargo.toml
··· 38 38 [features] 39 39 default = ["cfg-classify", "cfg-generate", "cfg-predict-sets", "cfg-predict-distance", "cfg-generate", "cfg-sequence", "cfg-history", "cfg-symbol-bit-matrix"] 40 40 serde = ["cfg-grammar/serde"] 41 - miniserde = ["cfg-grammar/miniserde", "cfg-history/miniserde"] 42 41 ll = ["cfg-classify/ll"] 43 42 lr = ["cfg-classify/lr"] 44 43 weighted-generation = ["cfg-generate/weighted", "cfg-generate", "rand"]
docs/graph.png

This is a binary file and will not be displayed.

graph.png

This is a binary file and will not be displayed.

+7
tokei.sh
··· 1 + #!/usr/bin/bash 2 + tokei cfg{-grammar,-regexp,-load,-history,-sequence,-symbol,-symbol-bit-matrix,-predict-sets,}/src 3 + 4 + if [ $? -eq 127 ]; then 5 + echo "please run cargo install tokei (exit code 127)" 6 + fi 7 +