Lightweight tagged data library.
at main 275 lines 9.4 kB view raw
1//! Different tag parsers and their strategies. 2 3mod adapters; 4 5use crate::error::ParseError; 6use crate::label::DefaultLabel; 7use crate::label::Label; 8pub use crate::parse::adapters::*; 9#[cfg(doc)] 10use crate::storage::Storage; 11use crate::storage::StorageLock; 12use crate::tag::*; 13#[cfg(doc)] 14use crate::TagManager; 15use std::hash::BuildHasher; 16use std::marker::PhantomData; 17use std::ops::Not as _; 18use std::sync::Arc; 19use std::sync::Mutex; 20use string_interner::backend::Backend as InternerBackend; 21use string_interner::DefaultSymbol; 22use string_interner::Symbol; 23 24/// Types that provide a strategy for parsing tags. 25/// 26/// `Parser`s are required to be [`Send`] and [`Sync`] as we want [`TagManager`] 27/// to be [`Send`] and [`Sync`]. For basic parsers that don't maintain 28/// any internal state, this is trivial, but more complex parsers may 29/// need to establish internal synchronization of their state in the case 30/// that they are performing concurrent parses. 31pub trait Parser { 32 /// The type of [`Tag`] produced by the [`Parser`]. 33 type Tag: Tag; 34 35 /// Parse a given string to produce a new [`Tag`]. 36 fn parse<B, H>( 37 &self, 38 storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 39 key_value_separator: KeyValueSep, 40 path_separator: PathSep, 41 raw: &str, 42 ) -> Result<Self::Tag, ParseError> 43 where 44 B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 45 H: BuildHasher; 46} 47 48// Implement Parser for any Parser wrapped in `Arc<Mutex<_>>`, to enable 49// passing externally-synchronized parsers in addition to trivially-synchronized ones, 50// in cases where the parsers maintain internal state. 51impl<P> Parser for Arc<Mutex<P>> 52where 53 P: Parser, 54{ 55 type Tag = P::Tag; 56 57 fn parse<B, H>( 58 &self, 59 storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 60 key_value_separator: KeyValueSep, 61 path_separator: PathSep, 62 raw: &str, 63 ) -> Result<Self::Tag, ParseError> 64 where 65 B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 66 H: BuildHasher, 67 { 68 let internal_parser = self.lock().map_err(|_| ParseError::CouldNotLock)?; 69 internal_parser.parse(storage, key_value_separator, path_separator, raw) 70 } 71} 72 73/// The policy to use for splitting on separators in a [`KeyValue`]. 74#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 75pub enum KvPolicy { 76 /// Don't allow ambiguous separators. Only one separator is permitted. 77 NoAmbiguousSep, 78 79 /// Split keys and values on the first occurence of the separator. 80 SplitOnFirstSep, 81 82 /// Split keys and values on the last occurence of the separator. 83 SplitOnLastSep, 84} 85 86/// The policy to use for permitting "single-part" [`MultipartTag`]s. 87#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 88pub enum MultipartPolicy { 89 /// Permit single-part tags. 90 PermitOnePart, 91 92 /// Do not permit single-part tags. 93 RequireMultipart, 94} 95 96/// Helper macro to construct tag parsers. 97/// 98/// This macro: 99/// 100/// 1. Defines each parser as either an empty struct or tuple struct with only public fields. 101/// 2. Implements a `parse` inherent method, which calls `check_empty` and then whatever closure 102/// is provided by the macro to implement the actual parsing behavior. 103/// 3. Implements the `Parser` trait, with `Parser::parse` just delegating to the `parse` 104/// inherent method. 105/// 106/// The syntax of each parser-defining pattern is: 107/// 108/// ```text 109/// <doc_comment> 110/// <struct_name>(<field_types>)? => <tag_type> { 111/// <parser_closure> 112/// } 113/// ``` 114macro_rules! parsers { 115 ( 116 $( 117 $( #[$($attrss:meta)*] )* 118 $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident { 119 $parser:expr 120 } 121 )* 122 ) => { 123 $( 124 parsers! { 125 @single 126 $( #[$($attrss)*] )* 127 $struct { $($field_name: $field_ty),* } => $tag { 128 $parser 129 } 130 } 131 )* 132 }; 133 134 ( 135 @single 136 $(#[$($attrss:meta)*] )* 137 $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident { 138 $parser:expr 139 } 140 ) => { 141 $( #[$($attrss)*] )* 142 #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 143 pub struct $struct<L: Label = DefaultLabel, S: Symbol = DefaultSymbol> { 144 _label: PhantomData<L>, 145 _symbol: PhantomData<S>, 146 $( $field_name: $field_ty ),* 147 } 148 149 impl<L, S> $struct<L, S> where L: Label, S: Symbol { 150 /// Construct a new parser. 151 #[allow(clippy::new_without_default)] 152 pub fn new($( $field_name: $field_ty ),*) -> Self { 153 Self { 154 _label: PhantomData, 155 _symbol: PhantomData, 156 $($field_name),* 157 } 158 } 159 160 /// Parse a token with the given `interner` and `separator`. 161 #[allow(clippy::redundant_closure_call)] 162 pub fn parse<B, H>( 163 &self, 164 storage: &mut StorageLock<'_, L, B, H>, 165 key_value_separator: KeyValueSep, 166 path_separator: PathSep, 167 raw: &str 168 ) -> Result<$tag<L, S>, ParseError> 169 where 170 S: Symbol, 171 B: InternerBackend<Symbol = S>, 172 H: BuildHasher 173 { 174 check_empty(raw)?; 175 ($parser)(self, storage, key_value_separator, path_separator, raw) 176 } 177 } 178 179 impl<L: Label, S: Symbol> Parser for $struct<L, S> { 180 type Tag = $tag<L, S>; 181 182 fn parse<B, H>( 183 &self, 184 storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 185 key_value_separator: KeyValueSep, 186 path_separator: PathSep, 187 raw: &str 188 ) -> Result<Self::Tag, ParseError> 189 where 190 B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 191 H: BuildHasher 192 { 193 self.parse(storage, key_value_separator, path_separator, raw) 194 } 195 } 196 }; 197} 198 199/// Validate that the raw tag isn't empty, error out if it is. 200fn check_empty(raw: &str) -> Result<(), ParseError> { 201 raw.is_empty() 202 .not() 203 .then_some(()) 204 .ok_or(ParseError::EmptyTag) 205} 206 207parsers! { 208 /// No internal structure, `':'` default separator. 209 Plain {} => PlainTag { 210 |_this, interner, _key_value_separator, _path_separator, raw| Ok(PlainTag::new(interner, raw)) 211 } 212 213 /// Key-value parser, `':'` default separator. 214 KeyValue { policy: KvPolicy } => KeyValueTag { 215 |this: &KeyValue<L, S>, interner, key_value_separator: KeyValueSep, _path_separator, raw: &str| { 216 match this.policy { 217 KvPolicy::NoAmbiguousSep => { 218 let mut parts_iter = raw.split(key_value_separator.0); 219 let key = parts_iter.next().ok_or(ParseError::MissingKey)?; 220 let value = parts_iter.next().ok_or(ParseError::MissingValue)?; 221 match parts_iter.next() { 222 Some(_) => Err(ParseError::AmbiguousKeyValueTag), 223 None => Ok(KeyValueTag::new(interner, key, value)) 224 } 225 } 226 KvPolicy::SplitOnFirstSep => { 227 match raw.split_once(key_value_separator.0) { 228 None => Err(ParseError::MissingValue), 229 Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)), 230 } 231 } 232 KvPolicy::SplitOnLastSep => { 233 match raw.rsplit_once(key_value_separator.0) { 234 None => Err(ParseError::MissingValue), 235 Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)), 236 } 237 } 238 } 239 } 240 } 241 242 /// Multipart parser, splits parts on separator, `':'` default separator. 243 Multipart { policy: MultipartPolicy } => MultipartTag { 244 |this: &Multipart<L, S>, interner, _key_value_separator, path_separator: PathSep, raw: &str| { 245 match this.policy { 246 MultipartPolicy::PermitOnePart => Ok(MultipartTag::new(interner, raw.split(path_separator.0))), 247 MultipartPolicy::RequireMultipart => { 248 let parts = raw.split(path_separator.0); 249 250 if parts.clone().count() < 2 { 251 return Err(ParseError::SinglePartMultipart); 252 } 253 254 Ok(MultipartTag::new(interner, parts)) 255 }, 256 } 257 } 258 } 259} 260 261/* # SAFETY 262 * 263 * There's no data to sync for any of these; the only fields involved 264 * are read-only once the type is created (they just set configuration). 265 * Since there's nothing to sync, there's no worry about deriving this. 266 */ 267 268unsafe impl<L: Label, S: Symbol> Send for Plain<L, S> {} 269unsafe impl<L: Label, S: Symbol> Sync for Plain<L, S> {} 270 271unsafe impl<L: Label, S: Symbol> Send for KeyValue<L, S> {} 272unsafe impl<L: Label, S: Symbol> Sync for KeyValue<L, S> {} 273 274unsafe impl<L: Label, S: Symbol> Send for Multipart<L, S> {} 275unsafe impl<L: Label, S: Symbol> Sync for Multipart<L, S> {}