Lightweight tagged data library.
1//! Different tag parsers and their strategies.
2
3mod adapters;
4
5use crate::error::ParseError;
6use crate::label::DefaultLabel;
7use crate::label::Label;
8pub use crate::parse::adapters::*;
9#[cfg(doc)]
10use crate::storage::Storage;
11use crate::storage::StorageLock;
12use crate::tag::*;
13#[cfg(doc)]
14use crate::TagManager;
15use std::hash::BuildHasher;
16use std::marker::PhantomData;
17use std::ops::Not as _;
18use std::sync::Arc;
19use std::sync::Mutex;
20use string_interner::backend::Backend as InternerBackend;
21use string_interner::DefaultSymbol;
22use string_interner::Symbol;
23
24/// Types that provide a strategy for parsing tags.
25///
26/// `Parser`s are required to be [`Send`] and [`Sync`] as we want [`TagManager`]
27/// to be [`Send`] and [`Sync`]. For basic parsers that don't maintain
28/// any internal state, this is trivial, but more complex parsers may
29/// need to establish internal synchronization of their state in the case
30/// that they are performing concurrent parses.
31pub trait Parser {
32 /// The type of [`Tag`] produced by the [`Parser`].
33 type Tag: Tag;
34
35 /// Parse a given string to produce a new [`Tag`].
36 fn parse<B, H>(
37 &self,
38 storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
39 key_value_separator: KeyValueSep,
40 path_separator: PathSep,
41 raw: &str,
42 ) -> Result<Self::Tag, ParseError>
43 where
44 B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
45 H: BuildHasher;
46}
47
48// Implement Parser for any Parser wrapped in `Arc<Mutex<_>>`, to enable
49// passing externally-synchronized parsers in addition to trivially-synchronized ones,
50// in cases where the parsers maintain internal state.
51impl<P> Parser for Arc<Mutex<P>>
52where
53 P: Parser,
54{
55 type Tag = P::Tag;
56
57 fn parse<B, H>(
58 &self,
59 storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
60 key_value_separator: KeyValueSep,
61 path_separator: PathSep,
62 raw: &str,
63 ) -> Result<Self::Tag, ParseError>
64 where
65 B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
66 H: BuildHasher,
67 {
68 let internal_parser = self.lock().map_err(|_| ParseError::CouldNotLock)?;
69 internal_parser.parse(storage, key_value_separator, path_separator, raw)
70 }
71}
72
73/// The policy to use for splitting on separators in a [`KeyValue`].
74#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
75pub enum KvPolicy {
76 /// Don't allow ambiguous separators. Only one separator is permitted.
77 NoAmbiguousSep,
78
79 /// Split keys and values on the first occurence of the separator.
80 SplitOnFirstSep,
81
82 /// Split keys and values on the last occurence of the separator.
83 SplitOnLastSep,
84}
85
86/// The policy to use for permitting "single-part" [`MultipartTag`]s.
87#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
88pub enum MultipartPolicy {
89 /// Permit single-part tags.
90 PermitOnePart,
91
92 /// Do not permit single-part tags.
93 RequireMultipart,
94}
95
96/// Helper macro to construct tag parsers.
97///
98/// This macro:
99///
100/// 1. Defines each parser as either an empty struct or tuple struct with only public fields.
101/// 2. Implements a `parse` inherent method, which calls `check_empty` and then whatever closure
102/// is provided by the macro to implement the actual parsing behavior.
103/// 3. Implements the `Parser` trait, with `Parser::parse` just delegating to the `parse`
104/// inherent method.
105///
106/// The syntax of each parser-defining pattern is:
107///
108/// ```text
109/// <doc_comment>
110/// <struct_name>(<field_types>)? => <tag_type> {
111/// <parser_closure>
112/// }
113/// ```
114macro_rules! parsers {
115 (
116 $(
117 $( #[$($attrss:meta)*] )*
118 $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident {
119 $parser:expr
120 }
121 )*
122 ) => {
123 $(
124 parsers! {
125 @single
126 $( #[$($attrss)*] )*
127 $struct { $($field_name: $field_ty),* } => $tag {
128 $parser
129 }
130 }
131 )*
132 };
133
134 (
135 @single
136 $(#[$($attrss:meta)*] )*
137 $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident {
138 $parser:expr
139 }
140 ) => {
141 $( #[$($attrss)*] )*
142 #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
143 pub struct $struct<L: Label = DefaultLabel, S: Symbol = DefaultSymbol> {
144 _label: PhantomData<L>,
145 _symbol: PhantomData<S>,
146 $( $field_name: $field_ty ),*
147 }
148
149 impl<L, S> $struct<L, S> where L: Label, S: Symbol {
150 /// Construct a new parser.
151 #[allow(clippy::new_without_default)]
152 pub fn new($( $field_name: $field_ty ),*) -> Self {
153 Self {
154 _label: PhantomData,
155 _symbol: PhantomData,
156 $($field_name),*
157 }
158 }
159
160 /// Parse a token with the given `interner` and `separator`.
161 #[allow(clippy::redundant_closure_call)]
162 pub fn parse<B, H>(
163 &self,
164 storage: &mut StorageLock<'_, L, B, H>,
165 key_value_separator: KeyValueSep,
166 path_separator: PathSep,
167 raw: &str
168 ) -> Result<$tag<L, S>, ParseError>
169 where
170 S: Symbol,
171 B: InternerBackend<Symbol = S>,
172 H: BuildHasher
173 {
174 check_empty(raw)?;
175 ($parser)(self, storage, key_value_separator, path_separator, raw)
176 }
177 }
178
179 impl<L: Label, S: Symbol> Parser for $struct<L, S> {
180 type Tag = $tag<L, S>;
181
182 fn parse<B, H>(
183 &self,
184 storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
185 key_value_separator: KeyValueSep,
186 path_separator: PathSep,
187 raw: &str
188 ) -> Result<Self::Tag, ParseError>
189 where
190 B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
191 H: BuildHasher
192 {
193 self.parse(storage, key_value_separator, path_separator, raw)
194 }
195 }
196 };
197}
198
199/// Validate that the raw tag isn't empty, error out if it is.
200fn check_empty(raw: &str) -> Result<(), ParseError> {
201 raw.is_empty()
202 .not()
203 .then_some(())
204 .ok_or(ParseError::EmptyTag)
205}
206
207parsers! {
208 /// No internal structure, `':'` default separator.
209 Plain {} => PlainTag {
210 |_this, interner, _key_value_separator, _path_separator, raw| Ok(PlainTag::new(interner, raw))
211 }
212
213 /// Key-value parser, `':'` default separator.
214 KeyValue { policy: KvPolicy } => KeyValueTag {
215 |this: &KeyValue<L, S>, interner, key_value_separator: KeyValueSep, _path_separator, raw: &str| {
216 match this.policy {
217 KvPolicy::NoAmbiguousSep => {
218 let mut parts_iter = raw.split(key_value_separator.0);
219 let key = parts_iter.next().ok_or(ParseError::MissingKey)?;
220 let value = parts_iter.next().ok_or(ParseError::MissingValue)?;
221 match parts_iter.next() {
222 Some(_) => Err(ParseError::AmbiguousKeyValueTag),
223 None => Ok(KeyValueTag::new(interner, key, value))
224 }
225 }
226 KvPolicy::SplitOnFirstSep => {
227 match raw.split_once(key_value_separator.0) {
228 None => Err(ParseError::MissingValue),
229 Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)),
230 }
231 }
232 KvPolicy::SplitOnLastSep => {
233 match raw.rsplit_once(key_value_separator.0) {
234 None => Err(ParseError::MissingValue),
235 Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)),
236 }
237 }
238 }
239 }
240 }
241
242 /// Multipart parser, splits parts on separator, `':'` default separator.
243 Multipart { policy: MultipartPolicy } => MultipartTag {
244 |this: &Multipart<L, S>, interner, _key_value_separator, path_separator: PathSep, raw: &str| {
245 match this.policy {
246 MultipartPolicy::PermitOnePart => Ok(MultipartTag::new(interner, raw.split(path_separator.0))),
247 MultipartPolicy::RequireMultipart => {
248 let parts = raw.split(path_separator.0);
249
250 if parts.clone().count() < 2 {
251 return Err(ParseError::SinglePartMultipart);
252 }
253
254 Ok(MultipartTag::new(interner, parts))
255 },
256 }
257 }
258 }
259}
260
261/* # SAFETY
262 *
263 * There's no data to sync for any of these; the only fields involved
264 * are read-only once the type is created (they just set configuration).
265 * Since there's nothing to sync, there's no worry about deriving this.
266 */
267
268unsafe impl<L: Label, S: Symbol> Send for Plain<L, S> {}
269unsafe impl<L: Label, S: Symbol> Sync for Plain<L, S> {}
270
271unsafe impl<L: Label, S: Symbol> Send for KeyValue<L, S> {}
272unsafe impl<L: Label, S: Symbol> Sync for KeyValue<L, S> {}
273
274unsafe impl<L: Label, S: Symbol> Send for Multipart<L, S> {}
275unsafe impl<L: Label, S: Symbol> Sync for Multipart<L, S> {}