at main 465 lines 15 kB view raw
1//! EditorWriter - HTML generation for markdown with visible formatting. 2//! 3//! Refactored to use grouped state structs for clarity. 4//! Generic over TextBuffer - works with ropey (local) or can be adapted for Loro (collab). 5 6mod embed; 7mod events; 8mod state; 9mod syntax; 10mod tags; 11#[cfg(test)] 12mod tests; 13 14pub use embed::EditorImageResolver; 15pub use state::*; 16 17use std::collections::HashMap; 18use std::fmt::{self, Write as FmtWrite}; 19use std::ops::Range; 20 21use markdown_weaver::Event; 22use smol_str::SmolStr; 23 24use crate::offset_map::OffsetMapping; 25use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 26use crate::syntax::SyntaxSpanInfo; 27 28/// Result of rendering with EditorWriter. 29#[derive(Debug, Clone, Default)] 30pub struct WriterResult { 31 /// HTML segments, one per paragraph 32 pub html_segments: Vec<String>, 33 /// Offset mappings per paragraph 34 pub offset_maps_by_paragraph: Vec<Vec<OffsetMapping>>, 35 /// Paragraph boundaries: (byte_range, char_range) 36 pub paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, 37 /// Syntax spans per paragraph 38 pub syntax_spans_by_paragraph: Vec<Vec<SyntaxSpanInfo>>, 39 /// Collected refs per paragraph 40 pub collected_refs_by_paragraph: Vec<Vec<weaver_common::ExtractedRef>>, 41} 42 43/// Segmented HTML output writer. 44#[derive(Debug, Clone, Default)] 45pub struct SegmentedWriter { 46 segments: Vec<String>, 47 current: String, 48} 49 50impl SegmentedWriter { 51 pub fn new() -> Self { 52 Self::default() 53 } 54 55 pub fn write_str(&mut self, s: &str) -> fmt::Result { 56 self.current.push_str(s); 57 Ok(()) 58 } 59 60 pub fn new_segment(&mut self) { 61 if !self.current.is_empty() { 62 self.segments.push(std::mem::take(&mut self.current)); 63 } 64 } 65 66 pub fn into_segments(mut self) -> Vec<String> { 67 self.new_segment(); 68 self.segments 69 } 70 71 pub fn current_len(&self) -> usize { 72 self.current.len() 73 } 74} 75 76impl FmtWrite for SegmentedWriter { 77 fn write_str(&mut self, s: &str) -> fmt::Result { 78 self.current.push_str(s); 79 Ok(()) 80 } 81} 82 83impl markdown_weaver_escape::StrWrite for SegmentedWriter { 84 type Error = fmt::Error; 85 86 fn write_str(&mut self, s: &str) -> fmt::Result { 87 self.current.push_str(s); 88 Ok(()) 89 } 90 91 fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> fmt::Result { 92 std::fmt::Write::write_fmt(&mut self.current, args) 93 } 94} 95 96/// HTML writer that preserves markdown formatting characters. 97/// 98/// Generic over: 99/// - `T`: Text buffer for efficient offset conversions 100/// - `I`: Iterator of markdown events with byte ranges 101/// - `E`: Embed content provider (optional) 102/// - `R`: Image resolver (optional) 103/// - `W`: Wikilink validator (optional) 104pub struct EditorWriter<'a, T, I, E = (), R = (), W = ()> 105where 106 T: crate::TextBuffer, 107 I: Iterator<Item = (Event<'a>, Range<usize>)>, 108{ 109 // === Input === 110 source: &'a str, 111 text_buffer: &'a T, 112 events: I, 113 114 // === Output === 115 writer: SegmentedWriter, 116 117 // === Position tracking === 118 last_byte_offset: usize, 119 last_char_offset: usize, 120 121 // === Rendering flags === 122 end_newline: bool, 123 in_non_writing_block: bool, 124 125 // === Grouped state === 126 pub(crate) table: TableContext, 127 pub(crate) code_block: CodeBlockContext, 128 pub(crate) node_ids: NodeIdGenerator, 129 pub(crate) current_node: CurrentNodeState, 130 pub(crate) paragraphs: ParagraphTracker, 131 pub(crate) current_para: ParagraphBuildState, 132 pub(crate) weaver_block: WeaverBlockContext, 133 pub(crate) footnotes: FootnoteContext, 134 pub(crate) utf16: Utf16Tracker, 135 136 // === Per-paragraph results === 137 offset_maps_by_para: Vec<Vec<OffsetMapping>>, 138 syntax_spans_by_para: Vec<Vec<SyntaxSpanInfo>>, 139 refs_by_para: Vec<Vec<weaver_common::ExtractedRef>>, 140 141 // === External resolvers === 142 embed_provider: Option<E>, 143 image_resolver: Option<R>, 144 wikilink_validator: Option<W>, 145 entry_index: Option<&'a weaver_common::EntryIndex>, 146 147 // === Misc === 148 numbers: HashMap<SmolStr, usize>, 149 pending_blockquote_range: Option<Range<usize>>, 150 ref_collector: weaver_common::RefCollector, 151} 152 153impl<'a, T, I, E, R, W> EditorWriter<'a, T, I, E, R, W> 154where 155 T: crate::TextBuffer, 156 I: Iterator<Item = (Event<'a>, Range<usize>)>, 157{ 158 /// Create a new EditorWriter. 159 /// 160 /// `source` is the markdown source text (should match text_buffer content). 161 /// `text_buffer` provides efficient offset conversions. 162 /// `events` is the markdown parser event iterator. 163 pub fn new(source: &'a str, text_buffer: &'a T, events: I) -> Self { 164 Self { 165 source, 166 text_buffer, 167 events, 168 writer: SegmentedWriter::new(), 169 last_byte_offset: 0, 170 last_char_offset: 0, 171 end_newline: true, 172 in_non_writing_block: false, 173 table: TableContext::default(), 174 code_block: CodeBlockContext::default(), 175 node_ids: NodeIdGenerator::default(), 176 current_node: CurrentNodeState::default(), 177 paragraphs: ParagraphTracker::default(), 178 current_para: ParagraphBuildState::default(), 179 weaver_block: WeaverBlockContext::default(), 180 footnotes: FootnoteContext::default(), 181 utf16: Utf16Tracker::new(), 182 offset_maps_by_para: Vec::new(), 183 syntax_spans_by_para: Vec::new(), 184 refs_by_para: Vec::new(), 185 embed_provider: None, 186 image_resolver: None, 187 wikilink_validator: None, 188 entry_index: None, 189 numbers: HashMap::new(), 190 pending_blockquote_range: None, 191 ref_collector: weaver_common::RefCollector::new(), 192 } 193 } 194 195 /// Set a static node ID prefix for all paragraphs. 196 pub fn with_node_id_prefix(mut self, prefix: &str) -> Self { 197 self.node_ids.prefix = Some(SmolStr::new(prefix)); 198 self.node_ids.next_node_id = 0; 199 self 200 } 201 202 /// Use auto-incrementing paragraph prefixes starting from `base`. 203 pub fn with_auto_incrementing_prefix(mut self, base: usize) -> Self { 204 use smol_str::format_smolstr; 205 self.node_ids.auto_increment_base = Some(base); 206 self.node_ids.prefix = Some(format_smolstr!("p-{}", base)); 207 self.node_ids.next_node_id = 0; 208 self 209 } 210 211 /// Override prefix for a specific paragraph index. 212 pub fn with_static_prefix_at_index(mut self, index: usize, prefix: &str) -> Self { 213 self.node_ids.static_override = Some((index, SmolStr::new(prefix))); 214 if index == 0 { 215 self.node_ids.prefix = Some(SmolStr::new(prefix)); 216 self.node_ids.next_node_id = 0; 217 } 218 self 219 } 220 221 /// Set initial offsets (for rendering a subset of the document). 222 pub fn with_offsets( 223 mut self, 224 byte_offset: usize, 225 char_offset: usize, 226 node_id_offset: usize, 227 syn_id_offset: usize, 228 ) -> Self { 229 self.last_byte_offset = byte_offset; 230 self.last_char_offset = char_offset; 231 self.node_ids.next_node_id = node_id_offset; 232 self.node_ids.next_syn_id = syn_id_offset; 233 self 234 } 235 236 /// Set embed content provider. 237 pub fn with_embed_provider<E2: EmbedContentProvider>( 238 self, 239 provider: E2, 240 ) -> EditorWriter<'a, T, I, E2, R, W> { 241 EditorWriter { 242 source: self.source, 243 text_buffer: self.text_buffer, 244 events: self.events, 245 writer: self.writer, 246 last_byte_offset: self.last_byte_offset, 247 last_char_offset: self.last_char_offset, 248 end_newline: self.end_newline, 249 in_non_writing_block: self.in_non_writing_block, 250 table: self.table, 251 code_block: self.code_block, 252 node_ids: self.node_ids, 253 current_node: self.current_node, 254 paragraphs: self.paragraphs, 255 current_para: self.current_para, 256 weaver_block: self.weaver_block, 257 footnotes: self.footnotes, 258 utf16: self.utf16, 259 offset_maps_by_para: self.offset_maps_by_para, 260 syntax_spans_by_para: self.syntax_spans_by_para, 261 refs_by_para: self.refs_by_para, 262 embed_provider: Some(provider), 263 image_resolver: self.image_resolver, 264 wikilink_validator: self.wikilink_validator, 265 entry_index: self.entry_index, 266 numbers: self.numbers, 267 pending_blockquote_range: self.pending_blockquote_range, 268 ref_collector: self.ref_collector, 269 } 270 } 271 272 /// Set image resolver. 273 pub fn with_image_resolver<R2: ImageResolver>( 274 self, 275 resolver: R2, 276 ) -> EditorWriter<'a, T, I, E, R2, W> { 277 EditorWriter { 278 source: self.source, 279 text_buffer: self.text_buffer, 280 events: self.events, 281 writer: self.writer, 282 last_byte_offset: self.last_byte_offset, 283 last_char_offset: self.last_char_offset, 284 end_newline: self.end_newline, 285 in_non_writing_block: self.in_non_writing_block, 286 table: self.table, 287 code_block: self.code_block, 288 node_ids: self.node_ids, 289 current_node: self.current_node, 290 paragraphs: self.paragraphs, 291 current_para: self.current_para, 292 weaver_block: self.weaver_block, 293 footnotes: self.footnotes, 294 utf16: self.utf16, 295 offset_maps_by_para: self.offset_maps_by_para, 296 syntax_spans_by_para: self.syntax_spans_by_para, 297 refs_by_para: self.refs_by_para, 298 embed_provider: self.embed_provider, 299 image_resolver: Some(resolver), 300 wikilink_validator: self.wikilink_validator, 301 entry_index: self.entry_index, 302 numbers: self.numbers, 303 pending_blockquote_range: self.pending_blockquote_range, 304 ref_collector: self.ref_collector, 305 } 306 } 307 308 /// Set wikilink validator. 309 pub fn with_wikilink_validator<W2: WikilinkValidator>( 310 self, 311 validator: W2, 312 ) -> EditorWriter<'a, T, I, E, R, W2> { 313 EditorWriter { 314 source: self.source, 315 text_buffer: self.text_buffer, 316 events: self.events, 317 writer: self.writer, 318 last_byte_offset: self.last_byte_offset, 319 last_char_offset: self.last_char_offset, 320 end_newline: self.end_newline, 321 in_non_writing_block: self.in_non_writing_block, 322 table: self.table, 323 code_block: self.code_block, 324 node_ids: self.node_ids, 325 current_node: self.current_node, 326 paragraphs: self.paragraphs, 327 current_para: self.current_para, 328 weaver_block: self.weaver_block, 329 footnotes: self.footnotes, 330 utf16: self.utf16, 331 offset_maps_by_para: self.offset_maps_by_para, 332 syntax_spans_by_para: self.syntax_spans_by_para, 333 refs_by_para: self.refs_by_para, 334 embed_provider: self.embed_provider, 335 image_resolver: self.image_resolver, 336 wikilink_validator: Some(validator), 337 entry_index: self.entry_index, 338 numbers: self.numbers, 339 pending_blockquote_range: self.pending_blockquote_range, 340 ref_collector: self.ref_collector, 341 } 342 } 343 344 /// Set entry index for wikilink resolution. 345 pub fn with_entry_index(mut self, index: &'a weaver_common::EntryIndex) -> Self { 346 self.entry_index = Some(index); 347 self 348 } 349} 350 351// Core helper methods 352impl<'a, T, I, E, R, W> EditorWriter<'a, T, I, E, R, W> 353where 354 T: crate::TextBuffer, 355 I: Iterator<Item = (Event<'a>, Range<usize>)>, 356{ 357 /// Write a string to the output. 358 #[inline] 359 pub(crate) fn write(&mut self, s: &str) -> fmt::Result { 360 if !s.is_empty() { 361 self.end_newline = s.ends_with('\n'); 362 } 363 self.writer.write_str(s) 364 } 365 366 /// Write a newline. 367 #[inline] 368 pub(crate) fn write_newline(&mut self) -> fmt::Result { 369 self.end_newline = true; 370 self.writer.write_str("\n") 371 } 372 373 /// Generate a unique node ID. 374 pub(crate) fn gen_node_id(&mut self) -> SmolStr { 375 self.node_ids.next_node() 376 } 377 378 /// Generate a unique syntax span ID. 379 pub(crate) fn gen_syn_id(&mut self) -> SmolStr { 380 self.node_ids.next_syn() 381 } 382 383 /// Start tracking a new text container node. 384 pub(crate) fn begin_node(&mut self, node_id: SmolStr) { 385 self.current_node.begin(node_id); 386 } 387 388 /// Stop tracking current node. 389 pub(crate) fn end_node(&mut self) { 390 self.current_node.end(); 391 } 392 393 /// Compute UTF-16 length for a text slice (fast path for ASCII). 394 #[inline] 395 pub(crate) fn utf16_len_for_slice(text: &str) -> usize { 396 let byte_len = text.len(); 397 let char_len = text.chars().count(); 398 399 if byte_len == char_len { 400 char_len 401 } else { 402 text.encode_utf16().count() 403 } 404 } 405 406 /// Record an offset mapping. 407 pub(crate) fn record_mapping(&mut self, byte_range: Range<usize>, char_range: Range<usize>) { 408 if let Some(ref node_id) = self.current_node.id { 409 let text_slice = &self.source[byte_range.clone()]; 410 let utf16_len = Self::utf16_len_for_slice(text_slice); 411 412 // Record UTF-16 checkpoint 413 let last = self.utf16.last(); 414 let new_utf16 = last.1 + utf16_len; 415 if char_range.end > last.0 { 416 self.utf16.checkpoint(char_range.end, new_utf16); 417 } 418 419 let mapping = OffsetMapping { 420 byte_range, 421 char_range: char_range.clone(), 422 node_id: node_id.clone(), 423 char_offset_in_node: self.current_node.char_offset, 424 child_index: None, 425 utf16_len, 426 }; 427 self.current_para.offset_maps.push(mapping); 428 self.current_node.char_offset += utf16_len; 429 } 430 } 431 432 /// Finalize the current paragraph. 433 pub(crate) fn finalize_paragraph( 434 &mut self, 435 byte_range: Range<usize>, 436 char_range: Range<usize>, 437 ) { 438 self.paragraphs.ranges.push((byte_range, char_range)); 439 440 let (maps, spans, refs) = self.current_para.take_all(); 441 self.offset_maps_by_para.push(maps); 442 self.syntax_spans_by_para.push(spans); 443 self.refs_by_para.push(refs); 444 445 self.node_ids.next_paragraph(); 446 self.writer.new_segment(); 447 } 448 449 /// Consume events until End tag without writing. 450 pub(crate) fn consume_until_end(&mut self) { 451 let mut nest = 0; 452 while let Some((event, _)) = self.events.next() { 453 match event { 454 Event::Start(_) => nest += 1, 455 Event::End(_) => { 456 if nest == 0 { 457 break; 458 } 459 nest -= 1; 460 } 461 _ => {} 462 } 463 } 464 } 465}