atproto blogging
1//! EditorWriter - HTML generation for markdown with visible formatting.
2//!
3//! Refactored to use grouped state structs for clarity.
4//! Generic over TextBuffer - works with ropey (local) or can be adapted for Loro (collab).
5
6mod embed;
7mod events;
8mod state;
9mod syntax;
10mod tags;
11#[cfg(test)]
12mod tests;
13
14pub use embed::EditorImageResolver;
15pub use state::*;
16
17use std::collections::HashMap;
18use std::fmt::{self, Write as FmtWrite};
19use std::ops::Range;
20
21use markdown_weaver::Event;
22use smol_str::SmolStr;
23
24use crate::offset_map::OffsetMapping;
25use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator};
26use crate::syntax::SyntaxSpanInfo;
27
28/// Result of rendering with EditorWriter.
29#[derive(Debug, Clone, Default)]
30pub struct WriterResult {
31 /// HTML segments, one per paragraph
32 pub html_segments: Vec<String>,
33 /// Offset mappings per paragraph
34 pub offset_maps_by_paragraph: Vec<Vec<OffsetMapping>>,
35 /// Paragraph boundaries: (byte_range, char_range)
36 pub paragraph_ranges: Vec<(Range<usize>, Range<usize>)>,
37 /// Syntax spans per paragraph
38 pub syntax_spans_by_paragraph: Vec<Vec<SyntaxSpanInfo>>,
39 /// Collected refs per paragraph
40 pub collected_refs_by_paragraph: Vec<Vec<weaver_common::ExtractedRef>>,
41}
42
43/// Segmented HTML output writer.
44#[derive(Debug, Clone, Default)]
45pub struct SegmentedWriter {
46 segments: Vec<String>,
47 current: String,
48}
49
50impl SegmentedWriter {
51 pub fn new() -> Self {
52 Self::default()
53 }
54
55 pub fn write_str(&mut self, s: &str) -> fmt::Result {
56 self.current.push_str(s);
57 Ok(())
58 }
59
60 pub fn new_segment(&mut self) {
61 if !self.current.is_empty() {
62 self.segments.push(std::mem::take(&mut self.current));
63 }
64 }
65
66 pub fn into_segments(mut self) -> Vec<String> {
67 self.new_segment();
68 self.segments
69 }
70
71 pub fn current_len(&self) -> usize {
72 self.current.len()
73 }
74}
75
76impl FmtWrite for SegmentedWriter {
77 fn write_str(&mut self, s: &str) -> fmt::Result {
78 self.current.push_str(s);
79 Ok(())
80 }
81}
82
83impl markdown_weaver_escape::StrWrite for SegmentedWriter {
84 type Error = fmt::Error;
85
86 fn write_str(&mut self, s: &str) -> fmt::Result {
87 self.current.push_str(s);
88 Ok(())
89 }
90
91 fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> fmt::Result {
92 std::fmt::Write::write_fmt(&mut self.current, args)
93 }
94}
95
96/// HTML writer that preserves markdown formatting characters.
97///
98/// Generic over:
99/// - `T`: Text buffer for efficient offset conversions
100/// - `I`: Iterator of markdown events with byte ranges
101/// - `E`: Embed content provider (optional)
102/// - `R`: Image resolver (optional)
103/// - `W`: Wikilink validator (optional)
104pub struct EditorWriter<'a, T, I, E = (), R = (), W = ()>
105where
106 T: crate::TextBuffer,
107 I: Iterator<Item = (Event<'a>, Range<usize>)>,
108{
109 // === Input ===
110 source: &'a str,
111 text_buffer: &'a T,
112 events: I,
113
114 // === Output ===
115 writer: SegmentedWriter,
116
117 // === Position tracking ===
118 last_byte_offset: usize,
119 last_char_offset: usize,
120
121 // === Rendering flags ===
122 end_newline: bool,
123 in_non_writing_block: bool,
124
125 // === Grouped state ===
126 pub(crate) table: TableContext,
127 pub(crate) code_block: CodeBlockContext,
128 pub(crate) node_ids: NodeIdGenerator,
129 pub(crate) current_node: CurrentNodeState,
130 pub(crate) paragraphs: ParagraphTracker,
131 pub(crate) current_para: ParagraphBuildState,
132 pub(crate) weaver_block: WeaverBlockContext,
133 pub(crate) footnotes: FootnoteContext,
134 pub(crate) utf16: Utf16Tracker,
135
136 // === Per-paragraph results ===
137 offset_maps_by_para: Vec<Vec<OffsetMapping>>,
138 syntax_spans_by_para: Vec<Vec<SyntaxSpanInfo>>,
139 refs_by_para: Vec<Vec<weaver_common::ExtractedRef>>,
140
141 // === External resolvers ===
142 embed_provider: Option<E>,
143 image_resolver: Option<R>,
144 wikilink_validator: Option<W>,
145 entry_index: Option<&'a weaver_common::EntryIndex>,
146
147 // === Misc ===
148 numbers: HashMap<SmolStr, usize>,
149 pending_blockquote_range: Option<Range<usize>>,
150 ref_collector: weaver_common::RefCollector,
151}
152
153impl<'a, T, I, E, R, W> EditorWriter<'a, T, I, E, R, W>
154where
155 T: crate::TextBuffer,
156 I: Iterator<Item = (Event<'a>, Range<usize>)>,
157{
158 /// Create a new EditorWriter.
159 ///
160 /// `source` is the markdown source text (should match text_buffer content).
161 /// `text_buffer` provides efficient offset conversions.
162 /// `events` is the markdown parser event iterator.
163 pub fn new(source: &'a str, text_buffer: &'a T, events: I) -> Self {
164 Self {
165 source,
166 text_buffer,
167 events,
168 writer: SegmentedWriter::new(),
169 last_byte_offset: 0,
170 last_char_offset: 0,
171 end_newline: true,
172 in_non_writing_block: false,
173 table: TableContext::default(),
174 code_block: CodeBlockContext::default(),
175 node_ids: NodeIdGenerator::default(),
176 current_node: CurrentNodeState::default(),
177 paragraphs: ParagraphTracker::default(),
178 current_para: ParagraphBuildState::default(),
179 weaver_block: WeaverBlockContext::default(),
180 footnotes: FootnoteContext::default(),
181 utf16: Utf16Tracker::new(),
182 offset_maps_by_para: Vec::new(),
183 syntax_spans_by_para: Vec::new(),
184 refs_by_para: Vec::new(),
185 embed_provider: None,
186 image_resolver: None,
187 wikilink_validator: None,
188 entry_index: None,
189 numbers: HashMap::new(),
190 pending_blockquote_range: None,
191 ref_collector: weaver_common::RefCollector::new(),
192 }
193 }
194
195 /// Set a static node ID prefix for all paragraphs.
196 pub fn with_node_id_prefix(mut self, prefix: &str) -> Self {
197 self.node_ids.prefix = Some(SmolStr::new(prefix));
198 self.node_ids.next_node_id = 0;
199 self
200 }
201
202 /// Use auto-incrementing paragraph prefixes starting from `base`.
203 pub fn with_auto_incrementing_prefix(mut self, base: usize) -> Self {
204 use smol_str::format_smolstr;
205 self.node_ids.auto_increment_base = Some(base);
206 self.node_ids.prefix = Some(format_smolstr!("p-{}", base));
207 self.node_ids.next_node_id = 0;
208 self
209 }
210
211 /// Override prefix for a specific paragraph index.
212 pub fn with_static_prefix_at_index(mut self, index: usize, prefix: &str) -> Self {
213 self.node_ids.static_override = Some((index, SmolStr::new(prefix)));
214 if index == 0 {
215 self.node_ids.prefix = Some(SmolStr::new(prefix));
216 self.node_ids.next_node_id = 0;
217 }
218 self
219 }
220
221 /// Set initial offsets (for rendering a subset of the document).
222 pub fn with_offsets(
223 mut self,
224 byte_offset: usize,
225 char_offset: usize,
226 node_id_offset: usize,
227 syn_id_offset: usize,
228 ) -> Self {
229 self.last_byte_offset = byte_offset;
230 self.last_char_offset = char_offset;
231 self.node_ids.next_node_id = node_id_offset;
232 self.node_ids.next_syn_id = syn_id_offset;
233 self
234 }
235
236 /// Set embed content provider.
237 pub fn with_embed_provider<E2: EmbedContentProvider>(
238 self,
239 provider: E2,
240 ) -> EditorWriter<'a, T, I, E2, R, W> {
241 EditorWriter {
242 source: self.source,
243 text_buffer: self.text_buffer,
244 events: self.events,
245 writer: self.writer,
246 last_byte_offset: self.last_byte_offset,
247 last_char_offset: self.last_char_offset,
248 end_newline: self.end_newline,
249 in_non_writing_block: self.in_non_writing_block,
250 table: self.table,
251 code_block: self.code_block,
252 node_ids: self.node_ids,
253 current_node: self.current_node,
254 paragraphs: self.paragraphs,
255 current_para: self.current_para,
256 weaver_block: self.weaver_block,
257 footnotes: self.footnotes,
258 utf16: self.utf16,
259 offset_maps_by_para: self.offset_maps_by_para,
260 syntax_spans_by_para: self.syntax_spans_by_para,
261 refs_by_para: self.refs_by_para,
262 embed_provider: Some(provider),
263 image_resolver: self.image_resolver,
264 wikilink_validator: self.wikilink_validator,
265 entry_index: self.entry_index,
266 numbers: self.numbers,
267 pending_blockquote_range: self.pending_blockquote_range,
268 ref_collector: self.ref_collector,
269 }
270 }
271
272 /// Set image resolver.
273 pub fn with_image_resolver<R2: ImageResolver>(
274 self,
275 resolver: R2,
276 ) -> EditorWriter<'a, T, I, E, R2, W> {
277 EditorWriter {
278 source: self.source,
279 text_buffer: self.text_buffer,
280 events: self.events,
281 writer: self.writer,
282 last_byte_offset: self.last_byte_offset,
283 last_char_offset: self.last_char_offset,
284 end_newline: self.end_newline,
285 in_non_writing_block: self.in_non_writing_block,
286 table: self.table,
287 code_block: self.code_block,
288 node_ids: self.node_ids,
289 current_node: self.current_node,
290 paragraphs: self.paragraphs,
291 current_para: self.current_para,
292 weaver_block: self.weaver_block,
293 footnotes: self.footnotes,
294 utf16: self.utf16,
295 offset_maps_by_para: self.offset_maps_by_para,
296 syntax_spans_by_para: self.syntax_spans_by_para,
297 refs_by_para: self.refs_by_para,
298 embed_provider: self.embed_provider,
299 image_resolver: Some(resolver),
300 wikilink_validator: self.wikilink_validator,
301 entry_index: self.entry_index,
302 numbers: self.numbers,
303 pending_blockquote_range: self.pending_blockquote_range,
304 ref_collector: self.ref_collector,
305 }
306 }
307
308 /// Set wikilink validator.
309 pub fn with_wikilink_validator<W2: WikilinkValidator>(
310 self,
311 validator: W2,
312 ) -> EditorWriter<'a, T, I, E, R, W2> {
313 EditorWriter {
314 source: self.source,
315 text_buffer: self.text_buffer,
316 events: self.events,
317 writer: self.writer,
318 last_byte_offset: self.last_byte_offset,
319 last_char_offset: self.last_char_offset,
320 end_newline: self.end_newline,
321 in_non_writing_block: self.in_non_writing_block,
322 table: self.table,
323 code_block: self.code_block,
324 node_ids: self.node_ids,
325 current_node: self.current_node,
326 paragraphs: self.paragraphs,
327 current_para: self.current_para,
328 weaver_block: self.weaver_block,
329 footnotes: self.footnotes,
330 utf16: self.utf16,
331 offset_maps_by_para: self.offset_maps_by_para,
332 syntax_spans_by_para: self.syntax_spans_by_para,
333 refs_by_para: self.refs_by_para,
334 embed_provider: self.embed_provider,
335 image_resolver: self.image_resolver,
336 wikilink_validator: Some(validator),
337 entry_index: self.entry_index,
338 numbers: self.numbers,
339 pending_blockquote_range: self.pending_blockquote_range,
340 ref_collector: self.ref_collector,
341 }
342 }
343
344 /// Set entry index for wikilink resolution.
345 pub fn with_entry_index(mut self, index: &'a weaver_common::EntryIndex) -> Self {
346 self.entry_index = Some(index);
347 self
348 }
349}
350
351// Core helper methods
352impl<'a, T, I, E, R, W> EditorWriter<'a, T, I, E, R, W>
353where
354 T: crate::TextBuffer,
355 I: Iterator<Item = (Event<'a>, Range<usize>)>,
356{
357 /// Write a string to the output.
358 #[inline]
359 pub(crate) fn write(&mut self, s: &str) -> fmt::Result {
360 if !s.is_empty() {
361 self.end_newline = s.ends_with('\n');
362 }
363 self.writer.write_str(s)
364 }
365
366 /// Write a newline.
367 #[inline]
368 pub(crate) fn write_newline(&mut self) -> fmt::Result {
369 self.end_newline = true;
370 self.writer.write_str("\n")
371 }
372
373 /// Generate a unique node ID.
374 pub(crate) fn gen_node_id(&mut self) -> SmolStr {
375 self.node_ids.next_node()
376 }
377
378 /// Generate a unique syntax span ID.
379 pub(crate) fn gen_syn_id(&mut self) -> SmolStr {
380 self.node_ids.next_syn()
381 }
382
383 /// Start tracking a new text container node.
384 pub(crate) fn begin_node(&mut self, node_id: SmolStr) {
385 self.current_node.begin(node_id);
386 }
387
388 /// Stop tracking current node.
389 pub(crate) fn end_node(&mut self) {
390 self.current_node.end();
391 }
392
393 /// Compute UTF-16 length for a text slice (fast path for ASCII).
394 #[inline]
395 pub(crate) fn utf16_len_for_slice(text: &str) -> usize {
396 let byte_len = text.len();
397 let char_len = text.chars().count();
398
399 if byte_len == char_len {
400 char_len
401 } else {
402 text.encode_utf16().count()
403 }
404 }
405
406 /// Record an offset mapping.
407 pub(crate) fn record_mapping(&mut self, byte_range: Range<usize>, char_range: Range<usize>) {
408 if let Some(ref node_id) = self.current_node.id {
409 let text_slice = &self.source[byte_range.clone()];
410 let utf16_len = Self::utf16_len_for_slice(text_slice);
411
412 // Record UTF-16 checkpoint
413 let last = self.utf16.last();
414 let new_utf16 = last.1 + utf16_len;
415 if char_range.end > last.0 {
416 self.utf16.checkpoint(char_range.end, new_utf16);
417 }
418
419 let mapping = OffsetMapping {
420 byte_range,
421 char_range: char_range.clone(),
422 node_id: node_id.clone(),
423 char_offset_in_node: self.current_node.char_offset,
424 child_index: None,
425 utf16_len,
426 };
427 self.current_para.offset_maps.push(mapping);
428 self.current_node.char_offset += utf16_len;
429 }
430 }
431
432 /// Finalize the current paragraph.
433 pub(crate) fn finalize_paragraph(
434 &mut self,
435 byte_range: Range<usize>,
436 char_range: Range<usize>,
437 ) {
438 self.paragraphs.ranges.push((byte_range, char_range));
439
440 let (maps, spans, refs) = self.current_para.take_all();
441 self.offset_maps_by_para.push(maps);
442 self.syntax_spans_by_para.push(spans);
443 self.refs_by_para.push(refs);
444
445 self.node_ids.next_paragraph();
446 self.writer.new_segment();
447 }
448
449 /// Consume events until End tag without writing.
450 pub(crate) fn consume_until_end(&mut self) {
451 let mut nest = 0;
452 while let Some((event, _)) = self.events.next() {
453 match event {
454 Event::Start(_) => nest += 1,
455 Event::End(_) => {
456 if nest == 0 {
457 break;
458 }
459 nest -= 1;
460 }
461 _ => {}
462 }
463 }
464 }
465}