//! EditorWriter - HTML generation for markdown with visible formatting.
//!
//! Refactored to use grouped state structs for clarity.
//! Generic over TextBuffer - works with ropey (local) or can be adapted for Loro (collab).
mod embed;
mod events;
mod state;
mod syntax;
mod tags;
#[cfg(test)]
mod tests;
pub use embed::EditorImageResolver;
pub use state::*;
use std::collections::HashMap;
use std::fmt::{self, Write as FmtWrite};
use std::ops::Range;
use markdown_weaver::Event;
use smol_str::SmolStr;
use crate::offset_map::OffsetMapping;
use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator};
use crate::syntax::SyntaxSpanInfo;
/// Result of rendering with EditorWriter.
#[derive(Debug, Clone, Default)]
pub struct WriterResult {
/// HTML segments, one per paragraph
pub html_segments: Vec,
/// Offset mappings per paragraph
pub offset_maps_by_paragraph: Vec>,
/// Paragraph boundaries: (byte_range, char_range)
pub paragraph_ranges: Vec<(Range, Range)>,
/// Syntax spans per paragraph
pub syntax_spans_by_paragraph: Vec>,
/// Collected refs per paragraph
pub collected_refs_by_paragraph: Vec>,
}
/// Segmented HTML output writer.
#[derive(Debug, Clone, Default)]
pub struct SegmentedWriter {
segments: Vec,
current: String,
}
impl SegmentedWriter {
pub fn new() -> Self {
Self::default()
}
pub fn write_str(&mut self, s: &str) -> fmt::Result {
self.current.push_str(s);
Ok(())
}
pub fn new_segment(&mut self) {
if !self.current.is_empty() {
self.segments.push(std::mem::take(&mut self.current));
}
}
pub fn into_segments(mut self) -> Vec {
self.new_segment();
self.segments
}
pub fn current_len(&self) -> usize {
self.current.len()
}
}
impl FmtWrite for SegmentedWriter {
fn write_str(&mut self, s: &str) -> fmt::Result {
self.current.push_str(s);
Ok(())
}
}
impl markdown_weaver_escape::StrWrite for SegmentedWriter {
type Error = fmt::Error;
fn write_str(&mut self, s: &str) -> fmt::Result {
self.current.push_str(s);
Ok(())
}
fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> fmt::Result {
std::fmt::Write::write_fmt(&mut self.current, args)
}
}
/// HTML writer that preserves markdown formatting characters.
///
/// Generic over:
/// - `T`: Text buffer for efficient offset conversions
/// - `I`: Iterator of markdown events with byte ranges
/// - `E`: Embed content provider (optional)
/// - `R`: Image resolver (optional)
/// - `W`: Wikilink validator (optional)
pub struct EditorWriter<'a, T, I, E = (), R = (), W = ()>
where
T: crate::TextBuffer,
I: Iterator- , Range)>,
{
// === Input ===
source: &'a str,
text_buffer: &'a T,
events: I,
// === Output ===
writer: SegmentedWriter,
// === Position tracking ===
last_byte_offset: usize,
last_char_offset: usize,
// === Rendering flags ===
end_newline: bool,
in_non_writing_block: bool,
// === Grouped state ===
pub(crate) table: TableContext,
pub(crate) code_block: CodeBlockContext,
pub(crate) node_ids: NodeIdGenerator,
pub(crate) current_node: CurrentNodeState,
pub(crate) paragraphs: ParagraphTracker,
pub(crate) current_para: ParagraphBuildState,
pub(crate) weaver_block: WeaverBlockContext,
pub(crate) footnotes: FootnoteContext,
pub(crate) utf16: Utf16Tracker,
// === Per-paragraph results ===
offset_maps_by_para: Vec>,
syntax_spans_by_para: Vec>,
refs_by_para: Vec>,
// === External resolvers ===
embed_provider: Option,
image_resolver: Option,
wikilink_validator: Option,
entry_index: Option<&'a weaver_common::EntryIndex>,
// === Misc ===
numbers: HashMap,
pending_blockquote_range: Option>,
ref_collector: weaver_common::RefCollector,
}
impl<'a, T, I, E, R, W> EditorWriter<'a, T, I, E, R, W>
where
T: crate::TextBuffer,
I: Iterator
- , Range)>,
{
/// Create a new EditorWriter.
///
/// `source` is the markdown source text (should match text_buffer content).
/// `text_buffer` provides efficient offset conversions.
/// `events` is the markdown parser event iterator.
pub fn new(source: &'a str, text_buffer: &'a T, events: I) -> Self {
Self {
source,
text_buffer,
events,
writer: SegmentedWriter::new(),
last_byte_offset: 0,
last_char_offset: 0,
end_newline: true,
in_non_writing_block: false,
table: TableContext::default(),
code_block: CodeBlockContext::default(),
node_ids: NodeIdGenerator::default(),
current_node: CurrentNodeState::default(),
paragraphs: ParagraphTracker::default(),
current_para: ParagraphBuildState::default(),
weaver_block: WeaverBlockContext::default(),
footnotes: FootnoteContext::default(),
utf16: Utf16Tracker::new(),
offset_maps_by_para: Vec::new(),
syntax_spans_by_para: Vec::new(),
refs_by_para: Vec::new(),
embed_provider: None,
image_resolver: None,
wikilink_validator: None,
entry_index: None,
numbers: HashMap::new(),
pending_blockquote_range: None,
ref_collector: weaver_common::RefCollector::new(),
}
}
/// Set a static node ID prefix for all paragraphs.
pub fn with_node_id_prefix(mut self, prefix: &str) -> Self {
self.node_ids.prefix = Some(SmolStr::new(prefix));
self.node_ids.next_node_id = 0;
self
}
/// Use auto-incrementing paragraph prefixes starting from `base`.
pub fn with_auto_incrementing_prefix(mut self, base: usize) -> Self {
use smol_str::format_smolstr;
self.node_ids.auto_increment_base = Some(base);
self.node_ids.prefix = Some(format_smolstr!("p-{}", base));
self.node_ids.next_node_id = 0;
self
}
/// Override prefix for a specific paragraph index.
pub fn with_static_prefix_at_index(mut self, index: usize, prefix: &str) -> Self {
self.node_ids.static_override = Some((index, SmolStr::new(prefix)));
if index == 0 {
self.node_ids.prefix = Some(SmolStr::new(prefix));
self.node_ids.next_node_id = 0;
}
self
}
/// Set initial offsets (for rendering a subset of the document).
pub fn with_offsets(
mut self,
byte_offset: usize,
char_offset: usize,
node_id_offset: usize,
syn_id_offset: usize,
) -> Self {
self.last_byte_offset = byte_offset;
self.last_char_offset = char_offset;
self.node_ids.next_node_id = node_id_offset;
self.node_ids.next_syn_id = syn_id_offset;
self
}
/// Set embed content provider.
pub fn with_embed_provider(
self,
provider: E2,
) -> EditorWriter<'a, T, I, E2, R, W> {
EditorWriter {
source: self.source,
text_buffer: self.text_buffer,
events: self.events,
writer: self.writer,
last_byte_offset: self.last_byte_offset,
last_char_offset: self.last_char_offset,
end_newline: self.end_newline,
in_non_writing_block: self.in_non_writing_block,
table: self.table,
code_block: self.code_block,
node_ids: self.node_ids,
current_node: self.current_node,
paragraphs: self.paragraphs,
current_para: self.current_para,
weaver_block: self.weaver_block,
footnotes: self.footnotes,
utf16: self.utf16,
offset_maps_by_para: self.offset_maps_by_para,
syntax_spans_by_para: self.syntax_spans_by_para,
refs_by_para: self.refs_by_para,
embed_provider: Some(provider),
image_resolver: self.image_resolver,
wikilink_validator: self.wikilink_validator,
entry_index: self.entry_index,
numbers: self.numbers,
pending_blockquote_range: self.pending_blockquote_range,
ref_collector: self.ref_collector,
}
}
/// Set image resolver.
pub fn with_image_resolver(
self,
resolver: R2,
) -> EditorWriter<'a, T, I, E, R2, W> {
EditorWriter {
source: self.source,
text_buffer: self.text_buffer,
events: self.events,
writer: self.writer,
last_byte_offset: self.last_byte_offset,
last_char_offset: self.last_char_offset,
end_newline: self.end_newline,
in_non_writing_block: self.in_non_writing_block,
table: self.table,
code_block: self.code_block,
node_ids: self.node_ids,
current_node: self.current_node,
paragraphs: self.paragraphs,
current_para: self.current_para,
weaver_block: self.weaver_block,
footnotes: self.footnotes,
utf16: self.utf16,
offset_maps_by_para: self.offset_maps_by_para,
syntax_spans_by_para: self.syntax_spans_by_para,
refs_by_para: self.refs_by_para,
embed_provider: self.embed_provider,
image_resolver: Some(resolver),
wikilink_validator: self.wikilink_validator,
entry_index: self.entry_index,
numbers: self.numbers,
pending_blockquote_range: self.pending_blockquote_range,
ref_collector: self.ref_collector,
}
}
/// Set wikilink validator.
pub fn with_wikilink_validator(
self,
validator: W2,
) -> EditorWriter<'a, T, I, E, R, W2> {
EditorWriter {
source: self.source,
text_buffer: self.text_buffer,
events: self.events,
writer: self.writer,
last_byte_offset: self.last_byte_offset,
last_char_offset: self.last_char_offset,
end_newline: self.end_newline,
in_non_writing_block: self.in_non_writing_block,
table: self.table,
code_block: self.code_block,
node_ids: self.node_ids,
current_node: self.current_node,
paragraphs: self.paragraphs,
current_para: self.current_para,
weaver_block: self.weaver_block,
footnotes: self.footnotes,
utf16: self.utf16,
offset_maps_by_para: self.offset_maps_by_para,
syntax_spans_by_para: self.syntax_spans_by_para,
refs_by_para: self.refs_by_para,
embed_provider: self.embed_provider,
image_resolver: self.image_resolver,
wikilink_validator: Some(validator),
entry_index: self.entry_index,
numbers: self.numbers,
pending_blockquote_range: self.pending_blockquote_range,
ref_collector: self.ref_collector,
}
}
/// Set entry index for wikilink resolution.
pub fn with_entry_index(mut self, index: &'a weaver_common::EntryIndex) -> Self {
self.entry_index = Some(index);
self
}
}
// Core helper methods
impl<'a, T, I, E, R, W> EditorWriter<'a, T, I, E, R, W>
where
T: crate::TextBuffer,
I: Iterator
- , Range)>,
{
/// Write a string to the output.
#[inline]
pub(crate) fn write(&mut self, s: &str) -> fmt::Result {
if !s.is_empty() {
self.end_newline = s.ends_with('\n');
}
self.writer.write_str(s)
}
/// Write a newline.
#[inline]
pub(crate) fn write_newline(&mut self) -> fmt::Result {
self.end_newline = true;
self.writer.write_str("\n")
}
/// Generate a unique node ID.
pub(crate) fn gen_node_id(&mut self) -> SmolStr {
self.node_ids.next_node()
}
/// Generate a unique syntax span ID.
pub(crate) fn gen_syn_id(&mut self) -> SmolStr {
self.node_ids.next_syn()
}
/// Start tracking a new text container node.
pub(crate) fn begin_node(&mut self, node_id: SmolStr) {
self.current_node.begin(node_id);
}
/// Stop tracking current node.
pub(crate) fn end_node(&mut self) {
self.current_node.end();
}
/// Compute UTF-16 length for a text slice (fast path for ASCII).
#[inline]
pub(crate) fn utf16_len_for_slice(text: &str) -> usize {
let byte_len = text.len();
let char_len = text.chars().count();
if byte_len == char_len {
char_len
} else {
text.encode_utf16().count()
}
}
/// Record an offset mapping.
pub(crate) fn record_mapping(&mut self, byte_range: Range, char_range: Range) {
if let Some(ref node_id) = self.current_node.id {
let text_slice = &self.source[byte_range.clone()];
let utf16_len = Self::utf16_len_for_slice(text_slice);
// Record UTF-16 checkpoint
let last = self.utf16.last();
let new_utf16 = last.1 + utf16_len;
if char_range.end > last.0 {
self.utf16.checkpoint(char_range.end, new_utf16);
}
let mapping = OffsetMapping {
byte_range,
char_range: char_range.clone(),
node_id: node_id.clone(),
char_offset_in_node: self.current_node.char_offset,
child_index: None,
utf16_len,
};
self.current_para.offset_maps.push(mapping);
self.current_node.char_offset += utf16_len;
}
}
/// Finalize the current paragraph.
pub(crate) fn finalize_paragraph(
&mut self,
byte_range: Range,
char_range: Range,
) {
self.paragraphs.ranges.push((byte_range, char_range));
let (maps, spans, refs) = self.current_para.take_all();
self.offset_maps_by_para.push(maps);
self.syntax_spans_by_para.push(spans);
self.refs_by_para.push(refs);
self.node_ids.next_paragraph();
self.writer.new_segment();
}
/// Consume events until End tag without writing.
pub(crate) fn consume_until_end(&mut self) {
let mut nest = 0;
while let Some((event, _)) = self.events.next() {
match event {
Event::Start(_) => nest += 1,
Event::End(_) => {
if nest == 0 {
break;
}
nest -= 1;
}
_ => {}
}
}
}
}