richtext.js at main · slices.network/tools

slices.network / tools
Tools for the Atmosphere tools.slices.network
quickslice atproto html
tools / richtext.js
at main 477 lines 14 kB view raw
  1// richtext.js - Shared richtext parsing and rendering for tools.slices.network
  2
  3/**
  4 * Helper functions for consistent facet type detection.
  5 * Handles both $type (stored facets) and __typename (GraphQL responses).
  6 */
  7export function isFacetType(type, facetType) {
  8  if (!type) return false;
  9  const normalized = type.toLowerCase();
 10  return normalized.includes(facetType.toLowerCase());
 11}
 12
 13export const FacetTypes = {
 14  isLink: (type) => isFacetType(type, 'link'),
 15  isBold: (type) => isFacetType(type, 'bold'),
 16  isItalic: (type) => isFacetType(type, 'italic'),
 17  isCode: (type) => isFacetType(type, 'code') && !isFacetType(type, 'codeblock'),
 18  isCodeBlock: (type) => isFacetType(type, 'codeblock'),
 19};
 20
 21export const BlockTypes = {
 22  isParagraph: (type) => isFacetType(type, 'paragraph'),
 23  isHeading: (type) => isFacetType(type, 'heading'),
 24  isCodeBlock: (type) => isFacetType(type, 'codeblock'),
 25  isQuote: (type) => isFacetType(type, 'quote'),
 26  isTangledEmbed: (type) => isFacetType(type, 'tangledembed'),
 27  isImageEmbed: (type) => isFacetType(type, 'imageembed'),
 28};
 29
 30/**
 31 * Parse markdown-style text into facets for AT Protocol storage.
 32 * Detects: code blocks, bold, italic, inline code, URLs
 33 * Returns { text, facets } where text preserves delimiters for editing.
 34 */
 35export function parseFacets(text) {
 36  if (!text) return { text: "", facets: [] };
 37
 38  const facets = [];
 39  const encoder = new TextEncoder();
 40
 41  // Track which character positions are already claimed by a facet
 42  const claimedPositions = new Set();
 43
 44  // Helper to check if a range overlaps with claimed positions
 45  function isRangeClaimed(start, end) {
 46    for (let i = start; i < end; i++) {
 47      if (claimedPositions.has(i)) return true;
 48    }
 49    return false;
 50  }
 51
 52  // Helper to claim a range
 53  function claimRange(start, end) {
 54    for (let i = start; i < end; i++) {
 55      claimedPositions.add(i);
 56    }
 57  }
 58
 59  // Helper to get byte offset for a character position
 60  function getByteOffset(str, charIndex) {
 61    return encoder.encode(str.slice(0, charIndex)).length;
 62  }
 63
 64  // Process code blocks FIRST (highest priority) - they should not be parsed for other patterns
 65  const codeBlockRegex = /```(\w*)\n([\s\S]*?)```/g;
 66  let codeBlockMatch;
 67  while ((codeBlockMatch = codeBlockRegex.exec(text)) !== null) {
 68    const start = codeBlockMatch.index;
 69    const end = start + codeBlockMatch[0].length;
 70
 71    if (!isRangeClaimed(start, end)) {
 72      claimRange(start, end);
 73      const lang = codeBlockMatch[1] || undefined;
 74      facets.push({
 75        index: {
 76          byteStart: getByteOffset(text, start),
 77          byteEnd: getByteOffset(text, end),
 78        },
 79        features: [
 80          {
 81            $type: "network.slices.tools.richtext.facet#codeBlock",
 82            lang,
 83          },
 84        ],
 85      });
 86    }
 87  }
 88
 89  // Bold: **text**
 90  const boldRegex = /\*\*(.+?)\*\*/g;
 91  let boldMatch;
 92  while ((boldMatch = boldRegex.exec(text)) !== null) {
 93    const start = boldMatch.index;
 94    const end = start + boldMatch[0].length;
 95
 96    if (!isRangeClaimed(start, end)) {
 97      claimRange(start, end);
 98      facets.push({
 99        index: {
100          byteStart: getByteOffset(text, start),
101          byteEnd: getByteOffset(text, end),
102        },
103        features: [{ $type: "network.slices.tools.richtext.facet#bold" }],
104      });
105    }
106  }
107
108  // Italic: *text* or _text_ (but not inside bold)
109  const italicRegex = /(?<!\*)\*([^*]+)\*(?!\*)|_([^_]+)_/g;
110  let italicMatch;
111  while ((italicMatch = italicRegex.exec(text)) !== null) {
112    const start = italicMatch.index;
113    const end = start + italicMatch[0].length;
114
115    if (!isRangeClaimed(start, end)) {
116      claimRange(start, end);
117      facets.push({
118        index: {
119          byteStart: getByteOffset(text, start),
120          byteEnd: getByteOffset(text, end),
121        },
122        features: [{ $type: "network.slices.tools.richtext.facet#italic" }],
123      });
124    }
125  }
126
127  // Inline code: `code`
128  const codeRegex = /`([^`]+)`/g;
129  let codeMatch;
130  while ((codeMatch = codeRegex.exec(text)) !== null) {
131    const start = codeMatch.index;
132    const end = start + codeMatch[0].length;
133
134    if (!isRangeClaimed(start, end)) {
135      claimRange(start, end);
136      facets.push({
137        index: {
138          byteStart: getByteOffset(text, start),
139          byteEnd: getByteOffset(text, end),
140        },
141        features: [{ $type: "network.slices.tools.richtext.facet#code" }],
142      });
143    }
144  }
145
146  // URLs
147  const urlRegex = /https?:\/\/[^\s<>\[\]()]+/g;
148  let urlMatch;
149  while ((urlMatch = urlRegex.exec(text)) !== null) {
150    const start = urlMatch.index;
151    const end = start + urlMatch[0].length;
152
153    if (!isRangeClaimed(start, end)) {
154      claimRange(start, end);
155      facets.push({
156        index: {
157          byteStart: getByteOffset(text, start),
158          byteEnd: getByteOffset(text, end),
159        },
160        features: [
161          {
162            $type: "network.slices.tools.richtext.facet#link",
163            uri: urlMatch[0],
164          },
165        ],
166      });
167    }
168  }
169
170  // Sort facets by byte position
171  facets.sort((a, b) => a.index.byteStart - b.index.byteStart);
172
173  return { text, facets };
174}
175
176/**
177 * Render faceted text as HTML.
178 * Falls back to parseFacets if no facets provided (legacy content).
179 * Strips markdown delimiters for display.
180 */
181export function renderFacetedText(text, facets, options = {}) {
182  if (!text) return "";
183
184  const { escapeHtml = defaultEscapeHtml } = options;
185
186  // If no facets provided, parse on the fly (legacy support)
187  if (!facets || facets.length === 0) {
188    const parsed = parseFacets(text);
189    facets = parsed.facets;
190  }
191
192  if (facets.length === 0) {
193    return escapeHtml(text);
194  }
195
196  const encoder = new TextEncoder();
197  const decoder = new TextDecoder();
198  const bytes = encoder.encode(text);
199
200  // Sort facets by start position
201  const sortedFacets = [...facets].sort(
202    (a, b) => a.index.byteStart - b.index.byteStart
203  );
204
205  let result = "";
206  let lastEnd = 0;
207
208  for (const facet of sortedFacets) {
209    // Add text before this facet
210    if (facet.index.byteStart > lastEnd) {
211      const beforeBytes = bytes.slice(lastEnd, facet.index.byteStart);
212      result += escapeHtml(decoder.decode(beforeBytes));
213    }
214
215    // Get the faceted text
216    const facetBytes = bytes.slice(facet.index.byteStart, facet.index.byteEnd);
217    let facetText = decoder.decode(facetBytes);
218
219    // Determine facet type and render
220    const feature = facet.features[0];
221    const type = feature?.$type || feature?.__typename || "";
222
223    if (FacetTypes.isLink(type)) {
224      const uri = feature.uri;
225      result += `<a href="${escapeHtml(uri)}" target="_blank" rel="noopener" class="facet-link">${escapeHtml(facetText)}</a>`;
226    } else if (FacetTypes.isBold(type)) {
227      // Strip ** delimiters
228      const content = facetText.replace(/^\*\*|\*\*$/g, "");
229      result += `<strong class="facet-bold">${escapeHtml(content)}</strong>`;
230    } else if (FacetTypes.isItalic(type)) {
231      // Strip * or _ delimiters
232      const content = facetText.replace(/^\*|\*$|^_|_$/g, "");
233      result += `<em class="facet-italic">${escapeHtml(content)}</em>`;
234    } else if (FacetTypes.isCodeBlock(type)) {
235      // Strip ``` delimiters and extract content
236      const match = facetText.match(/^```(\w*)\n([\s\S]*?)```$/);
237      if (match) {
238        const lang = match[1] || "";
239        const code = match[2];
240        const langClass = lang ? ` language-${escapeHtml(lang)}` : "";
241        result += `<pre class="facet-codeblock${langClass}"><code>${escapeHtml(code)}</code></pre>`;
242      } else {
243        result += `<pre class="facet-codeblock"><code>${escapeHtml(facetText)}</code></pre>`;
244      }
245    } else if (FacetTypes.isCode(type)) {
246      // Strip ` delimiters
247      const content = facetText.replace(/^`|`$/g, "");
248      result += `<code class="facet-code">${escapeHtml(content)}</code>`;
249    } else {
250      result += escapeHtml(facetText);
251    }
252
253    lastEnd = facet.index.byteEnd;
254  }
255
256  // Add remaining text
257  if (lastEnd < bytes.length) {
258    const remainingBytes = bytes.slice(lastEnd);
259    result += escapeHtml(decoder.decode(remainingBytes));
260  }
261
262  return result;
263}
264
265/**
266 * Convert text + facets to HTML for contenteditable editing.
267 * Returns HTML string with formatting tags.
268 */
269export function facetsToDom(text, facets = []) {
270  if (!text) return "";
271
272  if (!facets || facets.length === 0) {
273    return escapeHtmlForDom(text);
274  }
275
276  const encoder = new TextEncoder();
277  const decoder = new TextDecoder();
278  const bytes = encoder.encode(text);
279
280  // Sort facets by start position
281  const sortedFacets = [...facets].sort(
282    (a, b) => a.index.byteStart - b.index.byteStart
283  );
284
285  let result = "";
286  let lastEnd = 0;
287
288  for (const facet of sortedFacets) {
289    // Add text before this facet
290    if (facet.index.byteStart > lastEnd) {
291      const beforeBytes = bytes.slice(lastEnd, facet.index.byteStart);
292      result += escapeHtmlForDom(decoder.decode(beforeBytes));
293    }
294
295    // Get the faceted text
296    const facetBytes = bytes.slice(facet.index.byteStart, facet.index.byteEnd);
297    const facetText = decoder.decode(facetBytes);
298
299    // Determine facet type and wrap in tag
300    const feature = facet.features[0];
301    const type = feature?.$type || feature?.__typename || "";
302
303    if (FacetTypes.isLink(type)) {
304      result += `<a href="${escapeHtmlForDom(feature.uri)}" class="facet-link">${escapeHtmlForDom(facetText)}</a>`;
305    } else if (FacetTypes.isBold(type)) {
306      result += `<strong>${escapeHtmlForDom(facetText)}</strong>`;
307    } else if (FacetTypes.isItalic(type)) {
308      result += `<em>${escapeHtmlForDom(facetText)}</em>`;
309    } else if (FacetTypes.isCode(type)) {
310      result += `<code>${escapeHtmlForDom(facetText)}</code>`;
311    } else {
312      result += escapeHtmlForDom(facetText);
313    }
314
315    lastEnd = facet.index.byteEnd;
316  }
317
318  // Add remaining text
319  if (lastEnd < bytes.length) {
320    const remainingBytes = bytes.slice(lastEnd);
321    result += escapeHtmlForDom(decoder.decode(remainingBytes));
322  }
323
324  return result;
325}
326
327function escapeHtmlForDom(text) {
328  return text
329    .replace(/&/g, "&amp;")
330    .replace(/</g, "&lt;")
331    .replace(/>/g, "&gt;")
332    .replace(/"/g, "&quot;");
333}
334
335/**
336 * Extract text and facets from a contenteditable element.
337 * Walks the DOM tree and builds facets from formatting tags.
338 * Returns { text, facets }.
339 */
340export function domToFacets(element) {
341  const encoder = new TextEncoder();
342  let text = "";
343  const facets = [];
344
345  function walk(node, activeFormats = []) {
346    if (node.nodeType === Node.TEXT_NODE) {
347      const content = node.textContent || "";
348      if (content) {
349        const startByte = encoder.encode(text).length;
350        text += content;
351        const endByte = encoder.encode(text).length;
352
353        // Create facets for each active format
354        for (const format of activeFormats) {
355          facets.push({
356            index: { byteStart: startByte, byteEnd: endByte },
357            features: [format],
358          });
359        }
360      }
361    } else if (node.nodeType === Node.ELEMENT_NODE) {
362      const tag = node.tagName.toLowerCase();
363      let newFormat = null;
364
365      if (tag === "strong" || tag === "b") {
366        newFormat = { $type: "network.slices.tools.richtext.facet#bold" };
367      } else if (tag === "em" || tag === "i") {
368        newFormat = { $type: "network.slices.tools.richtext.facet#italic" };
369      } else if (tag === "code") {
370        newFormat = { $type: "network.slices.tools.richtext.facet#code" };
371      } else if (tag === "a") {
372        newFormat = {
373          $type: "network.slices.tools.richtext.facet#link",
374          uri: node.getAttribute("href") || "",
375        };
376      }
377
378      const formats = newFormat ? [...activeFormats, newFormat] : activeFormats;
379
380      for (const child of node.childNodes) {
381        walk(child, formats);
382      }
383    }
384  }
385
386  walk(element);
387
388  // Merge adjacent facets of the same type
389  const mergedFacets = mergeFacets(facets);
390
391  // Detect URLs in plain text that aren't already linked or in code
392  const urlRegex = /https?:\/\/[^\s<>\[\]()]+/g;
393  let urlMatch;
394  while ((urlMatch = urlRegex.exec(text)) !== null) {
395    const startByte = encoder.encode(text.slice(0, urlMatch.index)).length;
396    const endByte = encoder.encode(text.slice(0, urlMatch.index + urlMatch[0].length)).length;
397
398    // Check if this range is already covered by a link or code facet
399    const alreadyCovered = mergedFacets.some(f => {
400      const type = f.features[0]?.$type || '';
401      return (FacetTypes.isLink(type) || FacetTypes.isCode(type)) &&
402             f.index.byteStart <= startByte && f.index.byteEnd >= endByte;
403    });
404
405    if (!alreadyCovered) {
406      mergedFacets.push({
407        index: { byteStart: startByte, byteEnd: endByte },
408        features: [{
409          $type: "network.slices.tools.richtext.facet#link",
410          uri: urlMatch[0],
411        }],
412      });
413    }
414  }
415
416  // Re-sort after adding URL facets
417  mergedFacets.sort((a, b) => a.index.byteStart - b.index.byteStart);
418
419  return { text, facets: mergedFacets };
420}
421
422/**
423 * Merge adjacent facets of the same type.
424 */
425function mergeFacets(facets) {
426  if (facets.length === 0) return [];
427
428  // Group by type
429  const byType = new Map();
430  for (const facet of facets) {
431    const type = facet.features[0]?.$type || "";
432    const key = type + (facet.features[0]?.uri || "");
433    if (!byType.has(key)) {
434      byType.set(key, []);
435    }
436    byType.get(key).push(facet);
437  }
438
439  const merged = [];
440  for (const group of byType.values()) {
441    // Sort by start position
442    group.sort((a, b) => a.index.byteStart - b.index.byteStart);
443
444    let current = null;
445    for (const facet of group) {
446      if (!current) {
447        current = { ...facet, index: { ...facet.index } };
448      } else if (facet.index.byteStart <= current.index.byteEnd) {
449        // Merge overlapping or adjacent
450        current.index.byteEnd = Math.max(current.index.byteEnd, facet.index.byteEnd);
451      } else {
452        merged.push(current);
453        current = { ...facet, index: { ...facet.index } };
454      }
455    }
456    if (current) {
457      merged.push(current);
458    }
459  }
460
461  // Sort by start position
462  merged.sort((a, b) => a.index.byteStart - b.index.byteStart);
463
464  return merged;
465}
466
467/**
468 * Default HTML escape function.
469 */
470function defaultEscapeHtml(text) {
471  return text
472    .replace(/&/g, "&amp;")
473    .replace(/</g, "&lt;")
474    .replace(/>/g, "&gt;")
475    .replace(/"/g, "&quot;")
476    .replace(/'/g, "&#039;");
477}