···242242/// Prelude with the extension traits you're likely to want and some other stuff
243243pub mod prelude {
244244 pub use crate::client::AgentSession;
245245+ #[cfg(feature = "api")]
245246 pub use crate::client::AgentSessionExt;
246247 pub use crate::client::BasicClient;
247248 pub use crate::common::http_client::HttpClient;
+332-13
crates/jacquard/src/richtext.rs
···33//! Provides parsing and building of rich text with facets (mentions, links, tags)
44//! and detection of embed candidates (record and external embeds).
5566+#[cfg(feature = "api_bluesky")]
77+use crate::api::app_bsky::richtext::facet::Facet;
68use crate::common::CowStr;
79use jacquard_common::IntoStatic;
810use jacquard_common::types::did::{DID_REGEX, Did};
···3234 LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
33353436static TRAILING_PUNCT_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{P}+$").unwrap());
3737+3838+/// Default domains that support at-URI extraction from URLs
3939+/// (bsky.app URL patterns like /profile/{actor}/post/{rkey})
4040+#[cfg(feature = "api_bluesky")]
4141+static DEFAULT_EMBED_DOMAINS: &[&str] = &["bsky.app", "deer.social"];
35423643/// Marker type indicating all facets are resolved (no handles pending DID resolution)
3744pub struct Resolved;
···119126}
120127121128/// Entry point for parsing text with automatic facet detection
129129+///
130130+/// Uses default embed domains (bsky.app, deer.social) for at-URI extraction.
131131+/// For custom domains, use [`parse_with_domains`].
122132pub fn parse(text: impl Into<String>) -> RichTextBuilder<Unresolved> {
133133+ #[cfg(feature = "api_bluesky")]
134134+ {
135135+ parse_with_domains(text, DEFAULT_EMBED_DOMAINS)
136136+ }
137137+ #[cfg(not(feature = "api_bluesky"))]
138138+ {
139139+ parse_with_domains(text, &[])
140140+ }
141141+}
142142+143143+/// Parse text with custom embed domains for at-URI extraction
144144+///
145145+/// This allows specifying additional domains (beyond bsky.app and deer.social)
146146+/// that use the same URL patterns for records (e.g., /profile/{actor}/post/{rkey}).
147147+#[cfg(feature = "api_bluesky")]
148148+pub fn parse_with_domains(
149149+ text: impl Into<String>,
150150+ embed_domains: &[&str],
151151+) -> RichTextBuilder<Unresolved> {
123152 let text = text.into();
124153 let mut facet_candidates = Vec::new();
154154+ let mut embed_candidates = Vec::new();
125155126156 // Step 1: Detect and strip markdown links first
127157 let (text_processed, markdown_facets) = detect_markdown_links(&text);
158158+159159+ // Check markdown links for embed candidates
160160+ for facet in &markdown_facets {
161161+ if let FacetCandidate::MarkdownLink { url, .. } = facet {
162162+ if let Some(embed) = classify_embed(url, embed_domains) {
163163+ embed_candidates.push(embed);
164164+ }
165165+ }
166166+ }
167167+128168 facet_candidates.extend(markdown_facets);
129169130170 // Step 2: Detect mentions
···133173134174 // Step 3: Detect URLs
135175 let url_facets = detect_urls(&text_processed);
176176+177177+ // Check URLs for embed candidates
178178+ for facet in &url_facets {
179179+ if let FacetCandidate::Link { range } = facet {
180180+ let url = &text_processed[range.clone()];
181181+ if let Some(embed) = classify_embed(url, embed_domains) {
182182+ embed_candidates.push(embed);
183183+ }
184184+ }
185185+ }
186186+136187 facet_candidates.extend(url_facets);
137188138189 // Step 4: Detect tags
···142193 RichTextBuilder {
143194 text: text_processed,
144195 facet_candidates,
145145- #[cfg(feature = "api_bluesky")]
146146- embed_candidates: Vec::new(),
196196+ embed_candidates,
197197+ _state: PhantomData,
198198+ }
199199+}
200200+201201+/// Parse text without embed detection (no api_bluesky feature)
202202+#[cfg(not(feature = "api_bluesky"))]
203203+pub fn parse_with_domains(
204204+ text: impl Into<String>,
205205+ _embed_domains: &[&str],
206206+) -> RichTextBuilder<Unresolved> {
207207+ let text = text.into();
208208+ let mut facet_candidates = Vec::new();
209209+210210+ // Step 1: Detect and strip markdown links first
211211+ let (text_processed, markdown_facets) = detect_markdown_links(&text);
212212+ facet_candidates.extend(markdown_facets);
213213+214214+ // Step 2: Detect mentions
215215+ let mention_facets = detect_mentions(&text_processed);
216216+ facet_candidates.extend(mention_facets);
217217+218218+ // Step 3: Detect URLs
219219+ let url_facets = detect_urls(&text_processed);
220220+ facet_candidates.extend(url_facets);
221221+222222+ // Step 4: Detect tags
223223+ let tag_facets = detect_tags(&text_processed);
224224+ facet_candidates.extend(tag_facets);
225225+226226+ RichTextBuilder {
227227+ text: text_processed,
228228+ facet_candidates,
147229 _state: PhantomData,
148230 }
149231}
···408490 facets
409491}
410492493493+/// Classifies a URL or at-URI as an embed candidate
494494+#[cfg(feature = "api_bluesky")]
495495+fn classify_embed(url: &str, embed_domains: &[&str]) -> Option<EmbedCandidate<'static>> {
496496+ use crate::types::aturi::AtUri;
497497+498498+ // Check if it's an at:// URI
499499+ if url.starts_with("at://") {
500500+ if let Ok(at_uri) = AtUri::new(url) {
501501+ return Some(EmbedCandidate::Record {
502502+ at_uri: at_uri.into_static(),
503503+ strong_ref: None,
504504+ });
505505+ }
506506+ }
507507+508508+ // Check if it's an HTTP(S) URL
509509+ if url.starts_with("http://") || url.starts_with("https://") {
510510+ // Try to extract at-uri from configured domain URL patterns
511511+ if let Some(at_uri) = extract_at_uri_from_url(url, embed_domains) {
512512+ return Some(EmbedCandidate::Record {
513513+ at_uri,
514514+ strong_ref: None,
515515+ });
516516+ }
517517+518518+ // Otherwise, it's an external embed
519519+ return Some(EmbedCandidate::External {
520520+ url: CowStr::from(url.to_string()),
521521+ metadata: None,
522522+ });
523523+ }
524524+525525+ None
526526+}
527527+528528+/// Extracts an at-URI from a URL with bsky.app-style path patterns
529529+///
530530+/// Supports these patterns:
531531+/// - https://{domain}/profile/{handle|did}/post/{rkey} → at://{actor}/app.bsky.feed.post/{rkey}
532532+/// - https://{domain}/profile/{handle|did}/lists/{rkey} → at://{actor}/app.bsky.graph.list/{rkey}
533533+/// - https://{domain}/profile/{handle|did}/feed/{rkey} → at://{actor}/app.bsky.feed.generator/{rkey}
534534+/// - https://{domain}/starter-pack/{handle|did}/{rkey} → at://{actor}/app.bsky.graph.starterpack/{rkey}
535535+/// - https://{domain}/profile/{handle|did}/{collection}/{rkey} → at://{actor}/{collection}/{rkey} (if collection looks like NSID)
536536+///
537537+/// Only works for domains in the provided `embed_domains` list.
538538+#[cfg(feature = "api_bluesky")]
539539+fn extract_at_uri_from_url(
540540+ url: &str,
541541+ embed_domains: &[&str],
542542+) -> Option<crate::types::aturi::AtUri<'static>> {
543543+ use crate::types::aturi::AtUri;
544544+545545+ // Parse URL
546546+ let url_parsed = url::Url::parse(url).ok()?;
547547+548548+ // Check if domain is in allowed list
549549+ let domain = url_parsed.domain()?;
550550+ if !embed_domains.contains(&domain) {
551551+ return None;
552552+ }
553553+554554+ let path = url_parsed.path();
555555+ let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
556556+557557+ let at_uri_str = match segments.as_slice() {
558558+ // Known shortcuts
559559+ ["profile", actor, "post", rkey] => {
560560+ format!("at://{}/app.bsky.feed.post/{}", actor, rkey)
561561+ }
562562+ ["profile", actor, "lists", rkey] => {
563563+ format!("at://{}/app.bsky.graph.list/{}", actor, rkey)
564564+ }
565565+ ["profile", actor, "feed", rkey] => {
566566+ format!("at://{}/app.bsky.feed.generator/{}", actor, rkey)
567567+ }
568568+ ["starter-pack", actor, rkey] => {
569569+ format!("at://{}/app.bsky.graph.starterpack/{}", actor, rkey)
570570+ }
571571+ // Generic pattern: /profile/{actor}/{collection}/{rkey}
572572+ // Accept if collection looks like it could be an NSID (contains dots)
573573+ ["profile", actor, collection, rkey] if collection.contains('.') => {
574574+ format!("at://{}/{}/{}", actor, collection, rkey)
575575+ }
576576+ _ => return None,
577577+ };
578578+579579+ AtUri::new(&at_uri_str).ok().map(|u| u.into_static())
580580+}
581581+411582use jacquard_common::types::string::AtStrError;
412583use thiserror::Error;
413584···429600 /// Invalid byte range
430601 #[error("Invalid byte range {start}..{end} for text of length {text_len}")]
431602 InvalidRange {
603603+ /// Range start position
432604 start: usize,
605605+ /// Range end position
433606 end: usize,
607607+ /// Total text length
434608 text_len: usize,
435609 },
436610···446620#[cfg(feature = "api_bluesky")]
447621impl RichTextBuilder<Resolved> {
448622 /// Build the richtext (sync - all facets must be resolved)
449449- pub fn build(
450450- self,
451451- ) -> Result<
452452- (
453453- String,
454454- Option<Vec<crate::api::app_bsky::richtext::facet::Facet<'static>>>,
455455- ),
456456- RichTextError,
457457- > {
623623+ pub fn build(self) -> Result<(String, Option<Vec<Facet<'static>>>), RichTextError> {
458624 use std::collections::BTreeMap;
459625 if self.facet_candidates.is_empty() {
460626 return Ok((self.text, None));
···475641 let text_len = self.text.len();
476642477643 for candidate in candidates {
644644+ use crate::api::app_bsky::richtext::facet::{ByteSlice, Facet};
645645+478646 let (range, feature) = match candidate {
479647 FacetCandidate::MarkdownLink { display_range, url } => {
480648 // MarkdownLink stores URL directly, use display_range for index
···574742 });
575743 }
576744577577- facets.push(crate::api::app_bsky::richtext::facet::Facet {
578578- index: crate::api::app_bsky::richtext::facet::ByteSlice {
745745+ facets.push(Facet {
746746+ index: ByteSlice {
747747+ byte_start: range.start as i64,
748748+ byte_end: range.end as i64,
749749+ extra_data: BTreeMap::new(),
750750+ },
751751+ features: vec![feature],
752752+ extra_data: BTreeMap::new(),
753753+ });
754754+755755+ last_end = range.end;
756756+ }
757757+758758+ Ok((self.text, Some(facets.into_static())))
759759+ }
760760+}
761761+762762+#[cfg(feature = "api_bluesky")]
763763+impl RichTextBuilder<Unresolved> {
764764+ /// Build richtext, resolving handles to DIDs using the provided resolver
765765+ pub async fn build_async<R>(
766766+ self,
767767+ resolver: &R,
768768+ ) -> Result<(String, Option<Vec<Facet<'static>>>), RichTextError>
769769+ where
770770+ R: jacquard_identity::resolver::IdentityResolver + Sync,
771771+ {
772772+ use crate::api::app_bsky::richtext::facet::{
773773+ ByteSlice, FacetFeaturesItem, Link, Mention, Tag,
774774+ };
775775+ use std::collections::BTreeMap;
776776+777777+ if self.facet_candidates.is_empty() {
778778+ return Ok((self.text, None));
779779+ }
780780+781781+ // Sort facets by start position
782782+ let mut candidates = self.facet_candidates;
783783+ candidates.sort_by_key(|fc| match fc {
784784+ FacetCandidate::MarkdownLink { display_range, .. } => display_range.start,
785785+ FacetCandidate::Mention { range, .. } => range.start,
786786+ FacetCandidate::Link { range } => range.start,
787787+ FacetCandidate::Tag { range } => range.start,
788788+ });
789789+790790+ // Resolve handles and convert to Facet types
791791+ let mut facets = Vec::with_capacity(candidates.len());
792792+ let mut last_end = 0;
793793+ let text_len = self.text.len();
794794+795795+ for candidate in candidates {
796796+ let (range, feature) = match candidate {
797797+ FacetCandidate::MarkdownLink { display_range, url } => {
798798+ // MarkdownLink stores URL directly, use display_range for index
799799+800800+ let feature = FacetFeaturesItem::Link(Box::new(Link {
801801+ uri: crate::types::uri::Uri::new_owned(&url)?,
802802+ extra_data: BTreeMap::new(),
803803+ }));
804804+ (display_range, feature)
805805+ }
806806+ FacetCandidate::Mention { range, did } => {
807807+ let did = if let Some(did) = did {
808808+ // Already resolved
809809+ did
810810+ } else {
811811+ // Extract handle from text and resolve
812812+ if range.end > text_len {
813813+ return Err(RichTextError::InvalidRange {
814814+ start: range.start,
815815+ end: range.end,
816816+ text_len,
817817+ });
818818+ }
819819+820820+ let handle_str = self.text[range.clone()].trim_start_matches('@');
821821+ let handle = jacquard_common::types::handle::Handle::new(handle_str)?;
822822+823823+ resolver.resolve_handle(&handle).await?
824824+ };
825825+826826+ let feature = FacetFeaturesItem::Mention(Box::new(Mention {
827827+ did,
828828+ extra_data: BTreeMap::new(),
829829+ }));
830830+ (range, feature)
831831+ }
832832+ FacetCandidate::Link { range } => {
833833+ // Extract URL from text[range] and normalize
834834+835835+ if range.end > text_len {
836836+ return Err(RichTextError::InvalidRange {
837837+ start: range.start,
838838+ end: range.end,
839839+ text_len,
840840+ });
841841+ }
842842+843843+ let mut url = self.text[range.clone()].to_string();
844844+845845+ // Prepend https:// if URL doesn't have a scheme
846846+ if !url.starts_with("http://") && !url.starts_with("https://") {
847847+ url = format!("https://{}", url);
848848+ }
849849+850850+ let feature = FacetFeaturesItem::Link(Box::new(Link {
851851+ uri: crate::types::uri::Uri::new_owned(&url)?,
852852+ extra_data: BTreeMap::new(),
853853+ }));
854854+ (range, feature)
855855+ }
856856+ FacetCandidate::Tag { range } => {
857857+ // Extract tag from text[range] (includes #), strip # and trailing punct
858858+859859+ use smol_str::ToSmolStr;
860860+ if range.end > text_len {
861861+ return Err(RichTextError::InvalidRange {
862862+ start: range.start,
863863+ end: range.end,
864864+ text_len,
865865+ });
866866+ }
867867+868868+ let tag_with_hash = &self.text[range.clone()];
869869+ // Strip # prefix (could be # or #)
870870+ let tag = tag_with_hash
871871+ .trim_start_matches('#')
872872+ .trim_start_matches('#');
873873+874874+ let feature = FacetFeaturesItem::Tag(Box::new(Tag {
875875+ tag: CowStr::from(tag.to_smolstr()),
876876+ extra_data: BTreeMap::new(),
877877+ }));
878878+ (range, feature)
879879+ }
880880+ };
881881+882882+ // Check overlap
883883+ if range.start < last_end {
884884+ return Err(RichTextError::OverlappingFacets(range.start, range.end));
885885+ }
886886+887887+ // Validate range
888888+ if range.end > text_len {
889889+ return Err(RichTextError::InvalidRange {
890890+ start: range.start,
891891+ end: range.end,
892892+ text_len,
893893+ });
894894+ }
895895+896896+ facets.push(Facet {
897897+ index: ByteSlice {
579898 byte_start: range.start as i64,
580899 byte_end: range.end as i64,
581900 extra_data: BTreeMap::new(),