pydantic model generator for atproto lexicons
1//! type conversion from lexicon types to python type annotations
2
3use std::collections::HashSet;
4
5use atrium_lex::lexicon::{
6 LexArrayItem, LexObject, LexObjectProperty, LexRecord, LexRef, LexRefUnion, LexUserType,
7};
8use atrium_lex::LexiconDoc;
9use heck::ToPascalCase;
10
11/// context for resolving refs within a document
12pub struct RefContext<'a> {
13 /// nsid of the current document (e.g., "fm.plyr.track")
14 pub nsid: &'a str,
15}
16
17impl<'a> RefContext<'a> {
18 pub fn new(nsid: &'a str) -> Self {
19 Self { nsid }
20 }
21
22 /// resolve a ref string to a python class name
23 ///
24 /// - `#localDef` -> class in same document
25 /// - `com.example.foo` -> external nsid main def
26 /// - `com.example.foo#bar` -> external nsid specific def
27 pub fn resolve_ref(&self, ref_str: &str) -> String {
28 if let Some(local_name) = ref_str.strip_prefix('#') {
29 // local ref within same document
30 to_class_name(self.nsid, local_name)
31 } else if let Some((nsid, def_name)) = ref_str.split_once('#') {
32 // external ref with specific def
33 to_class_name(nsid, def_name)
34 } else {
35 // external ref to main def
36 to_class_name(ref_str, "main")
37 }
38 }
39}
40
41/// convert lexicon property to python type annotation
42pub fn property_to_python(prop: &LexObjectProperty, ctx: &RefContext) -> String {
43 match prop {
44 LexObjectProperty::Boolean(_) => "bool".into(),
45 LexObjectProperty::Integer(_) => "int".into(),
46 LexObjectProperty::String(_) => "str".into(),
47 LexObjectProperty::Bytes(_) => "bytes".into(),
48 LexObjectProperty::CidLink(_) => "str".into(),
49 LexObjectProperty::Blob(_) => "dict[str, Any]".into(),
50 LexObjectProperty::Unknown(_) => "Any".into(),
51 LexObjectProperty::Ref(r) => ref_to_python(r, ctx),
52 LexObjectProperty::Union(u) => union_to_python(u, ctx),
53 LexObjectProperty::Array(arr) => {
54 let item_type = array_item_to_python(&arr.items, ctx);
55 format!("list[{item_type}]")
56 }
57 }
58}
59
60/// convert a ref to python type
61fn ref_to_python(r: &LexRef, ctx: &RefContext) -> String {
62 ctx.resolve_ref(&r.r#ref)
63}
64
65/// convert a union to python type
66fn union_to_python(u: &LexRefUnion, ctx: &RefContext) -> String {
67 if u.refs.is_empty() {
68 return "Any".into();
69 }
70
71 let types: Vec<String> = u.refs.iter().map(|r| ctx.resolve_ref(r)).collect();
72
73 if types.len() == 1 {
74 types.into_iter().next().unwrap()
75 } else {
76 types.join(" | ")
77 }
78}
79
80/// convert array item type to python
81fn array_item_to_python(item: &LexArrayItem, ctx: &RefContext) -> String {
82 match item {
83 LexArrayItem::Boolean(_) => "bool".into(),
84 LexArrayItem::Integer(_) => "int".into(),
85 LexArrayItem::String(_) => "str".into(),
86 LexArrayItem::Bytes(_) => "bytes".into(),
87 LexArrayItem::CidLink(_) => "str".into(),
88 LexArrayItem::Blob(_) => "dict[str, Any]".into(),
89 LexArrayItem::Unknown(_) => "Any".into(),
90 LexArrayItem::Ref(r) => ref_to_python(r, ctx),
91 LexArrayItem::Union(u) => union_to_python(u, ctx),
92 }
93}
94
95/// generate python class name from nsid and def name
96pub fn to_class_name(nsid: &str, def_name: &str) -> String {
97 let mut parts: Vec<&str> = nsid.split('.').collect();
98 if def_name != "main" {
99 parts.push(def_name);
100 }
101 parts.iter().map(|p| p.to_pascal_case()).collect()
102}
103
104/// collect all external ref nsids from a document
105pub fn collect_external_refs(doc: &LexiconDoc) -> HashSet<String> {
106 let mut refs = HashSet::new();
107
108 for def in doc.defs.values() {
109 match def {
110 LexUserType::Record(LexRecord { record, .. }) => {
111 let atrium_lex::lexicon::LexRecordRecord::Object(obj) = record;
112 collect_refs_from_object(obj, &mut refs);
113 }
114 LexUserType::Object(obj) => {
115 collect_refs_from_object(obj, &mut refs);
116 }
117 _ => {}
118 }
119 }
120
121 // filter to only external refs (not starting with #)
122 refs.into_iter()
123 .filter(|r| !r.starts_with('#'))
124 .map(|r| {
125 // extract nsid from ref (strip #defName if present)
126 r.split_once('#').map(|(nsid, _)| nsid.to_string()).unwrap_or(r)
127 })
128 .collect()
129}
130
131fn collect_refs_from_object(obj: &LexObject, refs: &mut HashSet<String>) {
132 for prop in obj.properties.values() {
133 collect_refs_from_property(prop, refs);
134 }
135}
136
137fn collect_refs_from_property(prop: &LexObjectProperty, refs: &mut HashSet<String>) {
138 match prop {
139 LexObjectProperty::Ref(r) => {
140 refs.insert(r.r#ref.clone());
141 }
142 LexObjectProperty::Union(u) => {
143 for r in &u.refs {
144 refs.insert(r.clone());
145 }
146 }
147 LexObjectProperty::Array(arr) => {
148 collect_refs_from_array_item(&arr.items, refs);
149 }
150 _ => {}
151 }
152}
153
154fn collect_refs_from_array_item(item: &LexArrayItem, refs: &mut HashSet<String>) {
155 match item {
156 LexArrayItem::Ref(r) => {
157 refs.insert(r.r#ref.clone());
158 }
159 LexArrayItem::Union(u) => {
160 for r in &u.refs {
161 refs.insert(r.clone());
162 }
163 }
164 _ => {}
165 }
166}