···66and this project adheres to
77[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8899-<!-- ## [Unreleased] -->
99+## [Unreleased]
1010+1111+### Added
1212+1313+- `Atex.Lexicon` module that provides the `deflexicon` macro, taking in a JSON
1414+ Lexicon definition and converts it into a series of schemas for each
1515+ definition within it.
10161117## [0.3.0] - 2025-06-29
1218
+309
lib/atex/lexicon.ex
···11+defmodule Atex.Lexicon do
22+ @moduledoc """
33+ Provide `deflexicon` macro for defining a module with types and schemas from an entire lexicon definition.
44+55+ Should it also define structs, with functions to convert from input case to snake case?
66+ """
77+88+ alias Atex.Lexicon.Validators
99+1010+ defmacro __using__(_opts) do
1111+ quote do
1212+ import Atex.Lexicon
1313+ import Atex.Lexicon.Validators
1414+ import Peri
1515+ end
1616+ end
1717+1818+ defmacro deflexicon(lexicon) do
1919+ # Better way to get the real map, without having to eval? (custom function to compose one from quoted?)
2020+ lexicon =
2121+ lexicon
2222+ |> Code.eval_quoted()
2323+ |> elem(0)
2424+ |> then(&Recase.Enumerable.atomize_keys/1)
2525+ |> then(&Atex.Lexicon.Schema.lexicon!/1)
2626+2727+ # TODO: support returning typedefs
2828+ defs =
2929+ lexicon.defs
3030+ |> Enum.flat_map(fn {def_name, def} -> def_to_schema(lexicon.id, def_name, def) end)
3131+ |> Enum.map(fn {schema_key, quoted_schema} ->
3232+ quote do
3333+ defschema unquote(schema_key), unquote(quoted_schema)
3434+ end
3535+ end)
3636+3737+ quote do
3838+ def id, do: unquote(Atex.NSID.to_atom(lexicon.id))
3939+4040+ unquote_splicing(defs)
4141+ end
4242+ end
4343+4444+ # TODO: generate typedefs
4545+ @spec def_to_schema(nsid :: String.t(), def_name :: String.t(), lexicon_def :: map()) ::
4646+ list({key :: atom(), quoted :: term()})
4747+4848+ defp def_to_schema(nsid, def_name, %{type: "record", record: record}) do
4949+ # TODO: record rkey format validator
5050+ def_to_schema(nsid, def_name, record)
5151+ end
5252+5353+ defp def_to_schema(
5454+ nsid,
5555+ def_name,
5656+ %{
5757+ type: "object",
5858+ properties: properties,
5959+ required: required
6060+ } = def
6161+ ) do
6262+ nullable = Map.get(def, :nullable, [])
6363+6464+ properties
6565+ |> Enum.map(fn {key, field} ->
6666+ field_to_schema(field, nsid)
6767+ |> then(
6868+ &if key in nullable, do: quote(do: {:either, {{:literal, nil}, unquote(&1)}}), else: &1
6969+ )
7070+ |> then(&if key in required, do: quote(do: {:required, unquote(&1)}), else: &1)
7171+ |> then(&{key, &1})
7272+ end)
7373+ |> then(&{:%{}, [], &1})
7474+ |> then(&[{atomise(def_name), &1}])
7575+ end
7676+7777+ # TODO: validating errors?
7878+ defp def_to_schema(nsid, _def_name, %{type: "query"} = def) do
7979+ params =
8080+ if def[:parameters] do
8181+ [schema] =
8282+ def_to_schema(nsid, "params", %{
8383+ type: "object",
8484+ required: def.parameters.required,
8585+ nullable: [],
8686+ properties: def.parameters.properties
8787+ })
8888+8989+ schema
9090+ end
9191+9292+ output =
9393+ if def.output && def.output.schema do
9494+ [schema] = def_to_schema(nsid, "output", def.output.schema)
9595+ schema
9696+ end
9797+9898+ [params, output]
9999+ |> Enum.reject(&is_nil/1)
100100+ end
101101+102102+ defp def_to_schema(nsid, _def_name, %{type: "procedure"} = def) do
103103+ # TODO: better keys for these
104104+ params =
105105+ if def[:parameters] do
106106+ [schema] =
107107+ def_to_schema(nsid, "params", %{
108108+ type: "object",
109109+ required: def.parameters.required,
110110+ properties: def.parameters.properties
111111+ })
112112+113113+ schema
114114+ end
115115+116116+ output =
117117+ if def[:output] && def.output.schema do
118118+ [schema] = def_to_schema(nsid, "output", def.output.schema)
119119+ schema
120120+ end
121121+122122+ input =
123123+ if def[:input] && def.input.schema do
124124+ [schema] = def_to_schema(nsid, "output", def.input.schema)
125125+ schema
126126+ end
127127+128128+ [params, output, input]
129129+ |> Enum.reject(&is_nil/1)
130130+ end
131131+132132+ defp def_to_schema(nsid, _def_name, %{type: "subscription"} = def) do
133133+ params =
134134+ if def[:parameters] do
135135+ [schema] =
136136+ def_to_schema(nsid, "params", %{
137137+ type: "object",
138138+ required: def.parameters.required,
139139+ properties: def.parameters.properties
140140+ })
141141+142142+ schema
143143+ end
144144+145145+ message =
146146+ if def[:message] do
147147+ [schema] = def_to_schema(nsid, "message", def.message.schema)
148148+ schema
149149+ end
150150+151151+ [params, message]
152152+ |> Enum.reject(&is_nil/1)
153153+ end
154154+155155+ defp def_to_schema(_nsid, def_name, %{type: "token"}) do
156156+ # TODO: make it a validator that expects the nsid + key.
157157+ [{atomise(def_name), :string}]
158158+ end
159159+160160+ defp def_to_schema(nsid, def_name, %{type: type} = def)
161161+ when type in [
162162+ "blob",
163163+ "array",
164164+ "boolean",
165165+ "integer",
166166+ "string",
167167+ "bytes",
168168+ "cid-link",
169169+ "unknown"
170170+ ] do
171171+ [{atomise(def_name), field_to_schema(def, nsid)}]
172172+ end
173173+174174+ @spec field_to_schema(field_def :: %{type: String.t()}, nsid :: String.t()) :: Peri.schema_def()
175175+ defp field_to_schema(%{type: "string"} = field, _nsid) do
176176+ fixed_schema = const_or_enum(field)
177177+178178+ if fixed_schema do
179179+ maybe_default(fixed_schema, field)
180180+ else
181181+ field
182182+ |> Map.take([
183183+ :format,
184184+ :maxLength,
185185+ :minLength,
186186+ :maxGraphemes,
187187+ :minGraphemes
188188+ ])
189189+ |> Enum.map(fn {k, v} -> {Recase.to_snake(k), v} end)
190190+ |> then(&{:custom, {Validators.String, :validate, [&1]}})
191191+ |> maybe_default(field)
192192+ |> then(&Macro.escape/1)
193193+ end
194194+ end
195195+196196+ defp field_to_schema(%{type: "boolean"} = field, _nsid) do
197197+ (const(field) || :boolean)
198198+ |> maybe_default(field)
199199+ |> then(&Macro.escape/1)
200200+ end
201201+202202+ defp field_to_schema(%{type: "integer"} = field, _nsid) do
203203+ fixed_schema = const_or_enum(field)
204204+205205+ if fixed_schema do
206206+ maybe_default(fixed_schema, field)
207207+ else
208208+ field
209209+ |> Map.take([:maximum, :minimum])
210210+ |> Keyword.new()
211211+ |> then(&{:custom, {Validators.Integer, [&1]}})
212212+ |> maybe_default(field)
213213+ end
214214+ |> then(&Macro.escape/1)
215215+ end
216216+217217+ defp field_to_schema(%{type: "array", items: items} = field, nsid) do
218218+ inner_schema = field_to_schema(items, nsid)
219219+220220+ field
221221+ |> Map.take([:maxLength, :minLength])
222222+ |> Enum.map(fn {k, v} -> {Recase.to_snake(k), v} end)
223223+ |> then(&Validators.array(inner_schema, &1))
224224+ |> then(&Macro.escape/1)
225225+ # Can't unquote the inner_schema beforehand as that would risk evaluating `get_schema`s which don't exist yet.
226226+ # There's probably a better way to do this lol.
227227+ |> then(fn {:custom, {:{}, c, [Validators.Array, :validate, [quoted_inner_schema | args]]}} ->
228228+ {inner_schema, _} = Code.eval_quoted(quoted_inner_schema)
229229+ {:custom, {:{}, c, [Validators.Array, :validate, [inner_schema | args]]}}
230230+ end)
231231+ end
232232+233233+ defp field_to_schema(%{type: "blob"} = field, _nsid) do
234234+ field
235235+ |> Map.take([:accept, :maxSize])
236236+ |> Enum.map(fn {k, v} -> {Recase.to_snake(k), v} end)
237237+ |> Validators.blob()
238238+ |> then(&Macro.escape/1)
239239+ end
240240+241241+ defp field_to_schema(%{type: "bytes"} = field, _nsid) do
242242+ field
243243+ |> Map.take([:maxLength, :minLength])
244244+ |> Enum.map(fn {k, v} -> {Recase.to_snake(k), v} end)
245245+ |> Validators.bytes()
246246+ |> then(&Macro.escape/1)
247247+ end
248248+249249+ defp field_to_schema(%{type: "cid-link"}, _nsid) do
250250+ Validators.cid_link()
251251+ |> then(&Macro.escape/1)
252252+ end
253253+254254+ # TODO: do i need to make sure these two deal with brands? Check objects in atp.tools
255255+ defp field_to_schema(%{type: "ref", ref: ref}, nsid) do
256256+ {nsid, fragment} =
257257+ nsid
258258+ |> Atex.NSID.expand_possible_fragment_shorthand(ref)
259259+ |> Atex.NSID.to_atom_with_fragment()
260260+261261+ quote do
262262+ unquote(nsid).get_schema(unquote(fragment))
263263+ end
264264+ end
265265+266266+ defp field_to_schema(%{type: "union", refs: refs}, nsid) do
267267+ # refs =
268268+ refs
269269+ |> Enum.map(fn ref ->
270270+ {nsid, fragment} =
271271+ nsid
272272+ |> Atex.NSID.expand_possible_fragment_shorthand(ref)
273273+ |> Atex.NSID.to_atom_with_fragment()
274274+275275+ quote do
276276+ unquote(nsid).get_schema(unquote(fragment))
277277+ end
278278+ end)
279279+ |> then(
280280+ "e do
281281+ {:oneof, unquote(&1)}
282282+ end
283283+ )
284284+ end
285285+286286+ # TODO: apparently should be a data object, not a primitive?
287287+ defp field_to_schema(%{type: "unknown"}, _nsid) do
288288+ :any
289289+ end
290290+291291+ defp field_to_schema(_field_def, _nsid), do: nil
292292+293293+ defp maybe_default(schema, field) do
294294+ if field[:default] != nil,
295295+ do: {schema, {:default, field.default}},
296296+ else: schema
297297+ end
298298+299299+ defp const_or_enum(field), do: const(field) || enum(field)
300300+301301+ defp const(%{const: value}), do: {:literal, value}
302302+ defp const(_), do: nil
303303+304304+ defp enum(%{enum: values}), do: {:enum, values}
305305+ defp enum(_), do: nil
306306+307307+ defp atomise(x) when is_atom(x), do: x
308308+ defp atomise(x) when is_binary(x), do: String.to_atom(x)
309309+end
···11+defmodule Sh.Comet.V0.Feed.Track do
22+ @moduledoc """
33+ The following `deflexicon` call should result in something similar to the following output:
44+55+ import Peri
66+ import Atex.Lexicon.Validators
77+88+ @type main() :: %{}
99+1010+ """
1111+ use Atex.Lexicon
1212+ # import Atex.Lexicon
1313+ # import Atex.Lexicon.Validators
1414+ # import Peri
1515+1616+ # TODO: need an example with `nullable` fields to demonstrate how those are handled (and also the weird extra types in lexicon defs like union)
1717+1818+ @type main() :: %{
1919+ required(:audio) => Atex.Lexicon.Validators.blob_t(),
2020+ required(:title) => String.t(),
2121+ required(:createdAt) => String.t(),
2222+ # TODO: check if peri replaces with `nil` or omits them completely.
2323+ optional(:description) => String.t(),
2424+ optional(:descriptionFacets) => Sh.Comet.V0.Richtext.Facet.main(),
2525+ optional(:explicit) => boolean(),
2626+ optional(:image) => Atex.Lexicon.Validators.blob_t(),
2727+ optional(:link) => Sh.Comet.V0.Feed.Defs.link(),
2828+ optional(:releasedAt) => String.t(),
2929+ optional(:tags) => list(String.t())
3030+ }
3131+3232+ @type view() :: %{
3333+ required(:uri) => String.t(),
3434+ required(:cid) => String.t(),
3535+ required(:author) => Sh.Comet.V0.Actor.Profile.viewFull(),
3636+ required(:audio) => String.t(),
3737+ required(:record) => main(),
3838+ required(:indexedAt) => String.t(),
3939+ optional(:image) => String.t(),
4040+ optional(:commentCount) => integer(),
4141+ optional(:likeCount) => integer(),
4242+ optional(:playCount) => integer(),
4343+ optional(:repostCount) => integer(),
4444+ optional(:viewer) => Sh.Comet.V0.Feed.Defs.viewerState()
4545+ }
4646+4747+ # Should probably be a separate validator for all rkey formats.
4848+ # defschema :main_rkey, string(format: :tid)
4949+5050+ # defschema :main, %{
5151+ # audio: {:required, blob(accept: ["audio/ogg"], max_size: 100_000_000)},
5252+ # title: {:required, string(min_length: 1, max_length: 2560, max_graphemes: 256)},
5353+ # createdAt: {:required, string(format: :datetime)},
5454+ # description: string(max_length: 20000, max_graphemes: 2000),
5555+ # # This is `ref`
5656+ # descriptionFacets: Sh.Comet.V0.Richtext.Facet.get_schema(:main),
5757+ # explicit: :boolean,
5858+ # image: blob(accept: ["image/png", "image/jpeg"], max_size: 1_000_000),
5959+ # link: Sh.Comet.V0.Feed.Defs.get_schema(:link),
6060+ # releasedAt: string(format: :datetime),
6161+ # tags: array(string(max_graphemes: 64, max_length: 640), max_length: 8)
6262+ # }
6363+6464+ # defschema :view, %{
6565+ # uri: {:required, string(format: :at_uri)},
6666+ # cid: {:required, string(format: :cid)},
6767+ # author: {:required, Sh.Comet.V0.Actor.Profile.get_schema(:viewFull)},
6868+ # audio: {:required, string(format: :uri)},
6969+ # record: {:required, get_schema(:main)},
7070+ # indexedAt: {:required, string(format: :datetime)},
7171+ # image: string(format: :uri),
7272+ # commentCount: :integer,
7373+ # likeCount: :integer,
7474+ # playCount: :integer,
7575+ # repostCount: :integer,
7676+ # viewer: Sh.Comet.V0.Feed.Defs.get_schema(:viewerState)
7777+ # }
7878+7979+ deflexicon(%{
8080+ "defs" => %{
8181+ "main" => %{
8282+ "description" =>
8383+ "A Comet audio track. TODO: should probably have some sort of pre-calculated waveform, or have a query to get one from a blob?",
8484+ "key" => "tid",
8585+ "record" => %{
8686+ "properties" => %{
8787+ "audio" => %{
8888+ "accept" => ["audio/ogg"],
8989+ "description" =>
9090+ "Audio of the track, ideally encoded as 96k Opus. Limited to 100mb.",
9191+ "maxSize" => 100_000_000,
9292+ "type" => "blob"
9393+ },
9494+ "createdAt" => %{
9595+ "description" => "Timestamp for when the track entry was originally created.",
9696+ "format" => "datetime",
9797+ "type" => "string"
9898+ },
9999+ "description" => %{
100100+ "description" => "Description of the track.",
101101+ "maxGraphemes" => 2000,
102102+ "maxLength" => 20000,
103103+ "type" => "string"
104104+ },
105105+ "descriptionFacets" => %{
106106+ "description" => "Annotations of the track's description.",
107107+ "ref" => "sh.comet.v0.richtext.facet",
108108+ "type" => "ref"
109109+ },
110110+ "explicit" => %{
111111+ "description" =>
112112+ "Whether the track contains explicit content that may objectionable to some people, usually swearing or adult themes.",
113113+ "type" => "boolean"
114114+ },
115115+ "image" => %{
116116+ "accept" => ["image/png", "image/jpeg"],
117117+ "description" => "Image to be displayed representing the track.",
118118+ "maxSize" => 1_000_000,
119119+ "type" => "blob"
120120+ },
121121+ "link" => %{"ref" => "sh.comet.v0.feed.defs#link", "type" => "ref"},
122122+ "releasedAt" => %{
123123+ "description" =>
124124+ "Timestamp for when the track was released. If in the future, may be used to implement pre-savable tracks.",
125125+ "format" => "datetime",
126126+ "type" => "string"
127127+ },
128128+ "tags" => %{
129129+ "description" => "Hashtags for the track, usually for genres.",
130130+ "items" => %{
131131+ "maxGraphemes" => 64,
132132+ "maxLength" => 640,
133133+ "type" => "string"
134134+ },
135135+ "maxLength" => 8,
136136+ "type" => "array"
137137+ },
138138+ "title" => %{
139139+ "description" =>
140140+ "Title of the track. Usually shouldn't include the creator's name.",
141141+ "maxGraphemes" => 256,
142142+ "maxLength" => 2560,
143143+ "minLength" => 1,
144144+ "type" => "string"
145145+ }
146146+ },
147147+ "required" => ["audio", "title", "createdAt"],
148148+ "type" => "object"
149149+ },
150150+ "type" => "record"
151151+ },
152152+ "view" => %{
153153+ "properties" => %{
154154+ "audio" => %{
155155+ "description" =>
156156+ "URL pointing to where the audio data for the track can be fetched. May be re-encoded from the original blob.",
157157+ "format" => "uri",
158158+ "type" => "string"
159159+ },
160160+ "author" => %{
161161+ "ref" => "sh.comet.v0.actor.profile#viewFull",
162162+ "type" => "ref"
163163+ },
164164+ "cid" => %{"format" => "cid", "type" => "string"},
165165+ "commentCount" => %{"type" => "integer"},
166166+ "image" => %{
167167+ "description" => "URL pointing to where the image for the track can be fetched.",
168168+ "format" => "uri",
169169+ "type" => "string"
170170+ },
171171+ "indexedAt" => %{"format" => "datetime", "type" => "string"},
172172+ "likeCount" => %{"type" => "integer"},
173173+ "playCount" => %{"type" => "integer"},
174174+ "record" => %{"ref" => "#main", "type" => "ref"},
175175+ "repostCount" => %{"type" => "integer"},
176176+ "uri" => %{"format" => "at-uri", "type" => "string"},
177177+ "viewer" => %{
178178+ "ref" => "sh.comet.v0.feed.defs#viewerState",
179179+ "type" => "ref"
180180+ }
181181+ },
182182+ "required" => ["uri", "cid", "author", "audio", "record", "indexedAt"],
183183+ "type" => "object"
184184+ }
185185+ },
186186+ "id" => "sh.comet.v0.feed.track",
187187+ "lexicon" => 1
188188+ })
189189+end
+70
lib/atproto/sh/comet/v0/richtext/facet.ex
···11+defmodule Sh.Comet.V0.Richtext.Facet do
22+ use Atex.Lexicon
33+44+ deflexicon(%{
55+ "defs" => %{
66+ "byteSlice" => %{
77+ "description" =>
88+ "Specifies the sub-string range a facet feature applies to. Start index is inclusive, end index is exclusive. Indices are zero-indexed, counting bytes of the UTF-8 encoded text. NOTE: some languages, like Javascript, use UTF-16 or Unicode codepoints for string slice indexing; in these languages, convert to byte arrays before working with facets.",
99+ "properties" => %{
1010+ "byteEnd" => %{"minimum" => 0, "type" => "integer"},
1111+ "byteStart" => %{"minimum" => 0, "type" => "integer"}
1212+ },
1313+ "required" => ["byteStart", "byteEnd"],
1414+ "type" => "object"
1515+ },
1616+ "link" => %{
1717+ "description" =>
1818+ "Facet feature for a URL. The text URL may have been simplified or truncated, but the facet reference should be a complete URL.",
1919+ "properties" => %{"uri" => %{"format" => "uri", "type" => "string"}},
2020+ "required" => ["uri"],
2121+ "type" => "object"
2222+ },
2323+ "main" => %{
2424+ "description" => "Annotation of a sub-string within rich text.",
2525+ "properties" => %{
2626+ "features" => %{
2727+ "items" => %{
2828+ "refs" => ["#mention", "#link", "#tag"],
2929+ "type" => "union"
3030+ },
3131+ "type" => "array"
3232+ },
3333+ "index" => %{"ref" => "#byteSlice", "type" => "ref"}
3434+ },
3535+ "required" => ["index", "features"],
3636+ "type" => "object"
3737+ },
3838+ "mention" => %{
3939+ "description" =>
4040+ "Facet feature for mention of another account. The text is usually a handle, including a '@' prefix, but the facet reference is a DID.",
4141+ "properties" => %{"did" => %{"format" => "did", "type" => "string"}},
4242+ "required" => ["did"],
4343+ "type" => "object"
4444+ },
4545+ "tag" => %{
4646+ "description" =>
4747+ "Facet feature for a hashtag. The text usually includes a '#' prefix, but the facet reference should not (except in the case of 'double hash tags').",
4848+ "properties" => %{
4949+ "tag" => %{"maxGraphemes" => 64, "maxLength" => 640, "type" => "string"}
5050+ },
5151+ "required" => ["tag"],
5252+ "type" => "object"
5353+ },
5454+ "timestamp" => %{
5555+ "description" =>
5656+ "Facet feature for a timestamp in a track. The text usually is in the format of 'hh:mm:ss' with the hour section being omitted if unnecessary.",
5757+ "properties" => %{
5858+ "timestamp" => %{
5959+ "description" => "Reference time, in seconds.",
6060+ "minimum" => 0,
6161+ "type" => "integer"
6262+ }
6363+ },
6464+ "type" => "object"
6565+ }
6666+ },
6767+ "id" => "sh.comet.v0.richtext.facet",
6868+ "lexicon" => 1
6969+ })
7070+end