Parse and validate AT Protocol Lexicons with DTO generation for Laravel
1<?php
2
3namespace SocialDept\AtpSchema\Parser;
4
5use Illuminate\Support\Facades\Cache;
6use Illuminate\Support\Facades\Http;
7use SocialDept\AtpSchema\Contracts\SchemaRepository;
8use SocialDept\AtpSchema\Data\LexiconDocument;
9use SocialDept\AtpSchema\Exceptions\SchemaNotFoundException;
10use SocialDept\AtpSchema\Exceptions\SchemaParseException;
11
12class SchemaLoader implements SchemaRepository
13{
14 /**
15 * In-memory cache of loaded schemas for current request.
16 *
17 * @var array<string, array>
18 */
19 protected array $memoryCache = [];
20
21 /**
22 * Schema source directories.
23 *
24 * @var array<string>
25 */
26 protected array $sources;
27
28 /**
29 * Whether to use Laravel cache.
30 */
31 protected bool $useCache;
32
33 /**
34 * Cache TTL in seconds.
35 */
36 protected int $cacheTtl;
37
38 /**
39 * Cache key prefix.
40 */
41 protected string $cachePrefix;
42
43 /**
44 * Whether DNS resolution is enabled.
45 */
46 protected bool $dnsResolutionEnabled;
47
48 /**
49 * HTTP timeout for schema fetching.
50 */
51 protected int $httpTimeout;
52
53 /**
54 * Whether the atp-resolver package is available.
55 */
56 protected bool $hasResolver = false;
57
58 /**
59 * Whether we've shown the resolver warning.
60 */
61 protected static bool $resolverWarningShown = false;
62
63 /**
64 * Create a new SchemaLoader instance.
65 *
66 * @param array<string> $sources
67 */
68 public function __construct(
69 array $sources,
70 bool $useCache = true,
71 int $cacheTtl = 3600,
72 string $cachePrefix = 'schema',
73 bool $dnsResolutionEnabled = true,
74 int $httpTimeout = 10
75 ) {
76 $this->sources = $sources;
77 $this->useCache = $useCache;
78 $this->cacheTtl = $cacheTtl;
79 $this->cachePrefix = $cachePrefix;
80 $this->dnsResolutionEnabled = $dnsResolutionEnabled;
81 $this->httpTimeout = $httpTimeout;
82 $this->hasResolver = class_exists('SocialDept\\AtpResolver\\Resolver');
83 }
84
85 /**
86 * Find schema by NSID (nullable version).
87 */
88 public function find(string $nsid): ?LexiconDocument
89 {
90 try {
91 return $this->load($nsid);
92 } catch (SchemaNotFoundException) {
93 return null;
94 }
95 }
96
97 /**
98 * Load schema by NSID.
99 */
100 public function load(string $nsid): LexiconDocument
101 {
102 // Check memory cache first
103 if (isset($this->memoryCache[$nsid])) {
104 return $this->memoryCache[$nsid];
105 }
106
107 // Check Laravel cache
108 if ($this->useCache) {
109 $cacheKey = $this->getCacheKey($nsid);
110 $cached = Cache::get($cacheKey);
111
112 if ($cached !== null) {
113 // Cache stores raw arrays, convert to LexiconDocument
114 $document = LexiconDocument::fromArray($cached);
115 $this->memoryCache[$nsid] = $document;
116
117 return $document;
118 }
119 }
120
121 // Load raw array data from sources
122 $data = $this->loadFromSources($nsid);
123
124 // Parse into LexiconDocument
125 $document = LexiconDocument::fromArray($data);
126
127 // Cache both in memory (as object) and Laravel cache (as array)
128 $this->memoryCache[$nsid] = $document;
129
130 if ($this->useCache) {
131 Cache::put($this->getCacheKey($nsid), $data, $this->cacheTtl);
132 }
133
134 return $document;
135 }
136
137 /**
138 * Load raw schema array by NSID.
139 */
140 protected function loadRaw(string $nsid): array
141 {
142 $document = $this->load($nsid);
143
144 return $document->toArray();
145 }
146
147 /**
148 * Get all available schema NSIDs.
149 *
150 * @return array<string>
151 */
152 public function all(): array
153 {
154 $nsids = [];
155
156 // Scan all source directories for lexicon files
157 foreach ($this->sources as $source) {
158 if (! is_dir($source)) {
159 continue;
160 }
161
162 // Recursively scan for .json files
163 $files = new \RecursiveIteratorIterator(
164 new \RecursiveDirectoryIterator($source, \RecursiveDirectoryIterator::SKIP_DOTS)
165 );
166
167 foreach ($files as $file) {
168 if ($file->isFile() && $file->getExtension() === 'json') {
169 // Try to parse the NSID from the file
170 try {
171 $contents = file_get_contents($file->getPathname());
172 $data = json_decode($contents, true);
173
174 if (isset($data['id'])) {
175 $nsids[] = $data['id'];
176 }
177 } catch (\Exception $e) {
178 // Skip invalid files
179 continue;
180 }
181 }
182 }
183 }
184
185 return array_unique($nsids);
186 }
187
188 /**
189 * Check if schema exists.
190 */
191 public function exists(string $nsid): bool
192 {
193 try {
194 $this->load($nsid);
195
196 return true;
197 } catch (SchemaNotFoundException) {
198 return false;
199 }
200 }
201
202 /**
203 * Load schema from configured sources.
204 */
205 protected function loadFromSources(string $nsid): array
206 {
207 foreach ($this->sources as $source) {
208 // Try to load from this source
209 $schema = $this->loadFromSource($nsid, $source);
210
211 if ($schema !== null) {
212 return $schema;
213 }
214 }
215
216 // Try DNS resolution as fallback if enabled
217 if ($this->dnsResolutionEnabled) {
218 $schema = $this->loadViaDns($nsid);
219
220 if ($schema !== null) {
221 return $schema;
222 }
223 }
224
225 throw SchemaNotFoundException::forNsid($nsid);
226 }
227
228 /**
229 * Load schema from a specific source directory.
230 */
231 protected function loadFromSource(string $nsid, string $source): ?array
232 {
233 // Try NSID-based path (app.bsky.feed.post -> app/bsky/feed/post.json)
234 $nsidPath = $this->nsidToPath($nsid);
235 $jsonPath = $source.'/'.$nsidPath.'.json';
236
237 if (file_exists($jsonPath)) {
238 return $this->loadJsonFile($jsonPath, $nsid);
239 }
240
241 // Try PHP file
242 $phpPath = $source.'/'.$nsidPath.'.php';
243
244 if (file_exists($phpPath)) {
245 return $this->loadPhpFile($phpPath, $nsid);
246 }
247
248 // Try flat structure (app.bsky.feed.post.json)
249 $flatJsonPath = $source.'/'.$nsid.'.json';
250
251 if (file_exists($flatJsonPath)) {
252 return $this->loadJsonFile($flatJsonPath, $nsid);
253 }
254
255 $flatPhpPath = $source.'/'.$nsid.'.php';
256
257 if (file_exists($flatPhpPath)) {
258 return $this->loadPhpFile($flatPhpPath, $nsid);
259 }
260
261 return null;
262 }
263
264 /**
265 * Convert NSID to file path (app.bsky.feed.post -> app/bsky/feed/post).
266 */
267 protected function nsidToPath(string $nsid): string
268 {
269 return str_replace('.', '/', $nsid);
270 }
271
272 /**
273 * Load and parse JSON file.
274 */
275 protected function loadJsonFile(string $path, string $nsid): array
276 {
277 $contents = file_get_contents($path);
278
279 if ($contents === false) {
280 throw SchemaNotFoundException::forFile($path);
281 }
282
283 $data = json_decode($contents, true);
284
285 if (json_last_error() !== JSON_ERROR_NONE) {
286 throw SchemaParseException::invalidJson($nsid, json_last_error_msg());
287 }
288
289 if (! is_array($data)) {
290 throw SchemaParseException::malformed($nsid, 'Schema must be a JSON object');
291 }
292
293 return $data;
294 }
295
296 /**
297 * Load PHP file returning array.
298 */
299 protected function loadPhpFile(string $path, string $nsid): array
300 {
301 $data = include $path;
302
303 if (! is_array($data)) {
304 throw SchemaParseException::malformed($nsid, 'PHP file must return an array');
305 }
306
307 return $data;
308 }
309
310 /**
311 * Get cache key for NSID.
312 */
313 protected function getCacheKey(string $nsid): string
314 {
315 return "{$this->cachePrefix}:parsed:{$nsid}";
316 }
317
318 /**
319 * Clear cached schema.
320 */
321 public function clearCache(?string $nsid = null): void
322 {
323 if ($nsid === null) {
324 // Clear all memory cache
325 $this->memoryCache = [];
326
327 // Note: Can't easily clear all Laravel cache entries with prefix
328 // Users should call Cache::flush() or use cache tags if needed
329 return;
330 }
331
332 // Clear specific NSID from memory cache
333 unset($this->memoryCache[$nsid]);
334
335 // Clear from Laravel cache
336 if ($this->useCache) {
337 Cache::forget($this->getCacheKey($nsid));
338 }
339 }
340
341 /**
342 * Get all cached NSIDs from memory.
343 *
344 * @return array<string>
345 */
346 public function getCachedNsids(): array
347 {
348 return array_keys($this->memoryCache);
349 }
350
351 /**
352 * Load schema via DNS resolution following AT Protocol spec.
353 *
354 * AT Protocol DNS-based lexicon discovery:
355 * 1. Query DNS TXT record at _lexicon.<authority-domain>
356 * 2. Extract DID from TXT record (format: did=<DID>)
357 * 3. Resolve DID to PDS endpoint (requires atp-resolver package)
358 * 4. Fetch lexicon from repository via com.atproto.repo.getRecord
359 */
360 protected function loadViaDns(string $nsid): ?array
361 {
362 // Check if atp-resolver is available
363 if (! $this->hasResolver) {
364 $this->showResolverWarning();
365
366 return null;
367 }
368
369 try {
370 $nsidParsed = Nsid::parse($nsid);
371
372 // Step 1: Query DNS TXT record for DID
373 $did = $this->queryLexiconDid($nsidParsed);
374 if ($did === null) {
375 return null;
376 }
377
378 // Step 2: Resolve DID to PDS endpoint
379 $pdsUrl = $this->resolvePdsEndpoint($did);
380 if ($pdsUrl === null) {
381 return null;
382 }
383
384 // Step 3: Fetch lexicon schema from repository
385 return $this->fetchLexiconFromRepository($pdsUrl, $did, $nsid);
386 } catch (\Exception $e) {
387 // Silently fail and return null - will try other sources or fail with main error
388 return null;
389 }
390 }
391
392 /**
393 * Query DNS TXT record for lexicon DID.
394 *
395 * Queries _lexicon.<authority-domain> for TXT record containing did=<DID>
396 */
397 protected function queryLexiconDid(Nsid $nsid): ?string
398 {
399 // Convert authority to domain (e.g., pub.leaflet -> leaflet.pub)
400 $authority = $nsid->getAuthority();
401 $parts = explode('.', $authority);
402 $domain = implode('.', array_reverse($parts));
403
404 // Query DNS TXT record at _lexicon.<domain>
405 $hostname = "_lexicon.{$domain}";
406
407 try {
408 $records = dns_get_record($hostname, DNS_TXT);
409
410 if ($records === false || empty($records)) {
411 return null;
412 }
413
414 // Look for TXT record with did= prefix
415 foreach ($records as $record) {
416 if (isset($record['txt']) && str_starts_with($record['txt'], 'did=')) {
417 return substr($record['txt'], 4); // Remove 'did=' prefix
418 }
419 }
420 } catch (\Exception $e) {
421 // DNS query failed
422 return null;
423 }
424
425 return null;
426 }
427
428 /**
429 * Resolve DID to PDS endpoint using atp-resolver.
430 */
431 protected function resolvePdsEndpoint(string $did): ?string
432 {
433 if (! $this->hasResolver) {
434 return null;
435 }
436
437 try {
438 // Get resolver from Laravel container if available
439 if (function_exists('app') && app()->has(\SocialDept\AtpResolver\Resolver::class)) {
440 $resolver = app(\SocialDept\AtpResolver\Resolver::class);
441 } else {
442 // Can't instantiate without dependencies
443 return null;
444 }
445
446 // Use the resolvePds method which handles DID resolution and PDS extraction
447 return $resolver->resolvePds($did);
448 } catch (\Exception $e) {
449 return null;
450 }
451 }
452
453 /**
454 * Fetch lexicon schema from AT Protocol repository.
455 */
456 protected function fetchLexiconFromRepository(string $pdsUrl, string $did, string $nsid): ?array
457 {
458 try {
459 // Construct XRPC call to com.atproto.repo.getRecord
460 $response = Http::timeout($this->httpTimeout)
461 ->get("{$pdsUrl}/xrpc/com.atproto.repo.getRecord", [
462 'repo' => $did,
463 'collection' => 'com.atproto.lexicon.schema',
464 'rkey' => $nsid,
465 ]);
466
467 if ($response->successful()) {
468 $data = $response->json();
469
470 // Extract the lexicon schema from the record value
471 if (isset($data['value']) && is_array($data['value']) && isset($data['value']['lexicon'])) {
472 return $data['value'];
473 }
474 }
475 } catch (\Exception $e) {
476 return null;
477 }
478
479 return null;
480 }
481
482 /**
483 * Show warning about missing atp-resolver package.
484 */
485 protected function showResolverWarning(): void
486 {
487 if (self::$resolverWarningShown) {
488 return;
489 }
490
491 if (function_exists('logger')) {
492 logger()->warning(
493 'DNS-based lexicon resolution requires the socialdept/atp-resolver package. '.
494 'Install it with: composer require socialdept/atp-resolver '.
495 'Falling back to local lexicon sources only.'
496 );
497 }
498
499 self::$resolverWarningShown = true;
500 }
501}