Parse and validate AT Protocol Lexicons with DTO generation for Laravel
1<?php
2
3namespace SocialDept\AtpSchema\Validation\Rules;
4
5use Closure;
6use Illuminate\Contracts\Validation\ValidationRule;
7
8class Language implements ValidationRule
9{
10 /**
11 * Run the validation rule.
12 */
13 public function validate(string $attribute, mixed $value, Closure $fail): void
14 {
15 if (! is_string($value)) {
16 $fail("The {$attribute} must be a string.");
17
18 return;
19 }
20
21 if (! $this->isValidBcp47($value)) {
22 $fail("The {$attribute} is not a valid BCP 47 language code.");
23 }
24 }
25
26 /**
27 * Validate BCP 47 language code.
28 *
29 * Format: language[-script][-region][-variant]
30 * Examples: en, en-US, zh-Hans, en-GB-oed
31 */
32 protected function isValidBcp47(string $value): bool
33 {
34 // BCP 47 regex pattern
35 // Primary language: 2-3 letter code or 4-8 letter code
36 // Script: 4 letters (optional)
37 // Region: 2 letters or 3 digits (optional)
38 // Variant: 5-8 alphanumeric or digit followed by 3 alphanumeric (optional, repeatable)
39 $pattern = '/^
40 ([a-z]{2,3}|[a-z]{4}|[a-z]{5,8}) # Primary language
41 (-[A-Z][a-z]{3})? # Script (optional)
42 (-([A-Z]{2}|[0-9]{3}))? # Region (optional)
43 (-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))* # Variant (optional, repeatable)
44 (-[a-z]-[a-z0-9]{2,8})* # Extension (optional)
45 (-x-[a-z0-9]{1,8})? # Private use (optional)
46 $/xi';
47
48 if (! preg_match($pattern, $value)) {
49 return false;
50 }
51
52 // Additional validation: Check if primary language is valid
53 $parts = explode('-', $value);
54 $primaryLanguage = strtolower($parts[0]);
55
56 // Language code should be 2-3 characters (ISO 639-1 or 639-2)
57 $length = strlen($primaryLanguage);
58
59 return $length >= 2 && $length <= 8;
60 }
61}