+41
atproto/syntax/language.go
+41
atproto/syntax/language.go
···
1
+
package syntax
2
+
3
+
import (
4
+
"fmt"
5
+
"regexp"
6
+
)
7
+
8
+
// Represents a Language specifier in string format, as would pass Lexicon syntax validation.
9
+
//
10
+
// Always use [ParseLanguage] instead of wrapping strings directly, especially when working with network input.
11
+
//
12
+
// The syntax is BCP-47. This is a partial/naive parsing implementation, designed for fast validation and exact-string passthrough with no normaliztion. For actually working with BCP-47 language specifiers in atproto code bases, we recommend the golang.org/x/text/language package.
13
+
type Language string
14
+
15
+
func ParseLanguage(raw string) (Language, error) {
16
+
if len(raw) > 128 {
17
+
return "", fmt.Errorf("Language is too long (128 chars max)")
18
+
}
19
+
var langRegex = regexp.MustCompile(`^(i|[a-z]{2,3})(-[a-zA-Z0-9]+)*$`)
20
+
if !langRegex.MatchString(raw) {
21
+
return "", fmt.Errorf("Language syntax didn't validate via regex")
22
+
}
23
+
return Language(raw), nil
24
+
}
25
+
26
+
func (l Language) String() string {
27
+
return string(l)
28
+
}
29
+
30
+
func (l Language) MarshalText() ([]byte, error) {
31
+
return []byte(l.String()), nil
32
+
}
33
+
34
+
func (l *Language) UnmarshalText(text []byte) error {
35
+
lang, err := ParseLanguage(string(text))
36
+
if err != nil {
37
+
return err
38
+
}
39
+
*l = lang
40
+
return nil
41
+
}
+50
atproto/syntax/language_test.go
+50
atproto/syntax/language_test.go
···
1
+
package syntax
2
+
3
+
import (
4
+
"bufio"
5
+
"fmt"
6
+
"os"
7
+
"testing"
8
+
9
+
"github.com/stretchr/testify/assert"
10
+
)
11
+
12
+
func TestInteropLanguagesValid(t *testing.T) {
13
+
assert := assert.New(t)
14
+
file, err := os.Open("testdata/language_syntax_valid.txt")
15
+
assert.NoError(err)
16
+
defer file.Close()
17
+
scanner := bufio.NewScanner(file)
18
+
for scanner.Scan() {
19
+
line := scanner.Text()
20
+
if len(line) == 0 || line[0] == '#' {
21
+
continue
22
+
}
23
+
_, err := ParseLanguage(line)
24
+
if err != nil {
25
+
fmt.Println("GOOD: " + line)
26
+
}
27
+
assert.NoError(err)
28
+
}
29
+
assert.NoError(scanner.Err())
30
+
}
31
+
32
+
func TestInteropLanguagesInvalid(t *testing.T) {
33
+
assert := assert.New(t)
34
+
file, err := os.Open("testdata/language_syntax_invalid.txt")
35
+
assert.NoError(err)
36
+
defer file.Close()
37
+
scanner := bufio.NewScanner(file)
38
+
for scanner.Scan() {
39
+
line := scanner.Text()
40
+
if len(line) == 0 || line[0] == '#' {
41
+
continue
42
+
}
43
+
_, err := ParseLanguage(line)
44
+
if err == nil {
45
+
fmt.Println("BAD: " + line)
46
+
}
47
+
assert.Error(err)
48
+
}
49
+
assert.NoError(scanner.Err())
50
+
}
+10
atproto/syntax/testdata/language_syntax_invalid.txt
+10
atproto/syntax/testdata/language_syntax_invalid.txt
+18
atproto/syntax/testdata/language_syntax_valid.txt
+18
atproto/syntax/testdata/language_syntax_valid.txt
···
1
+
ja
2
+
ban
3
+
pt-BR
4
+
hy-Latn-IT-arevela
5
+
en-GB
6
+
zh-Hant
7
+
sgn-BE-NL
8
+
es-419
9
+
en-GB-boont-r-extended-sequence-x-private
10
+
11
+
# grandfathered
12
+
zh-hakka
13
+
i-default
14
+
i-navajo
15
+
16
+
# https://github.com/sebinsua/ietf-language-tag-regex/blob/master/test.js
17
+
de-CH-1901
18
+
qaa-Qaaa-QM-x-southern