1package keyword
2
3import (
4 "regexp"
5 "testing"
6
7 "github.com/stretchr/testify/assert"
8)
9
10func TestTokenizeText(t *testing.T) {
11 assert := assert.New(t)
12
13 fixtures := []struct {
14 text string
15 out []string
16 }{
17 {text: "", out: []string{}},
18 {text: "Hello, โลก!", out: []string{"hello", "โลก"}},
19 {text: "Gdańsk", out: []string{"gdansk"}},
20 {text: " foo1;bar2,baz3...", out: []string{"foo1", "bar2", "baz3"}},
21 {text: "foo*bar", out: []string{"foo", "bar"}},
22 {text: "foo-bar", out: []string{"foo", "bar"}},
23 {text: "foo_bar", out: []string{"foo", "bar"}},
24 }
25
26 for _, fix := range fixtures {
27 assert.Equal(fix.out, TokenizeText(fix.text))
28 }
29}
30
31func TestTokenizeTextWithCensorChars(t *testing.T) {
32 assert := assert.New(t)
33
34 fixtures := []struct {
35 text string
36 out []string
37 }{
38 {text: "", out: []string{}},
39 {text: "Hello, โลก!", out: []string{"hello", "โลก"}},
40 {text: "Gdańsk", out: []string{"gdansk"}},
41 {text: " foo1;bar2,baz3...", out: []string{"foo1", "bar2", "baz3"}},
42 {text: "foo*bar,foo&bar", out: []string{"foo*bar", "foo", "bar"}},
43 {text: "foo-bar,foo&bar", out: []string{"foo-bar", "foo", "bar"}},
44 {text: "foo_bar,foo&bar", out: []string{"foo_bar", "foo", "bar"}},
45 {text: "foo#bar,foo&bar", out: []string{"foo#bar", "foo", "bar"}},
46 }
47
48 for _, fix := range fixtures {
49 assert.Equal(fix.out, TokenizeTextSkippingCensorChars(fix.text))
50 }
51}
52
53func TestTokenizeTextWithCustomRegex(t *testing.T) {
54 assert := assert.New(t)
55
56 fixtures := []struct {
57 text string
58 out []string
59 }{
60 {text: "", out: []string{}},
61 {text: "Hello, โลก!", out: []string{"hello", "โลก"}},
62 {text: "Gdańsk", out: []string{"gdansk"}},
63 {text: " foo1;bar2,baz3...", out: []string{"foo1", "bar2", "baz3"}},
64 {text: "foo*bar", out: []string{"foo", "bar"}},
65 {text: "foo&bar,foo*bar", out: []string{"foo&bar", "foo", "bar"}},
66 }
67
68 regex := regexp.MustCompile(`[^\pL\pN\s&]`)
69 for _, fix := range fixtures {
70 assert.Equal(fix.out, TokenizeTextWithRegex(fix.text, regex))
71 }
72}
73
74func TestTokenizeIdentifier(t *testing.T) {
75 assert := assert.New(t)
76
77 fixtures := []struct {
78 ident string
79 out []string
80 }{
81 {ident: "", out: []string{}},
82 {ident: "the-handle.example.com", out: []string{"the", "handle", "example", "com"}},
83 {ident: "@a-b-c", out: []string{}},
84 }
85
86 for _, fix := range fixtures {
87 assert.Equal(fix.out, TokenizeIdentifier(fix.ident))
88 }
89}