1//go:build localsearch
2
3package search
4
5import (
6 "context"
7 "crypto/tls"
8 "io"
9 "log/slog"
10 "net/http"
11 "testing"
12
13 appbsky "github.com/bluesky-social/indigo/api/bsky"
14 "github.com/bluesky-social/indigo/atproto/identity"
15 "github.com/bluesky-social/indigo/atproto/syntax"
16
17 "github.com/ipfs/go-cid"
18 es "github.com/opensearch-project/opensearch-go/v2"
19 "github.com/stretchr/testify/assert"
20 "gorm.io/driver/sqlite"
21 "gorm.io/gorm"
22)
23
24var (
25 testPostIndex = "palomar_test_post"
26 testProfileIndex = "palomar_test_profile"
27)
28
29func testEsClient(t *testing.T) *es.Client {
30 cfg := es.Config{
31 Addresses: []string{"http://localhost:9200"},
32 Username: "admin",
33 Password: "0penSearch-Pal0mar",
34 CACert: nil,
35 Transport: &http.Transport{
36 MaxIdleConnsPerHost: 5,
37 TLSClientConfig: &tls.Config{
38 InsecureSkipVerify: true,
39 },
40 },
41 }
42 escli, err := es.NewClient(cfg)
43 if err != nil {
44 t.Fatal(err)
45 }
46 info, err := escli.Info()
47 if err != nil {
48 t.Fatal(err)
49 }
50 info.Body.Close()
51 return escli
52
53}
54
55func testServer(ctx context.Context, t *testing.T, escli *es.Client, dir identity.Directory) *Server {
56 db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{})
57 if err != nil {
58 t.Fatal(err)
59 }
60
61 srv, err := NewServer(
62 db,
63 escli,
64 dir,
65 Config{
66 RelayHost: "wss://relay.invalid",
67 PostIndex: testPostIndex,
68 ProfileIndex: testProfileIndex,
69 Logger: slog.Default(),
70 RelaySyncRateLimit: 1,
71 IndexMaxConcurrency: 1,
72 },
73 )
74 if err != nil {
75 t.Fatal(err)
76 }
77
78 // NOTE: skipping errors
79 resp, _ := srv.escli.Indices.Delete([]string{testPostIndex, testProfileIndex})
80 defer resp.Body.Close()
81 io.ReadAll(resp.Body)
82
83 if err := srv.EnsureIndices(ctx); err != nil {
84 t.Fatal(err)
85 }
86
87 return srv
88}
89
90func TestJapaneseRegressions(t *testing.T) {
91 assert := assert.New(t)
92 ctx := context.Background()
93 escli := testEsClient(t)
94 dir := identity.NewMockDirectory()
95 srv := testServer(ctx, t, escli, &dir)
96 ident := identity.Identity{
97 DID: syntax.DID("did:plc:abc111"),
98 Handle: syntax.Handle("handle.example.com"),
99 }
100
101 res, err := DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20)
102 if err != nil {
103 t.Fatal(err)
104 }
105 assert.Equal(0, len(res.Hits.Hits))
106
107 p1 := appbsky.FeedPost{Text: "basic english post", CreatedAt: "2024-01-02T03:04:05.006Z"}
108 assert.NoError(srv.indexPost(ctx, &ident, &p1, "app.bsky.feed.post/3kpnillluoh2y", cid.Undef))
109
110 // https://github.com/bluesky-social/indigo/issues/302
111 p2 := appbsky.FeedPost{Text: "学校から帰って熱いお風呂に入ったら力一杯がんばる", CreatedAt: "2024-01-02T03:04:05.006Z"}
112 assert.NoError(srv.indexPost(ctx, &ident, &p2, "app.bsky.feed.post/3kpnillluo222", cid.Undef))
113 p3 := appbsky.FeedPost{Text: "熱力学", CreatedAt: "2024-01-02T03:04:05.006Z"}
114 assert.NoError(srv.indexPost(ctx, &ident, &p3, "app.bsky.feed.post/3kpnillluo333", cid.Undef))
115 p4 := appbsky.FeedPost{Text: "東京都", CreatedAt: "2024-01-02T03:04:05.006Z"}
116 assert.NoError(srv.indexPost(ctx, &ident, &p4, "app.bsky.feed.post/3kpnillluo444", cid.Undef))
117 p5 := appbsky.FeedPost{Text: "京都", CreatedAt: "2024-01-02T03:04:05.006Z"}
118 assert.NoError(srv.indexPost(ctx, &ident, &p5, "app.bsky.feed.post/3kpnillluo555", cid.Undef))
119 p6 := appbsky.FeedPost{Text: "パリ", CreatedAt: "2024-01-02T03:04:05.006Z"}
120 assert.NoError(srv.indexPost(ctx, &ident, &p6, "app.bsky.feed.post/3kpnillluo666", cid.Undef))
121 p7 := appbsky.FeedPost{Text: "ハリー・ポッター", CreatedAt: "2024-01-02T03:04:05.006Z"}
122 assert.NoError(srv.indexPost(ctx, &ident, &p7, "app.bsky.feed.post/3kpnillluo777", cid.Undef))
123 p8 := appbsky.FeedPost{Text: "ハリ", CreatedAt: "2024-01-02T03:04:05.006Z"}
124 assert.NoError(srv.indexPost(ctx, &ident, &p8, "app.bsky.feed.post/3kpnillluo223", cid.Undef))
125 p9 := appbsky.FeedPost{Text: "multilingual 多言語", CreatedAt: "2024-01-02T03:04:05.006Z"}
126 assert.NoError(srv.indexPost(ctx, &ident, &p9, "app.bsky.feed.post/3kpnillluo224", cid.Undef))
127
128 _, err = srv.escli.Indices.Refresh()
129 assert.NoError(err)
130
131 // expect all to be indexed
132 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "*", 0, 20)
133 if err != nil {
134 t.Fatal(err)
135 }
136 assert.Equal(9, len(res.Hits.Hits))
137
138 // check that english matches (single post)
139 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20)
140 if err != nil {
141 t.Fatal(err)
142 }
143 assert.Equal(1, len(res.Hits.Hits))
144
145 // "thermodynamics"; should return only one match
146 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "熱力学", 0, 20)
147 if err != nil {
148 t.Fatal(err)
149 }
150 assert.Equal(1, len(res.Hits.Hits))
151
152 // "Kyoto"; should return only one match
153 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "京都", 0, 20)
154 if err != nil {
155 t.Fatal(err)
156 }
157 assert.Equal(1, len(res.Hits.Hits))
158
159 // "Paris"; should return only one match
160 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "パリ", 0, 20)
161 if err != nil {
162 t.Fatal(err)
163 }
164 assert.Equal(1, len(res.Hits.Hits))
165
166 // should return only one match
167 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "ハリー", 0, 20)
168 if err != nil {
169 t.Fatal(err)
170 }
171 assert.Equal(1, len(res.Hits.Hits))
172
173 // part of a word; should match none
174 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "ハ", 0, 20)
175 if err != nil {
176 t.Fatal(err)
177 }
178 assert.Equal(0, len(res.Hits.Hits))
179
180 // should match both ways, and together
181 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "multilingual", 0, 20)
182 if err != nil {
183 t.Fatal(err)
184 }
185 assert.Equal(1, len(res.Hits.Hits))
186
187 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "多言語", 0, 20)
188 if err != nil {
189 t.Fatal(err)
190 }
191 assert.Equal(1, len(res.Hits.Hits))
192 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "multilingual 多言語", 0, 20)
193 if err != nil {
194 t.Fatal(err)
195 }
196 assert.Equal(1, len(res.Hits.Hits))
197 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "\"multilingual 多言語\"", 0, 20)
198 if err != nil {
199 t.Fatal(err)
200 }
201 assert.Equal(1, len(res.Hits.Hits))
202}
203
204func TestParsedQuery(t *testing.T) {
205 assert := assert.New(t)
206 ctx := context.Background()
207 escli := testEsClient(t)
208 dir := identity.NewMockDirectory()
209 srv := testServer(ctx, t, escli, &dir)
210 ident := identity.Identity{
211 DID: syntax.DID("did:plc:abc111"),
212 Handle: syntax.Handle("handle.example.com"),
213 }
214 other := identity.Identity{
215 DID: syntax.DID("did:plc:abc222"),
216 Handle: syntax.Handle("other.example.com"),
217 }
218 dir.Insert(ident)
219 dir.Insert(other)
220
221 res, err := DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20)
222 if err != nil {
223 t.Fatal(err)
224 }
225 assert.Equal(0, len(res.Hits.Hits))
226
227 p1 := appbsky.FeedPost{Text: "basic english post", CreatedAt: "2024-01-02T03:04:05.006Z"}
228 assert.NoError(srv.indexPost(ctx, &ident, &p1, "app.bsky.feed.post/3kpnillluoh2y", cid.Undef))
229 p2 := appbsky.FeedPost{Text: "another english post", CreatedAt: "2024-01-02T03:04:05.006Z"}
230 assert.NoError(srv.indexPost(ctx, &ident, &p2, "app.bsky.feed.post/3kpnilllu2222", cid.Undef))
231 p3 := appbsky.FeedPost{
232 Text: "#cat post with hashtag",
233 CreatedAt: "2024-01-02T03:04:05.006Z",
234 Facets: []*appbsky.RichtextFacet{
235 &appbsky.RichtextFacet{
236 Features: []*appbsky.RichtextFacet_Features_Elem{
237 &appbsky.RichtextFacet_Features_Elem{
238 RichtextFacet_Tag: &appbsky.RichtextFacet_Tag{
239 Tag: "trick",
240 },
241 },
242 },
243 Index: &appbsky.RichtextFacet_ByteSlice{
244 ByteStart: 0,
245 ByteEnd: 4,
246 },
247 },
248 },
249 }
250 assert.NoError(srv.indexPost(ctx, &ident, &p3, "app.bsky.feed.post/3kpnilllu3333", cid.Undef))
251 p4 := appbsky.FeedPost{
252 Text: "@other.example.com post with mention",
253 CreatedAt: "2024-01-02T03:04:05.006Z",
254 Facets: []*appbsky.RichtextFacet{
255 &appbsky.RichtextFacet{
256 Features: []*appbsky.RichtextFacet_Features_Elem{
257 &appbsky.RichtextFacet_Features_Elem{
258 RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{
259 Did: "did:plc:abc222",
260 },
261 },
262 },
263 Index: &appbsky.RichtextFacet_ByteSlice{
264 ByteStart: 0,
265 ByteEnd: 18,
266 },
267 },
268 },
269 }
270 assert.NoError(srv.indexPost(ctx, &ident, &p4, "app.bsky.feed.post/3kpnilllu4444", cid.Undef))
271 p5 := appbsky.FeedPost{
272 Text: "https://bsky.app... post with hashtag #cat",
273 CreatedAt: "2024-01-02T03:04:05.006Z",
274 Facets: []*appbsky.RichtextFacet{
275 &appbsky.RichtextFacet{
276 Features: []*appbsky.RichtextFacet_Features_Elem{
277 &appbsky.RichtextFacet_Features_Elem{
278 RichtextFacet_Link: &appbsky.RichtextFacet_Link{
279 Uri: "htTPS://www.en.wikipedia.org/wiki/CBOR?q=3&a=1&utm_campaign=123",
280 },
281 },
282 },
283 Index: &appbsky.RichtextFacet_ByteSlice{
284 ByteStart: 0,
285 ByteEnd: 19,
286 },
287 },
288 },
289 }
290 assert.NoError(srv.indexPost(ctx, &ident, &p5, "app.bsky.feed.post/3kpnilllu5555", cid.Undef))
291 p6 := appbsky.FeedPost{
292 Text: "post with lang (deutsch)",
293 CreatedAt: "2024-01-02T03:04:05.006Z",
294 Langs: []string{"ja", "de-DE"},
295 }
296 assert.NoError(srv.indexPost(ctx, &ident, &p6, "app.bsky.feed.post/3kpnilllu6666", cid.Undef))
297 p7 := appbsky.FeedPost{Text: "post with old date", CreatedAt: "2020-05-03T03:04:05.006Z"}
298 assert.NoError(srv.indexPost(ctx, &ident, &p7, "app.bsky.feed.post/3kpnilllu7777", cid.Undef))
299
300 _, err = srv.escli.Indices.Refresh()
301 assert.NoError(err)
302
303 // expect all to be indexed
304 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "*", 0, 20)
305 if err != nil {
306 t.Fatal(err)
307 }
308 assert.Equal(7, len(res.Hits.Hits))
309
310 // check that english matches both
311 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20)
312 if err != nil {
313 t.Fatal(err)
314 }
315 assert.Equal(2, len(res.Hits.Hits))
316
317 // phrase only matches one
318 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "\"basic english\"", 0, 20)
319 if err != nil {
320 t.Fatal(err)
321 }
322 assert.Equal(1, len(res.Hits.Hits))
323
324 // posts-by
325 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "from:handle.example.com", 0, 20)
326 if err != nil {
327 t.Fatal(err)
328 }
329 assert.Equal(7, len(res.Hits.Hits))
330 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "from:@handle.example.com", 0, 20)
331 if err != nil {
332 t.Fatal(err)
333 }
334 assert.Equal(7, len(res.Hits.Hits))
335
336 // hashtag query
337 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "post #trick", 0, 20)
338 if err != nil {
339 t.Fatal(err)
340 }
341 assert.Equal(1, len(res.Hits.Hits))
342 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "post #Trick", 0, 20)
343 if err != nil {
344 t.Fatal(err)
345 }
346 assert.Equal(1, len(res.Hits.Hits))
347 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "post #trick #allMustMatch", 0, 20)
348 if err != nil {
349 t.Fatal(err)
350 }
351 assert.Equal(0, len(res.Hits.Hits))
352
353 // mention query
354 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "@other.example.com", 0, 20)
355 if err != nil {
356 t.Fatal(err)
357 }
358 assert.Equal(1, len(res.Hits.Hits))
359
360 // URL and domain queries
361 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "https://en.wikipedia.org/wiki/CBOR?a=1&q=3", 0, 20)
362 if err != nil {
363 t.Fatal(err)
364 }
365 assert.Equal(1, len(res.Hits.Hits))
366 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "\"https://en.wikipedia.org/wiki/CBOR?a=1&q=3\"", 0, 20)
367 if err != nil {
368 t.Fatal(err)
369 }
370 assert.Equal(0, len(res.Hits.Hits))
371 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "https://en.wikipedia.org/wiki/CBOR", 0, 20)
372 if err != nil {
373 t.Fatal(err)
374 }
375 assert.Equal(0, len(res.Hits.Hits))
376 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "domain:en.wikipedia.org", 0, 20)
377 if err != nil {
378 t.Fatal(err)
379 }
380 assert.Equal(1, len(res.Hits.Hits))
381
382 // lang filter
383 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "lang:de", 0, 20)
384 if err != nil {
385 t.Fatal(err)
386 }
387 assert.Equal(1, len(res.Hits.Hits))
388
389 // date range filters
390 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "since:2023-01-01T00:00:00Z", 0, 20)
391 if err != nil {
392 t.Fatal(err)
393 }
394 assert.Equal(6, len(res.Hits.Hits))
395 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "since:2023-01-01", 0, 20)
396 if err != nil {
397 t.Fatal(err)
398 }
399 assert.Equal(6, len(res.Hits.Hits))
400 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "until:2023-01-01", 0, 20)
401 if err != nil {
402 t.Fatal(err)
403 }
404 assert.Equal(1, len(res.Hits.Hits))
405 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "until:asdf", 0, 20)
406 if err != nil {
407 t.Fatal(err)
408 }
409 assert.Equal(7, len(res.Hits.Hits))
410}