fork of indigo with slightly nicer lexgen
at main 12 kB view raw
1//go:build localsearch 2 3package search 4 5import ( 6 "context" 7 "crypto/tls" 8 "io" 9 "log/slog" 10 "net/http" 11 "testing" 12 13 appbsky "github.com/bluesky-social/indigo/api/bsky" 14 "github.com/bluesky-social/indigo/atproto/identity" 15 "github.com/bluesky-social/indigo/atproto/syntax" 16 17 "github.com/ipfs/go-cid" 18 es "github.com/opensearch-project/opensearch-go/v2" 19 "github.com/stretchr/testify/assert" 20 "gorm.io/driver/sqlite" 21 "gorm.io/gorm" 22) 23 24var ( 25 testPostIndex = "palomar_test_post" 26 testProfileIndex = "palomar_test_profile" 27) 28 29func testEsClient(t *testing.T) *es.Client { 30 cfg := es.Config{ 31 Addresses: []string{"http://localhost:9200"}, 32 Username: "admin", 33 Password: "0penSearch-Pal0mar", 34 CACert: nil, 35 Transport: &http.Transport{ 36 MaxIdleConnsPerHost: 5, 37 TLSClientConfig: &tls.Config{ 38 InsecureSkipVerify: true, 39 }, 40 }, 41 } 42 escli, err := es.NewClient(cfg) 43 if err != nil { 44 t.Fatal(err) 45 } 46 info, err := escli.Info() 47 if err != nil { 48 t.Fatal(err) 49 } 50 info.Body.Close() 51 return escli 52 53} 54 55func testServer(ctx context.Context, t *testing.T, escli *es.Client, dir identity.Directory) *Server { 56 db, err := gorm.Open(sqlite.Open("file::memory:?cache=shared"), &gorm.Config{}) 57 if err != nil { 58 t.Fatal(err) 59 } 60 61 srv, err := NewServer( 62 db, 63 escli, 64 dir, 65 Config{ 66 RelayHost: "wss://relay.invalid", 67 PostIndex: testPostIndex, 68 ProfileIndex: testProfileIndex, 69 Logger: slog.Default(), 70 RelaySyncRateLimit: 1, 71 IndexMaxConcurrency: 1, 72 }, 73 ) 74 if err != nil { 75 t.Fatal(err) 76 } 77 78 // NOTE: skipping errors 79 resp, _ := srv.escli.Indices.Delete([]string{testPostIndex, testProfileIndex}) 80 defer resp.Body.Close() 81 io.ReadAll(resp.Body) 82 83 if err := srv.EnsureIndices(ctx); err != nil { 84 t.Fatal(err) 85 } 86 87 return srv 88} 89 90func TestJapaneseRegressions(t *testing.T) { 91 assert := assert.New(t) 92 ctx := context.Background() 93 escli := testEsClient(t) 94 dir := identity.NewMockDirectory() 95 srv := testServer(ctx, t, escli, &dir) 96 ident := identity.Identity{ 97 DID: syntax.DID("did:plc:abc111"), 98 Handle: syntax.Handle("handle.example.com"), 99 } 100 101 res, err := DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20) 102 if err != nil { 103 t.Fatal(err) 104 } 105 assert.Equal(0, len(res.Hits.Hits)) 106 107 p1 := appbsky.FeedPost{Text: "basic english post", CreatedAt: "2024-01-02T03:04:05.006Z"} 108 assert.NoError(srv.indexPost(ctx, &ident, &p1, "app.bsky.feed.post/3kpnillluoh2y", cid.Undef)) 109 110 // https://github.com/bluesky-social/indigo/issues/302 111 p2 := appbsky.FeedPost{Text: "学校から帰って熱いお風呂に入ったら力一杯がんばる", CreatedAt: "2024-01-02T03:04:05.006Z"} 112 assert.NoError(srv.indexPost(ctx, &ident, &p2, "app.bsky.feed.post/3kpnillluo222", cid.Undef)) 113 p3 := appbsky.FeedPost{Text: "熱力学", CreatedAt: "2024-01-02T03:04:05.006Z"} 114 assert.NoError(srv.indexPost(ctx, &ident, &p3, "app.bsky.feed.post/3kpnillluo333", cid.Undef)) 115 p4 := appbsky.FeedPost{Text: "東京都", CreatedAt: "2024-01-02T03:04:05.006Z"} 116 assert.NoError(srv.indexPost(ctx, &ident, &p4, "app.bsky.feed.post/3kpnillluo444", cid.Undef)) 117 p5 := appbsky.FeedPost{Text: "京都", CreatedAt: "2024-01-02T03:04:05.006Z"} 118 assert.NoError(srv.indexPost(ctx, &ident, &p5, "app.bsky.feed.post/3kpnillluo555", cid.Undef)) 119 p6 := appbsky.FeedPost{Text: "パリ", CreatedAt: "2024-01-02T03:04:05.006Z"} 120 assert.NoError(srv.indexPost(ctx, &ident, &p6, "app.bsky.feed.post/3kpnillluo666", cid.Undef)) 121 p7 := appbsky.FeedPost{Text: "ハリー・ポッター", CreatedAt: "2024-01-02T03:04:05.006Z"} 122 assert.NoError(srv.indexPost(ctx, &ident, &p7, "app.bsky.feed.post/3kpnillluo777", cid.Undef)) 123 p8 := appbsky.FeedPost{Text: "ハリ", CreatedAt: "2024-01-02T03:04:05.006Z"} 124 assert.NoError(srv.indexPost(ctx, &ident, &p8, "app.bsky.feed.post/3kpnillluo223", cid.Undef)) 125 p9 := appbsky.FeedPost{Text: "multilingual 多言語", CreatedAt: "2024-01-02T03:04:05.006Z"} 126 assert.NoError(srv.indexPost(ctx, &ident, &p9, "app.bsky.feed.post/3kpnillluo224", cid.Undef)) 127 128 _, err = srv.escli.Indices.Refresh() 129 assert.NoError(err) 130 131 // expect all to be indexed 132 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "*", 0, 20) 133 if err != nil { 134 t.Fatal(err) 135 } 136 assert.Equal(9, len(res.Hits.Hits)) 137 138 // check that english matches (single post) 139 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20) 140 if err != nil { 141 t.Fatal(err) 142 } 143 assert.Equal(1, len(res.Hits.Hits)) 144 145 // "thermodynamics"; should return only one match 146 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "熱力学", 0, 20) 147 if err != nil { 148 t.Fatal(err) 149 } 150 assert.Equal(1, len(res.Hits.Hits)) 151 152 // "Kyoto"; should return only one match 153 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "京都", 0, 20) 154 if err != nil { 155 t.Fatal(err) 156 } 157 assert.Equal(1, len(res.Hits.Hits)) 158 159 // "Paris"; should return only one match 160 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "パリ", 0, 20) 161 if err != nil { 162 t.Fatal(err) 163 } 164 assert.Equal(1, len(res.Hits.Hits)) 165 166 // should return only one match 167 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "ハリー", 0, 20) 168 if err != nil { 169 t.Fatal(err) 170 } 171 assert.Equal(1, len(res.Hits.Hits)) 172 173 // part of a word; should match none 174 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "ハ", 0, 20) 175 if err != nil { 176 t.Fatal(err) 177 } 178 assert.Equal(0, len(res.Hits.Hits)) 179 180 // should match both ways, and together 181 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "multilingual", 0, 20) 182 if err != nil { 183 t.Fatal(err) 184 } 185 assert.Equal(1, len(res.Hits.Hits)) 186 187 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "多言語", 0, 20) 188 if err != nil { 189 t.Fatal(err) 190 } 191 assert.Equal(1, len(res.Hits.Hits)) 192 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "multilingual 多言語", 0, 20) 193 if err != nil { 194 t.Fatal(err) 195 } 196 assert.Equal(1, len(res.Hits.Hits)) 197 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "\"multilingual 多言語\"", 0, 20) 198 if err != nil { 199 t.Fatal(err) 200 } 201 assert.Equal(1, len(res.Hits.Hits)) 202} 203 204func TestParsedQuery(t *testing.T) { 205 assert := assert.New(t) 206 ctx := context.Background() 207 escli := testEsClient(t) 208 dir := identity.NewMockDirectory() 209 srv := testServer(ctx, t, escli, &dir) 210 ident := identity.Identity{ 211 DID: syntax.DID("did:plc:abc111"), 212 Handle: syntax.Handle("handle.example.com"), 213 } 214 other := identity.Identity{ 215 DID: syntax.DID("did:plc:abc222"), 216 Handle: syntax.Handle("other.example.com"), 217 } 218 dir.Insert(ident) 219 dir.Insert(other) 220 221 res, err := DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20) 222 if err != nil { 223 t.Fatal(err) 224 } 225 assert.Equal(0, len(res.Hits.Hits)) 226 227 p1 := appbsky.FeedPost{Text: "basic english post", CreatedAt: "2024-01-02T03:04:05.006Z"} 228 assert.NoError(srv.indexPost(ctx, &ident, &p1, "app.bsky.feed.post/3kpnillluoh2y", cid.Undef)) 229 p2 := appbsky.FeedPost{Text: "another english post", CreatedAt: "2024-01-02T03:04:05.006Z"} 230 assert.NoError(srv.indexPost(ctx, &ident, &p2, "app.bsky.feed.post/3kpnilllu2222", cid.Undef)) 231 p3 := appbsky.FeedPost{ 232 Text: "#cat post with hashtag", 233 CreatedAt: "2024-01-02T03:04:05.006Z", 234 Facets: []*appbsky.RichtextFacet{ 235 &appbsky.RichtextFacet{ 236 Features: []*appbsky.RichtextFacet_Features_Elem{ 237 &appbsky.RichtextFacet_Features_Elem{ 238 RichtextFacet_Tag: &appbsky.RichtextFacet_Tag{ 239 Tag: "trick", 240 }, 241 }, 242 }, 243 Index: &appbsky.RichtextFacet_ByteSlice{ 244 ByteStart: 0, 245 ByteEnd: 4, 246 }, 247 }, 248 }, 249 } 250 assert.NoError(srv.indexPost(ctx, &ident, &p3, "app.bsky.feed.post/3kpnilllu3333", cid.Undef)) 251 p4 := appbsky.FeedPost{ 252 Text: "@other.example.com post with mention", 253 CreatedAt: "2024-01-02T03:04:05.006Z", 254 Facets: []*appbsky.RichtextFacet{ 255 &appbsky.RichtextFacet{ 256 Features: []*appbsky.RichtextFacet_Features_Elem{ 257 &appbsky.RichtextFacet_Features_Elem{ 258 RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ 259 Did: "did:plc:abc222", 260 }, 261 }, 262 }, 263 Index: &appbsky.RichtextFacet_ByteSlice{ 264 ByteStart: 0, 265 ByteEnd: 18, 266 }, 267 }, 268 }, 269 } 270 assert.NoError(srv.indexPost(ctx, &ident, &p4, "app.bsky.feed.post/3kpnilllu4444", cid.Undef)) 271 p5 := appbsky.FeedPost{ 272 Text: "https://bsky.app... post with hashtag #cat", 273 CreatedAt: "2024-01-02T03:04:05.006Z", 274 Facets: []*appbsky.RichtextFacet{ 275 &appbsky.RichtextFacet{ 276 Features: []*appbsky.RichtextFacet_Features_Elem{ 277 &appbsky.RichtextFacet_Features_Elem{ 278 RichtextFacet_Link: &appbsky.RichtextFacet_Link{ 279 Uri: "htTPS://www.en.wikipedia.org/wiki/CBOR?q=3&a=1&utm_campaign=123", 280 }, 281 }, 282 }, 283 Index: &appbsky.RichtextFacet_ByteSlice{ 284 ByteStart: 0, 285 ByteEnd: 19, 286 }, 287 }, 288 }, 289 } 290 assert.NoError(srv.indexPost(ctx, &ident, &p5, "app.bsky.feed.post/3kpnilllu5555", cid.Undef)) 291 p6 := appbsky.FeedPost{ 292 Text: "post with lang (deutsch)", 293 CreatedAt: "2024-01-02T03:04:05.006Z", 294 Langs: []string{"ja", "de-DE"}, 295 } 296 assert.NoError(srv.indexPost(ctx, &ident, &p6, "app.bsky.feed.post/3kpnilllu6666", cid.Undef)) 297 p7 := appbsky.FeedPost{Text: "post with old date", CreatedAt: "2020-05-03T03:04:05.006Z"} 298 assert.NoError(srv.indexPost(ctx, &ident, &p7, "app.bsky.feed.post/3kpnilllu7777", cid.Undef)) 299 300 _, err = srv.escli.Indices.Refresh() 301 assert.NoError(err) 302 303 // expect all to be indexed 304 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "*", 0, 20) 305 if err != nil { 306 t.Fatal(err) 307 } 308 assert.Equal(7, len(res.Hits.Hits)) 309 310 // check that english matches both 311 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "english", 0, 20) 312 if err != nil { 313 t.Fatal(err) 314 } 315 assert.Equal(2, len(res.Hits.Hits)) 316 317 // phrase only matches one 318 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "\"basic english\"", 0, 20) 319 if err != nil { 320 t.Fatal(err) 321 } 322 assert.Equal(1, len(res.Hits.Hits)) 323 324 // posts-by 325 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "from:handle.example.com", 0, 20) 326 if err != nil { 327 t.Fatal(err) 328 } 329 assert.Equal(7, len(res.Hits.Hits)) 330 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "from:@handle.example.com", 0, 20) 331 if err != nil { 332 t.Fatal(err) 333 } 334 assert.Equal(7, len(res.Hits.Hits)) 335 336 // hashtag query 337 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "post #trick", 0, 20) 338 if err != nil { 339 t.Fatal(err) 340 } 341 assert.Equal(1, len(res.Hits.Hits)) 342 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "post #Trick", 0, 20) 343 if err != nil { 344 t.Fatal(err) 345 } 346 assert.Equal(1, len(res.Hits.Hits)) 347 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "post #trick #allMustMatch", 0, 20) 348 if err != nil { 349 t.Fatal(err) 350 } 351 assert.Equal(0, len(res.Hits.Hits)) 352 353 // mention query 354 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "@other.example.com", 0, 20) 355 if err != nil { 356 t.Fatal(err) 357 } 358 assert.Equal(1, len(res.Hits.Hits)) 359 360 // URL and domain queries 361 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "https://en.wikipedia.org/wiki/CBOR?a=1&q=3", 0, 20) 362 if err != nil { 363 t.Fatal(err) 364 } 365 assert.Equal(1, len(res.Hits.Hits)) 366 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "\"https://en.wikipedia.org/wiki/CBOR?a=1&q=3\"", 0, 20) 367 if err != nil { 368 t.Fatal(err) 369 } 370 assert.Equal(0, len(res.Hits.Hits)) 371 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "https://en.wikipedia.org/wiki/CBOR", 0, 20) 372 if err != nil { 373 t.Fatal(err) 374 } 375 assert.Equal(0, len(res.Hits.Hits)) 376 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "domain:en.wikipedia.org", 0, 20) 377 if err != nil { 378 t.Fatal(err) 379 } 380 assert.Equal(1, len(res.Hits.Hits)) 381 382 // lang filter 383 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "lang:de", 0, 20) 384 if err != nil { 385 t.Fatal(err) 386 } 387 assert.Equal(1, len(res.Hits.Hits)) 388 389 // date range filters 390 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "since:2023-01-01T00:00:00Z", 0, 20) 391 if err != nil { 392 t.Fatal(err) 393 } 394 assert.Equal(6, len(res.Hits.Hits)) 395 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "since:2023-01-01", 0, 20) 396 if err != nil { 397 t.Fatal(err) 398 } 399 assert.Equal(6, len(res.Hits.Hits)) 400 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "until:2023-01-01", 0, 20) 401 if err != nil { 402 t.Fatal(err) 403 } 404 assert.Equal(1, len(res.Hits.Hits)) 405 res, err = DoSearchPosts(ctx, &dir, escli, testPostIndex, "until:asdf", 0, 20) 406 if err != nil { 407 t.Fatal(err) 408 } 409 assert.Equal(7, len(res.Hits.Hits)) 410}