+94
automod/keyword/cmd/kw-cli/main.go
+94
automod/keyword/cmd/kw-cli/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"bufio"
5
+
"context"
6
+
"fmt"
7
+
"log/slog"
8
+
"os"
9
+
10
+
"github.com/bluesky-social/indigo/automod/keyword"
11
+
"github.com/bluesky-social/indigo/automod/setstore"
12
+
13
+
"github.com/urfave/cli/v2"
14
+
)
15
+
16
+
func main() {
17
+
app := cli.App{
18
+
Name: "kw-cli",
19
+
Usage: "informal debugging CLI tool for keyword matching",
20
+
}
21
+
app.Commands = []*cli.Command{
22
+
&cli.Command{
23
+
Name: "fuzzy",
24
+
Usage: "reads lines of text from stdin, runs regex fuzzy matching, outputs matches",
25
+
Action: runFuzzy,
26
+
},
27
+
&cli.Command{
28
+
Name: "tokens",
29
+
Usage: "reads lines of text from stdin, tokenizes and matches against set",
30
+
Action: runTokens,
31
+
Flags: []cli.Flag{
32
+
&cli.StringFlag{
33
+
Name: "json-set-file",
34
+
Usage: "path to JSON file containing bad word sets",
35
+
Value: "automod/rules/example_sets.json",
36
+
},
37
+
&cli.StringFlag{
38
+
Name: "set-name",
39
+
Usage: "which set within the set file to use",
40
+
Value: "bad-words",
41
+
},
42
+
&cli.BoolFlag{
43
+
Name: "identifiers",
44
+
Usage: "whether to parse the line as identifiers (instead of text)",
45
+
},
46
+
},
47
+
},
48
+
}
49
+
h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})
50
+
slog.SetDefault(slog.New(h))
51
+
app.RunAndExitOnError()
52
+
}
53
+
54
+
func runFuzzy(cctx *cli.Context) error {
55
+
scanner := bufio.NewScanner(os.Stdin)
56
+
for scanner.Scan() {
57
+
line := scanner.Text()
58
+
word := keyword.SlugContainsExplicitSlur(keyword.Slugify(line))
59
+
if word != "" {
60
+
fmt.Printf("MATCH\t%s\t%s\n", word, line)
61
+
}
62
+
}
63
+
return nil
64
+
}
65
+
66
+
func runTokens(cctx *cli.Context) error {
67
+
ctx := context.Background()
68
+
sets := setstore.NewMemSetStore()
69
+
if err := sets.LoadFromFileJSON(cctx.String("json-set-file")); err != nil {
70
+
return err
71
+
}
72
+
setName := cctx.String("set-name")
73
+
identMode := cctx.Bool("identifiers")
74
+
scanner := bufio.NewScanner(os.Stdin)
75
+
for scanner.Scan() {
76
+
line := scanner.Text()
77
+
var tokens []string
78
+
if identMode {
79
+
tokens = keyword.TokenizeIdentifier(line)
80
+
} else {
81
+
tokens = keyword.TokenizeText(line)
82
+
}
83
+
for _, tok := range tokens {
84
+
match, err := sets.InSet(ctx, setName, tok)
85
+
if err != nil {
86
+
return err
87
+
}
88
+
if match {
89
+
fmt.Printf("MATCH\t%s\t%s\n", tok, line)
90
+
}
91
+
}
92
+
}
93
+
return nil
94
+
}