fork of indigo with slightly nicer lexgen

keywords: CLI stdin/stdout helper tool

Changed files
+94
automod
keyword
cmd
kw-cli
+94
automod/keyword/cmd/kw-cli/main.go
··· 1 + package main 2 + 3 + import ( 4 + "bufio" 5 + "context" 6 + "fmt" 7 + "log/slog" 8 + "os" 9 + 10 + "github.com/bluesky-social/indigo/automod/keyword" 11 + "github.com/bluesky-social/indigo/automod/setstore" 12 + 13 + "github.com/urfave/cli/v2" 14 + ) 15 + 16 + func main() { 17 + app := cli.App{ 18 + Name: "kw-cli", 19 + Usage: "informal debugging CLI tool for keyword matching", 20 + } 21 + app.Commands = []*cli.Command{ 22 + &cli.Command{ 23 + Name: "fuzzy", 24 + Usage: "reads lines of text from stdin, runs regex fuzzy matching, outputs matches", 25 + Action: runFuzzy, 26 + }, 27 + &cli.Command{ 28 + Name: "tokens", 29 + Usage: "reads lines of text from stdin, tokenizes and matches against set", 30 + Action: runTokens, 31 + Flags: []cli.Flag{ 32 + &cli.StringFlag{ 33 + Name: "json-set-file", 34 + Usage: "path to JSON file containing bad word sets", 35 + Value: "automod/rules/example_sets.json", 36 + }, 37 + &cli.StringFlag{ 38 + Name: "set-name", 39 + Usage: "which set within the set file to use", 40 + Value: "bad-words", 41 + }, 42 + &cli.BoolFlag{ 43 + Name: "identifiers", 44 + Usage: "whether to parse the line as identifiers (instead of text)", 45 + }, 46 + }, 47 + }, 48 + } 49 + h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}) 50 + slog.SetDefault(slog.New(h)) 51 + app.RunAndExitOnError() 52 + } 53 + 54 + func runFuzzy(cctx *cli.Context) error { 55 + scanner := bufio.NewScanner(os.Stdin) 56 + for scanner.Scan() { 57 + line := scanner.Text() 58 + word := keyword.SlugContainsExplicitSlur(keyword.Slugify(line)) 59 + if word != "" { 60 + fmt.Printf("MATCH\t%s\t%s\n", word, line) 61 + } 62 + } 63 + return nil 64 + } 65 + 66 + func runTokens(cctx *cli.Context) error { 67 + ctx := context.Background() 68 + sets := setstore.NewMemSetStore() 69 + if err := sets.LoadFromFileJSON(cctx.String("json-set-file")); err != nil { 70 + return err 71 + } 72 + setName := cctx.String("set-name") 73 + identMode := cctx.Bool("identifiers") 74 + scanner := bufio.NewScanner(os.Stdin) 75 + for scanner.Scan() { 76 + line := scanner.Text() 77 + var tokens []string 78 + if identMode { 79 + tokens = keyword.TokenizeIdentifier(line) 80 + } else { 81 + tokens = keyword.TokenizeText(line) 82 + } 83 + for _, tok := range tokens { 84 + match, err := sets.InSet(ctx, setName, tok) 85 + if err != nil { 86 + return err 87 + } 88 + if match { 89 + fmt.Printf("MATCH\t%s\t%s\n", tok, line) 90 + } 91 + } 92 + } 93 + return nil 94 + }