+3
automod/visual/doc.go
+3
automod/visual/doc.go
+147
automod/visual/hiveai.go
+147
automod/visual/hiveai.go
···
1
+
package visual
2
+
3
+
import (
4
+
"log/slog"
5
+
"bytes"
6
+
"context"
7
+
"encoding/json"
8
+
"fmt"
9
+
"io"
10
+
"mime/multipart"
11
+
"net/http"
12
+
13
+
lexutil "github.com/bluesky-social/indigo/lex/util"
14
+
"github.com/bluesky-social/indigo/util"
15
+
16
+
"github.com/carlmjohnson/versioninfo"
17
+
)
18
+
19
+
type HiveAILabeler struct {
20
+
Client http.Client
21
+
ApiToken string
22
+
}
23
+
24
+
// schema: https://docs.thehive.ai/reference/classification
25
+
type HiveAIResp struct {
26
+
Status []HiveAIResp_Status `json:"status"`
27
+
}
28
+
29
+
type HiveAIResp_Status struct {
30
+
Response HiveAIResp_Response `json:"response"`
31
+
}
32
+
33
+
type HiveAIResp_Response struct {
34
+
Output []HiveAIResp_Out `json:"output"`
35
+
}
36
+
37
+
type HiveAIResp_Out struct {
38
+
Time float64 `json:"time"`
39
+
Classes []HiveAIResp_Class `json:"classes"`
40
+
}
41
+
42
+
type HiveAIResp_Class struct {
43
+
Class string `json:"class"`
44
+
Score float64 `json:"score"`
45
+
}
46
+
47
+
func NewHiveAILabeler(token string) HiveAILabeler {
48
+
return HiveAILabeler{
49
+
Client: *util.RobustHTTPClient(),
50
+
ApiToken: token,
51
+
}
52
+
}
53
+
54
+
func (resp *HiveAIResp) SummarizeLabels() []string {
55
+
var labels []string
56
+
57
+
for _, status := range resp.Status {
58
+
for _, out := range status.Response.Output {
59
+
for _, cls := range out.Classes {
60
+
// TODO(bnewbold): lots more upstream tags could be included here.
61
+
// for example, "sexy" for not nude but still explicit/suggestive,
62
+
// or lolicon (animated, not nude, "sugggestive"
63
+
64
+
// sexual: https://docs.thehive.ai/docs/sexual-content
65
+
// note: won't apply "nude" if "porn" already applied
66
+
if cls.Class == "yes_sexual_activity" && cls.Score >= 0.90 {
67
+
// NOTE: will include "hentai"
68
+
labels = append(labels, "porn")
69
+
} else if cls.Class == "animal_genitalia_and_human" && cls.Score >= 0.90 {
70
+
labels = append(labels, "porn")
71
+
} else if cls.Class == "yes_male_nudity" && cls.Score >= 0.90 {
72
+
labels = append(labels, "nude")
73
+
} else if cls.Class == "yes_female_nudity" && cls.Score >= 0.90 {
74
+
labels = append(labels, "nude")
75
+
}
76
+
77
+
// gore and violence: https://docs.thehive.ai/docs/class-descriptions-violence-gore
78
+
if cls.Class == "very_bloody" && cls.Score >= 0.90 {
79
+
labels = append(labels, "gore")
80
+
}
81
+
if cls.Class == "human_corpse" && cls.Score >= 0.90 {
82
+
labels = append(labels, "corpse")
83
+
}
84
+
if cls.Class == "yes_self_harm" && cls.Score >= 0.90 {
85
+
labels = append(labels, "self-harm")
86
+
}
87
+
}
88
+
}
89
+
}
90
+
91
+
return labels
92
+
}
93
+
94
+
func (hal *HiveAILabeler) LabelBlob(ctx context.Context, blob lexutil.LexBlob, blobBytes []byte) ([]string, error) {
95
+
96
+
slog.Info("sending blob to thehive.ai", "cid", blob.Ref, "mimetype", blob.MimeType, "size", len(blobBytes))
97
+
98
+
// generic HTTP form file upload, then parse the response JSON
99
+
body := &bytes.Buffer{}
100
+
writer := multipart.NewWriter(body)
101
+
part, err := writer.CreateFormFile("media", blob.Ref.String())
102
+
if err != nil {
103
+
return nil, err
104
+
}
105
+
_, err = part.Write(blobBytes)
106
+
if err != nil {
107
+
return nil, err
108
+
}
109
+
err = writer.Close()
110
+
if err != nil {
111
+
return nil, err
112
+
}
113
+
114
+
req, err := http.NewRequest("POST", "https://api.thehive.ai/api/v2/task/sync", body)
115
+
if err != nil {
116
+
return nil, err
117
+
}
118
+
119
+
req.Header.Set("Authorization", fmt.Sprintf("Token %s", hal.ApiToken))
120
+
req.Header.Add("Content-Type", writer.FormDataContentType())
121
+
req.Header.Set("Accept", "application/json")
122
+
req.Header.Set("User-Agent", "labelmaker/"+versioninfo.Short())
123
+
124
+
res, err := hal.Client.Do(req)
125
+
if err != nil {
126
+
return nil, fmt.Errorf("HiveAI request failed: %v", err)
127
+
}
128
+
defer res.Body.Close()
129
+
if res.StatusCode != 200 {
130
+
return nil, fmt.Errorf("HiveAI request failed statusCode=%d", res.StatusCode)
131
+
}
132
+
133
+
respBytes, err := io.ReadAll(res.Body)
134
+
if err != nil {
135
+
return nil, fmt.Errorf("failed to read HiveAI resp body: %v", err)
136
+
}
137
+
138
+
slog.Debug("HiveAI raw result", "cid", blob.Ref, "body", string(respBytes))
139
+
140
+
var respObj HiveAIResp
141
+
if err := json.Unmarshal(respBytes, &respObj); err != nil {
142
+
return nil, fmt.Errorf("failed to parse HiveAI resp JSON: %v", err)
143
+
}
144
+
respJson, _ := json.Marshal(respObj.Status[0].Response.Output[0])
145
+
slog.Info("HiveAI result", "cid", blob.Ref, "json", string(respJson))
146
+
return respObj.SummarizeLabels(), nil
147
+
}
+42
automod/visual/hiveai_test.go
+42
automod/visual/hiveai_test.go
···
1
+
package visual
2
+
3
+
import (
4
+
"encoding/json"
5
+
"io"
6
+
"os"
7
+
"reflect"
8
+
"testing"
9
+
)
10
+
11
+
func TestHiveParse(t *testing.T) {
12
+
file, err := os.Open("testdata/hiveai_resp_example.json")
13
+
if err != nil {
14
+
t.Fatal(err)
15
+
}
16
+
17
+
respBytes, err := io.ReadAll(file)
18
+
if err != nil {
19
+
t.Fatal(err)
20
+
}
21
+
22
+
var respObj HiveAIResp
23
+
if err := json.Unmarshal(respBytes, &respObj); err != nil {
24
+
t.Fatal(err)
25
+
}
26
+
27
+
classes := respObj.Status[0].Response.Output[0].Classes
28
+
if len(classes) <= 10 {
29
+
t.Fatal("didn't get expected class count")
30
+
}
31
+
for _, c := range classes {
32
+
if c.Class == "" || c.Score == 0.0 {
33
+
t.Fatal("got null/empty class in resp")
34
+
}
35
+
}
36
+
37
+
labels := respObj.SummarizeLabels()
38
+
expected := []string{"porn"}
39
+
if !reflect.DeepEqual(labels, expected) {
40
+
t.Fatal("didn't summarize to expected labels")
41
+
}
42
+
}
+401
automod/visual/testdata/hiveai_resp_example.json
+401
automod/visual/testdata/hiveai_resp_example.json
···
1
+
{
2
+
"id": "02122580-c37f-11ed-81d2-000000000000",
3
+
"code": 200,
4
+
"project_id": 12345,
5
+
"user_id": 12345,
6
+
"created_on": "2023-03-15T22:16:18.408Z",
7
+
"status": [
8
+
{
9
+
"status": {
10
+
"code": "0",
11
+
"message": "SUCCESS"
12
+
},
13
+
"response": {
14
+
"input": {
15
+
"id": "02122580-c37f-11ed-81d2-000000000000",
16
+
"charge": 0.003,
17
+
"model": "mod55_dense",
18
+
"model_version": 1,
19
+
"model_type": "CATEGORIZATION",
20
+
"created_on": "2023-03-15T22:16:18.136Z",
21
+
"media": {
22
+
"url": null,
23
+
"filename": "bafkreiam7k6mvkyuoybq4ynhljvj5xa75sdbhjbolzjf5j2udx7vj5gnsy",
24
+
"type": "PHOTO",
25
+
"mime_type": "jpeg",
26
+
"mimetype": "image/jpeg",
27
+
"width": 800,
28
+
"height": 800,
29
+
"num_frames": 1,
30
+
"duration": 0
31
+
},
32
+
"user_id": 12345,
33
+
"project_id": 12345,
34
+
"config_version": 1,
35
+
"config_tag": "default"
36
+
},
37
+
"output": [
38
+
{
39
+
"time": 0,
40
+
"classes": [
41
+
{
42
+
"class": "general_not_nsfw_not_suggestive",
43
+
"score": 0.9998097218132356
44
+
},
45
+
{
46
+
"class": "general_nsfw",
47
+
"score": 8.857344804177162e-05
48
+
},
49
+
{
50
+
"class": "general_suggestive",
51
+
"score": 0.00010170473872266839
52
+
},
53
+
{
54
+
"class": "no_female_underwear",
55
+
"score": 0.9999923079040384
56
+
},
57
+
{
58
+
"class": "yes_female_underwear",
59
+
"score": 7.692095961599136e-06
60
+
},
61
+
{
62
+
"class": "no_male_underwear",
63
+
"score": 0.9999984904867634
64
+
},
65
+
{
66
+
"class": "yes_male_underwear",
67
+
"score": 1.5095132367094679e-06
68
+
},
69
+
{
70
+
"class": "no_sex_toy",
71
+
"score": 0.9999970970762551
72
+
},
73
+
{
74
+
"class": "yes_sex_toy",
75
+
"score": 2.9029237450490604e-06
76
+
},
77
+
{
78
+
"class": "no_female_nudity",
79
+
"score": 0.9999739028909301
80
+
},
81
+
{
82
+
"class": "yes_female_nudity",
83
+
"score": 2.60971090699536e-05
84
+
},
85
+
{
86
+
"class": "no_male_nudity",
87
+
"score": 0.9999711373083747
88
+
},
89
+
{
90
+
"class": "yes_male_nudity",
91
+
"score": 2.8862691625255323e-05
92
+
},
93
+
{
94
+
"class": "no_female_swimwear",
95
+
"score": 0.9999917609899659
96
+
},
97
+
{
98
+
"class": "yes_female_swimwear",
99
+
"score": 8.239010034025379e-06
100
+
},
101
+
{
102
+
"class": "no_male_shirtless",
103
+
"score": 0.9999583350744331
104
+
},
105
+
{
106
+
"class": "yes_male_shirtless",
107
+
"score": 4.166492556688088e-05
108
+
},
109
+
{
110
+
"class": "no_text",
111
+
"score": 0.9958378716447616
112
+
},
113
+
{
114
+
"class": "text",
115
+
"score": 0.0041621283552384265
116
+
},
117
+
{
118
+
"class": "animated",
119
+
"score": 0.46755478950048235
120
+
},
121
+
{
122
+
"class": "hybrid",
123
+
"score": 0.0011440363434524984
124
+
},
125
+
{
126
+
"class": "natural",
127
+
"score": 0.5313011741560651
128
+
},
129
+
{
130
+
"class": "animated_gun",
131
+
"score": 2.0713000782979496e-05
132
+
},
133
+
{
134
+
"class": "gun_in_hand",
135
+
"score": 1.5844730446534659e-06
136
+
},
137
+
{
138
+
"class": "gun_not_in_hand",
139
+
"score": 1.0338973818006654e-06
140
+
},
141
+
{
142
+
"class": "no_gun",
143
+
"score": 0.9999766686287906
144
+
},
145
+
{
146
+
"class": "culinary_knife_in_hand",
147
+
"score": 3.8063500083369785e-06
148
+
},
149
+
{
150
+
"class": "culinary_knife_not_in_hand",
151
+
"score": 7.94057948996249e-07
152
+
},
153
+
{
154
+
"class": "knife_in_hand",
155
+
"score": 4.5578955723278505e-07
156
+
},
157
+
{
158
+
"class": "knife_not_in_hand",
159
+
"score": 3.842124714748908e-07
160
+
},
161
+
{
162
+
"class": "no_knife",
163
+
"score": 0.999994559590014
164
+
},
165
+
{
166
+
"class": "a_little_bloody",
167
+
"score": 2.1317745626539786e-07
168
+
},
169
+
{
170
+
"class": "no_blood",
171
+
"score": 0.9999793341236429
172
+
},
173
+
{
174
+
"class": "other_blood",
175
+
"score": 2.0322054269591763e-05
176
+
},
177
+
{
178
+
"class": "very_bloody",
179
+
"score": 1.306446309561673e-07
180
+
},
181
+
{
182
+
"class": "no_pills",
183
+
"score": 0.9999989592376954
184
+
},
185
+
{
186
+
"class": "yes_pills",
187
+
"score": 1.0407623044588633e-06
188
+
},
189
+
{
190
+
"class": "no_smoking",
191
+
"score": 0.9999939101969173
192
+
},
193
+
{
194
+
"class": "yes_smoking",
195
+
"score": 6.089803082758281e-06
196
+
},
197
+
{
198
+
"class": "illicit_injectables",
199
+
"score": 6.925695592003094e-07
200
+
},
201
+
{
202
+
"class": "medical_injectables",
203
+
"score": 8.587808234452378e-07
204
+
},
205
+
{
206
+
"class": "no_injectables",
207
+
"score": 0.9999984486496174
208
+
},
209
+
{
210
+
"class": "no_nazi",
211
+
"score": 0.9999987449628097
212
+
},
213
+
{
214
+
"class": "yes_nazi",
215
+
"score": 1.2550371902234279e-06
216
+
},
217
+
{
218
+
"class": "no_kkk",
219
+
"score": 0.999999762417549
220
+
},
221
+
{
222
+
"class": "yes_kkk",
223
+
"score": 2.3758245111050425e-07
224
+
},
225
+
{
226
+
"class": "no_middle_finger",
227
+
"score": 0.9999881515231847
228
+
},
229
+
{
230
+
"class": "yes_middle_finger",
231
+
"score": 1.184847681536747e-05
232
+
},
233
+
{
234
+
"class": "no_terrorist",
235
+
"score": 0.9999998870793229
236
+
},
237
+
{
238
+
"class": "yes_terrorist",
239
+
"score": 1.1292067715380635e-07
240
+
},
241
+
{
242
+
"class": "no_overlay_text",
243
+
"score": 0.9996453363440359
244
+
},
245
+
{
246
+
"class": "yes_overlay_text",
247
+
"score": 0.0003546636559640924
248
+
},
249
+
{
250
+
"class": "no_sexual_activity",
251
+
"score": 0.9999563580374798
252
+
},
253
+
{
254
+
"class": "yes_sexual_activity",
255
+
"score": 0.99,
256
+
"realScore": 4.364196252012032e-05
257
+
},
258
+
{
259
+
"class": "hanging",
260
+
"score": 3.6435135762510905e-07
261
+
},
262
+
{
263
+
"class": "no_hanging_no_noose",
264
+
"score": 0.9999980779196416
265
+
},
266
+
{
267
+
"class": "noose",
268
+
"score": 1.5577290007796094e-06
269
+
},
270
+
{
271
+
"class": "no_realistic_nsfw",
272
+
"score": 0.9999944341007805
273
+
},
274
+
{
275
+
"class": "yes_realistic_nsfw",
276
+
"score": 5.565899219571182e-06
277
+
},
278
+
{
279
+
"class": "animated_corpse",
280
+
"score": 5.276802046755426e-07
281
+
},
282
+
{
283
+
"class": "human_corpse",
284
+
"score": 2.5449360984211012e-08
285
+
},
286
+
{
287
+
"class": "no_corpse",
288
+
"score": 0.9999994468704343
289
+
},
290
+
{
291
+
"class": "no_self_harm",
292
+
"score": 0.9999994515625507
293
+
},
294
+
{
295
+
"class": "yes_self_harm",
296
+
"score": 5.484374493605692e-07
297
+
},
298
+
{
299
+
"class": "no_drawing",
300
+
"score": 0.9978276028816608
301
+
},
302
+
{
303
+
"class": "yes_drawing",
304
+
"score": 0.0021723971183392485
305
+
},
306
+
{
307
+
"class": "no_emaciated_body",
308
+
"score": 0.9999998146500432
309
+
},
310
+
{
311
+
"class": "yes_emaciated_body",
312
+
"score": 1.853499568724518e-07
313
+
},
314
+
{
315
+
"class": "no_child_present",
316
+
"score": 0.9999970498515446
317
+
},
318
+
{
319
+
"class": "yes_child_present",
320
+
"score": 2.950148455380443e-06
321
+
},
322
+
{
323
+
"class": "no_sexual_intent",
324
+
"score": 0.9999963861546292
325
+
},
326
+
{
327
+
"class": "yes_sexual_intent",
328
+
"score": 3.613845370766111e-06
329
+
},
330
+
{
331
+
"class": "animal_genitalia_and_human",
332
+
"score": 2.255472023465222e-08
333
+
},
334
+
{
335
+
"class": "animal_genitalia_only",
336
+
"score": 4.6783185199931176e-07
337
+
},
338
+
{
339
+
"class": "animated_animal_genitalia",
340
+
"score": 6.707857419436447e-07
341
+
},
342
+
{
343
+
"class": "no_animal_genitalia",
344
+
"score": 0.9999988388276858
345
+
},
346
+
{
347
+
"class": "no_gambling",
348
+
"score": 0.9999960939687145
349
+
},
350
+
{
351
+
"class": "yes_gambling",
352
+
"score": 3.906031285604864e-06
353
+
},
354
+
{
355
+
"class": "no_undressed",
356
+
"score": 0.99999923356218
357
+
},
358
+
{
359
+
"class": "yes_undressed",
360
+
"score": 7.664378199789045e-07
361
+
},
362
+
{
363
+
"class": "no_confederate",
364
+
"score": 0.9999925456900376
365
+
},
366
+
{
367
+
"class": "yes_confederate",
368
+
"score": 7.454309962453175e-06
369
+
},
370
+
{
371
+
"class": "animated_alcohol",
372
+
"score": 1.8109949948066074e-06
373
+
},
374
+
{
375
+
"class": "no_alcohol",
376
+
"score": 0.9999916620957963
377
+
},
378
+
{
379
+
"class": "yes_alcohol",
380
+
"score": 5.88781463445443e-06
381
+
},
382
+
{
383
+
"class": "yes_drinking_alcohol",
384
+
"score": 6.390945746578106e-07
385
+
},
386
+
{
387
+
"class": "no_religious_icon",
388
+
"score": 0.9999862158580689
389
+
},
390
+
{
391
+
"class": "yes_religious_icon",
392
+
"score": 1.3784141931119298e-05
393
+
}
394
+
]
395
+
}
396
+
]
397
+
}
398
+
}
399
+
],
400
+
"from_cache": false
401
+
}