fork of indigo with slightly nicer lexgen

update hive labeling to match appview auto-labeling code

Changed files
+140 -75
automod
-1
automod/visual/doc.go
··· 1 1 // automod helpers for visual content (image blobs) 2 2 package visual 3 -
+94 -28
automod/visual/hiveai.go
··· 1 1 package visual 2 2 3 3 import ( 4 - "log/slog" 5 4 "bytes" 6 5 "context" 7 6 "encoding/json" 8 7 "fmt" 9 8 "io" 9 + "log/slog" 10 10 "mime/multipart" 11 11 "net/http" 12 12 ··· 51 51 } 52 52 } 53 53 54 + // Simple direct mappings from individual classes to to labels 55 + // 56 + // hive gore and violence: https://docs.thehive.ai/docs/class-descriptions-violence-gore 57 + func summarizeSimpleLabels(cl []HiveAIResp_Class) []string { 58 + var labels []string 59 + 60 + for _, cls := range cl { 61 + if cls.Class == "very_bloody" && cls.Score >= 0.90 { 62 + labels = append(labels, "gore") 63 + } 64 + if cls.Class == "human_corpse" && cls.Score >= 0.90 { 65 + labels = append(labels, "corpse") 66 + } 67 + if cls.Class == "hanging" && cls.Score >= 0.90 { 68 + labels = append(labels, "corpse") 69 + } 70 + if cls.Class == "yes_self_harm" && cls.Score >= 0.96 { 71 + labels = append(labels, "self-harm") 72 + } 73 + } 74 + return labels 75 + } 76 + 77 + // Matches only one (or none) of: porn, sexual, nudity 78 + // 79 + // porn: sexual and nudity. including both explicit activity or full-frontal and suggestive/intent 80 + // sexual: sexually suggestive, not explicit; may include some forms of nudity 81 + // nudity: non-sexual nudity (eg, artistic, possibly some photographic) 82 + // 83 + // hive docs/definitions: https://docs.thehive.ai/docs/sexual-content 84 + func summarizeSexualLabels(cl []HiveAIResp_Class) string { 85 + 86 + scores := make(map[string]float64) 87 + for _, cls := range cl { 88 + scores[cls.Class] = cls.Score 89 + } 90 + 91 + // first check if porn... 92 + for _, pornClass := range []string{"yes_sexual_activity", "animal_genitalia_and_human", "yes_realistic_nsfw"} { 93 + if scores[pornClass] >= 0.9 { 94 + return "porn" 95 + } 96 + } 97 + if scores["general_nsfw"] >= 0.9 { 98 + // special case for some anime examples 99 + if scores["animated_animal_genitalia"] >= 0.5 { 100 + return "porn" 101 + } 102 + 103 + // special case for some pornographic/explicit classic drawings 104 + if scores["yes_undressed"] >= 0.9 && scores["yes_sexual_activity"] >= 0.9 { 105 + return "porn" 106 + } 107 + } 108 + 109 + // then check for sexual suggestive (which may include nudity)... 110 + for _, sexualClass := range []string{"yes_sexual_intent", "yes_sex_toy"} { 111 + if scores[sexualClass] >= 0.9 { 112 + return "sexual" 113 + } 114 + } 115 + if scores["yes_undressed"] >= 0.9 { 116 + // special case for bondage examples 117 + if scores["yes_sex_toy"] > 0.75 { 118 + return "sexual" 119 + } 120 + } 121 + 122 + // then non-sexual nudity... 123 + for _, nudityClass := range []string{"yes_male_nudity", "yes_female_nudity", "yes_undressed"} { 124 + if scores[nudityClass] >= 0.9 { 125 + return "nudity" 126 + } 127 + } 128 + 129 + // then finally flag remaining "underwear" images in to sexually suggestive 130 + // (after non-sexual content already labeled above) 131 + for _, underwearClass := range []string{"yes_male_underwear", "yes_female_underwear"} { 132 + if scores[underwearClass] >= 0.9 { 133 + return "sexual" 134 + } 135 + } 136 + 137 + return "" 138 + } 139 + 54 140 func (resp *HiveAIResp) SummarizeLabels() []string { 55 141 var labels []string 56 142 57 143 for _, status := range resp.Status { 58 144 for _, out := range status.Response.Output { 59 - for _, cls := range out.Classes { 60 - // TODO(bnewbold): lots more upstream tags could be included here. 61 - // for example, "sexy" for not nude but still explicit/suggestive, 62 - // or lolicon (animated, not nude, "sugggestive" 145 + simple := summarizeSimpleLabels(out.Classes) 146 + if len(simple) > 0 { 147 + labels = append(labels, simple...) 148 + } 63 149 64 - // sexual: https://docs.thehive.ai/docs/sexual-content 65 - // note: won't apply "nude" if "porn" already applied 66 - if cls.Class == "yes_sexual_activity" && cls.Score >= 0.90 { 67 - // NOTE: will include "hentai" 68 - labels = append(labels, "porn") 69 - } else if cls.Class == "animal_genitalia_and_human" && cls.Score >= 0.90 { 70 - labels = append(labels, "porn") 71 - } else if cls.Class == "yes_male_nudity" && cls.Score >= 0.90 { 72 - labels = append(labels, "nude") 73 - } else if cls.Class == "yes_female_nudity" && cls.Score >= 0.90 { 74 - labels = append(labels, "nude") 75 - } 76 - 77 - // gore and violence: https://docs.thehive.ai/docs/class-descriptions-violence-gore 78 - if cls.Class == "very_bloody" && cls.Score >= 0.90 { 79 - labels = append(labels, "gore") 80 - } 81 - if cls.Class == "human_corpse" && cls.Score >= 0.90 { 82 - labels = append(labels, "corpse") 83 - } 84 - if cls.Class == "yes_self_harm" && cls.Score >= 0.90 { 85 - labels = append(labels, "self-harm") 86 - } 150 + sexual := summarizeSexualLabels(out.Classes) 151 + if sexual != "" { 152 + labels = append(labels, sexual) 87 153 } 88 154 } 89 155 }
+46 -46
automod/visual/testdata/hiveai_resp_example.json
··· 44 44 }, 45 45 { 46 46 "class": "general_nsfw", 47 - "score": 8.857344804177162e-05 47 + "score": 8.857344804177162e-5 48 48 }, 49 49 { 50 50 "class": "general_suggestive", ··· 56 56 }, 57 57 { 58 58 "class": "yes_female_underwear", 59 - "score": 7.692095961599136e-06 59 + "score": 7.692095961599136e-6 60 60 }, 61 61 { 62 62 "class": "no_male_underwear", ··· 64 64 }, 65 65 { 66 66 "class": "yes_male_underwear", 67 - "score": 1.5095132367094679e-06 67 + "score": 1.5095132367094679e-6 68 68 }, 69 69 { 70 70 "class": "no_sex_toy", ··· 72 72 }, 73 73 { 74 74 "class": "yes_sex_toy", 75 - "score": 2.9029237450490604e-06 75 + "score": 2.9029237450490604e-6 76 76 }, 77 77 { 78 78 "class": "no_female_nudity", ··· 80 80 }, 81 81 { 82 82 "class": "yes_female_nudity", 83 - "score": 2.60971090699536e-05 83 + "score": 2.60971090699536e-5 84 84 }, 85 85 { 86 86 "class": "no_male_nudity", ··· 88 88 }, 89 89 { 90 90 "class": "yes_male_nudity", 91 - "score": 2.8862691625255323e-05 91 + "score": 2.8862691625255323e-5 92 92 }, 93 93 { 94 94 "class": "no_female_swimwear", ··· 96 96 }, 97 97 { 98 98 "class": "yes_female_swimwear", 99 - "score": 8.239010034025379e-06 99 + "score": 8.239010034025379e-6 100 100 }, 101 101 { 102 102 "class": "no_male_shirtless", ··· 104 104 }, 105 105 { 106 106 "class": "yes_male_shirtless", 107 - "score": 4.166492556688088e-05 107 + "score": 4.166492556688088e-5 108 108 }, 109 109 { 110 110 "class": "no_text", ··· 128 128 }, 129 129 { 130 130 "class": "animated_gun", 131 - "score": 2.0713000782979496e-05 131 + "score": 2.0713000782979496e-5 132 132 }, 133 133 { 134 134 "class": "gun_in_hand", 135 - "score": 1.5844730446534659e-06 135 + "score": 1.5844730446534659e-6 136 136 }, 137 137 { 138 138 "class": "gun_not_in_hand", 139 - "score": 1.0338973818006654e-06 139 + "score": 1.0338973818006654e-6 140 140 }, 141 141 { 142 142 "class": "no_gun", ··· 144 144 }, 145 145 { 146 146 "class": "culinary_knife_in_hand", 147 - "score": 3.8063500083369785e-06 147 + "score": 3.8063500083369785e-6 148 148 }, 149 149 { 150 150 "class": "culinary_knife_not_in_hand", 151 - "score": 7.94057948996249e-07 151 + "score": 7.94057948996249e-7 152 152 }, 153 153 { 154 154 "class": "knife_in_hand", 155 - "score": 4.5578955723278505e-07 155 + "score": 4.5578955723278505e-7 156 156 }, 157 157 { 158 158 "class": "knife_not_in_hand", 159 - "score": 3.842124714748908e-07 159 + "score": 3.842124714748908e-7 160 160 }, 161 161 { 162 162 "class": "no_knife", ··· 164 164 }, 165 165 { 166 166 "class": "a_little_bloody", 167 - "score": 2.1317745626539786e-07 167 + "score": 2.1317745626539786e-7 168 168 }, 169 169 { 170 170 "class": "no_blood", ··· 172 172 }, 173 173 { 174 174 "class": "other_blood", 175 - "score": 2.0322054269591763e-05 175 + "score": 2.0322054269591763e-5 176 176 }, 177 177 { 178 178 "class": "very_bloody", 179 - "score": 1.306446309561673e-07 179 + "score": 1.306446309561673e-7 180 180 }, 181 181 { 182 182 "class": "no_pills", ··· 184 184 }, 185 185 { 186 186 "class": "yes_pills", 187 - "score": 1.0407623044588633e-06 187 + "score": 1.0407623044588633e-6 188 188 }, 189 189 { 190 190 "class": "no_smoking", ··· 192 192 }, 193 193 { 194 194 "class": "yes_smoking", 195 - "score": 6.089803082758281e-06 195 + "score": 6.089803082758281e-6 196 196 }, 197 197 { 198 198 "class": "illicit_injectables", 199 - "score": 6.925695592003094e-07 199 + "score": 6.925695592003094e-7 200 200 }, 201 201 { 202 202 "class": "medical_injectables", 203 - "score": 8.587808234452378e-07 203 + "score": 8.587808234452378e-7 204 204 }, 205 205 { 206 206 "class": "no_injectables", ··· 212 212 }, 213 213 { 214 214 "class": "yes_nazi", 215 - "score": 1.2550371902234279e-06 215 + "score": 1.2550371902234279e-6 216 216 }, 217 217 { 218 218 "class": "no_kkk", ··· 220 220 }, 221 221 { 222 222 "class": "yes_kkk", 223 - "score": 2.3758245111050425e-07 223 + "score": 2.3758245111050425e-7 224 224 }, 225 225 { 226 226 "class": "no_middle_finger", ··· 228 228 }, 229 229 { 230 230 "class": "yes_middle_finger", 231 - "score": 1.184847681536747e-05 231 + "score": 1.184847681536747e-5 232 232 }, 233 233 { 234 234 "class": "no_terrorist", ··· 236 236 }, 237 237 { 238 238 "class": "yes_terrorist", 239 - "score": 1.1292067715380635e-07 239 + "score": 1.1292067715380635e-7 240 240 }, 241 241 { 242 242 "class": "no_overlay_text", ··· 253 253 { 254 254 "class": "yes_sexual_activity", 255 255 "score": 0.99, 256 - "realScore": 4.364196252012032e-05 256 + "realScore": 4.364196252012032e-5 257 257 }, 258 258 { 259 259 "class": "hanging", 260 - "score": 3.6435135762510905e-07 260 + "score": 3.6435135762510905e-7 261 261 }, 262 262 { 263 263 "class": "no_hanging_no_noose", ··· 265 265 }, 266 266 { 267 267 "class": "noose", 268 - "score": 1.5577290007796094e-06 268 + "score": 1.5577290007796094e-6 269 269 }, 270 270 { 271 271 "class": "no_realistic_nsfw", ··· 273 273 }, 274 274 { 275 275 "class": "yes_realistic_nsfw", 276 - "score": 5.565899219571182e-06 276 + "score": 5.565899219571182e-6 277 277 }, 278 278 { 279 279 "class": "animated_corpse", 280 - "score": 5.276802046755426e-07 280 + "score": 5.276802046755426e-7 281 281 }, 282 282 { 283 283 "class": "human_corpse", 284 - "score": 2.5449360984211012e-08 284 + "score": 2.5449360984211012e-8 285 285 }, 286 286 { 287 287 "class": "no_corpse", ··· 293 293 }, 294 294 { 295 295 "class": "yes_self_harm", 296 - "score": 5.484374493605692e-07 296 + "score": 5.484374493605692e-7 297 297 }, 298 298 { 299 299 "class": "no_drawing", ··· 309 309 }, 310 310 { 311 311 "class": "yes_emaciated_body", 312 - "score": 1.853499568724518e-07 312 + "score": 1.853499568724518e-7 313 313 }, 314 314 { 315 315 "class": "no_child_present", ··· 317 317 }, 318 318 { 319 319 "class": "yes_child_present", 320 - "score": 2.950148455380443e-06 320 + "score": 2.950148455380443e-6 321 321 }, 322 322 { 323 323 "class": "no_sexual_intent", ··· 325 325 }, 326 326 { 327 327 "class": "yes_sexual_intent", 328 - "score": 3.613845370766111e-06 328 + "score": 3.613845370766111e-6 329 329 }, 330 330 { 331 331 "class": "animal_genitalia_and_human", 332 - "score": 2.255472023465222e-08 332 + "score": 2.255472023465222e-8 333 333 }, 334 334 { 335 335 "class": "animal_genitalia_only", 336 - "score": 4.6783185199931176e-07 336 + "score": 4.6783185199931176e-7 337 337 }, 338 338 { 339 339 "class": "animated_animal_genitalia", 340 - "score": 6.707857419436447e-07 340 + "score": 6.707857419436447e-7 341 341 }, 342 342 { 343 343 "class": "no_animal_genitalia", ··· 349 349 }, 350 350 { 351 351 "class": "yes_gambling", 352 - "score": 3.906031285604864e-06 352 + "score": 3.906031285604864e-6 353 353 }, 354 354 { 355 355 "class": "no_undressed", ··· 357 357 }, 358 358 { 359 359 "class": "yes_undressed", 360 - "score": 7.664378199789045e-07 360 + "score": 7.664378199789045e-7 361 361 }, 362 362 { 363 363 "class": "no_confederate", ··· 365 365 }, 366 366 { 367 367 "class": "yes_confederate", 368 - "score": 7.454309962453175e-06 368 + "score": 7.454309962453175e-6 369 369 }, 370 370 { 371 371 "class": "animated_alcohol", 372 - "score": 1.8109949948066074e-06 372 + "score": 1.8109949948066074e-6 373 373 }, 374 374 { 375 375 "class": "no_alcohol", ··· 377 377 }, 378 378 { 379 379 "class": "yes_alcohol", 380 - "score": 5.88781463445443e-06 380 + "score": 5.88781463445443e-6 381 381 }, 382 382 { 383 383 "class": "yes_drinking_alcohol", 384 - "score": 6.390945746578106e-07 384 + "score": 6.390945746578106e-7 385 385 }, 386 386 { 387 387 "class": "no_religious_icon", ··· 389 389 }, 390 390 { 391 391 "class": "yes_religious_icon", 392 - "score": 1.3784141931119298e-05 392 + "score": 1.3784141931119298e-5 393 393 } 394 394 ] 395 395 }