import { AppBskyActorGetProfile, AppBskyEmbedExternal, AppBskyEmbedImages, AppBskyEmbedVideo, AppBskyFeedDefs, AppBskyFeedPost } from "@atproto/api";
import { ESCALATION_THRESHOLD, ESCALATE_AND_LABEL_THRESHOLD, keywordList, LABEL_THRESHOLD, overridesList, redis, VERBOSE, CRAWL_THREAD, DRY_RUN, MISCELLANEOUS_LABEL, NO_ALT_TEXT_LABEL } from "../main.ts";
import { Agent } from "@atproto/api";
import { retry } from "@atproto/common";
import { CredentialSession } from "@atproto/api";
import { hasAlreadyHandled } from "./redis.ts";

/** If there are more than 2 categories set, both of them get 25 points added.
    So if a "political" post is detected, it's more likely that other keywords
    may be related to their intended subjects. For example, a post that says 
    "vote trump" should definitely be labeled under the Trump label, and maybe 
    the Election label. But either separately could refer to a "trump card" or
    to a PTA or corporate election, or benefits election, which we don't care about.
    This value can be tweaked as needed. */
export const CATEGORY_SHARE_POINTS = 25;

/** The "bonus points" added to every category if the post came to our attention via a report.
 *  If a maybe-political post is reported, it's much more likely that it is indeed political.
 *  If it still doesn't meet criteria, the report is not dismissed. */
export const REPORT_BONUS_POINTS = 25;

/** If this post was reached because of crawling, that means another post in the thread matched
 *  a keyword. Therefore, this one is more likely to be political, and therefore it gets bonus
 *  points. */
export const CRAWLED_BONUS_POINTS = 10;

export const publicAgent = new Agent(new CredentialSession(new URL("https://public.api.bsky.app"), fetch, undefined));

function escapeRegExp(string: string) {
    return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}

const WORD_SEPARATOR_PATTERN = /(?:^|[ ",!.?:_~#+=$%&*)(\[\]{}<>'’-]|$)/;
const WSP_STRING = WORD_SEPARATOR_PATTERN.toString().slice(1).slice(0,-1);

export interface AuditPostOpts {
  isReport?: boolean;
  /** Did you crawl from another post to find this one? */
  crawled?: boolean;
  /** Used with [crawled]; indicates whether or not the root post got a label.
   * The bonus points are not applied if no label was applied to the initial post. */
  rootLabeled?: boolean;
};
const defaultAuditOpts: Required<AuditPostOpts> = {
  isReport: false,
  crawled: false,
  rootLabeled: false,
}

export async function auditPostForKeywords(post: AppBskyFeedDefs.PostView, agent: Agent, me: AppBskyActorGetProfile.Response, zzzoptions: Partial<AuditPostOpts>) {
  const opts: AuditPostOpts = {...defaultAuditOpts, ...zzzoptions};
  const scores: Record<string, number> = {}
  const matchedKeywords: Record<string, string[]> = {}

  const alreadyHandled = await hasAlreadyHandled(post.uri);
  if (alreadyHandled && VERBOSE) console.log("Already handled this one:", post.uri);

  const overrideData = overridesList.find((v,_,__) => v.subject == post.author.did);
  if (overrideData?.skip || alreadyHandled) {
    const _suffix = overrideData?.skip ? "user is skipped in overrides" : "post was recently handled"
    if (opts.isReport) await agent.tools.ozone.moderation.emitEvent({
      event: {
        $type: "tools.ozone.moderation.defs#modEventEscalate",
        comment: `[Automated] Escalated due to report (${_suffix})`,
      },
      subject: {
        $type: "com.atproto.repo.strongRef",
        uri: post.uri,
        cid: post.cid
      },
      createdBy: me.data.did,
    });
    return; // don't do anything for "skipped" users
  } else if (overrideData?.score && MISCELLANEOUS_LABEL) {
    // apply to "misc-or-bonus", which gets augmented and applied later
    scores["or:"+MISCELLANEOUS_LABEL] ??= 0
    scores["or:"+MISCELLANEOUS_LABEL] += overrideData.score
  }

  const _badAltText = ["alt text", "screenshot"];
  const allTextList = [(post.record as AppBskyFeedPost.Record).text]
  // It would be nice if we could check records (quotes) here,
  // but that would hit the rate limit really fast.
  // Looking for the reply chain would do the same.
  // So neither of those can be done. Such a shame.
  const embed = (post.record as AppBskyFeedPost.Record).embed;
  if (AppBskyEmbedExternal.isMain(embed)) {
    allTextList.push(embed?.external.title)
    allTextList.push(embed?.external.description)
  } else if (AppBskyEmbedImages.isMain(embed)) {
    for (const image of embed?.images) {
      if (image.alt) allTextList.push(image.alt)
      if (!image.alt || _badAltText.includes(image.alt.toLowerCase())) {
        if (NO_ALT_TEXT_LABEL) scores[NO_ALT_TEXT_LABEL] = LABEL_THRESHOLD;
        if (DRY_RUN || VERBOSE) console.info(" * Missing alt text");
      } else if (DRY_RUN || VERBOSE) console.info(" * Has alt text for image");
    }
  } else if (AppBskyEmbedVideo.isMain(embed)) {
    if (embed?.alt) allTextList.push(embed?.alt)
    if (!embed?.alt || _badAltText.includes(embed?.alt.toLowerCase())) {
      if (NO_ALT_TEXT_LABEL) scores[NO_ALT_TEXT_LABEL] = LABEL_THRESHOLD;
      if (DRY_RUN || VERBOSE) console.info(" * Missing alt text");
    } else if (DRY_RUN || VERBOSE) console.info(" * Has alt text for video");
  }
  if (Object.keys(post.record).includes("bridgyOriginalText")) {
    // deno-lint-ignore no-explicit-any
    const bridgyOriginalText = (post.record as any).bridgyOriginalText;
    allTextList.push(bridgyOriginalText)
    // TODO: if BridgyFed ever adds a CW field, I can add it here
  }
  for (const tag in (post.record as AppBskyFeedPost.Record).tags) {
    allTextList.push("#"+tag);
  }
  const allText = allTextList.join(" ");
  const splitPost = allText.toLowerCase().split(WORD_SEPARATOR_PATTERN).filter((v,_,__) => v != "")

  for (const entry of keywordList) {
    // deno-lint-ignore no-inner-declarations no-var
    var matchesLang: boolean|null = null;
    inner: for (const keyword of entry.keywords) {
      if (matchedKeywords[entry.label]?.join(", ").includes(keyword)) {
        break inner
      }
      if (keyword.startsWith("-") && !keyword.startsWith("-$")) {
        // keywords starting with - have a negative effect; the keyword _must not_ be present to match
        const kw = keyword.substring(1);
        if (splitPost.includes(kw)) {
          break inner
        } else if (allText.toLowerCase().includes(kw) && allText.toLowerCase().match(new RegExp(WSP_STRING+escapeRegExp(kw)+WSP_STRING))) {
          // This'll match phrases, but it's a little more intensive so we'll do a light check first
          break inner
        }
        continue;
      }
      if (keyword.startsWith("$")) {
        // keywords starting with $ check self-labels (or language)
        if (keyword.startsWith("$lang:")) {
          matchesLang ??= false;
          // language check
          const value = keyword.replace("$lang:","");
          // deno-lint-ignore no-explicit-any
          if (((post.record as any).langs as string[] | undefined)?.includes(value)) {
            matchesLang = true;
          }
        }
        if (post.labels?.find((v,_,__) => v.val == keyword.substring(1))) {
          scores[entry.label] ??= 0
          scores[entry.label] += entry.score
          matchedKeywords[entry.label] ??= []
          matchedKeywords[entry.label].push(keyword)
          break inner;
        }
      } else if (keyword.startsWith("-$lang:")) {
        // prefix -$lang: negates language check: posts marked with this language 
        // don't match the keyword (i.e. dem,-$lang:de)
        const value = keyword.replace("-$lang:","");
        // deno-lint-ignore no-explicit-any
        if (((post.record as any).langs as string[] | undefined)?.includes(value)) {
          break inner;
        }
      } else if (keyword.startsWith("-$")) {
        if (post.labels?.find((v,_,__) => v.val == keyword.substring(1))) {
          break inner;
        }
      }
      if (matchesLang === false) break inner;

      if (splitPost.includes(keyword)) {
        scores[entry.label] ??= 0
        scores[entry.label] += entry.score
        matchedKeywords[entry.label] ??= []
        matchedKeywords[entry.label].push(keyword)
        break inner
      } else if (allText.toLowerCase().includes(keyword) && allText.toLowerCase().match(new RegExp(WSP_STRING+escapeRegExp(keyword)+WSP_STRING))) {
        // This'll match phrases, but it's a little more intensive so we'll do a light check first
        scores[entry.label] ??= 0
        // make them less potent
        //scores[entry.label] += Math.max(entry.score-10,0)
        scores[entry.label] += entry.score
        matchedKeywords[entry.label] ??= []
        matchedKeywords[entry.label].push(keyword)
        break inner
      }
    }
  }
  for (const [fullLabel, score] of Object.entries(scores).filter(([k,_]) => k.endsWith("or:"))) {
    // A label in keywords.tsv with the or: prefix, i.e. or:miscellaneous-uspol, indicates that
    // if there are no labels to match it to, the prefixed label should be used.
    // If there are other labels, it applies as bonus points instead.
    const label = fullLabel.replace("or:","");
    // The misc-or-bonus pseudo-label converts into bonus points, or into miscellaneous-uspol if there is no other category.
    if ((Object.keys(scores).length >= 2 && !scores["bonus-points-only"]) || Object.keys(scores).length >= 3) {
      // Another category! Make it bonus points
      scores["bonus-points-only"] ??= 0
      scores["bonus-points-only"] += score
      matchedKeywords["bonus-points-only"] ??= []
      matchedKeywords["bonus-points-only"].push(...matchedKeywords[fullLabel])
    } else {
      // No other category! Make it miscellaneous-uspol
      scores[label] = score
      matchedKeywords[label] = matchedKeywords[fullLabel]
    }
    delete scores[fullLabel];
    delete matchedKeywords[fullLabel];
  }
  for (const [fullLabel, score] of Object.entries(scores).filter(([k,_]) => k.endsWith("bo:"))) {
    // Applies bonus points to a specific label ONLY, and only if it already exists
    // (so bonus points alone can't label a post)
    // in other words, for a bo:<LABEL> to be considered, another keyword under
    // <LABEL> has to already have matched
    const label = fullLabel.replace("or:","");
    if (scores[label] && scores[label] > 0) {
      scores[label] += score;
      matchedKeywords[label] ??= []; // shouldn't be necesssary
      matchedKeywords[label].push(...matchedKeywords[fullLabel])
    }
    delete scores[fullLabel];
    delete matchedKeywords[fullLabel];
  }
  for (const [fullLabel, _] of Object.entries(scores).filter(([k,_]) => k.includes(":"))) {
    // Clean up any invalid-prefixed labels
    delete scores[fullLabel];
    delete matchedKeywords[fullLabel];
  }
  if (Object.keys(scores).length > 0 && Object.entries(scores).some(([_,v]) => v > 0)) {
    if (Object.keys(scores).length == 1 && scores["bonus-points-only"]) {
      if (!opts.isReport) return;
      if (DRY_RUN) {
        console.info(" * Escalated due to report");
        console.info(" * Bonus points matched: "+matchedKeywords["bonus-points-only"].join(", "));
      }
      if (!DRY_RUN) await agent.tools.ozone.moderation.emitEvent({
        event: {
          $type: "tools.ozone.moderation.defs#modEventEscalate",
          comment: `[Automated] Escalated due to report (only bonus points matched: ${matchedKeywords["bonus-points-only"].join(", ")})`,
        },
        subject: {
          $type: "com.atproto.repo.strongRef",
          uri: post.uri,
          cid: post.cid
        },
        createdBy: me.data.did,
      });
    };
    // Add share points to each category, if multiple categories are present:
    if (Object.keys(scores).length >= 2) {
      // if there are multiple entries...
      for (const key in scores) {
        // add CATEGORY_SHARE_POINTS (25 right now) to every entry
        //scores.set(key, scores.get(key)! + CATEGORY_SHARE_POINTS)
        if (key == "bonus-points-only") {continue}
        if (scores["bonus-points-only"] > 0) {
          scores[key] += scores["bonus-points-only"]
        } else {
          scores[key] += CATEGORY_SHARE_POINTS
        }
      }
    }
    // deno-lint-ignore no-empty
    try {delete scores["bonus-points-only"]} finally {}
    // Add bonus points for reports
    if (opts.isReport) {
      for (const key in scores) {
        scores[key] += REPORT_BONUS_POINTS
      }
    }
    // Add bonus for crawled threads/quotes
    if (opts.crawled && opts.rootLabeled) {
      if (VERBOSE) console.log("Reached by crawling:",post.uri)
      for (const key in scores) {
        scores[key] += CRAWLED_BONUS_POINTS
      }
    }
    // deno-lint-ignore no-inner-declarations no-var
    var comment = "[Automated] Confidence levels:";
    const escalatingLabels: string[] = [];
    const likelyLabels: string[] = [];
    const certainLabels: string[] = [];
    if (NO_ALT_TEXT_LABEL && scores[NO_ALT_TEXT_LABEL] && Object.keys(scores).length == 1) {
      comment = "[Automated] Missing alt text!";
      if (!certainLabels.includes(NO_ALT_TEXT_LABEL)) certainLabels.push(NO_ALT_TEXT_LABEL)
    } else if (NO_ALT_TEXT_LABEL && scores[NO_ALT_TEXT_LABEL]) {
      comment = "[Automated] Missing alt text!\r\nConfidence levels:"
      if (!certainLabels.includes(NO_ALT_TEXT_LABEL)) certainLabels.push(NO_ALT_TEXT_LABEL)
    }
    for (const key in scores) {
      if (key === undefined) continue;
      if (NO_ALT_TEXT_LABEL && key == NO_ALT_TEXT_LABEL) continue;
      const score = scores[key]!;
      const kws = matchedKeywords[key] ?? ["unknown"];
      comment += `\r\n${key}: ${score} (matched: ${kws.join(", ")}); `;
      if (score >= LABEL_THRESHOLD) {
        certainLabels.push(key)
      } else if (score >= ESCALATE_AND_LABEL_THRESHOLD) {
        likelyLabels.push(key)
      } else if (score >= ESCALATION_THRESHOLD) {
        escalatingLabels.push(key)
      }
    }
    if (matchedKeywords["bonus-points-only"]) {
      comment += `\r\nBonus point keywords: ${matchedKeywords["bonus-points-only"].join(", ")}`;
    }
    await retry(async () => await redis?.pipelineCommands([
      ["MULTI"],
      ["HSET", "alreadyHandled", post.uri, "true"],
      ["HEXPIRE", "alreadyHandled", 86400, "NX", "FIELDS", 1, post.uri],
      ["EXEC"],
    ]), {maxRetries: 3});
    if (!DRY_RUN) await agent.tools.ozone.moderation.emitEvent({
      event: {
        $type: "tools.ozone.moderation.defs#modEventTag",
        add: ["auto-handled"],
        remove: [],
      },
      subject: {
        $type: "com.atproto.repo.strongRef",
        uri: post.uri,
        cid: post.cid
      },
      createdBy: me.data.did,
    });
    const combinedLabels = [...certainLabels, ...likelyLabels];
    if (!DRY_RUN) await agent.tools.ozone.moderation.emitEvent({
      event: {
        $type: combinedLabels.length > 0
          ? "tools.ozone.moderation.defs#modEventLabel"
          : escalatingLabels.length > 0
          ? "tools.ozone.moderation.defs#modEventEscalate"
          : "tools.ozone.moderation.defs#modEventComment",
        comment: comment,
        createLabelVals: combinedLabels.length == 0 ? undefined : combinedLabels,
        negateLabelVals: combinedLabels.length == 0 ? undefined : [],
      },
      subject: {
        $type: "com.atproto.repo.strongRef",
        uri: post.uri,
        cid: post.cid
      },
      createdBy: me.data.did,
    });
    if (DRY_RUN && certainLabels.length > 0) console.info(" * Auto label:", comment)
    else if (DRY_RUN && likelyLabels.length > 0) console.info(" * Auto label and escalate:", comment)
    else if (DRY_RUN && escalatingLabels.length > 0) console.info(" * Escalate:", comment)
    else if (DRY_RUN) console.info(" * No action:", comment)
    if (certainLabels.length > 0) {
      if (!DRY_RUN) await agent.tools.ozone.moderation.emitEvent({
        event: {
          $type: "tools.ozone.moderation.defs#modEventAcknowledge",
        },
        subject: {
          $type: "com.atproto.repo.strongRef",
          uri: post.uri,
          cid: post.cid
        },
        createdBy: me.data.did,
      });
    } else if (likelyLabels.length > 0) {
      if (!DRY_RUN) await agent.tools.ozone.moderation.emitEvent({
        event: {
          $type: "tools.ozone.moderation.defs#modEventEscalate",
          comment: "[Automated] Escalated due to labels: "+likelyLabels.join(",")
        },
        subject: {
          $type: "com.atproto.repo.strongRef",
          uri: post.uri,
          cid: post.cid
        },
        createdBy: me.data.did,
      });
    }
    if (CRAWL_THREAD && !opts.crawled) try {
      if (await hasAlreadyHandled((post.record as AppBskyFeedPost.Record).reply?.parent?.uri??"__defnot__") == true) return;
      // If the parent has already/recently been handled, there's not
      // much use in crawling the thread again.
      const thread = await publicAgent.app.bsky.feed.getPostThread({
        uri: post.uri,
        // depth: 6,
        // parentHeight: 80,
      });
      if (thread.success) {
        const allPosts: AppBskyFeedDefs.PostView[] = [];
        // deno-lint-ignore no-inner-declarations
        function crawl(threadPost: AppBskyFeedDefs.ThreadViewPost) {
          /// Crawls just the parent and replies fields.
          /// The "post" field is added next to crawl().
          if (AppBskyFeedDefs.isThreadViewPost(threadPost.parent)) {
            allPosts.push(threadPost.parent.post);
            crawl(threadPost.parent);
          }
          for (const reply of (threadPost.replies??[])) {
            if (AppBskyFeedDefs.isThreadViewPost(reply)) {
              allPosts.push(reply.post);
              crawl(reply);
            }
          }
        }
        // Not adding allPosts.push(thread.post) here,
        // because we already did that one
        if (AppBskyFeedDefs.isThreadViewPost(thread.data.thread)) {
          crawl(thread.data.thread)
        }
        for (const threadPost of allPosts) {
          const rootLabeled = certainLabels.length > 0;
          await auditPostForKeywords(threadPost, agent, me, {
            crawled: true,
            rootLabeled: rootLabeled,
            isReport: false, //opts.isReport,
          });
        }
      }
    } catch(e) {
      console.error("Failed while crawling %s:", post.uri, e)
    }
    return
  }
  if (opts.isReport && DRY_RUN) console.info(" * Escalated due to report");
  if (opts.isReport && !DRY_RUN) await agent.tools.ozone.moderation.emitEvent({
    event: {
      $type: "tools.ozone.moderation.defs#modEventEscalate",
      comment: "[Automated] Escalated due to report",
    },
    subject: {
      $type: "com.atproto.repo.strongRef",
      uri: post.uri,
      cid: post.cid
    },
    createdBy: me.data.did,
  });
}