/**
 * Spec Parser — Markdown → Clause[]
 *
 * Splits a Markdown document on heading boundaries.
 * Each heading + its body = one Clause.
 * Tracks section hierarchy for nested headings.
 */

import type { Clause } from './models/clause.js';
import { normalizeText } from './normalizer.js';
import { clauseSemhash, contextSemhashCold, clauseId } from './semhash.js';

interface RawSection {
  heading: string;
  level: number;
  startLine: number; // 1-indexed
  endLine: number;   // 1-indexed, inclusive
  rawText: string;
  sectionPath: string[];
}

/**
 * Parse a Markdown document into an array of Clauses.
 */
export function parseSpec(content: string, docId: string): Clause[] {
  const lines = content.split('\n');
  const sections = extractSections(lines);

  if (sections.length === 0) {
    // No headings found — treat entire document as one clause
    if (content.trim().length === 0) return [];
    const normalizedText = normalizeText(content);
    const semhash = clauseSemhash(normalizedText);
    const sectionPath: string[] = [];
    const id = clauseId(docId, sectionPath, normalizedText);
    const ctxHash = contextSemhashCold(normalizedText, sectionPath, '', '');
    return [{
      clause_id: id,
      source_doc_id: docId,
      source_line_range: [1, lines.length],
      raw_text: content,
      normalized_text: normalizedText,
      section_path: sectionPath,
      clause_semhash: semhash,
      context_semhash_cold: ctxHash,
    }];
  }

  // Build clauses without context hashes first
  const preClauses: Omit<Clause, 'context_semhash_cold'>[] = sections.map(sec => {
    const normalized = normalizeText(sec.rawText);
    const semhash = clauseSemhash(normalized);
    const id = clauseId(docId, sec.sectionPath, normalized);
    return {
      clause_id: id,
      source_doc_id: docId,
      source_line_range: [sec.startLine, sec.endLine] as [number, number],
      raw_text: sec.rawText,
      normalized_text: normalized,
      section_path: sec.sectionPath,
      clause_semhash: semhash,
    };
  });

  // Now compute context hashes with neighbor awareness
  const clauses: Clause[] = preClauses.map((pc, i) => {
    const prev = i > 0 ? preClauses[i - 1].clause_semhash : '';
    const next = i < preClauses.length - 1 ? preClauses[i + 1].clause_semhash : '';
    const ctxHash = contextSemhashCold(pc.normalized_text, pc.section_path, prev, next);
    return { ...pc, context_semhash_cold: ctxHash };
  });

  return clauses;
}

/**
 * Extract sections from Markdown lines.
 * A section = heading line through (but not including) the next heading of same or higher level,
 * or end of file.
 */
function extractSections(lines: string[]): RawSection[] {
  const headingPattern = /^(#{1,6})\s+(.+)/;
  const headingIndices: { index: number; level: number; text: string }[] = [];

  for (let i = 0; i < lines.length; i++) {
    const match = lines[i].match(headingPattern);
    if (match) {
      headingIndices.push({
        index: i,
        level: match[1].length,
        text: match[2].trim(),
      });
    }
  }

  if (headingIndices.length === 0) return [];

  // Build sections with proper section_path tracking
  const sections: RawSection[] = [];
  const pathStack: { level: number; text: string }[] = [];

  // Capture pre-heading content as a preamble section
  if (headingIndices.length > 0 && headingIndices[0].index > 0) {
    const preambleText = lines.slice(0, headingIndices[0].index).join('\n').trim();
    if (preambleText.length > 0) {
      sections.push({
        heading: '(preamble)',
        level: 0,
        startLine: 1,
        endLine: headingIndices[0].index,
        rawText: preambleText,
        sectionPath: ['(preamble)'],
      });
    }
  }

  for (let h = 0; h < headingIndices.length; h++) {
    const { index, level, text } = headingIndices[h];
    const startLine = index + 1; // 1-indexed
    const endLine = h < headingIndices.length - 1
      ? headingIndices[h + 1].index // line before next heading (0-indexed), = next heading 1-indexed - 1
      : lines.length;

    // Update section path stack
    while (pathStack.length > 0 && pathStack[pathStack.length - 1].level >= level) {
      pathStack.pop();
    }
    pathStack.push({ level, text });
    const sectionPath = pathStack.map(p => p.text);

    // Extract raw text for this section
    const rawText = lines.slice(index, endLine).join('\n');

    sections.push({
      heading: text,
      level,
      startLine,
      endLine,
      rawText,
      sectionPath: [...sectionPath],
    });
  }

  return sections;
}