#!/usr/bin/env bash
set -euo pipefail

# =============================================================================
# Local AI Coding Environment Setup for macOS Apple Silicon
# llama.cpp + Qwen 2.5 Coder 32B (chat) + Qwen 2.5 Coder 1.5B (autocomplete)
# + Aider (terminal coding agent) or OpenCode
# =============================================================================

BOLD="\033[1m"
GREEN="\033[0;32m"
YELLOW="\033[1;33m"
RED="\033[0;31m"
RESET="\033[0m"

MODELS_DIR="$HOME/.local/share/llama-models"
CHAT_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/qwen2.5-coder-32b-instruct-q4_k_m.gguf"
CHAT_MODEL_FILE="qwen2.5-coder-32b-instruct-q4_k_m.gguf"
AUTOCOMPLETE_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
AUTOCOMPLETE_MODEL_FILE="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"

CHAT_PORT=8080
AUTOCOMPLETE_PORT=8081

AIDER_CONFIG_DIR="$HOME/.aider"
AIDER_CONFIG_FILE="$AIDER_CONFIG_DIR/aider.conf.yml"

log()  { echo -e "${GREEN}${BOLD}[✓]${RESET} $1"; }
warn() { echo -e "${YELLOW}${BOLD}[!]${RESET} $1"; }
err()  { echo -e "${RED}${BOLD}[✗]${RESET} $1"; exit 1; }

# -----------------------------------------------------------------------------
# Pre-flight checks
# -----------------------------------------------------------------------------
echo -e "\n${BOLD}🔧 Local AI Coding Environment Installer (llama.cpp)${RESET}\n"

[[ "$(uname)" == "Darwin" ]] || err "This script is for macOS only."
[[ "$(uname -m)" == "arm64" ]] || warn "Not running on Apple Silicon — performance may vary."

MEM_GB=$(( $(sysctl -n hw.memsize) / 1073741824 ))
if (( MEM_GB < 32 )); then
  warn "You have ${MEM_GB}GB RAM. The 32B model needs ~20GB; you may experience swapping."
fi

# -----------------------------------------------------------------------------
# 1. Install Homebrew (if missing)
# -----------------------------------------------------------------------------
if ! command -v brew &>/dev/null; then
  log "Installing Homebrew..."
  /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
  eval "$(/opt/homebrew/bin/brew shellenv)"
else
  log "Homebrew already installed."
fi

# -----------------------------------------------------------------------------
# 2. Build / Install llama.cpp
# -----------------------------------------------------------------------------
if ! command -v llama-server &>/dev/null; then
  log "Installing llama.cpp via Homebrew..."
  brew install llama.cpp
else
  log "llama.cpp already installed."
fi

# Verify Metal support
if llama-server --help 2>&1 | grep -qi metal; then
  log "Metal (GPU) acceleration available."
else
  warn "Metal flag not detected — model will run on CPU only."
fi

# -----------------------------------------------------------------------------
# 3. Download Qwen GGUF models from HuggingFace
# -----------------------------------------------------------------------------
mkdir -p "$MODELS_DIR"

download_model() {
  local url="$1" file="$2"
  if [ -f "$MODELS_DIR/$file" ]; then
    log "Model already downloaded: $file"
  else
    log "Downloading $file (this may take a while)..."
    curl -L --progress-bar -o "$MODELS_DIR/$file" "$url"
    log "Downloaded: $file"
  fi
}

download_model "$CHAT_MODEL_URL" "$CHAT_MODEL_FILE"
download_model "$AUTOCOMPLETE_MODEL_URL" "$AUTOCOMPLETE_MODEL_FILE"

# -----------------------------------------------------------------------------
# 4. Install Python & Aider
# -----------------------------------------------------------------------------
if ! command -v jq &>/dev/null; then
  log "Installing jq..."
  brew install jq
else
  log "jq already installed."
fi

if ! command -v python3 &>/dev/null; then
  log "Installing Python 3..."
  brew install python@3.12
fi

if ! command -v pipx &>/dev/null; then
  log "Installing pipx..."
  brew install pipx
  pipx ensurepath
fi

if ! command -v aider &>/dev/null; then
  log "Installing Aider..."
  pipx install aider-chat
else
  log "Aider already installed. Upgrading..."
  pipx upgrade aider-chat
fi

# -----------------------------------------------------------------------------
# 5. Create llama.cpp server launcher scripts
# -----------------------------------------------------------------------------
LAUNCH_DIR="$HOME/.local/bin"
mkdir -p "$LAUNCH_DIR"

# --- Chat server launcher ---
cat > "$LAUNCH_DIR/llama-chat-server" << SCRIPT
#!/usr/bin/env bash
# Start llama.cpp server with Qwen 2.5 Coder 32B for chat
# Exposed as OpenAI-compatible API on port ${CHAT_PORT}

MODEL="$MODELS_DIR/$CHAT_MODEL_FILE"

exec llama-server \\
  --model "\$MODEL" \\
  --port ${CHAT_PORT} \\
  --host 127.0.0.1 \\
  --ctx-size 16384 \\
  --n-gpu-layers 99 \\
  --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\
  --mlock \\
  "\$@"
SCRIPT
chmod +x "$LAUNCH_DIR/llama-chat-server"

# --- Autocomplete server launcher ---
cat > "$LAUNCH_DIR/llama-complete-server" << SCRIPT
#!/usr/bin/env bash
# Start llama.cpp server with Qwen 2.5 Coder 1.5B for autocomplete
# Exposed as OpenAI-compatible API on port ${AUTOCOMPLETE_PORT}

MODEL="$MODELS_DIR/$AUTOCOMPLETE_MODEL_FILE"

exec llama-server \\
  --model "\$MODEL" \\
  --port ${AUTOCOMPLETE_PORT} \\
  --host 127.0.0.1 \\
  --ctx-size 4096 \\
  --n-gpu-layers 99 \\
  --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\
  --mlock \\
  "\$@"
SCRIPT
chmod +x "$LAUNCH_DIR/llama-complete-server"

# --- Combined server manager ---
cat > "$LAUNCH_DIR/llama-start" << 'SCRIPT'
#!/usr/bin/env bash
# Start both llama.cpp servers (chat + autocomplete)

BOLD="\033[1m"; GREEN="\033[0;32m"; RED="\033[0;31m"; RESET="\033[0m"
CHAT_PID="" COMPLETE_PID=""

cleanup() {
  echo -e "\n${RED}Shutting down servers...${RESET}"
  [ -n "$CHAT_PID" ] && kill "$CHAT_PID" 2>/dev/null
  [ -n "$COMPLETE_PID" ] && kill "$COMPLETE_PID" 2>/dev/null
  wait 2>/dev/null
  echo -e "${GREEN}Done.${RESET}"
  exit 0
}
trap cleanup SIGINT SIGTERM

echo -e "${BOLD}Starting llama.cpp servers...${RESET}\n"

echo -e "${GREEN}[1/2]${RESET} Chat model (32B) on :8080..."
llama-chat-server &>/tmp/llama-chat.log &
CHAT_PID=$!

echo -e "${GREEN}[2/2]${RESET} Autocomplete model (1.5B) on :8081..."
llama-complete-server &>/tmp/llama-complete.log &
COMPLETE_PID=$!

# Wait for servers to be ready
echo -ne "\nWaiting for servers..."
for i in $(seq 1 60); do
  CHAT_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8080/health 2>/dev/null || true)
  COMP_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8081/health 2>/dev/null || true)
  if [[ "$CHAT_OK" == "200" && "$COMP_OK" == "200" ]]; then
    echo -e " ${GREEN}ready!${RESET}"
    break
  fi
  echo -n "."
  sleep 2
done

echo ""
echo -e "${BOLD}Servers running:${RESET}"
echo -e "  Chat (32B):         http://127.0.0.1:8080"
echo -e "  Autocomplete (1.5B): http://127.0.0.1:8081"
echo -e "  Logs:               /tmp/llama-chat.log, /tmp/llama-complete.log"
echo -e "\n  Press Ctrl+C to stop both servers.\n"

wait
SCRIPT
chmod +x "$LAUNCH_DIR/llama-start"

# --- Stop servers ---
cat > "$LAUNCH_DIR/llama-stop" << 'SCRIPT'
#!/usr/bin/env bash
# Stop all running llama-server processes
pkill -f "llama-server" 2>/dev/null && echo "Servers stopped." || echo "No servers running."
SCRIPT
chmod +x "$LAUNCH_DIR/llama-stop"

# -----------------------------------------------------------------------------
# 6. Configure Aider to use llama.cpp OpenAI-compatible API
# -----------------------------------------------------------------------------
mkdir -p "$AIDER_CONFIG_DIR"

cat > "$AIDER_CONFIG_FILE" << 'EOF'
# =============================================================================
# Aider Configuration — Qwen 2.5 Coder via llama.cpp
# =============================================================================

# Point Aider at llama.cpp's OpenAI-compatible endpoint
# The model name can be anything — llama.cpp ignores it and uses the loaded model
model: openai/qwen2.5-coder-32b

# Architect mode for better code planning
architect: true
editor-model: openai/qwen2.5-coder-32b

# Git integration
auto-commits: true
dirty-commits: true
attribute-author: false
attribute-committer: false

# UI preferences
pretty: true
stream: true
dark-mode: true

# Code style
code-theme: monokai
show-diffs: true

# Disable analytics
analytics-disable: true
EOF

# Environment file for API base URL
cat > "$AIDER_CONFIG_DIR/.env" << 'EOF'
# llama.cpp serves an OpenAI-compatible API — no real key needed
OPENAI_API_KEY=sk-not-needed
OPENAI_API_BASE=http://127.0.0.1:8080/v1
EOF

log "Aider config written to ${AIDER_CONFIG_FILE}"
log "Aider env written to ${AIDER_CONFIG_DIR}/.env"

# -----------------------------------------------------------------------------
# 7. Create main launcher: ai-code
# -----------------------------------------------------------------------------
cat > "$LAUNCH_DIR/ai-code" << 'SCRIPT'
#!/usr/bin/env bash
# Launch Aider with local Qwen 2.5 Coder 32B via llama.cpp
# Usage: ai-code [directory] [aider flags...]
#
# Starts llama.cpp servers automatically if not already running.

BOLD="\033[1m"; GREEN="\033[0;32m"; RESET="\033[0m"

# Check if chat server is running
if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then
  echo -e "${BOLD}Starting llama.cpp chat server...${RESET}"
  llama-chat-server &>/tmp/llama-chat.log &
  echo -n "Waiting for model to load"
  for i in $(seq 1 120); do
    if curl -s http://127.0.0.1:8080/health &>/dev/null; then
      echo -e " ${GREEN}ready!${RESET}"
      break
    fi
    echo -n "."
    sleep 2
  done
fi

DIR="${1:-.}"
shift 2>/dev/null || true
cd "$DIR" || exit 1

# Initialize git repo if needed
if [ ! -d .git ]; then
  echo "Initializing git repo..."
  git init
  git add -A
  git commit -m "Initial commit (before AI edits)" --allow-empty
fi

# Source the env file for API config
export $(grep -v '^#' "$HOME/.aider/.env" | xargs)

exec aider "$@"
SCRIPT
chmod +x "$LAUNCH_DIR/ai-code"

# Quick question mode
cat > "$LAUNCH_DIR/ai-ask" << 'SCRIPT'
#!/usr/bin/env bash
# Quick coding Q&A — no file editing
if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then
  llama-chat-server &>/tmp/llama-chat.log &
  sleep 5
fi
export $(grep -v '^#' "$HOME/.aider/.env" | xargs)
if [ -n "$1" ]; then
  exec aider --no-auto-commits --message "$*"
else
  exec aider --no-auto-commits
fi
SCRIPT
chmod +x "$LAUNCH_DIR/ai-ask"

# Pipe mode using llama.cpp CLI directly
cat > "$LAUNCH_DIR/ai-pipe" << SCRIPT
#!/usr/bin/env bash
# Pipe code through llama.cpp
# Usage: cat main.py | ai-pipe "add error handling"

PROMPT="\${1:-Improve this code}"
INPUT=\$(cat)

curl -s http://127.0.0.1:8080/v1/chat/completions \\
  -H "Content-Type: application/json" \\
  -d "\$(jq -n --arg p "\$PROMPT" --arg c "\$INPUT" '{
    model: "qwen",
    messages: [
      {role: "system", content: "You are an expert programmer. Output only code, no explanations."},
      {role: "user", content: ("\$p\n\n```\n" + \$c + "\n```")}
    ],
    stream: false
  }')" | jq -r '.choices[0].message.content'
SCRIPT
chmod +x "$LAUNCH_DIR/ai-pipe"

# -----------------------------------------------------------------------------
# 8. Shell integration
# -----------------------------------------------------------------------------
SHELL_RC=""
case "$SHELL" in
  */zsh)  SHELL_RC="$HOME/.zshrc" ;;
  */bash) SHELL_RC="$HOME/.bashrc" ;;
  *)      SHELL_RC="$HOME/.profile" ;;
esac

if ! grep -q '.local/bin' "$SHELL_RC" 2>/dev/null; then
  echo '' >> "$SHELL_RC"
  echo '# Local AI coding tools' >> "$SHELL_RC"
  echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$SHELL_RC"
  log "Added ~/.local/bin to PATH in ${SHELL_RC}"
fi

# -----------------------------------------------------------------------------
# Done!
# -----------------------------------------------------------------------------
echo ""
echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}"
echo -e "${GREEN}${BOLD}  ✅ Setup complete!${RESET}"
echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}"
echo ""
echo -e "  ${BOLD}Models downloaded to:${RESET} ${MODELS_DIR}"
echo -e "    Chat:         ${CHAT_MODEL_FILE} (~20GB)"
echo -e "    Autocomplete: ${AUTOCOMPLETE_MODEL_FILE} (~1.2GB)"
echo ""
echo -e "  ${BOLD}Commands available${RESET} (restart your shell first):"
echo ""
echo -e "    ${BOLD}llama-start${RESET}          Start both llama.cpp servers"
echo -e "    ${BOLD}llama-stop${RESET}           Stop all llama.cpp servers"
echo ""
echo -e "    ${BOLD}ai-code${RESET} [dir]        Full coding agent (auto-starts server)"
echo -e "                         cd into a project and run 'ai-code .'"
echo ""
echo -e "    ${BOLD}ai-ask${RESET} \"question\"    Quick coding Q&A, no file edits"
echo ""
echo -e "    ${BOLD}ai-pipe${RESET} \"prompt\"     Pipe code through the model"
echo -e "                         cat file.py | ai-pipe \"add types\""
echo ""
echo -e "  ${BOLD}Config:${RESET}       ${AIDER_CONFIG_FILE}"
echo -e "  ${BOLD}API env:${RESET}      ${AIDER_CONFIG_DIR}/.env"
echo -e "  ${BOLD}Server logs:${RESET}  /tmp/llama-chat.log, /tmp/llama-complete.log"
echo ""
echo -e "  Run ${BOLD}source ${SHELL_RC}${RESET} or open a new terminal to get started."
echo ""