Script for easily configuring, using, switching and comparing local offline coding models
at main 406 lines 14 kB view raw
1#!/usr/bin/env bash 2set -euo pipefail 3 4# ============================================================================= 5# Local AI Coding Environment Setup for macOS Apple Silicon 6# llama.cpp + Qwen 2.5 Coder 32B (chat) + Qwen 2.5 Coder 1.5B (autocomplete) 7# + Aider (terminal coding agent) or OpenCode 8# ============================================================================= 9 10BOLD="\033[1m" 11GREEN="\033[0;32m" 12YELLOW="\033[1;33m" 13RED="\033[0;31m" 14RESET="\033[0m" 15 16MODELS_DIR="$HOME/.local/share/llama-models" 17CHAT_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/qwen2.5-coder-32b-instruct-q4_k_m.gguf" 18CHAT_MODEL_FILE="qwen2.5-coder-32b-instruct-q4_k_m.gguf" 19AUTOCOMPLETE_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf" 20AUTOCOMPLETE_MODEL_FILE="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf" 21 22CHAT_PORT=8080 23AUTOCOMPLETE_PORT=8081 24 25AIDER_CONFIG_DIR="$HOME/.aider" 26AIDER_CONFIG_FILE="$AIDER_CONFIG_DIR/aider.conf.yml" 27 28log() { echo -e "${GREEN}${BOLD}[✓]${RESET} $1"; } 29warn() { echo -e "${YELLOW}${BOLD}[!]${RESET} $1"; } 30err() { echo -e "${RED}${BOLD}[✗]${RESET} $1"; exit 1; } 31 32# ----------------------------------------------------------------------------- 33# Pre-flight checks 34# ----------------------------------------------------------------------------- 35echo -e "\n${BOLD}🔧 Local AI Coding Environment Installer (llama.cpp)${RESET}\n" 36 37[[ "$(uname)" == "Darwin" ]] || err "This script is for macOS only." 38[[ "$(uname -m)" == "arm64" ]] || warn "Not running on Apple Silicon — performance may vary." 39 40MEM_GB=$(( $(sysctl -n hw.memsize) / 1073741824 )) 41if (( MEM_GB < 32 )); then 42 warn "You have ${MEM_GB}GB RAM. The 32B model needs ~20GB; you may experience swapping." 43fi 44 45# ----------------------------------------------------------------------------- 46# 1. Install Homebrew (if missing) 47# ----------------------------------------------------------------------------- 48if ! command -v brew &>/dev/null; then 49 log "Installing Homebrew..." 50 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" 51 eval "$(/opt/homebrew/bin/brew shellenv)" 52else 53 log "Homebrew already installed." 54fi 55 56# ----------------------------------------------------------------------------- 57# 2. Build / Install llama.cpp 58# ----------------------------------------------------------------------------- 59if ! command -v llama-server &>/dev/null; then 60 log "Installing llama.cpp via Homebrew..." 61 brew install llama.cpp 62else 63 log "llama.cpp already installed." 64fi 65 66# Verify Metal support 67if llama-server --help 2>&1 | grep -qi metal; then 68 log "Metal (GPU) acceleration available." 69else 70 warn "Metal flag not detected — model will run on CPU only." 71fi 72 73# ----------------------------------------------------------------------------- 74# 3. Download Qwen GGUF models from HuggingFace 75# ----------------------------------------------------------------------------- 76mkdir -p "$MODELS_DIR" 77 78download_model() { 79 local url="$1" file="$2" 80 if [ -f "$MODELS_DIR/$file" ]; then 81 log "Model already downloaded: $file" 82 else 83 log "Downloading $file (this may take a while)..." 84 curl -L --progress-bar -o "$MODELS_DIR/$file" "$url" 85 log "Downloaded: $file" 86 fi 87} 88 89download_model "$CHAT_MODEL_URL" "$CHAT_MODEL_FILE" 90download_model "$AUTOCOMPLETE_MODEL_URL" "$AUTOCOMPLETE_MODEL_FILE" 91 92# ----------------------------------------------------------------------------- 93# 4. Install Python & Aider 94# ----------------------------------------------------------------------------- 95if ! command -v jq &>/dev/null; then 96 log "Installing jq..." 97 brew install jq 98else 99 log "jq already installed." 100fi 101 102if ! command -v python3 &>/dev/null; then 103 log "Installing Python 3..." 104 brew install python@3.12 105fi 106 107if ! command -v pipx &>/dev/null; then 108 log "Installing pipx..." 109 brew install pipx 110 pipx ensurepath 111fi 112 113if ! command -v aider &>/dev/null; then 114 log "Installing Aider..." 115 pipx install aider-chat 116else 117 log "Aider already installed. Upgrading..." 118 pipx upgrade aider-chat 119fi 120 121# ----------------------------------------------------------------------------- 122# 5. Create llama.cpp server launcher scripts 123# ----------------------------------------------------------------------------- 124LAUNCH_DIR="$HOME/.local/bin" 125mkdir -p "$LAUNCH_DIR" 126 127# --- Chat server launcher --- 128cat > "$LAUNCH_DIR/llama-chat-server" << SCRIPT 129#!/usr/bin/env bash 130# Start llama.cpp server with Qwen 2.5 Coder 32B for chat 131# Exposed as OpenAI-compatible API on port ${CHAT_PORT} 132 133MODEL="$MODELS_DIR/$CHAT_MODEL_FILE" 134 135exec llama-server \\ 136 --model "\$MODEL" \\ 137 --port ${CHAT_PORT} \\ 138 --host 127.0.0.1 \\ 139 --ctx-size 16384 \\ 140 --n-gpu-layers 99 \\ 141 --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\ 142 --mlock \\ 143 "\$@" 144SCRIPT 145chmod +x "$LAUNCH_DIR/llama-chat-server" 146 147# --- Autocomplete server launcher --- 148cat > "$LAUNCH_DIR/llama-complete-server" << SCRIPT 149#!/usr/bin/env bash 150# Start llama.cpp server with Qwen 2.5 Coder 1.5B for autocomplete 151# Exposed as OpenAI-compatible API on port ${AUTOCOMPLETE_PORT} 152 153MODEL="$MODELS_DIR/$AUTOCOMPLETE_MODEL_FILE" 154 155exec llama-server \\ 156 --model "\$MODEL" \\ 157 --port ${AUTOCOMPLETE_PORT} \\ 158 --host 127.0.0.1 \\ 159 --ctx-size 4096 \\ 160 --n-gpu-layers 99 \\ 161 --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\ 162 --mlock \\ 163 "\$@" 164SCRIPT 165chmod +x "$LAUNCH_DIR/llama-complete-server" 166 167# --- Combined server manager --- 168cat > "$LAUNCH_DIR/llama-start" << 'SCRIPT' 169#!/usr/bin/env bash 170# Start both llama.cpp servers (chat + autocomplete) 171 172BOLD="\033[1m"; GREEN="\033[0;32m"; RED="\033[0;31m"; RESET="\033[0m" 173CHAT_PID="" COMPLETE_PID="" 174 175cleanup() { 176 echo -e "\n${RED}Shutting down servers...${RESET}" 177 [ -n "$CHAT_PID" ] && kill "$CHAT_PID" 2>/dev/null 178 [ -n "$COMPLETE_PID" ] && kill "$COMPLETE_PID" 2>/dev/null 179 wait 2>/dev/null 180 echo -e "${GREEN}Done.${RESET}" 181 exit 0 182} 183trap cleanup SIGINT SIGTERM 184 185echo -e "${BOLD}Starting llama.cpp servers...${RESET}\n" 186 187echo -e "${GREEN}[1/2]${RESET} Chat model (32B) on :8080..." 188llama-chat-server &>/tmp/llama-chat.log & 189CHAT_PID=$! 190 191echo -e "${GREEN}[2/2]${RESET} Autocomplete model (1.5B) on :8081..." 192llama-complete-server &>/tmp/llama-complete.log & 193COMPLETE_PID=$! 194 195# Wait for servers to be ready 196echo -ne "\nWaiting for servers..." 197for i in $(seq 1 60); do 198 CHAT_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8080/health 2>/dev/null || true) 199 COMP_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8081/health 2>/dev/null || true) 200 if [[ "$CHAT_OK" == "200" && "$COMP_OK" == "200" ]]; then 201 echo -e " ${GREEN}ready!${RESET}" 202 break 203 fi 204 echo -n "." 205 sleep 2 206done 207 208echo "" 209echo -e "${BOLD}Servers running:${RESET}" 210echo -e " Chat (32B): http://127.0.0.1:8080" 211echo -e " Autocomplete (1.5B): http://127.0.0.1:8081" 212echo -e " Logs: /tmp/llama-chat.log, /tmp/llama-complete.log" 213echo -e "\n Press Ctrl+C to stop both servers.\n" 214 215wait 216SCRIPT 217chmod +x "$LAUNCH_DIR/llama-start" 218 219# --- Stop servers --- 220cat > "$LAUNCH_DIR/llama-stop" << 'SCRIPT' 221#!/usr/bin/env bash 222# Stop all running llama-server processes 223pkill -f "llama-server" 2>/dev/null && echo "Servers stopped." || echo "No servers running." 224SCRIPT 225chmod +x "$LAUNCH_DIR/llama-stop" 226 227# ----------------------------------------------------------------------------- 228# 6. Configure Aider to use llama.cpp OpenAI-compatible API 229# ----------------------------------------------------------------------------- 230mkdir -p "$AIDER_CONFIG_DIR" 231 232cat > "$AIDER_CONFIG_FILE" << 'EOF' 233# ============================================================================= 234# Aider Configuration — Qwen 2.5 Coder via llama.cpp 235# ============================================================================= 236 237# Point Aider at llama.cpp's OpenAI-compatible endpoint 238# The model name can be anything — llama.cpp ignores it and uses the loaded model 239model: openai/qwen2.5-coder-32b 240 241# Architect mode for better code planning 242architect: true 243editor-model: openai/qwen2.5-coder-32b 244 245# Git integration 246auto-commits: true 247dirty-commits: true 248attribute-author: false 249attribute-committer: false 250 251# UI preferences 252pretty: true 253stream: true 254dark-mode: true 255 256# Code style 257code-theme: monokai 258show-diffs: true 259 260# Disable analytics 261analytics-disable: true 262EOF 263 264# Environment file for API base URL 265cat > "$AIDER_CONFIG_DIR/.env" << 'EOF' 266# llama.cpp serves an OpenAI-compatible API — no real key needed 267OPENAI_API_KEY=sk-not-needed 268OPENAI_API_BASE=http://127.0.0.1:8080/v1 269EOF 270 271log "Aider config written to ${AIDER_CONFIG_FILE}" 272log "Aider env written to ${AIDER_CONFIG_DIR}/.env" 273 274# ----------------------------------------------------------------------------- 275# 7. Create main launcher: ai-code 276# ----------------------------------------------------------------------------- 277cat > "$LAUNCH_DIR/ai-code" << 'SCRIPT' 278#!/usr/bin/env bash 279# Launch Aider with local Qwen 2.5 Coder 32B via llama.cpp 280# Usage: ai-code [directory] [aider flags...] 281# 282# Starts llama.cpp servers automatically if not already running. 283 284BOLD="\033[1m"; GREEN="\033[0;32m"; RESET="\033[0m" 285 286# Check if chat server is running 287if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then 288 echo -e "${BOLD}Starting llama.cpp chat server...${RESET}" 289 llama-chat-server &>/tmp/llama-chat.log & 290 echo -n "Waiting for model to load" 291 for i in $(seq 1 120); do 292 if curl -s http://127.0.0.1:8080/health &>/dev/null; then 293 echo -e " ${GREEN}ready!${RESET}" 294 break 295 fi 296 echo -n "." 297 sleep 2 298 done 299fi 300 301DIR="${1:-.}" 302shift 2>/dev/null || true 303cd "$DIR" || exit 1 304 305# Initialize git repo if needed 306if [ ! -d .git ]; then 307 echo "Initializing git repo..." 308 git init 309 git add -A 310 git commit -m "Initial commit (before AI edits)" --allow-empty 311fi 312 313# Source the env file for API config 314export $(grep -v '^#' "$HOME/.aider/.env" | xargs) 315 316exec aider "$@" 317SCRIPT 318chmod +x "$LAUNCH_DIR/ai-code" 319 320# Quick question mode 321cat > "$LAUNCH_DIR/ai-ask" << 'SCRIPT' 322#!/usr/bin/env bash 323# Quick coding Q&A — no file editing 324if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then 325 llama-chat-server &>/tmp/llama-chat.log & 326 sleep 5 327fi 328export $(grep -v '^#' "$HOME/.aider/.env" | xargs) 329if [ -n "$1" ]; then 330 exec aider --no-auto-commits --message "$*" 331else 332 exec aider --no-auto-commits 333fi 334SCRIPT 335chmod +x "$LAUNCH_DIR/ai-ask" 336 337# Pipe mode using llama.cpp CLI directly 338cat > "$LAUNCH_DIR/ai-pipe" << SCRIPT 339#!/usr/bin/env bash 340# Pipe code through llama.cpp 341# Usage: cat main.py | ai-pipe "add error handling" 342 343PROMPT="\${1:-Improve this code}" 344INPUT=\$(cat) 345 346curl -s http://127.0.0.1:8080/v1/chat/completions \\ 347 -H "Content-Type: application/json" \\ 348 -d "\$(jq -n --arg p "\$PROMPT" --arg c "\$INPUT" '{ 349 model: "qwen", 350 messages: [ 351 {role: "system", content: "You are an expert programmer. Output only code, no explanations."}, 352 {role: "user", content: ("\$p\n\n```\n" + \$c + "\n```")} 353 ], 354 stream: false 355 }')" | jq -r '.choices[0].message.content' 356SCRIPT 357chmod +x "$LAUNCH_DIR/ai-pipe" 358 359# ----------------------------------------------------------------------------- 360# 8. Shell integration 361# ----------------------------------------------------------------------------- 362SHELL_RC="" 363case "$SHELL" in 364 */zsh) SHELL_RC="$HOME/.zshrc" ;; 365 */bash) SHELL_RC="$HOME/.bashrc" ;; 366 *) SHELL_RC="$HOME/.profile" ;; 367esac 368 369if ! grep -q '.local/bin' "$SHELL_RC" 2>/dev/null; then 370 echo '' >> "$SHELL_RC" 371 echo '# Local AI coding tools' >> "$SHELL_RC" 372 echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$SHELL_RC" 373 log "Added ~/.local/bin to PATH in ${SHELL_RC}" 374fi 375 376# ----------------------------------------------------------------------------- 377# Done! 378# ----------------------------------------------------------------------------- 379echo "" 380echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}" 381echo -e "${GREEN}${BOLD} ✅ Setup complete!${RESET}" 382echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}" 383echo "" 384echo -e " ${BOLD}Models downloaded to:${RESET} ${MODELS_DIR}" 385echo -e " Chat: ${CHAT_MODEL_FILE} (~20GB)" 386echo -e " Autocomplete: ${AUTOCOMPLETE_MODEL_FILE} (~1.2GB)" 387echo "" 388echo -e " ${BOLD}Commands available${RESET} (restart your shell first):" 389echo "" 390echo -e " ${BOLD}llama-start${RESET} Start both llama.cpp servers" 391echo -e " ${BOLD}llama-stop${RESET} Stop all llama.cpp servers" 392echo "" 393echo -e " ${BOLD}ai-code${RESET} [dir] Full coding agent (auto-starts server)" 394echo -e " cd into a project and run 'ai-code .'" 395echo "" 396echo -e " ${BOLD}ai-ask${RESET} \"question\" Quick coding Q&A, no file edits" 397echo "" 398echo -e " ${BOLD}ai-pipe${RESET} \"prompt\" Pipe code through the model" 399echo -e " cat file.py | ai-pipe \"add types\"" 400echo "" 401echo -e " ${BOLD}Config:${RESET} ${AIDER_CONFIG_FILE}" 402echo -e " ${BOLD}API env:${RESET} ${AIDER_CONFIG_DIR}/.env" 403echo -e " ${BOLD}Server logs:${RESET} /tmp/llama-chat.log, /tmp/llama-complete.log" 404echo "" 405echo -e " Run ${BOLD}source ${SHELL_RC}${RESET} or open a new terminal to get started." 406echo ""