#!/usr/bin/env bash set -euo pipefail # ============================================================================= # Local AI Coding Environment Setup for macOS Apple Silicon # llama.cpp + Qwen 2.5 Coder 32B (chat) + Qwen 2.5 Coder 1.5B (autocomplete) # + Aider (terminal coding agent) or OpenCode # ============================================================================= BOLD="\033[1m" GREEN="\033[0;32m" YELLOW="\033[1;33m" RED="\033[0;31m" RESET="\033[0m" MODELS_DIR="$HOME/.local/share/llama-models" CHAT_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/qwen2.5-coder-32b-instruct-q4_k_m.gguf" CHAT_MODEL_FILE="qwen2.5-coder-32b-instruct-q4_k_m.gguf" AUTOCOMPLETE_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf" AUTOCOMPLETE_MODEL_FILE="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf" CHAT_PORT=8080 AUTOCOMPLETE_PORT=8081 AIDER_CONFIG_DIR="$HOME/.aider" AIDER_CONFIG_FILE="$AIDER_CONFIG_DIR/aider.conf.yml" log() { echo -e "${GREEN}${BOLD}[✓]${RESET} $1"; } warn() { echo -e "${YELLOW}${BOLD}[!]${RESET} $1"; } err() { echo -e "${RED}${BOLD}[✗]${RESET} $1"; exit 1; } # ----------------------------------------------------------------------------- # Pre-flight checks # ----------------------------------------------------------------------------- echo -e "\n${BOLD}🔧 Local AI Coding Environment Installer (llama.cpp)${RESET}\n" [[ "$(uname)" == "Darwin" ]] || err "This script is for macOS only." [[ "$(uname -m)" == "arm64" ]] || warn "Not running on Apple Silicon — performance may vary." MEM_GB=$(( $(sysctl -n hw.memsize) / 1073741824 )) if (( MEM_GB < 32 )); then warn "You have ${MEM_GB}GB RAM. The 32B model needs ~20GB; you may experience swapping." fi # ----------------------------------------------------------------------------- # 1. Install Homebrew (if missing) # ----------------------------------------------------------------------------- if ! command -v brew &>/dev/null; then log "Installing Homebrew..." /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" eval "$(/opt/homebrew/bin/brew shellenv)" else log "Homebrew already installed." fi # ----------------------------------------------------------------------------- # 2. Build / Install llama.cpp # ----------------------------------------------------------------------------- if ! command -v llama-server &>/dev/null; then log "Installing llama.cpp via Homebrew..." brew install llama.cpp else log "llama.cpp already installed." fi # Verify Metal support if llama-server --help 2>&1 | grep -qi metal; then log "Metal (GPU) acceleration available." else warn "Metal flag not detected — model will run on CPU only." fi # ----------------------------------------------------------------------------- # 3. Download Qwen GGUF models from HuggingFace # ----------------------------------------------------------------------------- mkdir -p "$MODELS_DIR" download_model() { local url="$1" file="$2" if [ -f "$MODELS_DIR/$file" ]; then log "Model already downloaded: $file" else log "Downloading $file (this may take a while)..." curl -L --progress-bar -o "$MODELS_DIR/$file" "$url" log "Downloaded: $file" fi } download_model "$CHAT_MODEL_URL" "$CHAT_MODEL_FILE" download_model "$AUTOCOMPLETE_MODEL_URL" "$AUTOCOMPLETE_MODEL_FILE" # ----------------------------------------------------------------------------- # 4. Install Python & Aider # ----------------------------------------------------------------------------- if ! command -v jq &>/dev/null; then log "Installing jq..." brew install jq else log "jq already installed." fi if ! command -v python3 &>/dev/null; then log "Installing Python 3..." brew install python@3.12 fi if ! command -v pipx &>/dev/null; then log "Installing pipx..." brew install pipx pipx ensurepath fi if ! command -v aider &>/dev/null; then log "Installing Aider..." pipx install aider-chat else log "Aider already installed. Upgrading..." pipx upgrade aider-chat fi # ----------------------------------------------------------------------------- # 5. Create llama.cpp server launcher scripts # ----------------------------------------------------------------------------- LAUNCH_DIR="$HOME/.local/bin" mkdir -p "$LAUNCH_DIR" # --- Chat server launcher --- cat > "$LAUNCH_DIR/llama-chat-server" << SCRIPT #!/usr/bin/env bash # Start llama.cpp server with Qwen 2.5 Coder 32B for chat # Exposed as OpenAI-compatible API on port ${CHAT_PORT} MODEL="$MODELS_DIR/$CHAT_MODEL_FILE" exec llama-server \\ --model "\$MODEL" \\ --port ${CHAT_PORT} \\ --host 127.0.0.1 \\ --ctx-size 16384 \\ --n-gpu-layers 99 \\ --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\ --mlock \\ "\$@" SCRIPT chmod +x "$LAUNCH_DIR/llama-chat-server" # --- Autocomplete server launcher --- cat > "$LAUNCH_DIR/llama-complete-server" << SCRIPT #!/usr/bin/env bash # Start llama.cpp server with Qwen 2.5 Coder 1.5B for autocomplete # Exposed as OpenAI-compatible API on port ${AUTOCOMPLETE_PORT} MODEL="$MODELS_DIR/$AUTOCOMPLETE_MODEL_FILE" exec llama-server \\ --model "\$MODEL" \\ --port ${AUTOCOMPLETE_PORT} \\ --host 127.0.0.1 \\ --ctx-size 4096 \\ --n-gpu-layers 99 \\ --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\ --mlock \\ "\$@" SCRIPT chmod +x "$LAUNCH_DIR/llama-complete-server" # --- Combined server manager --- cat > "$LAUNCH_DIR/llama-start" << 'SCRIPT' #!/usr/bin/env bash # Start both llama.cpp servers (chat + autocomplete) BOLD="\033[1m"; GREEN="\033[0;32m"; RED="\033[0;31m"; RESET="\033[0m" CHAT_PID="" COMPLETE_PID="" cleanup() { echo -e "\n${RED}Shutting down servers...${RESET}" [ -n "$CHAT_PID" ] && kill "$CHAT_PID" 2>/dev/null [ -n "$COMPLETE_PID" ] && kill "$COMPLETE_PID" 2>/dev/null wait 2>/dev/null echo -e "${GREEN}Done.${RESET}" exit 0 } trap cleanup SIGINT SIGTERM echo -e "${BOLD}Starting llama.cpp servers...${RESET}\n" echo -e "${GREEN}[1/2]${RESET} Chat model (32B) on :8080..." llama-chat-server &>/tmp/llama-chat.log & CHAT_PID=$! echo -e "${GREEN}[2/2]${RESET} Autocomplete model (1.5B) on :8081..." llama-complete-server &>/tmp/llama-complete.log & COMPLETE_PID=$! # Wait for servers to be ready echo -ne "\nWaiting for servers..." for i in $(seq 1 60); do CHAT_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8080/health 2>/dev/null || true) COMP_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8081/health 2>/dev/null || true) if [[ "$CHAT_OK" == "200" && "$COMP_OK" == "200" ]]; then echo -e " ${GREEN}ready!${RESET}" break fi echo -n "." sleep 2 done echo "" echo -e "${BOLD}Servers running:${RESET}" echo -e " Chat (32B): http://127.0.0.1:8080" echo -e " Autocomplete (1.5B): http://127.0.0.1:8081" echo -e " Logs: /tmp/llama-chat.log, /tmp/llama-complete.log" echo -e "\n Press Ctrl+C to stop both servers.\n" wait SCRIPT chmod +x "$LAUNCH_DIR/llama-start" # --- Stop servers --- cat > "$LAUNCH_DIR/llama-stop" << 'SCRIPT' #!/usr/bin/env bash # Stop all running llama-server processes pkill -f "llama-server" 2>/dev/null && echo "Servers stopped." || echo "No servers running." SCRIPT chmod +x "$LAUNCH_DIR/llama-stop" # ----------------------------------------------------------------------------- # 6. Configure Aider to use llama.cpp OpenAI-compatible API # ----------------------------------------------------------------------------- mkdir -p "$AIDER_CONFIG_DIR" cat > "$AIDER_CONFIG_FILE" << 'EOF' # ============================================================================= # Aider Configuration — Qwen 2.5 Coder via llama.cpp # ============================================================================= # Point Aider at llama.cpp's OpenAI-compatible endpoint # The model name can be anything — llama.cpp ignores it and uses the loaded model model: openai/qwen2.5-coder-32b # Architect mode for better code planning architect: true editor-model: openai/qwen2.5-coder-32b # Git integration auto-commits: true dirty-commits: true attribute-author: false attribute-committer: false # UI preferences pretty: true stream: true dark-mode: true # Code style code-theme: monokai show-diffs: true # Disable analytics analytics-disable: true EOF # Environment file for API base URL cat > "$AIDER_CONFIG_DIR/.env" << 'EOF' # llama.cpp serves an OpenAI-compatible API — no real key needed OPENAI_API_KEY=sk-not-needed OPENAI_API_BASE=http://127.0.0.1:8080/v1 EOF log "Aider config written to ${AIDER_CONFIG_FILE}" log "Aider env written to ${AIDER_CONFIG_DIR}/.env" # ----------------------------------------------------------------------------- # 7. Create main launcher: ai-code # ----------------------------------------------------------------------------- cat > "$LAUNCH_DIR/ai-code" << 'SCRIPT' #!/usr/bin/env bash # Launch Aider with local Qwen 2.5 Coder 32B via llama.cpp # Usage: ai-code [directory] [aider flags...] # # Starts llama.cpp servers automatically if not already running. BOLD="\033[1m"; GREEN="\033[0;32m"; RESET="\033[0m" # Check if chat server is running if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then echo -e "${BOLD}Starting llama.cpp chat server...${RESET}" llama-chat-server &>/tmp/llama-chat.log & echo -n "Waiting for model to load" for i in $(seq 1 120); do if curl -s http://127.0.0.1:8080/health &>/dev/null; then echo -e " ${GREEN}ready!${RESET}" break fi echo -n "." sleep 2 done fi DIR="${1:-.}" shift 2>/dev/null || true cd "$DIR" || exit 1 # Initialize git repo if needed if [ ! -d .git ]; then echo "Initializing git repo..." git init git add -A git commit -m "Initial commit (before AI edits)" --allow-empty fi # Source the env file for API config export $(grep -v '^#' "$HOME/.aider/.env" | xargs) exec aider "$@" SCRIPT chmod +x "$LAUNCH_DIR/ai-code" # Quick question mode cat > "$LAUNCH_DIR/ai-ask" << 'SCRIPT' #!/usr/bin/env bash # Quick coding Q&A — no file editing if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then llama-chat-server &>/tmp/llama-chat.log & sleep 5 fi export $(grep -v '^#' "$HOME/.aider/.env" | xargs) if [ -n "$1" ]; then exec aider --no-auto-commits --message "$*" else exec aider --no-auto-commits fi SCRIPT chmod +x "$LAUNCH_DIR/ai-ask" # Pipe mode using llama.cpp CLI directly cat > "$LAUNCH_DIR/ai-pipe" << SCRIPT #!/usr/bin/env bash # Pipe code through llama.cpp # Usage: cat main.py | ai-pipe "add error handling" PROMPT="\${1:-Improve this code}" INPUT=\$(cat) curl -s http://127.0.0.1:8080/v1/chat/completions \\ -H "Content-Type: application/json" \\ -d "\$(jq -n --arg p "\$PROMPT" --arg c "\$INPUT" '{ model: "qwen", messages: [ {role: "system", content: "You are an expert programmer. Output only code, no explanations."}, {role: "user", content: ("\$p\n\n```\n" + \$c + "\n```")} ], stream: false }')" | jq -r '.choices[0].message.content' SCRIPT chmod +x "$LAUNCH_DIR/ai-pipe" # ----------------------------------------------------------------------------- # 8. Shell integration # ----------------------------------------------------------------------------- SHELL_RC="" case "$SHELL" in */zsh) SHELL_RC="$HOME/.zshrc" ;; */bash) SHELL_RC="$HOME/.bashrc" ;; *) SHELL_RC="$HOME/.profile" ;; esac if ! grep -q '.local/bin' "$SHELL_RC" 2>/dev/null; then echo '' >> "$SHELL_RC" echo '# Local AI coding tools' >> "$SHELL_RC" echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$SHELL_RC" log "Added ~/.local/bin to PATH in ${SHELL_RC}" fi # ----------------------------------------------------------------------------- # Done! # ----------------------------------------------------------------------------- echo "" echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}" echo -e "${GREEN}${BOLD} ✅ Setup complete!${RESET}" echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}" echo "" echo -e " ${BOLD}Models downloaded to:${RESET} ${MODELS_DIR}" echo -e " Chat: ${CHAT_MODEL_FILE} (~20GB)" echo -e " Autocomplete: ${AUTOCOMPLETE_MODEL_FILE} (~1.2GB)" echo "" echo -e " ${BOLD}Commands available${RESET} (restart your shell first):" echo "" echo -e " ${BOLD}llama-start${RESET} Start both llama.cpp servers" echo -e " ${BOLD}llama-stop${RESET} Stop all llama.cpp servers" echo "" echo -e " ${BOLD}ai-code${RESET} [dir] Full coding agent (auto-starts server)" echo -e " cd into a project and run 'ai-code .'" echo "" echo -e " ${BOLD}ai-ask${RESET} \"question\" Quick coding Q&A, no file edits" echo "" echo -e " ${BOLD}ai-pipe${RESET} \"prompt\" Pipe code through the model" echo -e " cat file.py | ai-pipe \"add types\"" echo "" echo -e " ${BOLD}Config:${RESET} ${AIDER_CONFIG_FILE}" echo -e " ${BOLD}API env:${RESET} ${AIDER_CONFIG_DIR}/.env" echo -e " ${BOLD}Server logs:${RESET} /tmp/llama-chat.log, /tmp/llama-complete.log" echo "" echo -e " Run ${BOLD}source ${SHELL_RC}${RESET} or open a new terminal to get started." echo ""