setup.sh at main · burrito.space/localcode

burrito.space / localcode
fork atom
Script for easily configuring, using, switching and comparing local offline coding models
fork atom
localcode / setup.sh
at main 406 lines 14 kB view raw
wrap content
burrito.tngl.sh Initial commit (before AI edits) 8d ago
155d66b8
  1#!/usr/bin/env bash
  2set -euo pipefail
  3
  4# =============================================================================
  5# Local AI Coding Environment Setup for macOS Apple Silicon
  6# llama.cpp + Qwen 2.5 Coder 32B (chat) + Qwen 2.5 Coder 1.5B (autocomplete)
  7# + Aider (terminal coding agent) or OpenCode
  8# =============================================================================
  9
 10BOLD="\033[1m"
 11GREEN="\033[0;32m"
 12YELLOW="\033[1;33m"
 13RED="\033[0;31m"
 14RESET="\033[0m"
 15
 16MODELS_DIR="$HOME/.local/share/llama-models"
 17CHAT_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/qwen2.5-coder-32b-instruct-q4_k_m.gguf"
 18CHAT_MODEL_FILE="qwen2.5-coder-32b-instruct-q4_k_m.gguf"
 19AUTOCOMPLETE_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
 20AUTOCOMPLETE_MODEL_FILE="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
 21
 22CHAT_PORT=8080
 23AUTOCOMPLETE_PORT=8081
 24
 25AIDER_CONFIG_DIR="$HOME/.aider"
 26AIDER_CONFIG_FILE="$AIDER_CONFIG_DIR/aider.conf.yml"
 27
 28log()  { echo -e "${GREEN}${BOLD}[✓]${RESET} $1"; }
 29warn() { echo -e "${YELLOW}${BOLD}[!]${RESET} $1"; }
 30err()  { echo -e "${RED}${BOLD}[✗]${RESET} $1"; exit 1; }
 31
 32# -----------------------------------------------------------------------------
 33# Pre-flight checks
 34# -----------------------------------------------------------------------------
 35echo -e "\n${BOLD}🔧 Local AI Coding Environment Installer (llama.cpp)${RESET}\n"
 36
 37[[ "$(uname)" == "Darwin" ]] || err "This script is for macOS only."
 38[[ "$(uname -m)" == "arm64" ]] || warn "Not running on Apple Silicon — performance may vary."
 39
 40MEM_GB=$(( $(sysctl -n hw.memsize) / 1073741824 ))
 41if (( MEM_GB < 32 )); then
 42  warn "You have ${MEM_GB}GB RAM. The 32B model needs ~20GB; you may experience swapping."
 43fi
 44
 45# -----------------------------------------------------------------------------
 46# 1. Install Homebrew (if missing)
 47# -----------------------------------------------------------------------------
 48if ! command -v brew &>/dev/null; then
 49  log "Installing Homebrew..."
 50  /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
 51  eval "$(/opt/homebrew/bin/brew shellenv)"
 52else
 53  log "Homebrew already installed."
 54fi
 55
 56# -----------------------------------------------------------------------------
 57# 2. Build / Install llama.cpp
 58# -----------------------------------------------------------------------------
 59if ! command -v llama-server &>/dev/null; then
 60  log "Installing llama.cpp via Homebrew..."
 61  brew install llama.cpp
 62else
 63  log "llama.cpp already installed."
 64fi
 65
 66# Verify Metal support
 67if llama-server --help 2>&1 | grep -qi metal; then
 68  log "Metal (GPU) acceleration available."
 69else
 70  warn "Metal flag not detected — model will run on CPU only."
 71fi
 72
 73# -----------------------------------------------------------------------------
 74# 3. Download Qwen GGUF models from HuggingFace
 75# -----------------------------------------------------------------------------
 76mkdir -p "$MODELS_DIR"
 77
 78download_model() {
 79  local url="$1" file="$2"
 80  if [ -f "$MODELS_DIR/$file" ]; then
 81    log "Model already downloaded: $file"
 82  else
 83    log "Downloading $file (this may take a while)..."
 84    curl -L --progress-bar -o "$MODELS_DIR/$file" "$url"
 85    log "Downloaded: $file"
 86  fi
 87}
 88
 89download_model "$CHAT_MODEL_URL" "$CHAT_MODEL_FILE"
 90download_model "$AUTOCOMPLETE_MODEL_URL" "$AUTOCOMPLETE_MODEL_FILE"
 91
 92# -----------------------------------------------------------------------------
 93# 4. Install Python & Aider
 94# -----------------------------------------------------------------------------
 95if ! command -v jq &>/dev/null; then
 96  log "Installing jq..."
 97  brew install jq
 98else
 99  log "jq already installed."
100fi
101
102if ! command -v python3 &>/dev/null; then
103  log "Installing Python 3..."
104  brew install python@3.12
105fi
106
107if ! command -v pipx &>/dev/null; then
108  log "Installing pipx..."
109  brew install pipx
110  pipx ensurepath
111fi
112
113if ! command -v aider &>/dev/null; then
114  log "Installing Aider..."
115  pipx install aider-chat
116else
117  log "Aider already installed. Upgrading..."
118  pipx upgrade aider-chat
119fi
120
121# -----------------------------------------------------------------------------
122# 5. Create llama.cpp server launcher scripts
123# -----------------------------------------------------------------------------
124LAUNCH_DIR="$HOME/.local/bin"
125mkdir -p "$LAUNCH_DIR"
126
127# --- Chat server launcher ---
128cat > "$LAUNCH_DIR/llama-chat-server" << SCRIPT
129#!/usr/bin/env bash
130# Start llama.cpp server with Qwen 2.5 Coder 32B for chat
131# Exposed as OpenAI-compatible API on port ${CHAT_PORT}
132
133MODEL="$MODELS_DIR/$CHAT_MODEL_FILE"
134
135exec llama-server \\
136  --model "\$MODEL" \\
137  --port ${CHAT_PORT} \\
138  --host 127.0.0.1 \\
139  --ctx-size 16384 \\
140  --n-gpu-layers 99 \\
141  --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\
142  --mlock \\
143  "\$@"
144SCRIPT
145chmod +x "$LAUNCH_DIR/llama-chat-server"
146
147# --- Autocomplete server launcher ---
148cat > "$LAUNCH_DIR/llama-complete-server" << SCRIPT
149#!/usr/bin/env bash
150# Start llama.cpp server with Qwen 2.5 Coder 1.5B for autocomplete
151# Exposed as OpenAI-compatible API on port ${AUTOCOMPLETE_PORT}
152
153MODEL="$MODELS_DIR/$AUTOCOMPLETE_MODEL_FILE"
154
155exec llama-server \\
156  --model "\$MODEL" \\
157  --port ${AUTOCOMPLETE_PORT} \\
158  --host 127.0.0.1 \\
159  --ctx-size 4096 \\
160  --n-gpu-layers 99 \\
161  --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\
162  --mlock \\
163  "\$@"
164SCRIPT
165chmod +x "$LAUNCH_DIR/llama-complete-server"
166
167# --- Combined server manager ---
168cat > "$LAUNCH_DIR/llama-start" << 'SCRIPT'
169#!/usr/bin/env bash
170# Start both llama.cpp servers (chat + autocomplete)
171
172BOLD="\033[1m"; GREEN="\033[0;32m"; RED="\033[0;31m"; RESET="\033[0m"
173CHAT_PID="" COMPLETE_PID=""
174
175cleanup() {
176  echo -e "\n${RED}Shutting down servers...${RESET}"
177  [ -n "$CHAT_PID" ] && kill "$CHAT_PID" 2>/dev/null
178  [ -n "$COMPLETE_PID" ] && kill "$COMPLETE_PID" 2>/dev/null
179  wait 2>/dev/null
180  echo -e "${GREEN}Done.${RESET}"
181  exit 0
182}
183trap cleanup SIGINT SIGTERM
184
185echo -e "${BOLD}Starting llama.cpp servers...${RESET}\n"
186
187echo -e "${GREEN}[1/2]${RESET} Chat model (32B) on :8080..."
188llama-chat-server &>/tmp/llama-chat.log &
189CHAT_PID=$!
190
191echo -e "${GREEN}[2/2]${RESET} Autocomplete model (1.5B) on :8081..."
192llama-complete-server &>/tmp/llama-complete.log &
193COMPLETE_PID=$!
194
195# Wait for servers to be ready
196echo -ne "\nWaiting for servers..."
197for i in $(seq 1 60); do
198  CHAT_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8080/health 2>/dev/null || true)
199  COMP_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8081/health 2>/dev/null || true)
200  if [[ "$CHAT_OK" == "200" && "$COMP_OK" == "200" ]]; then
201    echo -e " ${GREEN}ready!${RESET}"
202    break
203  fi
204  echo -n "."
205  sleep 2
206done
207
208echo ""
209echo -e "${BOLD}Servers running:${RESET}"
210echo -e "  Chat (32B):         http://127.0.0.1:8080"
211echo -e "  Autocomplete (1.5B): http://127.0.0.1:8081"
212echo -e "  Logs:               /tmp/llama-chat.log, /tmp/llama-complete.log"
213echo -e "\n  Press Ctrl+C to stop both servers.\n"
214
215wait
216SCRIPT
217chmod +x "$LAUNCH_DIR/llama-start"
218
219# --- Stop servers ---
220cat > "$LAUNCH_DIR/llama-stop" << 'SCRIPT'
221#!/usr/bin/env bash
222# Stop all running llama-server processes
223pkill -f "llama-server" 2>/dev/null && echo "Servers stopped." || echo "No servers running."
224SCRIPT
225chmod +x "$LAUNCH_DIR/llama-stop"
226
227# -----------------------------------------------------------------------------
228# 6. Configure Aider to use llama.cpp OpenAI-compatible API
229# -----------------------------------------------------------------------------
230mkdir -p "$AIDER_CONFIG_DIR"
231
232cat > "$AIDER_CONFIG_FILE" << 'EOF'
233# =============================================================================
234# Aider Configuration — Qwen 2.5 Coder via llama.cpp
235# =============================================================================
236
237# Point Aider at llama.cpp's OpenAI-compatible endpoint
238# The model name can be anything — llama.cpp ignores it and uses the loaded model
239model: openai/qwen2.5-coder-32b
240
241# Architect mode for better code planning
242architect: true
243editor-model: openai/qwen2.5-coder-32b
244
245# Git integration
246auto-commits: true
247dirty-commits: true
248attribute-author: false
249attribute-committer: false
250
251# UI preferences
252pretty: true
253stream: true
254dark-mode: true
255
256# Code style
257code-theme: monokai
258show-diffs: true
259
260# Disable analytics
261analytics-disable: true
262EOF
263
264# Environment file for API base URL
265cat > "$AIDER_CONFIG_DIR/.env" << 'EOF'
266# llama.cpp serves an OpenAI-compatible API — no real key needed
267OPENAI_API_KEY=sk-not-needed
268OPENAI_API_BASE=http://127.0.0.1:8080/v1
269EOF
270
271log "Aider config written to ${AIDER_CONFIG_FILE}"
272log "Aider env written to ${AIDER_CONFIG_DIR}/.env"
273
274# -----------------------------------------------------------------------------
275# 7. Create main launcher: ai-code
276# -----------------------------------------------------------------------------
277cat > "$LAUNCH_DIR/ai-code" << 'SCRIPT'
278#!/usr/bin/env bash
279# Launch Aider with local Qwen 2.5 Coder 32B via llama.cpp
280# Usage: ai-code [directory] [aider flags...]
281#
282# Starts llama.cpp servers automatically if not already running.
283
284BOLD="\033[1m"; GREEN="\033[0;32m"; RESET="\033[0m"
285
286# Check if chat server is running
287if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then
288  echo -e "${BOLD}Starting llama.cpp chat server...${RESET}"
289  llama-chat-server &>/tmp/llama-chat.log &
290  echo -n "Waiting for model to load"
291  for i in $(seq 1 120); do
292    if curl -s http://127.0.0.1:8080/health &>/dev/null; then
293      echo -e " ${GREEN}ready!${RESET}"
294      break
295    fi
296    echo -n "."
297    sleep 2
298  done
299fi
300
301DIR="${1:-.}"
302shift 2>/dev/null || true
303cd "$DIR" || exit 1
304
305# Initialize git repo if needed
306if [ ! -d .git ]; then
307  echo "Initializing git repo..."
308  git init
309  git add -A
310  git commit -m "Initial commit (before AI edits)" --allow-empty
311fi
312
313# Source the env file for API config
314export $(grep -v '^#' "$HOME/.aider/.env" | xargs)
315
316exec aider "$@"
317SCRIPT
318chmod +x "$LAUNCH_DIR/ai-code"
319
320# Quick question mode
321cat > "$LAUNCH_DIR/ai-ask" << 'SCRIPT'
322#!/usr/bin/env bash
323# Quick coding Q&A — no file editing
324if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then
325  llama-chat-server &>/tmp/llama-chat.log &
326  sleep 5
327fi
328export $(grep -v '^#' "$HOME/.aider/.env" | xargs)
329if [ -n "$1" ]; then
330  exec aider --no-auto-commits --message "$*"
331else
332  exec aider --no-auto-commits
333fi
334SCRIPT
335chmod +x "$LAUNCH_DIR/ai-ask"
336
337# Pipe mode using llama.cpp CLI directly
338cat > "$LAUNCH_DIR/ai-pipe" << SCRIPT
339#!/usr/bin/env bash
340# Pipe code through llama.cpp
341# Usage: cat main.py | ai-pipe "add error handling"
342
343PROMPT="\${1:-Improve this code}"
344INPUT=\$(cat)
345
346curl -s http://127.0.0.1:8080/v1/chat/completions \\
347  -H "Content-Type: application/json" \\
348  -d "\$(jq -n --arg p "\$PROMPT" --arg c "\$INPUT" '{
349    model: "qwen",
350    messages: [
351      {role: "system", content: "You are an expert programmer. Output only code, no explanations."},
352      {role: "user", content: ("\$p\n\n```\n" + \$c + "\n```")}
353    ],
354    stream: false
355  }')" | jq -r '.choices[0].message.content'
356SCRIPT
357chmod +x "$LAUNCH_DIR/ai-pipe"
358
359# -----------------------------------------------------------------------------
360# 8. Shell integration
361# -----------------------------------------------------------------------------
362SHELL_RC=""
363case "$SHELL" in
364  */zsh)  SHELL_RC="$HOME/.zshrc" ;;
365  */bash) SHELL_RC="$HOME/.bashrc" ;;
366  *)      SHELL_RC="$HOME/.profile" ;;
367esac
368
369if ! grep -q '.local/bin' "$SHELL_RC" 2>/dev/null; then
370  echo '' >> "$SHELL_RC"
371  echo '# Local AI coding tools' >> "$SHELL_RC"
372  echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$SHELL_RC"
373  log "Added ~/.local/bin to PATH in ${SHELL_RC}"
374fi
375
376# -----------------------------------------------------------------------------
377# Done!
378# -----------------------------------------------------------------------------
379echo ""
380echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}"
381echo -e "${GREEN}${BOLD}  ✅ Setup complete!${RESET}"
382echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}"
383echo ""
384echo -e "  ${BOLD}Models downloaded to:${RESET} ${MODELS_DIR}"
385echo -e "    Chat:         ${CHAT_MODEL_FILE} (~20GB)"
386echo -e "    Autocomplete: ${AUTOCOMPLETE_MODEL_FILE} (~1.2GB)"
387echo ""
388echo -e "  ${BOLD}Commands available${RESET} (restart your shell first):"
389echo ""
390echo -e "    ${BOLD}llama-start${RESET}          Start both llama.cpp servers"
391echo -e "    ${BOLD}llama-stop${RESET}           Stop all llama.cpp servers"
392echo ""
393echo -e "    ${BOLD}ai-code${RESET} [dir]        Full coding agent (auto-starts server)"
394echo -e "                         cd into a project and run 'ai-code .'"
395echo ""
396echo -e "    ${BOLD}ai-ask${RESET} \"question\"    Quick coding Q&A, no file edits"
397echo ""
398echo -e "    ${BOLD}ai-pipe${RESET} \"prompt\"     Pipe code through the model"
399echo -e "                         cat file.py | ai-pipe \"add types\""
400echo ""
401echo -e "  ${BOLD}Config:${RESET}       ${AIDER_CONFIG_FILE}"
402echo -e "  ${BOLD}API env:${RESET}      ${AIDER_CONFIG_DIR}/.env"
403echo -e "  ${BOLD}Server logs:${RESET}  /tmp/llama-chat.log, /tmp/llama-complete.log"
404echo ""
405echo -e "  Run ${BOLD}source ${SHELL_RC}${RESET} or open a new terminal to get started."
406echo ""