Script for easily configuring, using, switching and comparing local offline coding models
1#!/usr/bin/env bash
2set -euo pipefail
3
4# =============================================================================
5# Local AI Coding Environment Setup for macOS Apple Silicon
6# llama.cpp + Qwen 2.5 Coder 32B (chat) + Qwen 2.5 Coder 1.5B (autocomplete)
7# + Aider (terminal coding agent) or OpenCode
8# =============================================================================
9
10BOLD="\033[1m"
11GREEN="\033[0;32m"
12YELLOW="\033[1;33m"
13RED="\033[0;31m"
14RESET="\033[0m"
15
16MODELS_DIR="$HOME/.local/share/llama-models"
17CHAT_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/qwen2.5-coder-32b-instruct-q4_k_m.gguf"
18CHAT_MODEL_FILE="qwen2.5-coder-32b-instruct-q4_k_m.gguf"
19AUTOCOMPLETE_MODEL_URL="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
20AUTOCOMPLETE_MODEL_FILE="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
21
22CHAT_PORT=8080
23AUTOCOMPLETE_PORT=8081
24
25AIDER_CONFIG_DIR="$HOME/.aider"
26AIDER_CONFIG_FILE="$AIDER_CONFIG_DIR/aider.conf.yml"
27
28log() { echo -e "${GREEN}${BOLD}[✓]${RESET} $1"; }
29warn() { echo -e "${YELLOW}${BOLD}[!]${RESET} $1"; }
30err() { echo -e "${RED}${BOLD}[✗]${RESET} $1"; exit 1; }
31
32# -----------------------------------------------------------------------------
33# Pre-flight checks
34# -----------------------------------------------------------------------------
35echo -e "\n${BOLD}🔧 Local AI Coding Environment Installer (llama.cpp)${RESET}\n"
36
37[[ "$(uname)" == "Darwin" ]] || err "This script is for macOS only."
38[[ "$(uname -m)" == "arm64" ]] || warn "Not running on Apple Silicon — performance may vary."
39
40MEM_GB=$(( $(sysctl -n hw.memsize) / 1073741824 ))
41if (( MEM_GB < 32 )); then
42 warn "You have ${MEM_GB}GB RAM. The 32B model needs ~20GB; you may experience swapping."
43fi
44
45# -----------------------------------------------------------------------------
46# 1. Install Homebrew (if missing)
47# -----------------------------------------------------------------------------
48if ! command -v brew &>/dev/null; then
49 log "Installing Homebrew..."
50 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
51 eval "$(/opt/homebrew/bin/brew shellenv)"
52else
53 log "Homebrew already installed."
54fi
55
56# -----------------------------------------------------------------------------
57# 2. Build / Install llama.cpp
58# -----------------------------------------------------------------------------
59if ! command -v llama-server &>/dev/null; then
60 log "Installing llama.cpp via Homebrew..."
61 brew install llama.cpp
62else
63 log "llama.cpp already installed."
64fi
65
66# Verify Metal support
67if llama-server --help 2>&1 | grep -qi metal; then
68 log "Metal (GPU) acceleration available."
69else
70 warn "Metal flag not detected — model will run on CPU only."
71fi
72
73# -----------------------------------------------------------------------------
74# 3. Download Qwen GGUF models from HuggingFace
75# -----------------------------------------------------------------------------
76mkdir -p "$MODELS_DIR"
77
78download_model() {
79 local url="$1" file="$2"
80 if [ -f "$MODELS_DIR/$file" ]; then
81 log "Model already downloaded: $file"
82 else
83 log "Downloading $file (this may take a while)..."
84 curl -L --progress-bar -o "$MODELS_DIR/$file" "$url"
85 log "Downloaded: $file"
86 fi
87}
88
89download_model "$CHAT_MODEL_URL" "$CHAT_MODEL_FILE"
90download_model "$AUTOCOMPLETE_MODEL_URL" "$AUTOCOMPLETE_MODEL_FILE"
91
92# -----------------------------------------------------------------------------
93# 4. Install Python & Aider
94# -----------------------------------------------------------------------------
95if ! command -v jq &>/dev/null; then
96 log "Installing jq..."
97 brew install jq
98else
99 log "jq already installed."
100fi
101
102if ! command -v python3 &>/dev/null; then
103 log "Installing Python 3..."
104 brew install python@3.12
105fi
106
107if ! command -v pipx &>/dev/null; then
108 log "Installing pipx..."
109 brew install pipx
110 pipx ensurepath
111fi
112
113if ! command -v aider &>/dev/null; then
114 log "Installing Aider..."
115 pipx install aider-chat
116else
117 log "Aider already installed. Upgrading..."
118 pipx upgrade aider-chat
119fi
120
121# -----------------------------------------------------------------------------
122# 5. Create llama.cpp server launcher scripts
123# -----------------------------------------------------------------------------
124LAUNCH_DIR="$HOME/.local/bin"
125mkdir -p "$LAUNCH_DIR"
126
127# --- Chat server launcher ---
128cat > "$LAUNCH_DIR/llama-chat-server" << SCRIPT
129#!/usr/bin/env bash
130# Start llama.cpp server with Qwen 2.5 Coder 32B for chat
131# Exposed as OpenAI-compatible API on port ${CHAT_PORT}
132
133MODEL="$MODELS_DIR/$CHAT_MODEL_FILE"
134
135exec llama-server \\
136 --model "\$MODEL" \\
137 --port ${CHAT_PORT} \\
138 --host 127.0.0.1 \\
139 --ctx-size 16384 \\
140 --n-gpu-layers 99 \\
141 --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\
142 --mlock \\
143 "\$@"
144SCRIPT
145chmod +x "$LAUNCH_DIR/llama-chat-server"
146
147# --- Autocomplete server launcher ---
148cat > "$LAUNCH_DIR/llama-complete-server" << SCRIPT
149#!/usr/bin/env bash
150# Start llama.cpp server with Qwen 2.5 Coder 1.5B for autocomplete
151# Exposed as OpenAI-compatible API on port ${AUTOCOMPLETE_PORT}
152
153MODEL="$MODELS_DIR/$AUTOCOMPLETE_MODEL_FILE"
154
155exec llama-server \\
156 --model "\$MODEL" \\
157 --port ${AUTOCOMPLETE_PORT} \\
158 --host 127.0.0.1 \\
159 --ctx-size 4096 \\
160 --n-gpu-layers 99 \\
161 --threads \$(sysctl -n hw.perflevel0.logicalcpu 2>/dev/null || echo 4) \\
162 --mlock \\
163 "\$@"
164SCRIPT
165chmod +x "$LAUNCH_DIR/llama-complete-server"
166
167# --- Combined server manager ---
168cat > "$LAUNCH_DIR/llama-start" << 'SCRIPT'
169#!/usr/bin/env bash
170# Start both llama.cpp servers (chat + autocomplete)
171
172BOLD="\033[1m"; GREEN="\033[0;32m"; RED="\033[0;31m"; RESET="\033[0m"
173CHAT_PID="" COMPLETE_PID=""
174
175cleanup() {
176 echo -e "\n${RED}Shutting down servers...${RESET}"
177 [ -n "$CHAT_PID" ] && kill "$CHAT_PID" 2>/dev/null
178 [ -n "$COMPLETE_PID" ] && kill "$COMPLETE_PID" 2>/dev/null
179 wait 2>/dev/null
180 echo -e "${GREEN}Done.${RESET}"
181 exit 0
182}
183trap cleanup SIGINT SIGTERM
184
185echo -e "${BOLD}Starting llama.cpp servers...${RESET}\n"
186
187echo -e "${GREEN}[1/2]${RESET} Chat model (32B) on :8080..."
188llama-chat-server &>/tmp/llama-chat.log &
189CHAT_PID=$!
190
191echo -e "${GREEN}[2/2]${RESET} Autocomplete model (1.5B) on :8081..."
192llama-complete-server &>/tmp/llama-complete.log &
193COMPLETE_PID=$!
194
195# Wait for servers to be ready
196echo -ne "\nWaiting for servers..."
197for i in $(seq 1 60); do
198 CHAT_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8080/health 2>/dev/null || true)
199 COMP_OK=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8081/health 2>/dev/null || true)
200 if [[ "$CHAT_OK" == "200" && "$COMP_OK" == "200" ]]; then
201 echo -e " ${GREEN}ready!${RESET}"
202 break
203 fi
204 echo -n "."
205 sleep 2
206done
207
208echo ""
209echo -e "${BOLD}Servers running:${RESET}"
210echo -e " Chat (32B): http://127.0.0.1:8080"
211echo -e " Autocomplete (1.5B): http://127.0.0.1:8081"
212echo -e " Logs: /tmp/llama-chat.log, /tmp/llama-complete.log"
213echo -e "\n Press Ctrl+C to stop both servers.\n"
214
215wait
216SCRIPT
217chmod +x "$LAUNCH_DIR/llama-start"
218
219# --- Stop servers ---
220cat > "$LAUNCH_DIR/llama-stop" << 'SCRIPT'
221#!/usr/bin/env bash
222# Stop all running llama-server processes
223pkill -f "llama-server" 2>/dev/null && echo "Servers stopped." || echo "No servers running."
224SCRIPT
225chmod +x "$LAUNCH_DIR/llama-stop"
226
227# -----------------------------------------------------------------------------
228# 6. Configure Aider to use llama.cpp OpenAI-compatible API
229# -----------------------------------------------------------------------------
230mkdir -p "$AIDER_CONFIG_DIR"
231
232cat > "$AIDER_CONFIG_FILE" << 'EOF'
233# =============================================================================
234# Aider Configuration — Qwen 2.5 Coder via llama.cpp
235# =============================================================================
236
237# Point Aider at llama.cpp's OpenAI-compatible endpoint
238# The model name can be anything — llama.cpp ignores it and uses the loaded model
239model: openai/qwen2.5-coder-32b
240
241# Architect mode for better code planning
242architect: true
243editor-model: openai/qwen2.5-coder-32b
244
245# Git integration
246auto-commits: true
247dirty-commits: true
248attribute-author: false
249attribute-committer: false
250
251# UI preferences
252pretty: true
253stream: true
254dark-mode: true
255
256# Code style
257code-theme: monokai
258show-diffs: true
259
260# Disable analytics
261analytics-disable: true
262EOF
263
264# Environment file for API base URL
265cat > "$AIDER_CONFIG_DIR/.env" << 'EOF'
266# llama.cpp serves an OpenAI-compatible API — no real key needed
267OPENAI_API_KEY=sk-not-needed
268OPENAI_API_BASE=http://127.0.0.1:8080/v1
269EOF
270
271log "Aider config written to ${AIDER_CONFIG_FILE}"
272log "Aider env written to ${AIDER_CONFIG_DIR}/.env"
273
274# -----------------------------------------------------------------------------
275# 7. Create main launcher: ai-code
276# -----------------------------------------------------------------------------
277cat > "$LAUNCH_DIR/ai-code" << 'SCRIPT'
278#!/usr/bin/env bash
279# Launch Aider with local Qwen 2.5 Coder 32B via llama.cpp
280# Usage: ai-code [directory] [aider flags...]
281#
282# Starts llama.cpp servers automatically if not already running.
283
284BOLD="\033[1m"; GREEN="\033[0;32m"; RESET="\033[0m"
285
286# Check if chat server is running
287if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then
288 echo -e "${BOLD}Starting llama.cpp chat server...${RESET}"
289 llama-chat-server &>/tmp/llama-chat.log &
290 echo -n "Waiting for model to load"
291 for i in $(seq 1 120); do
292 if curl -s http://127.0.0.1:8080/health &>/dev/null; then
293 echo -e " ${GREEN}ready!${RESET}"
294 break
295 fi
296 echo -n "."
297 sleep 2
298 done
299fi
300
301DIR="${1:-.}"
302shift 2>/dev/null || true
303cd "$DIR" || exit 1
304
305# Initialize git repo if needed
306if [ ! -d .git ]; then
307 echo "Initializing git repo..."
308 git init
309 git add -A
310 git commit -m "Initial commit (before AI edits)" --allow-empty
311fi
312
313# Source the env file for API config
314export $(grep -v '^#' "$HOME/.aider/.env" | xargs)
315
316exec aider "$@"
317SCRIPT
318chmod +x "$LAUNCH_DIR/ai-code"
319
320# Quick question mode
321cat > "$LAUNCH_DIR/ai-ask" << 'SCRIPT'
322#!/usr/bin/env bash
323# Quick coding Q&A — no file editing
324if ! curl -s http://127.0.0.1:8080/health &>/dev/null; then
325 llama-chat-server &>/tmp/llama-chat.log &
326 sleep 5
327fi
328export $(grep -v '^#' "$HOME/.aider/.env" | xargs)
329if [ -n "$1" ]; then
330 exec aider --no-auto-commits --message "$*"
331else
332 exec aider --no-auto-commits
333fi
334SCRIPT
335chmod +x "$LAUNCH_DIR/ai-ask"
336
337# Pipe mode using llama.cpp CLI directly
338cat > "$LAUNCH_DIR/ai-pipe" << SCRIPT
339#!/usr/bin/env bash
340# Pipe code through llama.cpp
341# Usage: cat main.py | ai-pipe "add error handling"
342
343PROMPT="\${1:-Improve this code}"
344INPUT=\$(cat)
345
346curl -s http://127.0.0.1:8080/v1/chat/completions \\
347 -H "Content-Type: application/json" \\
348 -d "\$(jq -n --arg p "\$PROMPT" --arg c "\$INPUT" '{
349 model: "qwen",
350 messages: [
351 {role: "system", content: "You are an expert programmer. Output only code, no explanations."},
352 {role: "user", content: ("\$p\n\n```\n" + \$c + "\n```")}
353 ],
354 stream: false
355 }')" | jq -r '.choices[0].message.content'
356SCRIPT
357chmod +x "$LAUNCH_DIR/ai-pipe"
358
359# -----------------------------------------------------------------------------
360# 8. Shell integration
361# -----------------------------------------------------------------------------
362SHELL_RC=""
363case "$SHELL" in
364 */zsh) SHELL_RC="$HOME/.zshrc" ;;
365 */bash) SHELL_RC="$HOME/.bashrc" ;;
366 *) SHELL_RC="$HOME/.profile" ;;
367esac
368
369if ! grep -q '.local/bin' "$SHELL_RC" 2>/dev/null; then
370 echo '' >> "$SHELL_RC"
371 echo '# Local AI coding tools' >> "$SHELL_RC"
372 echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$SHELL_RC"
373 log "Added ~/.local/bin to PATH in ${SHELL_RC}"
374fi
375
376# -----------------------------------------------------------------------------
377# Done!
378# -----------------------------------------------------------------------------
379echo ""
380echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}"
381echo -e "${GREEN}${BOLD} ✅ Setup complete!${RESET}"
382echo -e "${GREEN}${BOLD}═══════════════════════════════════════════════════${RESET}"
383echo ""
384echo -e " ${BOLD}Models downloaded to:${RESET} ${MODELS_DIR}"
385echo -e " Chat: ${CHAT_MODEL_FILE} (~20GB)"
386echo -e " Autocomplete: ${AUTOCOMPLETE_MODEL_FILE} (~1.2GB)"
387echo ""
388echo -e " ${BOLD}Commands available${RESET} (restart your shell first):"
389echo ""
390echo -e " ${BOLD}llama-start${RESET} Start both llama.cpp servers"
391echo -e " ${BOLD}llama-stop${RESET} Stop all llama.cpp servers"
392echo ""
393echo -e " ${BOLD}ai-code${RESET} [dir] Full coding agent (auto-starts server)"
394echo -e " cd into a project and run 'ai-code .'"
395echo ""
396echo -e " ${BOLD}ai-ask${RESET} \"question\" Quick coding Q&A, no file edits"
397echo ""
398echo -e " ${BOLD}ai-pipe${RESET} \"prompt\" Pipe code through the model"
399echo -e " cat file.py | ai-pipe \"add types\""
400echo ""
401echo -e " ${BOLD}Config:${RESET} ${AIDER_CONFIG_FILE}"
402echo -e " ${BOLD}API env:${RESET} ${AIDER_CONFIG_DIR}/.env"
403echo -e " ${BOLD}Server logs:${RESET} /tmp/llama-chat.log, /tmp/llama-complete.log"
404echo ""
405echo -e " Run ${BOLD}source ${SHELL_RC}${RESET} or open a new terminal to get started."
406echo ""