134 lines
4.6 KiB
Bash
134 lines
4.6 KiB
Bash
#!/bin/bash
|
|
|
|
# Integration script for Docker Model Runner
|
|
# Adds model runner services to your existing Docker Compose stack
|
|
|
|
set -e
|
|
|
|
echo "=== Docker Model Runner Integration ==="
|
|
echo ""
|
|
|
|
# Check if docker-compose.yml exists
|
|
if [[ ! -f "docker-compose.yml" ]]; then
|
|
echo "❌ No existing docker-compose.yml found"
|
|
echo "Creating new docker-compose.yml with model runner services..."
|
|
cp docker-compose.model-runner.yml docker-compose.yml
|
|
else
|
|
echo "✅ Found existing docker-compose.yml"
|
|
echo ""
|
|
|
|
# Create backup
|
|
cp docker-compose.yml docker-compose.yml.backup
|
|
echo "📦 Backup created: docker-compose.yml.backup"
|
|
|
|
# Merge services
|
|
echo ""
|
|
echo "🔄 Merging model runner services..."
|
|
|
|
# Use yq or manual merge if yq not available
|
|
if command -v yq &> /dev/null; then
|
|
echo "Using yq to merge configurations..."
|
|
yq eval-all '. as $item ireduce ({}; . * $item)' docker-compose.yml docker-compose.model-runner.yml > docker-compose.tmp
|
|
mv docker-compose.tmp docker-compose.yml
|
|
else
|
|
echo "Manual merge (yq not available)..."
|
|
# Append services to existing file
|
|
echo "" >> docker-compose.yml
|
|
echo "# Added by Docker Model Runner Integration" >> docker-compose.yml
|
|
echo "" >> docker-compose.yml
|
|
|
|
# Add services from model-runner compose
|
|
awk '/^services:/{flag=1; next} /^volumes:/{flag=0} flag' docker-compose.model-runner.yml >> docker-compose.yml
|
|
|
|
# Add volumes and networks if they don't exist
|
|
if ! grep -q "^volumes:" docker-compose.yml; then
|
|
echo "" >> docker-compose.yml
|
|
awk '/^volumes:/{flag=1} /^networks:/{flag=0} flag' docker-compose.model-runner.yml >> docker-compose.yml
|
|
fi
|
|
|
|
if ! grep -q "^networks:" docker-compose.yml; then
|
|
echo "" >> docker-compose.yml
|
|
awk '/^networks:/{flag=1} flag' docker-compose.model-runner.yml >> docker-compose.yml
|
|
fi
|
|
fi
|
|
|
|
echo "✅ Services merged successfully"
|
|
fi
|
|
|
|
# Create necessary directories
|
|
echo ""
|
|
echo "📁 Creating necessary directories..."
|
|
mkdir -p models config
|
|
|
|
# Copy environment file
|
|
if [[ ! -f ".env" ]]; then
|
|
cp model-runner.env .env
|
|
echo "📄 Created .env file from model-runner.env"
|
|
elif [[ ! -f ".env.model-runner" ]]; then
|
|
cp model-runner.env .env.model-runner
|
|
echo "📄 Created .env.model-runner file"
|
|
fi
|
|
|
|
echo ""
|
|
echo "=== Integration Complete! ==="
|
|
echo ""
|
|
echo "📋 Available services:"
|
|
echo "• docker-model-runner - Main model runner (port 11434)"
|
|
echo "• llama-cpp-server - Advanced llama.cpp server (port 8000)"
|
|
echo "• model-manager - Model management service"
|
|
echo ""
|
|
echo "🚀 Usage Commands:"
|
|
echo ""
|
|
echo "# Start all services"
|
|
echo "docker-compose up -d"
|
|
echo ""
|
|
echo "# Start only model runner"
|
|
echo "docker-compose up -d docker-model-runner"
|
|
echo ""
|
|
echo "# Start with llama.cpp server"
|
|
echo "docker-compose --profile llama-cpp up -d"
|
|
echo ""
|
|
echo "# Start with management tools"
|
|
echo "docker-compose --profile management up -d"
|
|
echo ""
|
|
echo "# View logs"
|
|
echo "docker-compose logs -f docker-model-runner"
|
|
echo ""
|
|
echo "# Test API"
|
|
echo "curl http://localhost:11434/api/tags"
|
|
echo ""
|
|
echo "# Pull a model"
|
|
echo "docker-compose exec docker-model-runner /app/model-runner pull ai/smollm2:135M-Q4_K_M"
|
|
echo ""
|
|
echo "# Run a model"
|
|
echo "docker-compose exec docker-model-runner /app/model-runner run ai/smollm2:135M-Q4_K_M 'Hello!'"
|
|
echo ""
|
|
echo "# Pull Hugging Face model"
|
|
echo "docker-compose exec docker-model-runner /app/model-runner pull hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF"
|
|
echo ""
|
|
echo "🔧 Configuration:"
|
|
echo "• Edit model-runner.env for GPU and performance settings"
|
|
echo "• Models are stored in ./models directory"
|
|
echo "• Configuration files in ./config directory"
|
|
echo ""
|
|
echo "📊 Exposed Ports:"
|
|
echo "• 11434 - Docker Model Runner API (Ollama-compatible)"
|
|
echo "• 8000 - Llama.cpp server API"
|
|
echo "• 9090 - Metrics endpoint"
|
|
echo ""
|
|
echo "⚡ GPU Support:"
|
|
echo "• CUDA_VISIBLE_DEVICES=0 (first GPU)"
|
|
echo "• GPU_LAYERS=35 (layers to offload to GPU)"
|
|
echo "• THREADS=8 (CPU threads)"
|
|
echo "• BATCH_SIZE=512 (batch processing size)"
|
|
echo ""
|
|
echo "🔗 Integration with your existing services:"
|
|
echo "• Use http://docker-model-runner:11434 for internal API calls"
|
|
echo "• Use http://localhost:11434 for external API calls"
|
|
echo "• Add 'depends_on: [docker-model-runner]' to your services"
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo "1. Review and edit configuration in model-runner.env"
|
|
echo "2. Run: docker-compose up -d docker-model-runner"
|
|
echo "3. Test: curl http://localhost:11434/api/tags"
|