Files
gogo2/docker-compose.yml
2025-09-25 00:52:01 +03:00

59 lines
1.4 KiB
YAML

version: '3.8'
services:
# Working AMD GPU Model Runner - Using Docker Model Runner (not llama.cpp)
model-runner:
image: docker/model-runner:latest
container_name: model-runner
privileged: true
user: "0:0" # Run as root to fix permission issues
ports:
- "11434:11434" # Main API port (Ollama-compatible)
- "8083:8080" # Alternative API port
environment:
- HSA_OVERRIDE_GFX_VERSION=11.0.0 # AMD GPU version override
- GPU_LAYERS=35
- THREADS=8
- BATCH_SIZE=512
- CONTEXT_SIZE=4096
- DISPLAY=${DISPLAY}
- USER=${USER}
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
group_add:
- video
volumes:
- ./models:/models:rw
- ./data:/data:rw
- /home/${USER}:/home/${USER}:rslave
working_dir: /models
restart: unless-stopped
command: >
/app/model-runner serve
--port 11434
--host 0.0.0.0
--gpu-layers 35
--threads 8
--batch-size 512
--ctx-size 4096
--parallel
--cont-batching
--log-level info
--log-format json
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- model-runner-network
volumes:
model_runner_data:
driver: local
networks:
model-runner-network:
driver: bridge