59 lines
1.4 KiB
YAML
59 lines
1.4 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
# Working AMD GPU Model Runner - Using Docker Model Runner (not llama.cpp)
|
|
model-runner:
|
|
image: docker/model-runner:latest
|
|
container_name: model-runner
|
|
privileged: true
|
|
user: "0:0" # Run as root to fix permission issues
|
|
ports:
|
|
- "11434:11434" # Main API port (Ollama-compatible)
|
|
- "8083:8080" # Alternative API port
|
|
environment:
|
|
- HSA_OVERRIDE_GFX_VERSION=11.0.0 # AMD GPU version override
|
|
- GPU_LAYERS=35
|
|
- THREADS=8
|
|
- BATCH_SIZE=512
|
|
- CONTEXT_SIZE=4096
|
|
- DISPLAY=${DISPLAY}
|
|
- USER=${USER}
|
|
devices:
|
|
- /dev/kfd:/dev/kfd
|
|
- /dev/dri:/dev/dri
|
|
group_add:
|
|
- video
|
|
volumes:
|
|
- ./models:/models:rw
|
|
- ./data:/data:rw
|
|
- /home/${USER}:/home/${USER}:rslave
|
|
working_dir: /models
|
|
restart: unless-stopped
|
|
command: >
|
|
/app/model-runner serve
|
|
--port 11434
|
|
--host 0.0.0.0
|
|
--gpu-layers 35
|
|
--threads 8
|
|
--batch-size 512
|
|
--ctx-size 4096
|
|
--parallel
|
|
--cont-batching
|
|
--log-level info
|
|
--log-format json
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
networks:
|
|
- model-runner-network
|
|
|
|
volumes:
|
|
model_runner_data:
|
|
driver: local
|
|
|
|
networks:
|
|
model-runner-network:
|
|
driver: bridge |