65 lines
2.0 KiB
YAML
65 lines
2.0 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
vllm:
|
|
container_name: vllm
|
|
image: vllm/vllm:latest
|
|
restart: always
|
|
volumes:
|
|
- vllm-data:/root/.vllm
|
|
- /mnt/970_Containers/ollama_model:/models
|
|
# Persist only conversation logs to a host folder (example relative path)
|
|
- ./data/vllm_convos:/root/.vllm/conversations
|
|
# Default HTTP port for the vllm server - adjust if your image uses a different port
|
|
ports:
|
|
- "8000:8000"
|
|
environment:
|
|
# Informational env for other services; change if your webui expects a different var
|
|
- VLLM_API_URL=http://vllm:8000
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 2 # adjust to your GPU count
|
|
capabilities: [gpu]
|
|
|
|
open-webui:
|
|
container_name: open-webui
|
|
image: ghcr.io/open-webui/open-webui:main
|
|
restart: always
|
|
volumes:
|
|
- open-webui-data:/app/backend/data
|
|
ports:
|
|
- "9070:8080"
|
|
# Many web frontends accept a backend URL env; adjust the variable name if needed.
|
|
environment:
|
|
- VLLM_BASE_URL=http://vllm:8000
|
|
networks:
|
|
- stack_bridge
|
|
|
|
vllm-monitor:
|
|
image: docker:cli
|
|
container_name: vllm-monitor
|
|
restart: always
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
- /mnt/data/External/ollama_monitor/monitor.sh:/monitor.sh:ro
|
|
entrypoint: ["/bin/sh", "/monitor.sh"]
|
|
depends_on:
|
|
- vllm
|
|
|
|
volumes:
|
|
vllm-data:
|
|
open-webui-data:
|
|
|
|
networks:
|
|
stack_bridge:
|
|
external: true
|
|
|
|
# NOTES:
|
|
# - This compose file is a starting point. It assumes the vllm image exposes an HTTP API on port 8000
|
|
# and that conversation logs live under /root/.vllm/conversations inside the container.
|
|
# - If the real vllm image uses different paths or ports, update the paths/ports accordingly.
|
|
# - Keep the named volume `vllm-data` to hold other vllm internal state; the host mount
|
|
# ./data/vllm_convos will persist conversations specifically on the host. |