version: '3.8' services: vllm: container_name: vllm image: vllm/vllm:latest restart: always volumes: - vllm-data:/root/.vllm - /mnt/970_Containers/ollama_model:/models # Persist only conversation logs to a host folder (example relative path) - ./data/vllm_convos:/root/.vllm/conversations # Default HTTP port for the vllm server - adjust if your image uses a different port ports: - "8000:8000" environment: # Informational env for other services; change if your webui expects a different var - VLLM_API_URL=http://vllm:8000 deploy: resources: reservations: devices: - driver: nvidia count: 2 # adjust to your GPU count capabilities: [gpu] open-webui: container_name: open-webui image: ghcr.io/open-webui/open-webui:main restart: always volumes: - open-webui-data:/app/backend/data ports: - "9070:8080" # Many web frontends accept a backend URL env; adjust the variable name if needed. environment: - VLLM_BASE_URL=http://vllm:8000 networks: - stack_bridge vllm-monitor: image: docker:cli container_name: vllm-monitor restart: always volumes: - /var/run/docker.sock:/var/run/docker.sock - /mnt/data/External/ollama_monitor/monitor.sh:/monitor.sh:ro entrypoint: ["/bin/sh", "/monitor.sh"] depends_on: - vllm volumes: vllm-data: open-webui-data: networks: stack_bridge: external: true # NOTES: # - This compose file is a starting point. It assumes the vllm image exposes an HTTP API on port 8000 # and that conversation logs live under /root/.vllm/conversations inside the container. # - If the real vllm image uses different paths or ports, update the paths/ports accordingly. # - Keep the named volume `vllm-data` to hold other vllm internal state; the host mount # ./data/vllm_convos will persist conversations specifically on the host.