docker_stack/stack/vllm-webui.yml

version: '3.8'

services:
  vllm:
    container_name: vllm
    image: vllm/vllm:latest
    restart: always
    volumes:
      - vllm-data:/root/.vllm
      - /mnt/970_Containers/ollama_model:/models
      # Persist only conversation logs to a host folder (example relative path)
      - ./data/vllm_convos:/root/.vllm/conversations
    # Default HTTP port for the vllm server - adjust if your image uses a different port
    ports:
      - "8000:8000"
    environment:
      # Informational env for other services; change if your webui expects a different var
      - VLLM_API_URL=http://vllm:8000
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 2 # adjust to your GPU count
              capabilities: [gpu]

  open-webui:
    container_name: open-webui
    image: ghcr.io/open-webui/open-webui:main
    restart: always
    volumes:
      - open-webui-data:/app/backend/data
    ports:
      - "9070:8080"
    # Many web frontends accept a backend URL env; adjust the variable name if needed.
    environment:
      - VLLM_BASE_URL=http://vllm:8000
    networks:
      - stack_bridge

  vllm-monitor:
    image: docker:cli
    container_name: vllm-monitor
    restart: always
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - /mnt/data/External/ollama_monitor/monitor.sh:/monitor.sh:ro
    entrypoint: ["/bin/sh", "/monitor.sh"]
    depends_on:
      - vllm

volumes:
  vllm-data:
  open-webui-data:

networks:
  stack_bridge:
    external: true

# NOTES:
# - This compose file is a starting point. It assumes the vllm image exposes an HTTP API on port 8000
#   and that conversation logs live under /root/.vllm/conversations inside the container.
# - If the real vllm image uses different paths or ports, update the paths/ports accordingly.
# - Keep the named volume `vllm-data` to hold other vllm internal state; the host mount
#   ./data/vllm_convos will persist conversations specifically on the host.