rag-ingestor/docker-compose.yml

# Local development only.
# Production deployment goes via Coolify using docker/Dockerfile alone;
# the compose file here is for booting up qdrant + ollama next to the
# ingestor on a developer machine.
services:
  ingestor:
    build:
      context: .
      dockerfile: docker/Dockerfile
    env_file: .env
    ports:
      - "8000:8000"
    depends_on:
      - qdrant
      - ollama

  qdrant:
    image: qdrant/qdrant:latest
    ports:
      - "6333:6333"
    volumes:
      - qdrant_data:/qdrant/storage

  ollama:
    image: ollama/ollama:latest
    ports:
      - "11434:11434"
    volumes:
      - ollama_data:/root/.ollama
    # Cap CPU so embedding peaks don't starve the host. Mirror these
    # limits in the production Coolify config — Ollama otherwise scales
    # inference threads to all available cores.
    cpus: "2.0"
    environment:
      OLLAMA_NUM_PARALLEL: "1"

volumes:
  qdrant_data:
  ollama_data: