app/mcp_server.py: FastMCP (mcp SDK), streamable-http auf /mcp, statischer Bearer-Token (constant-time ASGI-Middleware), Fail-Fast ohne RAG_MCP_TOKEN. Tools rag_search (mit semester/fach/typ-Filter) + get_file_chunks. Läuft aus demselben Image wie der Ingestor und reused den Embed-Pfad → Vektoren sind garantiert kompatibel zum Ingest (der offizielle qdrant-MCP-Server kann nur fastembed → Dimension-/Schema-Mismatch). app/qdrant_store.py: search_chunks (query_points + optionaler Payload-Filter) und get_chunks_by_path (scroll, nach chunk_index sortiert). app/bulk.py: Amplification-Guard — /bulk-import lehnt mit 409 ab solange ein vorheriger Bulk noch BackgroundTasks abarbeitet. docker-compose.coolify.yml: rag-mcp-Service (nicht public, externes metamcp-net statt Stack-Coupling) + Traefik-Rate-Limit-Middleware am ingestor. tests/conftest.py: Settings-env_file in Tests neutralisieren (Dev-.env darf die Suite nicht kontaminieren). 68 passed, ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
51 lines
1.3 KiB
Python
51 lines
1.3 KiB
Python
import io
|
|
import pytest
|
|
import fitz # pymupdf
|
|
from docx import Document
|
|
|
|
from app.config import Settings
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _ignore_dotenv():
|
|
"""Tests must be deterministic regardless of a developer .env in the repo
|
|
root. Settings reads env_file='.env'; neutralise it so tests see only the
|
|
environment they explicitly set (e.g. via monkeypatch)."""
|
|
original = Settings.model_config.get("env_file")
|
|
Settings.model_config["env_file"] = None
|
|
yield
|
|
Settings.model_config["env_file"] = original
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_pdf_bytes() -> bytes:
|
|
doc = fitz.open()
|
|
p1 = doc.new_page()
|
|
p1.insert_text((72, 72), "Seite eins enthaelt Lorem Ipsum.")
|
|
p2 = doc.new_page()
|
|
p2.insert_text((72, 72), "Seite zwei enthaelt mehr Text.")
|
|
data = doc.tobytes()
|
|
doc.close()
|
|
return data
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_docx_bytes() -> bytes:
|
|
doc = Document()
|
|
doc.add_paragraph("Erster Absatz.")
|
|
doc.add_paragraph("Zweiter Absatz mit mehr Inhalt.")
|
|
buf = io.BytesIO()
|
|
doc.save(buf)
|
|
return buf.getvalue()
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_md_bytes() -> bytes:
|
|
return "# Title\n\nFirst paragraph.\n\nSecond paragraph.\n".encode("utf-8")
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_xlsx_bytes() -> bytes:
|
|
# Minimal placeholder; extractor doesn't read content for xlsx
|
|
return b"PK\x03\x04dummy"
|