Files
rag-ingestor/tests/test_bulk.py
Jean-Luc Makiola 9643011e64
All checks were successful
CI / ci (push) Successful in 49s
Release / release (push) Successful in 1m2s
feat: MCP-Server für RAG-Retrieval + Webhook-Härtung
app/mcp_server.py: FastMCP (mcp SDK), streamable-http auf /mcp, statischer
Bearer-Token (constant-time ASGI-Middleware), Fail-Fast ohne RAG_MCP_TOKEN.
Tools rag_search (mit semester/fach/typ-Filter) + get_file_chunks. Läuft aus
demselben Image wie der Ingestor und reused den Embed-Pfad → Vektoren sind
garantiert kompatibel zum Ingest (der offizielle qdrant-MCP-Server kann nur
fastembed → Dimension-/Schema-Mismatch).

app/qdrant_store.py: search_chunks (query_points + optionaler Payload-Filter)
und get_chunks_by_path (scroll, nach chunk_index sortiert).

app/bulk.py: Amplification-Guard — /bulk-import lehnt mit 409 ab solange ein
vorheriger Bulk noch BackgroundTasks abarbeitet.

docker-compose.coolify.yml: rag-mcp-Service (nicht public, externes
metamcp-net statt Stack-Coupling) + Traefik-Rate-Limit-Middleware am ingestor.

tests/conftest.py: Settings-env_file in Tests neutralisieren (Dev-.env darf
die Suite nicht kontaminieren). 68 passed, ruff clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 22:08:37 +02:00

83 lines
2.8 KiB
Python

from unittest.mock import AsyncMock, call
from fastapi.testclient import TestClient
from app.webhook.models import EventType
def _make_app(monkeypatch):
monkeypatch.setenv("NEXTCLOUD_WEBDAV_URL", "http://nc")
monkeypatch.setenv("NEXTCLOUD_USER", "u")
monkeypatch.setenv("NEXTCLOUD_APP_PASSWORD", "p")
monkeypatch.setenv("OLLAMA_URL", "http://ollama")
monkeypatch.setenv("OLLAMA_EMBED_MODEL", "m")
monkeypatch.setenv("QDRANT_URL", "http://qdrant")
monkeypatch.setenv("QDRANT_COLLECTION", "rag_test")
monkeypatch.setenv("WEBHOOK_SECRET", "abc")
from app.config import get_settings
get_settings.cache_clear()
import app.ingest.pipeline as pipe
pipe._qdrant_client.cache_clear()
monkeypatch.setattr("app.main._startup_ensure_collection", AsyncMock())
from app.main import app
return app
def test_bulk_import_lists_and_dispatches(monkeypatch):
app = _make_app(monkeypatch)
listed = [
"Documents/THB/Studium/2.Semester/Databases/a.pdf",
"Documents/THB/Studium/2.Semester/Databases/b.docx",
"Documents/THB/Studium/2.Semester/Databases/.rag-meta.json", # ignored
]
monkeypatch.setattr("app.bulk.list_files_recursive", AsyncMock(return_value=listed))
process_mock = AsyncMock()
monkeypatch.setattr("app.bulk.process_file", process_mock)
with TestClient(app) as client:
r = client.post(
"/bulk-import",
json={"path": "Documents/THB/Studium/2.Semester/Databases"},
headers={"X-Webhook-Secret": "abc"},
)
assert r.status_code == 202
body = r.json()
assert body["dispatched"] == 2 # only .pdf and .docx, not the json sidecar
process_mock.assert_has_calls(
[
call("Documents/THB/Studium/2.Semester/Databases/a.pdf", EventType.CREATED),
call("Documents/THB/Studium/2.Semester/Databases/b.docx", EventType.CREATED),
]
)
assert process_mock.await_count == 2
def test_bulk_import_rejects_wrong_secret(monkeypatch):
app = _make_app(monkeypatch)
with TestClient(app) as client:
r = client.post("/bulk-import", json={"path": "x"}, headers={"X-Webhook-Secret": "nope"})
assert r.status_code == 401
def test_bulk_import_rejects_while_in_progress(monkeypatch):
app = _make_app(monkeypatch)
# Simulate an in-flight bulk: the amplification guard must reject.
monkeypatch.setattr("app.bulk._inflight", 4)
list_mock = AsyncMock()
monkeypatch.setattr("app.bulk.list_files_recursive", list_mock)
with TestClient(app) as client:
r = client.post(
"/bulk-import", json={"path": "x"}, headers={"X-Webhook-Secret": "abc"}
)
assert r.status_code == 409
# Guard fires before any WebDAV listing happens.
list_mock.assert_not_awaited()