Files
rag-ingestor/tests/test_qdrant_store.py
Jean-Luc Makiola 9643011e64
All checks were successful
CI / ci (push) Successful in 49s
Release / release (push) Successful in 1m2s
feat: MCP-Server für RAG-Retrieval + Webhook-Härtung
app/mcp_server.py: FastMCP (mcp SDK), streamable-http auf /mcp, statischer
Bearer-Token (constant-time ASGI-Middleware), Fail-Fast ohne RAG_MCP_TOKEN.
Tools rag_search (mit semester/fach/typ-Filter) + get_file_chunks. Läuft aus
demselben Image wie der Ingestor und reused den Embed-Pfad → Vektoren sind
garantiert kompatibel zum Ingest (der offizielle qdrant-MCP-Server kann nur
fastembed → Dimension-/Schema-Mismatch).

app/qdrant_store.py: search_chunks (query_points + optionaler Payload-Filter)
und get_chunks_by_path (scroll, nach chunk_index sortiert).

app/bulk.py: Amplification-Guard — /bulk-import lehnt mit 409 ab solange ein
vorheriger Bulk noch BackgroundTasks abarbeitet.

docker-compose.coolify.yml: rag-mcp-Service (nicht public, externes
metamcp-net statt Stack-Coupling) + Traefik-Rate-Limit-Middleware am ingestor.

tests/conftest.py: Settings-env_file in Tests neutralisieren (Dev-.env darf
die Suite nicht kontaminieren). 68 passed, ruff clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 22:08:37 +02:00

168 lines
5.4 KiB
Python

from unittest.mock import MagicMock
import pytest
from app.qdrant_store import (
ensure_collection,
upsert_chunks,
delete_by_path,
search_chunks,
get_chunks_by_path,
_payload_filter,
ChunkPoint,
)
def test_ensure_collection_creates_when_missing():
fake_client = MagicMock()
fake_client.collection_exists.return_value = False
ensure_collection(fake_client, "rag_test", vector_size=1024)
fake_client.create_collection.assert_called_once()
args, kwargs = fake_client.create_collection.call_args
assert kwargs["collection_name"] == "rag_test"
# Payload indexes get created
assert fake_client.create_payload_index.call_count == 3
def test_ensure_collection_skips_when_exists_with_matching_dim():
fake_client = MagicMock()
fake_client.collection_exists.return_value = True
info = MagicMock()
info.config.params.vectors.size = 1024
fake_client.get_collection.return_value = info
ensure_collection(fake_client, "rag_test", vector_size=1024)
fake_client.create_collection.assert_not_called()
def test_ensure_collection_raises_on_dim_mismatch():
fake_client = MagicMock()
fake_client.collection_exists.return_value = True
info = MagicMock()
info.config.params.vectors.size = 768
fake_client.get_collection.return_value = info
with pytest.raises(RuntimeError, match="dimension mismatch"):
ensure_collection(fake_client, "rag_test", vector_size=1024)
def test_upsert_chunks_calls_client_upsert():
fake_client = MagicMock()
points = [
ChunkPoint(vector=[0.1] * 4, payload={"file_path": "a", "chunk_index": 0}),
ChunkPoint(vector=[0.2] * 4, payload={"file_path": "a", "chunk_index": 1}),
]
upsert_chunks(fake_client, "rag_test", points)
fake_client.upsert.assert_called_once()
kwargs = fake_client.upsert.call_args.kwargs
assert kwargs["collection_name"] == "rag_test"
produced = kwargs["points"]
assert len(produced) == 2
assert produced[0].vector == [0.1] * 4
assert produced[0].payload == {"file_path": "a", "chunk_index": 0}
assert produced[1].vector == [0.2] * 4
assert produced[1].payload == {"file_path": "a", "chunk_index": 1}
# ids are UUID strings, distinct
assert isinstance(produced[0].id, str)
assert produced[0].id != produced[1].id
def test_delete_by_path_uses_filter():
fake_client = MagicMock()
delete_by_path(fake_client, "rag_test", "Documents/x.pdf")
fake_client.delete.assert_called_once()
kwargs = fake_client.delete.call_args.kwargs
assert kwargs["collection_name"] == "rag_test"
# The filter should target file_path
selector = kwargs["points_selector"]
# Inspect the FilterSelector → Filter → must → FieldCondition
assert selector.filter.must[0].key == "file_path"
def test_payload_filter_none_when_no_constraints():
assert _payload_filter(None, None, None) is None
def test_payload_filter_builds_only_given_conditions():
flt = _payload_filter(semester="2.Semester", fach=None, typ="Vorlesungen")
keys = [c.key for c in flt.must]
assert keys == ["semester", "typ"]
assert flt.must[0].match.value == "2.Semester"
assert flt.must[1].match.value == "Vorlesungen"
def test_search_chunks_maps_payload_and_score():
hit = MagicMock()
hit.payload = {
"text": "chunk text",
"file_path": "Documents/THB/2.Semester/Databases/a.pdf",
"file_name": "a.pdf",
"semester": "2.Semester",
"fach": "Databases",
"typ": "Vorlesungen",
"page": 3,
"chunk_index": 2,
"ignored": "not in result fields",
}
hit.score = 0.87
response = MagicMock()
response.points = [hit]
fake_client = MagicMock()
fake_client.query_points.return_value = response
out = search_chunks(
fake_client, "rag_test", [0.1] * 4, limit=5, fach="Databases"
)
kwargs = fake_client.query_points.call_args.kwargs
assert kwargs["collection_name"] == "rag_test"
assert kwargs["limit"] == 5
assert kwargs["query_filter"].must[0].key == "fach"
assert out == [
{
"text": "chunk text",
"file_path": "Documents/THB/2.Semester/Databases/a.pdf",
"file_name": "a.pdf",
"semester": "2.Semester",
"fach": "Databases",
"typ": "Vorlesungen",
"page": 3,
"chunk_index": 2,
"score": 0.87,
}
]
def test_search_chunks_no_filter_passes_none():
response = MagicMock()
response.points = []
fake_client = MagicMock()
fake_client.query_points.return_value = response
search_chunks(fake_client, "rag_test", [0.1] * 4, limit=3)
assert fake_client.query_points.call_args.kwargs["query_filter"] is None
def test_get_chunks_by_path_sorts_by_chunk_index():
def pt(idx, page, text):
m = MagicMock()
m.payload = {"chunk_index": idx, "page": page, "text": text}
return m
fake_client = MagicMock()
fake_client.scroll.return_value = ([pt(2, 1, "c"), pt(0, 1, "a"), pt(1, 1, "b")], None)
rows = get_chunks_by_path(fake_client, "rag_test", "Documents/x.pdf")
assert [r["chunk_index"] for r in rows] == [0, 1, 2]
assert [r["text"] for r in rows] == ["a", "b", "c"]
scroll_kwargs = fake_client.scroll.call_args.kwargs
assert scroll_kwargs["scroll_filter"].must[0].key == "file_path"
assert scroll_kwargs["with_vectors"] is False