app/mcp_server.py: FastMCP (mcp SDK), streamable-http auf /mcp, statischer Bearer-Token (constant-time ASGI-Middleware), Fail-Fast ohne RAG_MCP_TOKEN. Tools rag_search (mit semester/fach/typ-Filter) + get_file_chunks. Läuft aus demselben Image wie der Ingestor und reused den Embed-Pfad → Vektoren sind garantiert kompatibel zum Ingest (der offizielle qdrant-MCP-Server kann nur fastembed → Dimension-/Schema-Mismatch). app/qdrant_store.py: search_chunks (query_points + optionaler Payload-Filter) und get_chunks_by_path (scroll, nach chunk_index sortiert). app/bulk.py: Amplification-Guard — /bulk-import lehnt mit 409 ab solange ein vorheriger Bulk noch BackgroundTasks abarbeitet. docker-compose.coolify.yml: rag-mcp-Service (nicht public, externes metamcp-net statt Stack-Coupling) + Traefik-Rate-Limit-Middleware am ingestor. tests/conftest.py: Settings-env_file in Tests neutralisieren (Dev-.env darf die Suite nicht kontaminieren). 68 passed, ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
168 lines
5.4 KiB
Python
168 lines
5.4 KiB
Python
from unittest.mock import MagicMock
|
|
import pytest
|
|
|
|
from app.qdrant_store import (
|
|
ensure_collection,
|
|
upsert_chunks,
|
|
delete_by_path,
|
|
search_chunks,
|
|
get_chunks_by_path,
|
|
_payload_filter,
|
|
ChunkPoint,
|
|
)
|
|
|
|
|
|
def test_ensure_collection_creates_when_missing():
|
|
fake_client = MagicMock()
|
|
fake_client.collection_exists.return_value = False
|
|
|
|
ensure_collection(fake_client, "rag_test", vector_size=1024)
|
|
|
|
fake_client.create_collection.assert_called_once()
|
|
args, kwargs = fake_client.create_collection.call_args
|
|
assert kwargs["collection_name"] == "rag_test"
|
|
# Payload indexes get created
|
|
assert fake_client.create_payload_index.call_count == 3
|
|
|
|
|
|
def test_ensure_collection_skips_when_exists_with_matching_dim():
|
|
fake_client = MagicMock()
|
|
fake_client.collection_exists.return_value = True
|
|
info = MagicMock()
|
|
info.config.params.vectors.size = 1024
|
|
fake_client.get_collection.return_value = info
|
|
|
|
ensure_collection(fake_client, "rag_test", vector_size=1024)
|
|
|
|
fake_client.create_collection.assert_not_called()
|
|
|
|
|
|
def test_ensure_collection_raises_on_dim_mismatch():
|
|
fake_client = MagicMock()
|
|
fake_client.collection_exists.return_value = True
|
|
info = MagicMock()
|
|
info.config.params.vectors.size = 768
|
|
fake_client.get_collection.return_value = info
|
|
|
|
with pytest.raises(RuntimeError, match="dimension mismatch"):
|
|
ensure_collection(fake_client, "rag_test", vector_size=1024)
|
|
|
|
|
|
def test_upsert_chunks_calls_client_upsert():
|
|
fake_client = MagicMock()
|
|
points = [
|
|
ChunkPoint(vector=[0.1] * 4, payload={"file_path": "a", "chunk_index": 0}),
|
|
ChunkPoint(vector=[0.2] * 4, payload={"file_path": "a", "chunk_index": 1}),
|
|
]
|
|
|
|
upsert_chunks(fake_client, "rag_test", points)
|
|
|
|
fake_client.upsert.assert_called_once()
|
|
kwargs = fake_client.upsert.call_args.kwargs
|
|
assert kwargs["collection_name"] == "rag_test"
|
|
produced = kwargs["points"]
|
|
assert len(produced) == 2
|
|
assert produced[0].vector == [0.1] * 4
|
|
assert produced[0].payload == {"file_path": "a", "chunk_index": 0}
|
|
assert produced[1].vector == [0.2] * 4
|
|
assert produced[1].payload == {"file_path": "a", "chunk_index": 1}
|
|
# ids are UUID strings, distinct
|
|
assert isinstance(produced[0].id, str)
|
|
assert produced[0].id != produced[1].id
|
|
|
|
|
|
def test_delete_by_path_uses_filter():
|
|
fake_client = MagicMock()
|
|
delete_by_path(fake_client, "rag_test", "Documents/x.pdf")
|
|
|
|
fake_client.delete.assert_called_once()
|
|
kwargs = fake_client.delete.call_args.kwargs
|
|
assert kwargs["collection_name"] == "rag_test"
|
|
# The filter should target file_path
|
|
selector = kwargs["points_selector"]
|
|
# Inspect the FilterSelector → Filter → must → FieldCondition
|
|
assert selector.filter.must[0].key == "file_path"
|
|
|
|
|
|
def test_payload_filter_none_when_no_constraints():
|
|
assert _payload_filter(None, None, None) is None
|
|
|
|
|
|
def test_payload_filter_builds_only_given_conditions():
|
|
flt = _payload_filter(semester="2.Semester", fach=None, typ="Vorlesungen")
|
|
keys = [c.key for c in flt.must]
|
|
assert keys == ["semester", "typ"]
|
|
assert flt.must[0].match.value == "2.Semester"
|
|
assert flt.must[1].match.value == "Vorlesungen"
|
|
|
|
|
|
def test_search_chunks_maps_payload_and_score():
|
|
hit = MagicMock()
|
|
hit.payload = {
|
|
"text": "chunk text",
|
|
"file_path": "Documents/THB/2.Semester/Databases/a.pdf",
|
|
"file_name": "a.pdf",
|
|
"semester": "2.Semester",
|
|
"fach": "Databases",
|
|
"typ": "Vorlesungen",
|
|
"page": 3,
|
|
"chunk_index": 2,
|
|
"ignored": "not in result fields",
|
|
}
|
|
hit.score = 0.87
|
|
response = MagicMock()
|
|
response.points = [hit]
|
|
fake_client = MagicMock()
|
|
fake_client.query_points.return_value = response
|
|
|
|
out = search_chunks(
|
|
fake_client, "rag_test", [0.1] * 4, limit=5, fach="Databases"
|
|
)
|
|
|
|
kwargs = fake_client.query_points.call_args.kwargs
|
|
assert kwargs["collection_name"] == "rag_test"
|
|
assert kwargs["limit"] == 5
|
|
assert kwargs["query_filter"].must[0].key == "fach"
|
|
assert out == [
|
|
{
|
|
"text": "chunk text",
|
|
"file_path": "Documents/THB/2.Semester/Databases/a.pdf",
|
|
"file_name": "a.pdf",
|
|
"semester": "2.Semester",
|
|
"fach": "Databases",
|
|
"typ": "Vorlesungen",
|
|
"page": 3,
|
|
"chunk_index": 2,
|
|
"score": 0.87,
|
|
}
|
|
]
|
|
|
|
|
|
def test_search_chunks_no_filter_passes_none():
|
|
response = MagicMock()
|
|
response.points = []
|
|
fake_client = MagicMock()
|
|
fake_client.query_points.return_value = response
|
|
|
|
search_chunks(fake_client, "rag_test", [0.1] * 4, limit=3)
|
|
|
|
assert fake_client.query_points.call_args.kwargs["query_filter"] is None
|
|
|
|
|
|
def test_get_chunks_by_path_sorts_by_chunk_index():
|
|
def pt(idx, page, text):
|
|
m = MagicMock()
|
|
m.payload = {"chunk_index": idx, "page": page, "text": text}
|
|
return m
|
|
|
|
fake_client = MagicMock()
|
|
fake_client.scroll.return_value = ([pt(2, 1, "c"), pt(0, 1, "a"), pt(1, 1, "b")], None)
|
|
|
|
rows = get_chunks_by_path(fake_client, "rag_test", "Documents/x.pdf")
|
|
|
|
assert [r["chunk_index"] for r in rows] == [0, 1, 2]
|
|
assert [r["text"] for r in rows] == ["a", "b", "c"]
|
|
scroll_kwargs = fake_client.scroll.call_args.kwargs
|
|
assert scroll_kwargs["scroll_filter"].must[0].key == "file_path"
|
|
assert scroll_kwargs["with_vectors"] is False
|