from unittest.mock import MagicMock import pytest from app.qdrant_store import ( ensure_collection, upsert_chunks, delete_by_path, search_chunks, get_chunks_by_path, _payload_filter, ChunkPoint, ) def test_ensure_collection_creates_when_missing(): fake_client = MagicMock() fake_client.collection_exists.return_value = False ensure_collection(fake_client, "rag_test", vector_size=1024) fake_client.create_collection.assert_called_once() args, kwargs = fake_client.create_collection.call_args assert kwargs["collection_name"] == "rag_test" # Payload indexes get created assert fake_client.create_payload_index.call_count == 3 def test_ensure_collection_skips_when_exists_with_matching_dim(): fake_client = MagicMock() fake_client.collection_exists.return_value = True info = MagicMock() info.config.params.vectors.size = 1024 fake_client.get_collection.return_value = info ensure_collection(fake_client, "rag_test", vector_size=1024) fake_client.create_collection.assert_not_called() def test_ensure_collection_raises_on_dim_mismatch(): fake_client = MagicMock() fake_client.collection_exists.return_value = True info = MagicMock() info.config.params.vectors.size = 768 fake_client.get_collection.return_value = info with pytest.raises(RuntimeError, match="dimension mismatch"): ensure_collection(fake_client, "rag_test", vector_size=1024) def test_upsert_chunks_calls_client_upsert(): fake_client = MagicMock() points = [ ChunkPoint(vector=[0.1] * 4, payload={"file_path": "a", "chunk_index": 0}), ChunkPoint(vector=[0.2] * 4, payload={"file_path": "a", "chunk_index": 1}), ] upsert_chunks(fake_client, "rag_test", points) fake_client.upsert.assert_called_once() kwargs = fake_client.upsert.call_args.kwargs assert kwargs["collection_name"] == "rag_test" produced = kwargs["points"] assert len(produced) == 2 assert produced[0].vector == [0.1] * 4 assert produced[0].payload == {"file_path": "a", "chunk_index": 0} assert produced[1].vector == [0.2] * 4 assert produced[1].payload == {"file_path": "a", "chunk_index": 1} # ids are UUID strings, distinct assert isinstance(produced[0].id, str) assert produced[0].id != produced[1].id def test_delete_by_path_uses_filter(): fake_client = MagicMock() delete_by_path(fake_client, "rag_test", "Documents/x.pdf") fake_client.delete.assert_called_once() kwargs = fake_client.delete.call_args.kwargs assert kwargs["collection_name"] == "rag_test" # The filter should target file_path selector = kwargs["points_selector"] # Inspect the FilterSelector → Filter → must → FieldCondition assert selector.filter.must[0].key == "file_path" def test_payload_filter_none_when_no_constraints(): assert _payload_filter(None, None, None) is None def test_payload_filter_builds_only_given_conditions(): flt = _payload_filter(semester="2.Semester", fach=None, typ="Vorlesungen") keys = [c.key for c in flt.must] assert keys == ["semester", "typ"] assert flt.must[0].match.value == "2.Semester" assert flt.must[1].match.value == "Vorlesungen" def test_search_chunks_maps_payload_and_score(): hit = MagicMock() hit.payload = { "text": "chunk text", "file_path": "Documents/THB/2.Semester/Databases/a.pdf", "file_name": "a.pdf", "semester": "2.Semester", "fach": "Databases", "typ": "Vorlesungen", "page": 3, "chunk_index": 2, "ignored": "not in result fields", } hit.score = 0.87 response = MagicMock() response.points = [hit] fake_client = MagicMock() fake_client.query_points.return_value = response out = search_chunks( fake_client, "rag_test", [0.1] * 4, limit=5, fach="Databases" ) kwargs = fake_client.query_points.call_args.kwargs assert kwargs["collection_name"] == "rag_test" assert kwargs["limit"] == 5 assert kwargs["query_filter"].must[0].key == "fach" assert out == [ { "text": "chunk text", "file_path": "Documents/THB/2.Semester/Databases/a.pdf", "file_name": "a.pdf", "semester": "2.Semester", "fach": "Databases", "typ": "Vorlesungen", "page": 3, "chunk_index": 2, "score": 0.87, } ] def test_search_chunks_no_filter_passes_none(): response = MagicMock() response.points = [] fake_client = MagicMock() fake_client.query_points.return_value = response search_chunks(fake_client, "rag_test", [0.1] * 4, limit=3) assert fake_client.query_points.call_args.kwargs["query_filter"] is None def test_get_chunks_by_path_sorts_by_chunk_index(): def pt(idx, page, text): m = MagicMock() m.payload = {"chunk_index": idx, "page": page, "text": text} return m fake_client = MagicMock() fake_client.scroll.return_value = ([pt(2, 1, "c"), pt(0, 1, "a"), pt(1, 1, "b")], None) rows = get_chunks_by_path(fake_client, "rag_test", "Documents/x.pdf") assert [r["chunk_index"] for r in rows] == [0, 1, 2] assert [r["text"] for r in rows] == ["a", "b", "c"] scroll_kwargs = fake_client.scroll.call_args.kwargs assert scroll_kwargs["scroll_filter"].must[0].key == "file_path" assert scroll_kwargs["with_vectors"] is False