chore: docstrings und tighter upsert-test fuer qdrant store

This commit is contained in:
2026-05-04 22:27:03 +02:00
parent 7ebb63501e
commit a861f4ec2b
2 changed files with 20 additions and 3 deletions

View File

@@ -13,7 +13,11 @@ class ChunkPoint:
def ensure_collection(client: QdrantClient, name: str, vector_size: int) -> None:
"""Create the collection if missing. Crash if it exists with wrong dim."""
"""Create the collection if missing. Crash if it exists with wrong dim.
Note: payload indexes are created only on initial collection creation;
they are not reconciled on subsequent runs.
"""
if not client.collection_exists(name):
client.create_collection(
collection_name=name,
@@ -38,6 +42,11 @@ def ensure_collection(client: QdrantClient, name: str, vector_size: int) -> None
def upsert_chunks(client: QdrantClient, name: str, chunks: list[ChunkPoint]) -> None:
"""Insert chunks with fresh UUID ids.
Caller is responsible for deduplication: call ``delete_by_path`` for the
file before re-ingesting, otherwise duplicates accumulate.
"""
points = [
qm.PointStruct(id=str(uuid.uuid4()), vector=c.vector, payload=c.payload)
for c in chunks

View File

@@ -1,4 +1,4 @@
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock
import pytest
from app.qdrant_store import (
@@ -57,7 +57,15 @@ def test_upsert_chunks_calls_client_upsert():
fake_client.upsert.assert_called_once()
kwargs = fake_client.upsert.call_args.kwargs
assert kwargs["collection_name"] == "rag_test"
assert len(kwargs["points"]) == 2
produced = kwargs["points"]
assert len(produced) == 2
assert produced[0].vector == [0.1] * 4
assert produced[0].payload == {"file_path": "a", "chunk_index": 0}
assert produced[1].vector == [0.2] * 4
assert produced[1].payload == {"file_path": "a", "chunk_index": 1}
# ids are UUID strings, distinct
assert isinstance(produced[0].id, str)
assert produced[0].id != produced[1].id
def test_delete_by_path_uses_filter():