chore: docstrings und tighter upsert-test fuer qdrant store
This commit is contained in:
@@ -13,7 +13,11 @@ class ChunkPoint:
|
||||
|
||||
|
||||
def ensure_collection(client: QdrantClient, name: str, vector_size: int) -> None:
|
||||
"""Create the collection if missing. Crash if it exists with wrong dim."""
|
||||
"""Create the collection if missing. Crash if it exists with wrong dim.
|
||||
|
||||
Note: payload indexes are created only on initial collection creation;
|
||||
they are not reconciled on subsequent runs.
|
||||
"""
|
||||
if not client.collection_exists(name):
|
||||
client.create_collection(
|
||||
collection_name=name,
|
||||
@@ -38,6 +42,11 @@ def ensure_collection(client: QdrantClient, name: str, vector_size: int) -> None
|
||||
|
||||
|
||||
def upsert_chunks(client: QdrantClient, name: str, chunks: list[ChunkPoint]) -> None:
|
||||
"""Insert chunks with fresh UUID ids.
|
||||
|
||||
Caller is responsible for deduplication: call ``delete_by_path`` for the
|
||||
file before re-ingesting, otherwise duplicates accumulate.
|
||||
"""
|
||||
points = [
|
||||
qm.PointStruct(id=str(uuid.uuid4()), vector=c.vector, payload=c.payload)
|
||||
for c in chunks
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import MagicMock
|
||||
import pytest
|
||||
|
||||
from app.qdrant_store import (
|
||||
@@ -57,7 +57,15 @@ def test_upsert_chunks_calls_client_upsert():
|
||||
fake_client.upsert.assert_called_once()
|
||||
kwargs = fake_client.upsert.call_args.kwargs
|
||||
assert kwargs["collection_name"] == "rag_test"
|
||||
assert len(kwargs["points"]) == 2
|
||||
produced = kwargs["points"]
|
||||
assert len(produced) == 2
|
||||
assert produced[0].vector == [0.1] * 4
|
||||
assert produced[0].payload == {"file_path": "a", "chunk_index": 0}
|
||||
assert produced[1].vector == [0.2] * 4
|
||||
assert produced[1].payload == {"file_path": "a", "chunk_index": 1}
|
||||
# ids are UUID strings, distinct
|
||||
assert isinstance(produced[0].id, str)
|
||||
assert produced[0].id != produced[1].id
|
||||
|
||||
|
||||
def test_delete_by_path_uses_filter():
|
||||
|
||||
Reference in New Issue
Block a user