chore: docstrings und tighter upsert-test fuer qdrant store
This commit is contained in:
@@ -13,7 +13,11 @@ class ChunkPoint:
|
||||
|
||||
|
||||
def ensure_collection(client: QdrantClient, name: str, vector_size: int) -> None:
|
||||
"""Create the collection if missing. Crash if it exists with wrong dim."""
|
||||
"""Create the collection if missing. Crash if it exists with wrong dim.
|
||||
|
||||
Note: payload indexes are created only on initial collection creation;
|
||||
they are not reconciled on subsequent runs.
|
||||
"""
|
||||
if not client.collection_exists(name):
|
||||
client.create_collection(
|
||||
collection_name=name,
|
||||
@@ -38,6 +42,11 @@ def ensure_collection(client: QdrantClient, name: str, vector_size: int) -> None
|
||||
|
||||
|
||||
def upsert_chunks(client: QdrantClient, name: str, chunks: list[ChunkPoint]) -> None:
|
||||
"""Insert chunks with fresh UUID ids.
|
||||
|
||||
Caller is responsible for deduplication: call ``delete_by_path`` for the
|
||||
file before re-ingesting, otherwise duplicates accumulate.
|
||||
"""
|
||||
points = [
|
||||
qm.PointStruct(id=str(uuid.uuid4()), vector=c.vector, payload=c.payload)
|
||||
for c in chunks
|
||||
|
||||
Reference in New Issue
Block a user