feat: pipeline-orchestrator fuer single-file ingest
This commit is contained in:
112
tests/test_pipeline.py
Normal file
112
tests/test_pipeline.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from app.webhook.models import EventType
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _populate_env(monkeypatch):
|
||||
monkeypatch.setenv("NEXTCLOUD_WEBDAV_URL", "http://nc")
|
||||
monkeypatch.setenv("NEXTCLOUD_USER", "u")
|
||||
monkeypatch.setenv("NEXTCLOUD_APP_PASSWORD", "p")
|
||||
monkeypatch.setenv("OLLAMA_URL", "http://ollama")
|
||||
monkeypatch.setenv("OLLAMA_EMBED_MODEL", "m")
|
||||
monkeypatch.setenv("QDRANT_URL", "http://qdrant")
|
||||
monkeypatch.setenv("QDRANT_COLLECTION", "rag_test")
|
||||
monkeypatch.setenv("WEBHOOK_SECRET", "abc")
|
||||
from app.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
yield
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_deleted_event_calls_delete_only(monkeypatch):
|
||||
from app.ingest.pipeline import process_file
|
||||
|
||||
qdrant = MagicMock()
|
||||
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
|
||||
|
||||
download_mock = AsyncMock()
|
||||
monkeypatch.setattr("app.ingest.pipeline.download_file", download_mock)
|
||||
|
||||
await process_file(
|
||||
file_path="Documents/THB/Studium/2.Semester/Databases/x.pdf",
|
||||
event_type=EventType.DELETED,
|
||||
)
|
||||
|
||||
download_mock.assert_not_called()
|
||||
qdrant.delete.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_outside_root_skips(monkeypatch):
|
||||
from app.ingest.pipeline import process_file
|
||||
|
||||
qdrant = MagicMock()
|
||||
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
|
||||
|
||||
download_mock = AsyncMock()
|
||||
monkeypatch.setattr("app.ingest.pipeline.download_file", download_mock)
|
||||
|
||||
await process_file(
|
||||
file_path="Documents/Other/x.pdf",
|
||||
event_type=EventType.CREATED,
|
||||
)
|
||||
|
||||
download_mock.assert_not_called()
|
||||
qdrant.delete.assert_not_called()
|
||||
qdrant.upsert.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_unsupported_extension_skips(monkeypatch):
|
||||
from app.ingest.pipeline import process_file
|
||||
|
||||
qdrant = MagicMock()
|
||||
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
|
||||
monkeypatch.setattr("app.ingest.pipeline.download_file", AsyncMock())
|
||||
|
||||
await process_file(
|
||||
file_path="Documents/THB/Studium/2.Semester/Databases/notes.txt",
|
||||
event_type=EventType.CREATED,
|
||||
)
|
||||
|
||||
qdrant.upsert.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_created_full_flow(monkeypatch, sample_pdf_bytes):
|
||||
from app.ingest.pipeline import process_file
|
||||
|
||||
qdrant = MagicMock()
|
||||
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.ingest.pipeline.download_file",
|
||||
AsyncMock(return_value=sample_pdf_bytes),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.ingest.pipeline.embed_texts",
|
||||
AsyncMock(return_value=[[0.1] * 4, [0.2] * 4]),
|
||||
)
|
||||
|
||||
await process_file(
|
||||
file_path="Documents/THB/Studium/2.Semester/Databases/Vorlesungen/x.pdf",
|
||||
event_type=EventType.CREATED,
|
||||
)
|
||||
|
||||
# delete called first (idempotency), upsert called after
|
||||
qdrant.delete.assert_called_once()
|
||||
qdrant.upsert.assert_called_once()
|
||||
|
||||
upserted_points = qdrant.upsert.call_args.kwargs["points"]
|
||||
assert len(upserted_points) >= 1
|
||||
payload = upserted_points[0].payload
|
||||
assert payload["semester"] == "2.Semester"
|
||||
assert payload["fach"] == "Databases"
|
||||
assert payload["typ"] == "Vorlesungen"
|
||||
assert payload["file_type"] == "pdf"
|
||||
assert payload["chunk_index"] == 0
|
||||
assert "ingested_at" in payload
|
||||
Reference in New Issue
Block a user