Files
rag-ingestor/tests/test_pipeline.py

113 lines
3.4 KiB
Python

from unittest.mock import AsyncMock, MagicMock
import pytest
from app.webhook.models import EventType
@pytest.fixture(autouse=True)
def _populate_env(monkeypatch):
monkeypatch.setenv("NEXTCLOUD_WEBDAV_URL", "http://nc")
monkeypatch.setenv("NEXTCLOUD_USER", "u")
monkeypatch.setenv("NEXTCLOUD_APP_PASSWORD", "p")
monkeypatch.setenv("OLLAMA_URL", "http://ollama")
monkeypatch.setenv("OLLAMA_EMBED_MODEL", "m")
monkeypatch.setenv("QDRANT_URL", "http://qdrant")
monkeypatch.setenv("QDRANT_COLLECTION", "rag_test")
monkeypatch.setenv("WEBHOOK_SECRET", "abc")
from app.config import get_settings
get_settings.cache_clear()
yield
get_settings.cache_clear()
@pytest.mark.asyncio
async def test_process_deleted_event_calls_delete_only(monkeypatch):
from app.ingest.pipeline import process_file
qdrant = MagicMock()
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
download_mock = AsyncMock()
monkeypatch.setattr("app.ingest.pipeline.download_file", download_mock)
await process_file(
file_path="Documents/THB/Studium/2.Semester/Databases/x.pdf",
event_type=EventType.DELETED,
)
download_mock.assert_not_called()
qdrant.delete.assert_called_once()
@pytest.mark.asyncio
async def test_process_outside_root_skips(monkeypatch):
from app.ingest.pipeline import process_file
qdrant = MagicMock()
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
download_mock = AsyncMock()
monkeypatch.setattr("app.ingest.pipeline.download_file", download_mock)
await process_file(
file_path="Documents/Other/x.pdf",
event_type=EventType.CREATED,
)
download_mock.assert_not_called()
qdrant.delete.assert_not_called()
qdrant.upsert.assert_not_called()
@pytest.mark.asyncio
async def test_process_unsupported_extension_skips(monkeypatch):
from app.ingest.pipeline import process_file
qdrant = MagicMock()
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
monkeypatch.setattr("app.ingest.pipeline.download_file", AsyncMock())
await process_file(
file_path="Documents/THB/Studium/2.Semester/Databases/notes.txt",
event_type=EventType.CREATED,
)
qdrant.upsert.assert_not_called()
@pytest.mark.asyncio
async def test_process_created_full_flow(monkeypatch, sample_pdf_bytes):
from app.ingest.pipeline import process_file
qdrant = MagicMock()
monkeypatch.setattr("app.ingest.pipeline._qdrant_client", lambda: qdrant)
monkeypatch.setattr(
"app.ingest.pipeline.download_file",
AsyncMock(return_value=sample_pdf_bytes),
)
monkeypatch.setattr(
"app.ingest.pipeline.embed_texts",
AsyncMock(return_value=[[0.1] * 4, [0.2] * 4]),
)
await process_file(
file_path="Documents/THB/Studium/2.Semester/Databases/Vorlesungen/x.pdf",
event_type=EventType.CREATED,
)
# delete called first (idempotency), upsert called after
qdrant.delete.assert_called_once()
qdrant.upsert.assert_called_once()
upserted_points = qdrant.upsert.call_args.kwargs["points"]
assert len(upserted_points) >= 1
payload = upserted_points[0].payload
assert payload["semester"] == "2.Semester"
assert payload["fach"] == "Databases"
assert payload["typ"] == "Vorlesungen"
assert payload["file_type"] == "pdf"
assert payload["chunk_index"] == 0
assert "ingested_at" in payload