Files
rag-ingestor/tests/test_bulk.py

65 lines
2.2 KiB
Python

from unittest.mock import AsyncMock, call
from fastapi.testclient import TestClient
from app.webhook.models import EventType
def _make_app(monkeypatch):
monkeypatch.setenv("NEXTCLOUD_WEBDAV_URL", "http://nc")
monkeypatch.setenv("NEXTCLOUD_USER", "u")
monkeypatch.setenv("NEXTCLOUD_APP_PASSWORD", "p")
monkeypatch.setenv("OLLAMA_URL", "http://ollama")
monkeypatch.setenv("OLLAMA_EMBED_MODEL", "m")
monkeypatch.setenv("QDRANT_URL", "http://qdrant")
monkeypatch.setenv("QDRANT_COLLECTION", "rag_test")
monkeypatch.setenv("WEBHOOK_SECRET", "abc")
from app.config import get_settings
get_settings.cache_clear()
import app.ingest.pipeline as pipe
pipe._qdrant_client.cache_clear()
monkeypatch.setattr("app.main._startup_ensure_collection", AsyncMock())
from app.main import app
return app
def test_bulk_import_lists_and_dispatches(monkeypatch):
app = _make_app(monkeypatch)
listed = [
"Documents/THB/Studium/2.Semester/Databases/a.pdf",
"Documents/THB/Studium/2.Semester/Databases/b.docx",
"Documents/THB/Studium/2.Semester/Databases/.rag-meta.json", # ignored
]
monkeypatch.setattr("app.bulk.list_files_recursive", AsyncMock(return_value=listed))
process_mock = AsyncMock()
monkeypatch.setattr("app.bulk.process_file", process_mock)
with TestClient(app) as client:
r = client.post(
"/bulk-import",
json={"path": "Documents/THB/Studium/2.Semester/Databases"},
headers={"X-Webhook-Secret": "abc"},
)
assert r.status_code == 202
body = r.json()
assert body["dispatched"] == 2 # only .pdf and .docx, not the json sidecar
process_mock.assert_has_calls(
[
call("Documents/THB/Studium/2.Semester/Databases/a.pdf", EventType.CREATED),
call("Documents/THB/Studium/2.Semester/Databases/b.docx", EventType.CREATED),
]
)
assert process_mock.await_count == 2
def test_bulk_import_rejects_wrong_secret(monkeypatch):
app = _make_app(monkeypatch)
with TestClient(app) as client:
r = client.post("/bulk-import", json={"path": "x"}, headers={"X-Webhook-Secret": "nope"})
assert r.status_code == 401