import io import pytest import fitz # pymupdf from docx import Document from app.config import Settings @pytest.fixture(autouse=True) def _ignore_dotenv(): """Tests must be deterministic regardless of a developer .env in the repo root. Settings reads env_file='.env'; neutralise it so tests see only the environment they explicitly set (e.g. via monkeypatch).""" original = Settings.model_config.get("env_file") Settings.model_config["env_file"] = None yield Settings.model_config["env_file"] = original @pytest.fixture def sample_pdf_bytes() -> bytes: doc = fitz.open() p1 = doc.new_page() p1.insert_text((72, 72), "Seite eins enthaelt Lorem Ipsum.") p2 = doc.new_page() p2.insert_text((72, 72), "Seite zwei enthaelt mehr Text.") data = doc.tobytes() doc.close() return data @pytest.fixture def sample_docx_bytes() -> bytes: doc = Document() doc.add_paragraph("Erster Absatz.") doc.add_paragraph("Zweiter Absatz mit mehr Inhalt.") buf = io.BytesIO() doc.save(buf) return buf.getvalue() @pytest.fixture def sample_md_bytes() -> bytes: return "# Title\n\nFirst paragraph.\n\nSecond paragraph.\n".encode("utf-8") @pytest.fixture def sample_xlsx_bytes() -> bytes: # Minimal placeholder; extractor doesn't read content for xlsx return b"PK\x03\x04dummy"