"""MCP server exposing the THB-Studium RAG corpus. Runs from the *same* image as the ingestor and reuses its embedding path (`app.ingest.embedder`), so query vectors are produced by the exact model used at ingest time — the only way Qdrant search returns meaningful hits. Transport: streamable-http on ``/mcp``. A static bearer token gates every request; the token is the second control layer behind network isolation (the service is only reachable by MetaMCP over a dedicated bridge). """ import hmac import logging import sys from functools import lru_cache import uvicorn from mcp.server.fastmcp import FastMCP from qdrant_client import QdrantClient from starlette.types import Receive, Scope, Send from app.config import get_settings from app.ingest.embedder import embed_texts from app.logging_setup import setup_logging from app.qdrant_store import get_chunks_by_path, search_chunks logger = logging.getLogger(__name__) mcp = FastMCP("rag-thb") @lru_cache(maxsize=1) def _qdrant() -> QdrantClient: return QdrantClient(url=get_settings().qdrant_url) @mcp.tool() async def rag_search( query: str, limit: int = 5, semester: str | None = None, fach: str | None = None, typ: str | None = None, ) -> list[dict]: """Semantische Suche im THB-Studium-Wissen (Vorlesungen, Übungen, Notizen). Args: query: Natürlichsprachige Suchanfrage. limit: Maximale Trefferzahl (Default 5). semester: Optionaler Filter, z.B. "2.Semester". fach: Optionaler Filter, z.B. "Databases". typ: Optionaler Filter, z.B. "Vorlesungen" oder "Uebungen". Returns: Treffer mit text und Quell-Metadaten (file_path, semester, fach, typ, page, chunk_index) plus Similarity-score, absteigend sortiert. """ settings = get_settings() vectors = await embed_texts([query], model=settings.ollama_embed_model) return search_chunks( _qdrant(), settings.qdrant_collection, vectors[0], limit=limit, semester=semester, fach=fach, typ=typ, ) @mcp.tool() async def get_file_chunks(file_path: str) -> list[dict]: """Alle Chunks eines Dokuments in Reihenfolge laden. Nützlich, um nach einem rag_search-Treffer das vollständige Dokument zu rekonstruieren. Args: file_path: Exakter Nextcloud-Pfad wie in rag_search-Treffern, z.B. "Documents/THB/2.Semester/Databases/Uebungen/01/Loesung.pdf". Returns: Chunks mit chunk_index, page und text, nach chunk_index sortiert. """ settings = get_settings() return get_chunks_by_path(_qdrant(), settings.qdrant_collection, file_path) class BearerAuthMiddleware: """Pure-ASGI gate: constant-time check of ``Authorization: Bearer ``. Non-HTTP scopes (lifespan, websocket) pass straight through so the StreamableHTTP session manager's lifespan still runs. """ def __init__(self, app, token: str) -> None: self._app = app self._expected = f"Bearer {token}" async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: if scope["type"] != "http": await self._app(scope, receive, send) return headers = dict(scope.get("headers") or []) provided = headers.get(b"authorization", b"").decode() if not hmac.compare_digest(provided, self._expected): await send( { "type": "http.response.start", "status": 401, "headers": [(b"content-type", b"text/plain")], } ) await send({"type": "http.response.body", "body": b"unauthorized"}) return await self._app(scope, receive, send) def build_app(): """Token-gated ASGI app, or exit if RAG_MCP_TOKEN is unset.""" settings = get_settings() if not settings.rag_mcp_token: logger.error( "refusing to start: RAG_MCP_TOKEN is empty", extra={"event": "mcp_startup_abort"}, ) sys.exit(1) mcp.settings.host = "0.0.0.0" mcp.settings.port = settings.rag_mcp_port return BearerAuthMiddleware(mcp.streamable_http_app(), settings.rag_mcp_token) def main() -> None: settings = get_settings() setup_logging(settings.log_level) app = build_app() logger.info( "mcp server starting", extra={"event": "mcp_startup", "port": settings.rag_mcp_port}, ) uvicorn.run(app, host="0.0.0.0", port=settings.rag_mcp_port) if __name__ == "__main__": main()