feat: MCP-Server für RAG-Retrieval + Webhook-Härtung
app/mcp_server.py: FastMCP (mcp SDK), streamable-http auf /mcp, statischer Bearer-Token (constant-time ASGI-Middleware), Fail-Fast ohne RAG_MCP_TOKEN. Tools rag_search (mit semester/fach/typ-Filter) + get_file_chunks. Läuft aus demselben Image wie der Ingestor und reused den Embed-Pfad → Vektoren sind garantiert kompatibel zum Ingest (der offizielle qdrant-MCP-Server kann nur fastembed → Dimension-/Schema-Mismatch). app/qdrant_store.py: search_chunks (query_points + optionaler Payload-Filter) und get_chunks_by_path (scroll, nach chunk_index sortiert). app/bulk.py: Amplification-Guard — /bulk-import lehnt mit 409 ab solange ein vorheriger Bulk noch BackgroundTasks abarbeitet. docker-compose.coolify.yml: rag-mcp-Service (nicht public, externes metamcp-net statt Stack-Coupling) + Traefik-Rate-Limit-Middleware am ingestor. tests/conftest.py: Settings-env_file in Tests neutralisieren (Dev-.env darf die Suite nicht kontaminieren). 68 passed, ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
27
app/bulk.py
27
app/bulk.py
@@ -24,10 +24,21 @@ router = APIRouter()
|
||||
BULK_CONCURRENCY = 4
|
||||
_bulk_semaphore = asyncio.Semaphore(BULK_CONCURRENCY)
|
||||
|
||||
# Amplification guard: one /bulk-import dispatches one BackgroundTask per
|
||||
# matching file with no upper bound. Repeated calls (a misfiring Nextcloud
|
||||
# flow, or an attacker holding the secret) would pile unbounded tasks. We
|
||||
# track outstanding dispatched work and reject a new bulk while any is
|
||||
# still draining — bulk runs are rare, so serialising them is acceptable.
|
||||
_inflight = 0
|
||||
|
||||
|
||||
async def _process_with_semaphore(file_path: str, event_type: EventType) -> None:
|
||||
async with _bulk_semaphore:
|
||||
await process_file(file_path, event_type)
|
||||
global _inflight
|
||||
try:
|
||||
async with _bulk_semaphore:
|
||||
await process_file(file_path, event_type)
|
||||
finally:
|
||||
_inflight -= 1
|
||||
|
||||
|
||||
class BulkRequest(BaseModel):
|
||||
@@ -83,9 +94,20 @@ async def bulk_import(
|
||||
background: BackgroundTasks,
|
||||
x_webhook_secret: str | None = Header(default=None),
|
||||
):
|
||||
global _inflight
|
||||
settings = get_settings()
|
||||
verify_secret(x_webhook_secret, settings.webhook_secret)
|
||||
|
||||
if _inflight > 0:
|
||||
logger.warning(
|
||||
"bulk rejected: import already in progress",
|
||||
extra={"event": "bulk_rejected", "path": body.path, "inflight": _inflight},
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail="a bulk import is already in progress",
|
||||
)
|
||||
|
||||
try:
|
||||
files = await list_files_recursive(
|
||||
settings.nextcloud_webdav_url,
|
||||
@@ -105,6 +127,7 @@ async def bulk_import(
|
||||
ext = PurePosixPath(f).suffix.lstrip(".").lower()
|
||||
if ext not in SUPPORTED_TYPES:
|
||||
continue
|
||||
_inflight += 1
|
||||
background.add_task(_process_with_semaphore, f, EventType.CREATED)
|
||||
dispatched += 1
|
||||
|
||||
|
||||
@@ -23,6 +23,12 @@ class Settings(BaseSettings):
|
||||
chunk_overlap_words: int = 50
|
||||
log_level: str = "INFO"
|
||||
|
||||
# MCP server (app.mcp_server). Optional so the ingestor — which shares
|
||||
# this Settings model — is unaffected. The MCP server itself refuses to
|
||||
# start when rag_mcp_token is empty.
|
||||
rag_mcp_token: str = ""
|
||||
rag_mcp_port: int = 9009
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_settings() -> Settings:
|
||||
|
||||
147
app/mcp_server.py
Normal file
147
app/mcp_server.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""MCP server exposing the THB-Studium RAG corpus.
|
||||
|
||||
Runs from the *same* image as the ingestor and reuses its embedding path
|
||||
(`app.ingest.embedder`), so query vectors are produced by the exact model
|
||||
used at ingest time — the only way Qdrant search returns meaningful hits.
|
||||
|
||||
Transport: streamable-http on ``/mcp``. A static bearer token gates every
|
||||
request; the token is the second control layer behind network isolation
|
||||
(the service is only reachable by MetaMCP over a dedicated bridge).
|
||||
"""
|
||||
|
||||
import hmac
|
||||
import logging
|
||||
import sys
|
||||
from functools import lru_cache
|
||||
|
||||
import uvicorn
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
from qdrant_client import QdrantClient
|
||||
from starlette.types import Receive, Scope, Send
|
||||
|
||||
from app.config import get_settings
|
||||
from app.ingest.embedder import embed_texts
|
||||
from app.logging_setup import setup_logging
|
||||
from app.qdrant_store import get_chunks_by_path, search_chunks
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mcp = FastMCP("rag-thb")
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _qdrant() -> QdrantClient:
|
||||
return QdrantClient(url=get_settings().qdrant_url)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def rag_search(
|
||||
query: str,
|
||||
limit: int = 5,
|
||||
semester: str | None = None,
|
||||
fach: str | None = None,
|
||||
typ: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Semantische Suche im THB-Studium-Wissen (Vorlesungen, Übungen, Notizen).
|
||||
|
||||
Args:
|
||||
query: Natürlichsprachige Suchanfrage.
|
||||
limit: Maximale Trefferzahl (Default 5).
|
||||
semester: Optionaler Filter, z.B. "2.Semester".
|
||||
fach: Optionaler Filter, z.B. "Databases".
|
||||
typ: Optionaler Filter, z.B. "Vorlesungen" oder "Uebungen".
|
||||
|
||||
Returns:
|
||||
Treffer mit text und Quell-Metadaten (file_path, semester, fach,
|
||||
typ, page, chunk_index) plus Similarity-score, absteigend sortiert.
|
||||
"""
|
||||
settings = get_settings()
|
||||
vectors = await embed_texts([query], model=settings.ollama_embed_model)
|
||||
return search_chunks(
|
||||
_qdrant(),
|
||||
settings.qdrant_collection,
|
||||
vectors[0],
|
||||
limit=limit,
|
||||
semester=semester,
|
||||
fach=fach,
|
||||
typ=typ,
|
||||
)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def get_file_chunks(file_path: str) -> list[dict]:
|
||||
"""Alle Chunks eines Dokuments in Reihenfolge laden.
|
||||
|
||||
Nützlich, um nach einem rag_search-Treffer das vollständige Dokument
|
||||
zu rekonstruieren.
|
||||
|
||||
Args:
|
||||
file_path: Exakter Nextcloud-Pfad wie in rag_search-Treffern, z.B.
|
||||
"Documents/THB/2.Semester/Databases/Uebungen/01/Loesung.pdf".
|
||||
|
||||
Returns:
|
||||
Chunks mit chunk_index, page und text, nach chunk_index sortiert.
|
||||
"""
|
||||
settings = get_settings()
|
||||
return get_chunks_by_path(_qdrant(), settings.qdrant_collection, file_path)
|
||||
|
||||
|
||||
class BearerAuthMiddleware:
|
||||
"""Pure-ASGI gate: constant-time check of ``Authorization: Bearer <token>``.
|
||||
|
||||
Non-HTTP scopes (lifespan, websocket) pass straight through so the
|
||||
StreamableHTTP session manager's lifespan still runs.
|
||||
"""
|
||||
|
||||
def __init__(self, app, token: str) -> None:
|
||||
self._app = app
|
||||
self._expected = f"Bearer {token}"
|
||||
|
||||
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
|
||||
if scope["type"] != "http":
|
||||
await self._app(scope, receive, send)
|
||||
return
|
||||
|
||||
headers = dict(scope.get("headers") or [])
|
||||
provided = headers.get(b"authorization", b"").decode()
|
||||
if not hmac.compare_digest(provided, self._expected):
|
||||
await send(
|
||||
{
|
||||
"type": "http.response.start",
|
||||
"status": 401,
|
||||
"headers": [(b"content-type", b"text/plain")],
|
||||
}
|
||||
)
|
||||
await send({"type": "http.response.body", "body": b"unauthorized"})
|
||||
return
|
||||
|
||||
await self._app(scope, receive, send)
|
||||
|
||||
|
||||
def build_app():
|
||||
"""Token-gated ASGI app, or exit if RAG_MCP_TOKEN is unset."""
|
||||
settings = get_settings()
|
||||
if not settings.rag_mcp_token:
|
||||
logger.error(
|
||||
"refusing to start: RAG_MCP_TOKEN is empty",
|
||||
extra={"event": "mcp_startup_abort"},
|
||||
)
|
||||
sys.exit(1)
|
||||
mcp.settings.host = "0.0.0.0"
|
||||
mcp.settings.port = settings.rag_mcp_port
|
||||
return BearerAuthMiddleware(mcp.streamable_http_app(), settings.rag_mcp_token)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
settings = get_settings()
|
||||
setup_logging(settings.log_level)
|
||||
app = build_app()
|
||||
logger.info(
|
||||
"mcp server starting",
|
||||
extra={"event": "mcp_startup", "port": settings.rag_mcp_port},
|
||||
)
|
||||
uvicorn.run(app, host="0.0.0.0", port=settings.rag_mcp_port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -61,3 +61,84 @@ def delete_by_path(client: QdrantClient, name: str, file_path: str) -> None:
|
||||
)
|
||||
)
|
||||
client.delete(collection_name=name, points_selector=selector)
|
||||
|
||||
|
||||
_RESULT_FIELDS = (
|
||||
"text",
|
||||
"file_path",
|
||||
"file_name",
|
||||
"semester",
|
||||
"fach",
|
||||
"typ",
|
||||
"page",
|
||||
"chunk_index",
|
||||
)
|
||||
|
||||
|
||||
def _payload_filter(
|
||||
semester: str | None, fach: str | None, typ: str | None
|
||||
) -> qm.Filter | None:
|
||||
"""Build a Qdrant filter from optional metadata constraints, or None."""
|
||||
conditions = [
|
||||
qm.FieldCondition(key=key, match=qm.MatchValue(value=value))
|
||||
for key, value in (("semester", semester), ("fach", fach), ("typ", typ))
|
||||
if value
|
||||
]
|
||||
return qm.Filter(must=conditions) if conditions else None
|
||||
|
||||
|
||||
def search_chunks(
|
||||
client: QdrantClient,
|
||||
name: str,
|
||||
vector: list[float],
|
||||
limit: int,
|
||||
semester: str | None = None,
|
||||
fach: str | None = None,
|
||||
typ: str | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Vector search with optional metadata filtering.
|
||||
|
||||
Returns one dict per hit: the indexed payload fields plus the similarity
|
||||
``score``. Caller must pass a vector embedded with the *same* model used
|
||||
at ingest time, otherwise results are meaningless.
|
||||
"""
|
||||
response = client.query_points(
|
||||
collection_name=name,
|
||||
query=vector,
|
||||
limit=limit,
|
||||
query_filter=_payload_filter(semester, fach, typ),
|
||||
with_payload=True,
|
||||
)
|
||||
out: list[dict[str, Any]] = []
|
||||
for point in response.points:
|
||||
payload = point.payload or {}
|
||||
row: dict[str, Any] = {field: payload.get(field) for field in _RESULT_FIELDS}
|
||||
row["score"] = point.score
|
||||
out.append(row)
|
||||
return out
|
||||
|
||||
|
||||
def get_chunks_by_path(
|
||||
client: QdrantClient, name: str, file_path: str
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return every chunk of one document, ordered by ``chunk_index``."""
|
||||
points, _ = client.scroll(
|
||||
collection_name=name,
|
||||
scroll_filter=qm.Filter(
|
||||
must=[qm.FieldCondition(key="file_path", match=qm.MatchValue(value=file_path))]
|
||||
),
|
||||
limit=10_000,
|
||||
with_payload=True,
|
||||
with_vectors=False,
|
||||
)
|
||||
rows = [
|
||||
{
|
||||
"chunk_index": p.payload.get("chunk_index"),
|
||||
"page": p.payload.get("page"),
|
||||
"text": p.payload.get("text"),
|
||||
}
|
||||
for p in points
|
||||
if p.payload is not None
|
||||
]
|
||||
rows.sort(key=lambda r: r["chunk_index"] if r["chunk_index"] is not None else 0)
|
||||
return rows
|
||||
|
||||
Reference in New Issue
Block a user