48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import PurePosixPath
|
|
|
|
|
|
SEMESTER_RE = re.compile(r"^\d+\.Semester$")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PathMetadata:
|
|
semester: str
|
|
fach: str
|
|
typ: str | None
|
|
|
|
|
|
def parse_path(file_path: str, ingest_root: str) -> PathMetadata | None:
|
|
"""Parse a Nextcloud file path into structured metadata.
|
|
|
|
Returns None when the path is outside the ingest root or does not match
|
|
the expected `<root>/<N>.Semester/<Fach>/[<typ>/...]/<file>` pattern.
|
|
|
|
Caller must pass a file path. Directory paths (with or without trailing
|
|
slash) produce undefined results — `PurePosixPath` strips trailing slashes,
|
|
so a directory ending in `/foo/` is indistinguishable from a file `/foo`.
|
|
"""
|
|
norm_path = file_path.lstrip("/")
|
|
norm_root = ingest_root.strip("/")
|
|
|
|
if not norm_path.startswith(norm_root + "/"):
|
|
return None
|
|
|
|
relative = norm_path[len(norm_root) + 1:]
|
|
parts = PurePosixPath(relative).parts
|
|
|
|
# Layout: [semester, fach, *inner, filename] — minimum 3 parts
|
|
if len(parts) < 3:
|
|
return None
|
|
|
|
semester, fach = parts[0], parts[1]
|
|
|
|
if not SEMESTER_RE.fullmatch(semester):
|
|
return None
|
|
|
|
inner = parts[2:-1] # everything between fach and filename
|
|
typ = inner[0] if inner else None
|
|
|
|
return PathMetadata(semester=semester, fach=fach, typ=typ)
|