refactor: klarere typ-extraktion, fullmatch, root-prefix-test

This commit is contained in:
2026-05-04 22:09:41 +02:00
parent 8d15f02187
commit 0224581587
2 changed files with 12 additions and 5 deletions

View File

@@ -18,6 +18,10 @@ def parse_path(file_path: str, ingest_root: str) -> PathMetadata | None:
Returns None when the path is outside the ingest root or does not match
the expected `<root>/<N>.Semester/<Fach>/[<typ>/...]/<file>` pattern.
Caller must pass a file path. Directory paths (with or without trailing
slash) produce undefined results — `PurePosixPath` strips trailing slashes,
so a directory ending in `/foo/` is indistinguishable from a file `/foo`.
"""
norm_path = file_path.lstrip("/")
norm_root = ingest_root.strip("/")
@@ -28,17 +32,16 @@ def parse_path(file_path: str, ingest_root: str) -> PathMetadata | None:
relative = norm_path[len(norm_root) + 1:]
parts = PurePosixPath(relative).parts
# Need at least: semester / fach / file.ext → 3 parts
# Layout: [semester, fach, *inner, filename] — minimum 3 parts
if len(parts) < 3:
return None
semester, fach = parts[0], parts[1]
if not SEMESTER_RE.match(semester):
if not SEMESTER_RE.fullmatch(semester):
return None
# parts[-1] is the filename. Anything between fach and filename is "deeper".
# The first deeper segment becomes `typ`. None if file lives directly in fach.
typ = parts[2] if len(parts) > 3 else None
inner = parts[2:-1] # everything between fach and filename
typ = inner[0] if inner else None
return PathMetadata(semester=semester, fach=fach, typ=typ)