refactor: klarere typ-extraktion, fullmatch, root-prefix-test
This commit is contained in:
@@ -18,6 +18,10 @@ def parse_path(file_path: str, ingest_root: str) -> PathMetadata | None:
|
|||||||
|
|
||||||
Returns None when the path is outside the ingest root or does not match
|
Returns None when the path is outside the ingest root or does not match
|
||||||
the expected `<root>/<N>.Semester/<Fach>/[<typ>/...]/<file>` pattern.
|
the expected `<root>/<N>.Semester/<Fach>/[<typ>/...]/<file>` pattern.
|
||||||
|
|
||||||
|
Caller must pass a file path. Directory paths (with or without trailing
|
||||||
|
slash) produce undefined results — `PurePosixPath` strips trailing slashes,
|
||||||
|
so a directory ending in `/foo/` is indistinguishable from a file `/foo`.
|
||||||
"""
|
"""
|
||||||
norm_path = file_path.lstrip("/")
|
norm_path = file_path.lstrip("/")
|
||||||
norm_root = ingest_root.strip("/")
|
norm_root = ingest_root.strip("/")
|
||||||
@@ -28,17 +32,16 @@ def parse_path(file_path: str, ingest_root: str) -> PathMetadata | None:
|
|||||||
relative = norm_path[len(norm_root) + 1:]
|
relative = norm_path[len(norm_root) + 1:]
|
||||||
parts = PurePosixPath(relative).parts
|
parts = PurePosixPath(relative).parts
|
||||||
|
|
||||||
# Need at least: semester / fach / file.ext → 3 parts
|
# Layout: [semester, fach, *inner, filename] — minimum 3 parts
|
||||||
if len(parts) < 3:
|
if len(parts) < 3:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
semester, fach = parts[0], parts[1]
|
semester, fach = parts[0], parts[1]
|
||||||
|
|
||||||
if not SEMESTER_RE.match(semester):
|
if not SEMESTER_RE.fullmatch(semester):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# parts[-1] is the filename. Anything between fach and filename is "deeper".
|
inner = parts[2:-1] # everything between fach and filename
|
||||||
# The first deeper segment becomes `typ`. None if file lives directly in fach.
|
typ = inner[0] if inner else None
|
||||||
typ = parts[2] if len(parts) > 3 else None
|
|
||||||
|
|
||||||
return PathMetadata(semester=semester, fach=fach, typ=typ)
|
return PathMetadata(semester=semester, fach=fach, typ=typ)
|
||||||
|
|||||||
@@ -44,3 +44,7 @@ def test_parse_path_no_fach_returns_none():
|
|||||||
def test_parse_path_with_leading_slash_normalizes():
|
def test_parse_path_with_leading_slash_normalizes():
|
||||||
md = parse_path("/Documents/THB/Studium/2.Semester/Databases/x.pdf", ROOT)
|
md = parse_path("/Documents/THB/Studium/2.Semester/Databases/x.pdf", ROOT)
|
||||||
assert md == PathMetadata(semester="2.Semester", fach="Databases", typ=None)
|
assert md == PathMetadata(semester="2.Semester", fach="Databases", typ=None)
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_path_root_prefix_collision_returns_none():
|
||||||
|
assert parse_path("Documents/THB/StudiumExam/2.Semester/Foo/x.pdf", ROOT) is None
|
||||||
|
|||||||
Reference in New Issue
Block a user