From 9faa02ee9fbe4bd2f3d90c307c50e4cc1d258c05 Mon Sep 17 00:00:00 2001
From: Jean-Luc Makiola <makiolaj@gitea.jeanlucmakiola.de>
Date: Wed, 11 Feb 2026 14:24:33 +0100
Subject: [PATCH] Initial commit: CSV row filler for satellite metadata

---
 README.md   | 21 +++++++++++++++
 fill_csv.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 README.md
 create mode 100644 fill_csv.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c68c668
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# fill_csv.py
+
+Füllt fehlende ROW-Nummern in einer Satellitenmetadaten-CSV auf.
+
+## Nutzung
+
+```bash
+python fill_csv.py eingabe.csv
+```
+
+Die Ausgabe landet automatisch als `eingabe_filled.csv` im selben Ordner.
+
+Optional kann ein eigener Ausgabepfad angegeben werden:
+
+```bash
+python fill_csv.py eingabe.csv ausgabe.csv
+```
+
+## Was macht das Skript?
+
+Für jede Kombination aus `PATH` und `DATE` schaut das Skript, welche ROW-Nummern vorhanden sind, und füllt die Lücken zwischen Minimum und Maximum mit Platzhalterzeilen auf. Die neuen Zeilen übernehmen die Grunddaten (MISSION, SENSOR, PATH, DATE usw.), alles ab Spalte 10 bleibt leer.
diff --git a/fill_csv.py b/fill_csv.py
new file mode 100644
index 0000000..7eec07d
--- /dev/null
+++ b/fill_csv.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""Fill missing ROW integers within each (PATH, DATE) group of a satellite imagery CSV."""
+
+import csv
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+
+def fill_csv(input_path: Path, output_path: Path) -> None:
+    with open(input_path, newline="", encoding="utf-8") as f:
+        reader = csv.reader(f)
+        header = next(reader)
+        rows = list(reader)
+
+    num_cols = len(header)
+
+    # Group data rows by (PATH, DATE) — columns 3 and 9
+    groups: dict[tuple, list] = defaultdict(list)
+    for row in rows:
+        path_val = row[3]
+        date_val = row[9]
+        groups[(path_val, date_val)].append(row)
+
+    synthetic_rows: list[list[str]] = []
+
+    for (path_val, date_val), group in groups.items():
+        row_numbers = {int(r[4]) for r in group}
+        min_row = min(row_numbers)
+        max_row = max(row_numbers)
+        missing = set(range(min_row, max_row + 1)) - row_numbers
+
+        template = group[0]
+        for r in sorted(missing):
+            new_row = (
+                template[:4]                       # cols 0–3: RECORD_TYP, MISSION, SENSOR, PATH
+                + [str(r)]                         # col 4: ROW
+                + template[5:8]                    # cols 5–7: SCENE, SUB, SHIFT
+                + [f"{template[3]}/{r}"]           # col 8: LABEL
+                + [template[9]]                    # col 9: DATE
+                + [""] * (num_cols - 10)           # cols 10–end: blank
+            )
+            synthetic_rows.append(new_row)
+
+    all_rows = rows + synthetic_rows
+    all_rows.sort(key=lambda r: (r[3], r[9], int(r[4])))
+
+    with open(output_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
+        writer.writerow(header)
+        writer.writerows(all_rows)
+
+    print(f"Original rows : {len(rows)}")
+    print(f"Synthetic rows: {len(synthetic_rows)}")
+    print(f"Total rows    : {len(all_rows)}")
+    print(f"Output written: {output_path}")
+
+
+def main() -> None:
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <input.csv> [output.csv]", file=sys.stderr)
+        sys.exit(1)
+
+    input_path = Path(sys.argv[1])
+    if len(sys.argv) >= 3:
+        output_path = Path(sys.argv[2])
+    else:
+        output_path = input_path.with_stem(input_path.stem + "_filled")
+
+    fill_csv(input_path, output_path)
+
+
+if __name__ == "__main__":
+    main()