#!/usr/bin/env python3 """Fill missing ROW integers within each (PATH, DATE) group of a satellite imagery CSV.""" import csv import sys from collections import defaultdict from pathlib import Path def fill_csv(input_path: Path, output_path: Path) -> None: with open(input_path, newline="", encoding="utf-8") as f: reader = csv.reader(f) header = next(reader) rows = list(reader) num_cols = len(header) # Group data rows by (PATH, DATE) β€” columns 3 and 9 groups: dict[tuple, list] = defaultdict(list) for row in rows: path_val = row[3] date_val = row[9] groups[(path_val, date_val)].append(row) synthetic_rows: list[list[str]] = [] for (path_val, date_val), group in groups.items(): row_numbers = {int(r[4]) for r in group} min_row = min(row_numbers) max_row = max(row_numbers) missing = set(range(min_row, max_row + 1)) - row_numbers template = group[0] for r in sorted(missing): new_row = ( template[:4] # cols 0–3: RECORD_TYP, MISSION, SENSOR, PATH + [str(r)] # col 4: ROW + template[5:8] # cols 5–7: SCENE, SUB, SHIFT + [f"{template[3]}/{r}"] # col 8: LABEL + [template[9]] # col 9: DATE + [""] * (num_cols - 10) # cols 10–end: blank ) synthetic_rows.append(new_row) all_rows = rows + synthetic_rows all_rows.sort(key=lambda r: (r[3], r[9], int(r[4]))) with open(output_path, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f, quoting=csv.QUOTE_ALL) writer.writerow(header) writer.writerows(all_rows) print(f"Original rows : {len(rows)}") print(f"Synthetic rows: {len(synthetic_rows)}") print(f"Total rows : {len(all_rows)}") print(f"Output written: {output_path}") def main() -> None: if len(sys.argv) < 2: print(f"Usage: {sys.argv[0]} [output.csv]", file=sys.stderr) sys.exit(1) input_path = Path(sys.argv[1]) if len(sys.argv) >= 3: output_path = Path(sys.argv[2]) else: output_path = input_path.with_stem(input_path.stem + "_filled") fill_csv(input_path, output_path) if __name__ == "__main__": main()