Initial commit: CSV row filler for satellite metadata

This commit is contained in:
Jean-Luc Makiola
2026-02-11 14:24:33 +01:00
commit 9faa02ee9f
2 changed files with 95 additions and 0 deletions

74
fill_csv.py Normal file
View File

@@ -0,0 +1,74 @@
#!/usr/bin/env python3
"""Fill missing ROW integers within each (PATH, DATE) group of a satellite imagery CSV."""
import csv
import sys
from collections import defaultdict
from pathlib import Path
def fill_csv(input_path: Path, output_path: Path) -> None:
with open(input_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader)
rows = list(reader)
num_cols = len(header)
# Group data rows by (PATH, DATE) — columns 3 and 9
groups: dict[tuple, list] = defaultdict(list)
for row in rows:
path_val = row[3]
date_val = row[9]
groups[(path_val, date_val)].append(row)
synthetic_rows: list[list[str]] = []
for (path_val, date_val), group in groups.items():
row_numbers = {int(r[4]) for r in group}
min_row = min(row_numbers)
max_row = max(row_numbers)
missing = set(range(min_row, max_row + 1)) - row_numbers
template = group[0]
for r in sorted(missing):
new_row = (
template[:4] # cols 03: RECORD_TYP, MISSION, SENSOR, PATH
+ [str(r)] # col 4: ROW
+ template[5:8] # cols 57: SCENE, SUB, SHIFT
+ [f"{template[3]}/{r}"] # col 8: LABEL
+ [template[9]] # col 9: DATE
+ [""] * (num_cols - 10) # cols 10end: blank
)
synthetic_rows.append(new_row)
all_rows = rows + synthetic_rows
all_rows.sort(key=lambda r: (r[3], r[9], int(r[4])))
with open(output_path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
writer.writerow(header)
writer.writerows(all_rows)
print(f"Original rows : {len(rows)}")
print(f"Synthetic rows: {len(synthetic_rows)}")
print(f"Total rows : {len(all_rows)}")
print(f"Output written: {output_path}")
def main() -> None:
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <input.csv> [output.csv]", file=sys.stderr)
sys.exit(1)
input_path = Path(sys.argv[1])
if len(sys.argv) >= 3:
output_path = Path(sys.argv[2])
else:
output_path = input_path.with_stem(input_path.stem + "_filled")
fill_csv(input_path, output_path)
if __name__ == "__main__":
main()