Files
csv-fill/fill_csv.py
2026-02-11 14:24:33 +01:00

75 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Fill missing ROW integers within each (PATH, DATE) group of a satellite imagery CSV."""
import csv
import sys
from collections import defaultdict
from pathlib import Path
def fill_csv(input_path: Path, output_path: Path) -> None:
with open(input_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader)
rows = list(reader)
num_cols = len(header)
# Group data rows by (PATH, DATE) — columns 3 and 9
groups: dict[tuple, list] = defaultdict(list)
for row in rows:
path_val = row[3]
date_val = row[9]
groups[(path_val, date_val)].append(row)
synthetic_rows: list[list[str]] = []
for (path_val, date_val), group in groups.items():
row_numbers = {int(r[4]) for r in group}
min_row = min(row_numbers)
max_row = max(row_numbers)
missing = set(range(min_row, max_row + 1)) - row_numbers
template = group[0]
for r in sorted(missing):
new_row = (
template[:4] # cols 03: RECORD_TYP, MISSION, SENSOR, PATH
+ [str(r)] # col 4: ROW
+ template[5:8] # cols 57: SCENE, SUB, SHIFT
+ [f"{template[3]}/{r}"] # col 8: LABEL
+ [template[9]] # col 9: DATE
+ [""] * (num_cols - 10) # cols 10end: blank
)
synthetic_rows.append(new_row)
all_rows = rows + synthetic_rows
all_rows.sort(key=lambda r: (r[3], r[9], int(r[4])))
with open(output_path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
writer.writerow(header)
writer.writerows(all_rows)
print(f"Original rows : {len(rows)}")
print(f"Synthetic rows: {len(synthetic_rows)}")
print(f"Total rows : {len(all_rows)}")
print(f"Output written: {output_path}")
def main() -> None:
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <input.csv> [output.csv]", file=sys.stderr)
sys.exit(1)
input_path = Path(sys.argv[1])
if len(sys.argv) >= 3:
output_path = Path(sys.argv[2])
else:
output_path = input_path.with_stem(input_path.stem + "_filled")
fill_csv(input_path, output_path)
if __name__ == "__main__":
main()