75 lines
2.4 KiB
Python
75 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
||
"""Fill missing ROW integers within each (PATH, DATE) group of a satellite imagery CSV."""
|
||
|
||
import csv
|
||
import sys
|
||
from collections import defaultdict
|
||
from pathlib import Path
|
||
|
||
|
||
def fill_csv(input_path: Path, output_path: Path) -> None:
|
||
with open(input_path, newline="", encoding="utf-8") as f:
|
||
reader = csv.reader(f)
|
||
header = next(reader)
|
||
rows = list(reader)
|
||
|
||
num_cols = len(header)
|
||
|
||
# Group data rows by (PATH, DATE) — columns 3 and 9
|
||
groups: dict[tuple, list] = defaultdict(list)
|
||
for row in rows:
|
||
path_val = row[3]
|
||
date_val = row[9]
|
||
groups[(path_val, date_val)].append(row)
|
||
|
||
synthetic_rows: list[list[str]] = []
|
||
|
||
for (path_val, date_val), group in groups.items():
|
||
row_numbers = {int(r[4]) for r in group}
|
||
min_row = min(row_numbers)
|
||
max_row = max(row_numbers)
|
||
missing = set(range(min_row, max_row + 1)) - row_numbers
|
||
|
||
template = group[0]
|
||
for r in sorted(missing):
|
||
new_row = (
|
||
template[:4] # cols 0–3: RECORD_TYP, MISSION, SENSOR, PATH
|
||
+ [str(r)] # col 4: ROW
|
||
+ template[5:8] # cols 5–7: SCENE, SUB, SHIFT
|
||
+ [f"{template[3]}/{r}"] # col 8: LABEL
|
||
+ [template[9]] # col 9: DATE
|
||
+ [""] * (num_cols - 10) # cols 10–end: blank
|
||
)
|
||
synthetic_rows.append(new_row)
|
||
|
||
all_rows = rows + synthetic_rows
|
||
all_rows.sort(key=lambda r: (r[3], r[9], int(r[4])))
|
||
|
||
with open(output_path, "w", newline="", encoding="utf-8") as f:
|
||
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
|
||
writer.writerow(header)
|
||
writer.writerows(all_rows)
|
||
|
||
print(f"Original rows : {len(rows)}")
|
||
print(f"Synthetic rows: {len(synthetic_rows)}")
|
||
print(f"Total rows : {len(all_rows)}")
|
||
print(f"Output written: {output_path}")
|
||
|
||
|
||
def main() -> None:
|
||
if len(sys.argv) < 2:
|
||
print(f"Usage: {sys.argv[0]} <input.csv> [output.csv]", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
input_path = Path(sys.argv[1])
|
||
if len(sys.argv) >= 3:
|
||
output_path = Path(sys.argv[2])
|
||
else:
|
||
output_path = input_path.with_stem(input_path.stem + "_filled")
|
||
|
||
fill_csv(input_path, output_path)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|