Initial commit: CSV row filler for satellite metadata
This commit is contained in:
74
fill_csv.py
Normal file
74
fill_csv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fill missing ROW integers within each (PATH, DATE) group of a satellite imagery CSV."""
|
||||
|
||||
import csv
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def fill_csv(input_path: Path, output_path: Path) -> None:
|
||||
with open(input_path, newline="", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
header = next(reader)
|
||||
rows = list(reader)
|
||||
|
||||
num_cols = len(header)
|
||||
|
||||
# Group data rows by (PATH, DATE) — columns 3 and 9
|
||||
groups: dict[tuple, list] = defaultdict(list)
|
||||
for row in rows:
|
||||
path_val = row[3]
|
||||
date_val = row[9]
|
||||
groups[(path_val, date_val)].append(row)
|
||||
|
||||
synthetic_rows: list[list[str]] = []
|
||||
|
||||
for (path_val, date_val), group in groups.items():
|
||||
row_numbers = {int(r[4]) for r in group}
|
||||
min_row = min(row_numbers)
|
||||
max_row = max(row_numbers)
|
||||
missing = set(range(min_row, max_row + 1)) - row_numbers
|
||||
|
||||
template = group[0]
|
||||
for r in sorted(missing):
|
||||
new_row = (
|
||||
template[:4] # cols 0–3: RECORD_TYP, MISSION, SENSOR, PATH
|
||||
+ [str(r)] # col 4: ROW
|
||||
+ template[5:8] # cols 5–7: SCENE, SUB, SHIFT
|
||||
+ [f"{template[3]}/{r}"] # col 8: LABEL
|
||||
+ [template[9]] # col 9: DATE
|
||||
+ [""] * (num_cols - 10) # cols 10–end: blank
|
||||
)
|
||||
synthetic_rows.append(new_row)
|
||||
|
||||
all_rows = rows + synthetic_rows
|
||||
all_rows.sort(key=lambda r: (r[3], r[9], int(r[4])))
|
||||
|
||||
with open(output_path, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
|
||||
writer.writerow(header)
|
||||
writer.writerows(all_rows)
|
||||
|
||||
print(f"Original rows : {len(rows)}")
|
||||
print(f"Synthetic rows: {len(synthetic_rows)}")
|
||||
print(f"Total rows : {len(all_rows)}")
|
||||
print(f"Output written: {output_path}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} <input.csv> [output.csv]", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
input_path = Path(sys.argv[1])
|
||||
if len(sys.argv) >= 3:
|
||||
output_path = Path(sys.argv[2])
|
||||
else:
|
||||
output_path = input_path.with_stem(input_path.stem + "_filled")
|
||||
|
||||
fill_csv(input_path, output_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user