From 6509b335012d7cfad8fee2509e56b4c56e1be93c Mon Sep 17 00:00:00 2001 From: Jean-Luc Makiola Date: Sun, 12 Apr 2026 20:09:17 +0200 Subject: [PATCH] feat(29-04): create backfill script for dominant colors One-time migration script processes items, globalItems, and threadCandidates to extract dominant colors via Sharp. Idempotent, batched (10 concurrent), with progress logging. Co-Authored-By: Claude Opus 4.6 (1M context) --- package.json | 3 +- scripts/backfill-dominant-colors.ts | 256 ++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 scripts/backfill-dominant-colors.ts diff --git a/package.json b/package.json index d1ed9db..ac20801 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,8 @@ "test:e2e": "bunx playwright test", "test:e2e:ui": "bunx playwright test --ui", "lint": "bunx @biomejs/biome check .", - "db:seed:dev": "bun run src/db/dev-seed.ts" + "db:seed:dev": "bun run src/db/dev-seed.ts", + "backfill:colors": "bun run scripts/backfill-dominant-colors.ts" }, "devDependencies": { "@biomejs/biome": "^2.4.7", diff --git a/scripts/backfill-dominant-colors.ts b/scripts/backfill-dominant-colors.ts new file mode 100644 index 0000000..3ce1638 --- /dev/null +++ b/scripts/backfill-dominant-colors.ts @@ -0,0 +1,256 @@ +/** + * Backfill dominant colors for all existing images. + * + * Usage: + * DATABASE_URL=postgres://... S3_ENDPOINT=... bun run scripts/backfill-dominant-colors.ts + * + * Idempotent — skips records that already have dominantColor set. + * Processes in batches of 10 concurrent requests. + */ + +import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; +import { and, eq, isNotNull, isNull } from "drizzle-orm"; +import { drizzle } from "drizzle-orm/postgres-js"; +import postgres from "postgres"; +import sharp from "sharp"; +import * as schema from "../src/db/schema.ts"; + +// --------------------------------------------------------------------------- +// Setup +// --------------------------------------------------------------------------- + +const DATABASE_URL = process.env.DATABASE_URL; +if (!DATABASE_URL) { + console.error("DATABASE_URL environment variable is required"); + process.exit(1); +} + +const client = postgres(DATABASE_URL); +const db = drizzle(client, { schema }); + +const s3 = new S3Client({ + endpoint: process.env.S3_ENDPOINT, + region: process.env.S3_REGION ?? "us-east-1", + credentials: { + accessKeyId: process.env.S3_ACCESS_KEY!, + secretAccessKey: process.env.S3_SECRET_KEY!, + }, + forcePathStyle: true, +}); +const bucket = process.env.S3_BUCKET ?? "gearbox-images"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async function extractColor(buffer: Buffer): Promise { + try { + const { data } = await sharp(buffer) + .resize(1, 1) + .raw() + .toBuffer({ resolveWithObject: true }); + return `#${data[0].toString(16).padStart(2, "0")}${data[1].toString(16).padStart(2, "0")}${data[2].toString(16).padStart(2, "0")}`; + } catch { + return null; + } +} + +async function fetchFromS3(filename: string): Promise { + try { + const response = await s3.send( + new GetObjectCommand({ Bucket: bucket, Key: filename }), + ); + const bytes = await response.Body?.transformToByteArray(); + return bytes ? Buffer.from(bytes) : null; + } catch { + return null; + } +} + +async function fetchFromUrl(url: string): Promise { + try { + const response = await fetch(url, { + signal: AbortSignal.timeout(10000), + }); + if (!response.ok) return null; + return Buffer.from(await response.arrayBuffer()); + } catch { + return null; + } +} + +const BATCH_SIZE = 10; + +async function processBatch( + items: T[], + getBuffer: (item: T) => Promise, + updateFn: (id: number, color: string) => Promise, + label: string, +) { + let processed = 0; + let updated = 0; + let failed = 0; + + for (let i = 0; i < items.length; i += BATCH_SIZE) { + const batch = items.slice(i, i + BATCH_SIZE); + await Promise.allSettled( + batch.map(async (item) => { + const buffer = await getBuffer(item); + if (!buffer) { + failed++; + return; + } + const color = await extractColor(buffer); + if (!color) { + failed++; + return; + } + await updateFn(item.id, color); + updated++; + }), + ); + processed += batch.length; + console.log( + ` ${label}: ${processed}/${items.length} processed, ${updated} updated, ${failed} failed`, + ); + } + + return { updated, failed }; +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +async function main() { + console.log("=== Backfill Dominant Colors ===\n"); + + // Items with imageFilename but no dominantColor + const itemsToProcess = await db + .select({ + id: schema.items.id, + imageFilename: schema.items.imageFilename, + }) + .from(schema.items) + .where( + and( + isNotNull(schema.items.imageFilename), + isNull(schema.items.dominantColor), + ), + ); + + console.log(`Items: ${itemsToProcess.length} need processing`); + if (itemsToProcess.length > 0) { + await processBatch( + itemsToProcess as { id: number; imageFilename: string }[], + (item) => fetchFromS3(item.imageFilename), + async (id, color) => { + await db + .update(schema.items) + .set({ dominantColor: color }) + .where(eq(schema.items.id, id)); + }, + "Items", + ); + } + + // GlobalItems with imageSourceUrl (stored in S3) + const globalWithSource = await db + .select({ + id: schema.globalItems.id, + imageSourceUrl: schema.globalItems.imageSourceUrl, + }) + .from(schema.globalItems) + .where( + and( + isNotNull(schema.globalItems.imageSourceUrl), + isNull(schema.globalItems.dominantColor), + ), + ); + + console.log( + `\nGlobal Items (source URL): ${globalWithSource.length} need processing`, + ); + if (globalWithSource.length > 0) { + await processBatch( + globalWithSource as { id: number; imageSourceUrl: string }[], + (item) => fetchFromUrl(item.imageSourceUrl), + async (id, color) => { + await db + .update(schema.globalItems) + .set({ dominantColor: color }) + .where(eq(schema.globalItems.id, id)); + }, + "Global Items (source)", + ); + } + + // GlobalItems with imageUrl (direct URLs) + const globalWithUrl = await db + .select({ + id: schema.globalItems.id, + imageUrl: schema.globalItems.imageUrl, + }) + .from(schema.globalItems) + .where( + and( + isNotNull(schema.globalItems.imageUrl), + isNull(schema.globalItems.dominantColor), + ), + ); + + console.log( + `\nGlobal Items (image URL): ${globalWithUrl.length} need processing`, + ); + if (globalWithUrl.length > 0) { + await processBatch( + globalWithUrl as { id: number; imageUrl: string }[], + (item) => fetchFromUrl(item.imageUrl), + async (id, color) => { + await db + .update(schema.globalItems) + .set({ dominantColor: color }) + .where(eq(schema.globalItems.id, id)); + }, + "Global Items (URL)", + ); + } + + // Thread candidates + const candidatesToProcess = await db + .select({ + id: schema.threadCandidates.id, + imageFilename: schema.threadCandidates.imageFilename, + }) + .from(schema.threadCandidates) + .where( + and( + isNotNull(schema.threadCandidates.imageFilename), + isNull(schema.threadCandidates.dominantColor), + ), + ); + + console.log(`\nCandidates: ${candidatesToProcess.length} need processing`); + if (candidatesToProcess.length > 0) { + await processBatch( + candidatesToProcess as { id: number; imageFilename: string }[], + (item) => fetchFromS3(item.imageFilename), + async (id, color) => { + await db + .update(schema.threadCandidates) + .set({ dominantColor: color }) + .where(eq(schema.threadCandidates.id, id)); + }, + "Candidates", + ); + } + + console.log("\n=== Backfill Complete ==="); + await client.end(); + process.exit(0); +} + +main().catch((err) => { + console.error("Backfill failed:", err); + process.exit(1); +});