/** * Backfill dominant colors for all existing images. * * Usage: * DATABASE_URL=postgres://... S3_ENDPOINT=... bun run scripts/backfill-dominant-colors.ts * * Idempotent — skips records that already have dominantColor set. * Processes in batches of 10 concurrent requests. */ import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; import { and, eq, isNotNull, isNull } from "drizzle-orm"; import { drizzle } from "drizzle-orm/postgres-js"; import postgres from "postgres"; import sharp from "sharp"; import * as schema from "../src/db/schema.ts"; // --------------------------------------------------------------------------- // Setup // --------------------------------------------------------------------------- const DATABASE_URL = process.env.DATABASE_URL; if (!DATABASE_URL) { console.error("DATABASE_URL environment variable is required"); process.exit(1); } const client = postgres(DATABASE_URL); const db = drizzle(client, { schema }); const s3 = new S3Client({ endpoint: process.env.S3_ENDPOINT, region: process.env.S3_REGION ?? "us-east-1", credentials: { accessKeyId: process.env.S3_ACCESS_KEY!, secretAccessKey: process.env.S3_SECRET_KEY!, }, forcePathStyle: true, }); const bucket = process.env.S3_BUCKET ?? "gearbox-images"; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- async function extractColor(buffer: Buffer): Promise { try { const { data } = await sharp(buffer) .resize(1, 1) .raw() .toBuffer({ resolveWithObject: true }); return `#${data[0].toString(16).padStart(2, "0")}${data[1].toString(16).padStart(2, "0")}${data[2].toString(16).padStart(2, "0")}`; } catch { return null; } } async function fetchFromS3(filename: string): Promise { try { const response = await s3.send( new GetObjectCommand({ Bucket: bucket, Key: filename }), ); const bytes = await response.Body?.transformToByteArray(); return bytes ? Buffer.from(bytes) : null; } catch { return null; } } async function fetchFromUrl(url: string): Promise { try { const response = await fetch(url, { signal: AbortSignal.timeout(10000), }); if (!response.ok) return null; return Buffer.from(await response.arrayBuffer()); } catch { return null; } } const BATCH_SIZE = 10; async function processBatch( items: T[], getBuffer: (item: T) => Promise, updateFn: (id: number, color: string) => Promise, label: string, ) { let processed = 0; let updated = 0; let failed = 0; for (let i = 0; i < items.length; i += BATCH_SIZE) { const batch = items.slice(i, i + BATCH_SIZE); await Promise.allSettled( batch.map(async (item) => { const buffer = await getBuffer(item); if (!buffer) { failed++; return; } const color = await extractColor(buffer); if (!color) { failed++; return; } await updateFn(item.id, color); updated++; }), ); processed += batch.length; console.log( ` ${label}: ${processed}/${items.length} processed, ${updated} updated, ${failed} failed`, ); } return { updated, failed }; } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- async function main() { console.log("=== Backfill Dominant Colors ===\n"); // Items with imageFilename but no dominantColor const itemsToProcess = await db .select({ id: schema.items.id, imageFilename: schema.items.imageFilename, }) .from(schema.items) .where( and( isNotNull(schema.items.imageFilename), isNull(schema.items.dominantColor), ), ); console.log(`Items: ${itemsToProcess.length} need processing`); if (itemsToProcess.length > 0) { await processBatch( itemsToProcess as { id: number; imageFilename: string }[], (item) => fetchFromS3(item.imageFilename), async (id, color) => { await db .update(schema.items) .set({ dominantColor: color }) .where(eq(schema.items.id, id)); }, "Items", ); } // GlobalItems with imageSourceUrl (stored in S3) const globalWithSource = await db .select({ id: schema.globalItems.id, imageSourceUrl: schema.globalItems.imageSourceUrl, }) .from(schema.globalItems) .where( and( isNotNull(schema.globalItems.imageSourceUrl), isNull(schema.globalItems.dominantColor), ), ); console.log( `\nGlobal Items (source URL): ${globalWithSource.length} need processing`, ); if (globalWithSource.length > 0) { await processBatch( globalWithSource as { id: number; imageSourceUrl: string }[], (item) => fetchFromUrl(item.imageSourceUrl), async (id, color) => { await db .update(schema.globalItems) .set({ dominantColor: color }) .where(eq(schema.globalItems.id, id)); }, "Global Items (source)", ); } // GlobalItems with imageUrl (direct URLs) const globalWithUrl = await db .select({ id: schema.globalItems.id, imageUrl: schema.globalItems.imageUrl, }) .from(schema.globalItems) .where( and( isNotNull(schema.globalItems.imageUrl), isNull(schema.globalItems.dominantColor), ), ); console.log( `\nGlobal Items (image URL): ${globalWithUrl.length} need processing`, ); if (globalWithUrl.length > 0) { await processBatch( globalWithUrl as { id: number; imageUrl: string }[], (item) => fetchFromUrl(item.imageUrl), async (id, color) => { await db .update(schema.globalItems) .set({ dominantColor: color }) .where(eq(schema.globalItems.id, id)); }, "Global Items (URL)", ); } // Thread candidates const candidatesToProcess = await db .select({ id: schema.threadCandidates.id, imageFilename: schema.threadCandidates.imageFilename, }) .from(schema.threadCandidates) .where( and( isNotNull(schema.threadCandidates.imageFilename), isNull(schema.threadCandidates.dominantColor), ), ); console.log(`\nCandidates: ${candidatesToProcess.length} need processing`); if (candidatesToProcess.length > 0) { await processBatch( candidatesToProcess as { id: number; imageFilename: string }[], (item) => fetchFromS3(item.imageFilename), async (id, color) => { await db .update(schema.threadCandidates) .set({ dominantColor: color }) .where(eq(schema.threadCandidates.id, id)); }, "Candidates", ); } console.log("\n=== Backfill Complete ==="); await client.end(); process.exit(0); } main().catch((err) => { console.error("Backfill failed:", err); process.exit(1); });