feat(29-04): create backfill script for dominant colors

One-time migration script processes items, globalItems, and
threadCandidates to extract dominant colors via Sharp. Idempotent,
batched (10 concurrent), with progress logging.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-12 20:09:17 +02:00
parent 9817a80f32
commit 6509b33501
2 changed files with 258 additions and 1 deletions

View File

@@ -14,7 +14,8 @@
"test:e2e": "bunx playwright test",
"test:e2e:ui": "bunx playwright test --ui",
"lint": "bunx @biomejs/biome check .",
"db:seed:dev": "bun run src/db/dev-seed.ts"
"db:seed:dev": "bun run src/db/dev-seed.ts",
"backfill:colors": "bun run scripts/backfill-dominant-colors.ts"
},
"devDependencies": {
"@biomejs/biome": "^2.4.7",

View File

@@ -0,0 +1,256 @@
/**
* Backfill dominant colors for all existing images.
*
* Usage:
* DATABASE_URL=postgres://... S3_ENDPOINT=... bun run scripts/backfill-dominant-colors.ts
*
* Idempotent — skips records that already have dominantColor set.
* Processes in batches of 10 concurrent requests.
*/
import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3";
import { and, eq, isNotNull, isNull } from "drizzle-orm";
import { drizzle } from "drizzle-orm/postgres-js";
import postgres from "postgres";
import sharp from "sharp";
import * as schema from "../src/db/schema.ts";
// ---------------------------------------------------------------------------
// Setup
// ---------------------------------------------------------------------------
const DATABASE_URL = process.env.DATABASE_URL;
if (!DATABASE_URL) {
console.error("DATABASE_URL environment variable is required");
process.exit(1);
}
const client = postgres(DATABASE_URL);
const db = drizzle(client, { schema });
const s3 = new S3Client({
endpoint: process.env.S3_ENDPOINT,
region: process.env.S3_REGION ?? "us-east-1",
credentials: {
accessKeyId: process.env.S3_ACCESS_KEY!,
secretAccessKey: process.env.S3_SECRET_KEY!,
},
forcePathStyle: true,
});
const bucket = process.env.S3_BUCKET ?? "gearbox-images";
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
async function extractColor(buffer: Buffer): Promise<string | null> {
try {
const { data } = await sharp(buffer)
.resize(1, 1)
.raw()
.toBuffer({ resolveWithObject: true });
return `#${data[0].toString(16).padStart(2, "0")}${data[1].toString(16).padStart(2, "0")}${data[2].toString(16).padStart(2, "0")}`;
} catch {
return null;
}
}
async function fetchFromS3(filename: string): Promise<Buffer | null> {
try {
const response = await s3.send(
new GetObjectCommand({ Bucket: bucket, Key: filename }),
);
const bytes = await response.Body?.transformToByteArray();
return bytes ? Buffer.from(bytes) : null;
} catch {
return null;
}
}
async function fetchFromUrl(url: string): Promise<Buffer | null> {
try {
const response = await fetch(url, {
signal: AbortSignal.timeout(10000),
});
if (!response.ok) return null;
return Buffer.from(await response.arrayBuffer());
} catch {
return null;
}
}
const BATCH_SIZE = 10;
async function processBatch<T extends { id: number }>(
items: T[],
getBuffer: (item: T) => Promise<Buffer | null>,
updateFn: (id: number, color: string) => Promise<void>,
label: string,
) {
let processed = 0;
let updated = 0;
let failed = 0;
for (let i = 0; i < items.length; i += BATCH_SIZE) {
const batch = items.slice(i, i + BATCH_SIZE);
await Promise.allSettled(
batch.map(async (item) => {
const buffer = await getBuffer(item);
if (!buffer) {
failed++;
return;
}
const color = await extractColor(buffer);
if (!color) {
failed++;
return;
}
await updateFn(item.id, color);
updated++;
}),
);
processed += batch.length;
console.log(
` ${label}: ${processed}/${items.length} processed, ${updated} updated, ${failed} failed`,
);
}
return { updated, failed };
}
// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------
async function main() {
console.log("=== Backfill Dominant Colors ===\n");
// Items with imageFilename but no dominantColor
const itemsToProcess = await db
.select({
id: schema.items.id,
imageFilename: schema.items.imageFilename,
})
.from(schema.items)
.where(
and(
isNotNull(schema.items.imageFilename),
isNull(schema.items.dominantColor),
),
);
console.log(`Items: ${itemsToProcess.length} need processing`);
if (itemsToProcess.length > 0) {
await processBatch(
itemsToProcess as { id: number; imageFilename: string }[],
(item) => fetchFromS3(item.imageFilename),
async (id, color) => {
await db
.update(schema.items)
.set({ dominantColor: color })
.where(eq(schema.items.id, id));
},
"Items",
);
}
// GlobalItems with imageSourceUrl (stored in S3)
const globalWithSource = await db
.select({
id: schema.globalItems.id,
imageSourceUrl: schema.globalItems.imageSourceUrl,
})
.from(schema.globalItems)
.where(
and(
isNotNull(schema.globalItems.imageSourceUrl),
isNull(schema.globalItems.dominantColor),
),
);
console.log(
`\nGlobal Items (source URL): ${globalWithSource.length} need processing`,
);
if (globalWithSource.length > 0) {
await processBatch(
globalWithSource as { id: number; imageSourceUrl: string }[],
(item) => fetchFromUrl(item.imageSourceUrl),
async (id, color) => {
await db
.update(schema.globalItems)
.set({ dominantColor: color })
.where(eq(schema.globalItems.id, id));
},
"Global Items (source)",
);
}
// GlobalItems with imageUrl (direct URLs)
const globalWithUrl = await db
.select({
id: schema.globalItems.id,
imageUrl: schema.globalItems.imageUrl,
})
.from(schema.globalItems)
.where(
and(
isNotNull(schema.globalItems.imageUrl),
isNull(schema.globalItems.dominantColor),
),
);
console.log(
`\nGlobal Items (image URL): ${globalWithUrl.length} need processing`,
);
if (globalWithUrl.length > 0) {
await processBatch(
globalWithUrl as { id: number; imageUrl: string }[],
(item) => fetchFromUrl(item.imageUrl),
async (id, color) => {
await db
.update(schema.globalItems)
.set({ dominantColor: color })
.where(eq(schema.globalItems.id, id));
},
"Global Items (URL)",
);
}
// Thread candidates
const candidatesToProcess = await db
.select({
id: schema.threadCandidates.id,
imageFilename: schema.threadCandidates.imageFilename,
})
.from(schema.threadCandidates)
.where(
and(
isNotNull(schema.threadCandidates.imageFilename),
isNull(schema.threadCandidates.dominantColor),
),
);
console.log(`\nCandidates: ${candidatesToProcess.length} need processing`);
if (candidatesToProcess.length > 0) {
await processBatch(
candidatesToProcess as { id: number; imageFilename: string }[],
(item) => fetchFromS3(item.imageFilename),
async (id, color) => {
await db
.update(schema.threadCandidates)
.set({ dominantColor: color })
.where(eq(schema.threadCandidates.id, id));
},
"Candidates",
);
}
console.log("\n=== Backfill Complete ===");
await client.end();
process.exit(0);
}
main().catch((err) => {
console.error("Backfill failed:", err);
process.exit(1);
});