feat(29-04): create backfill script for dominant colors
One-time migration script processes items, globalItems, and threadCandidates to extract dominant colors via Sharp. Idempotent, batched (10 concurrent), with progress logging. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,8 @@
|
||||
"test:e2e": "bunx playwright test",
|
||||
"test:e2e:ui": "bunx playwright test --ui",
|
||||
"lint": "bunx @biomejs/biome check .",
|
||||
"db:seed:dev": "bun run src/db/dev-seed.ts"
|
||||
"db:seed:dev": "bun run src/db/dev-seed.ts",
|
||||
"backfill:colors": "bun run scripts/backfill-dominant-colors.ts"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^2.4.7",
|
||||
|
||||
256
scripts/backfill-dominant-colors.ts
Normal file
256
scripts/backfill-dominant-colors.ts
Normal file
@@ -0,0 +1,256 @@
|
||||
/**
|
||||
* Backfill dominant colors for all existing images.
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL=postgres://... S3_ENDPOINT=... bun run scripts/backfill-dominant-colors.ts
|
||||
*
|
||||
* Idempotent — skips records that already have dominantColor set.
|
||||
* Processes in batches of 10 concurrent requests.
|
||||
*/
|
||||
|
||||
import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3";
|
||||
import { and, eq, isNotNull, isNull } from "drizzle-orm";
|
||||
import { drizzle } from "drizzle-orm/postgres-js";
|
||||
import postgres from "postgres";
|
||||
import sharp from "sharp";
|
||||
import * as schema from "../src/db/schema.ts";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Setup
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
if (!DATABASE_URL) {
|
||||
console.error("DATABASE_URL environment variable is required");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const client = postgres(DATABASE_URL);
|
||||
const db = drizzle(client, { schema });
|
||||
|
||||
const s3 = new S3Client({
|
||||
endpoint: process.env.S3_ENDPOINT,
|
||||
region: process.env.S3_REGION ?? "us-east-1",
|
||||
credentials: {
|
||||
accessKeyId: process.env.S3_ACCESS_KEY!,
|
||||
secretAccessKey: process.env.S3_SECRET_KEY!,
|
||||
},
|
||||
forcePathStyle: true,
|
||||
});
|
||||
const bucket = process.env.S3_BUCKET ?? "gearbox-images";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function extractColor(buffer: Buffer): Promise<string | null> {
|
||||
try {
|
||||
const { data } = await sharp(buffer)
|
||||
.resize(1, 1)
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
return `#${data[0].toString(16).padStart(2, "0")}${data[1].toString(16).padStart(2, "0")}${data[2].toString(16).padStart(2, "0")}`;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchFromS3(filename: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const response = await s3.send(
|
||||
new GetObjectCommand({ Bucket: bucket, Key: filename }),
|
||||
);
|
||||
const bytes = await response.Body?.transformToByteArray();
|
||||
return bytes ? Buffer.from(bytes) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchFromUrl(url: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
if (!response.ok) return null;
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const BATCH_SIZE = 10;
|
||||
|
||||
async function processBatch<T extends { id: number }>(
|
||||
items: T[],
|
||||
getBuffer: (item: T) => Promise<Buffer | null>,
|
||||
updateFn: (id: number, color: string) => Promise<void>,
|
||||
label: string,
|
||||
) {
|
||||
let processed = 0;
|
||||
let updated = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (let i = 0; i < items.length; i += BATCH_SIZE) {
|
||||
const batch = items.slice(i, i + BATCH_SIZE);
|
||||
await Promise.allSettled(
|
||||
batch.map(async (item) => {
|
||||
const buffer = await getBuffer(item);
|
||||
if (!buffer) {
|
||||
failed++;
|
||||
return;
|
||||
}
|
||||
const color = await extractColor(buffer);
|
||||
if (!color) {
|
||||
failed++;
|
||||
return;
|
||||
}
|
||||
await updateFn(item.id, color);
|
||||
updated++;
|
||||
}),
|
||||
);
|
||||
processed += batch.length;
|
||||
console.log(
|
||||
` ${label}: ${processed}/${items.length} processed, ${updated} updated, ${failed} failed`,
|
||||
);
|
||||
}
|
||||
|
||||
return { updated, failed };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function main() {
|
||||
console.log("=== Backfill Dominant Colors ===\n");
|
||||
|
||||
// Items with imageFilename but no dominantColor
|
||||
const itemsToProcess = await db
|
||||
.select({
|
||||
id: schema.items.id,
|
||||
imageFilename: schema.items.imageFilename,
|
||||
})
|
||||
.from(schema.items)
|
||||
.where(
|
||||
and(
|
||||
isNotNull(schema.items.imageFilename),
|
||||
isNull(schema.items.dominantColor),
|
||||
),
|
||||
);
|
||||
|
||||
console.log(`Items: ${itemsToProcess.length} need processing`);
|
||||
if (itemsToProcess.length > 0) {
|
||||
await processBatch(
|
||||
itemsToProcess as { id: number; imageFilename: string }[],
|
||||
(item) => fetchFromS3(item.imageFilename),
|
||||
async (id, color) => {
|
||||
await db
|
||||
.update(schema.items)
|
||||
.set({ dominantColor: color })
|
||||
.where(eq(schema.items.id, id));
|
||||
},
|
||||
"Items",
|
||||
);
|
||||
}
|
||||
|
||||
// GlobalItems with imageSourceUrl (stored in S3)
|
||||
const globalWithSource = await db
|
||||
.select({
|
||||
id: schema.globalItems.id,
|
||||
imageSourceUrl: schema.globalItems.imageSourceUrl,
|
||||
})
|
||||
.from(schema.globalItems)
|
||||
.where(
|
||||
and(
|
||||
isNotNull(schema.globalItems.imageSourceUrl),
|
||||
isNull(schema.globalItems.dominantColor),
|
||||
),
|
||||
);
|
||||
|
||||
console.log(
|
||||
`\nGlobal Items (source URL): ${globalWithSource.length} need processing`,
|
||||
);
|
||||
if (globalWithSource.length > 0) {
|
||||
await processBatch(
|
||||
globalWithSource as { id: number; imageSourceUrl: string }[],
|
||||
(item) => fetchFromUrl(item.imageSourceUrl),
|
||||
async (id, color) => {
|
||||
await db
|
||||
.update(schema.globalItems)
|
||||
.set({ dominantColor: color })
|
||||
.where(eq(schema.globalItems.id, id));
|
||||
},
|
||||
"Global Items (source)",
|
||||
);
|
||||
}
|
||||
|
||||
// GlobalItems with imageUrl (direct URLs)
|
||||
const globalWithUrl = await db
|
||||
.select({
|
||||
id: schema.globalItems.id,
|
||||
imageUrl: schema.globalItems.imageUrl,
|
||||
})
|
||||
.from(schema.globalItems)
|
||||
.where(
|
||||
and(
|
||||
isNotNull(schema.globalItems.imageUrl),
|
||||
isNull(schema.globalItems.dominantColor),
|
||||
),
|
||||
);
|
||||
|
||||
console.log(
|
||||
`\nGlobal Items (image URL): ${globalWithUrl.length} need processing`,
|
||||
);
|
||||
if (globalWithUrl.length > 0) {
|
||||
await processBatch(
|
||||
globalWithUrl as { id: number; imageUrl: string }[],
|
||||
(item) => fetchFromUrl(item.imageUrl),
|
||||
async (id, color) => {
|
||||
await db
|
||||
.update(schema.globalItems)
|
||||
.set({ dominantColor: color })
|
||||
.where(eq(schema.globalItems.id, id));
|
||||
},
|
||||
"Global Items (URL)",
|
||||
);
|
||||
}
|
||||
|
||||
// Thread candidates
|
||||
const candidatesToProcess = await db
|
||||
.select({
|
||||
id: schema.threadCandidates.id,
|
||||
imageFilename: schema.threadCandidates.imageFilename,
|
||||
})
|
||||
.from(schema.threadCandidates)
|
||||
.where(
|
||||
and(
|
||||
isNotNull(schema.threadCandidates.imageFilename),
|
||||
isNull(schema.threadCandidates.dominantColor),
|
||||
),
|
||||
);
|
||||
|
||||
console.log(`\nCandidates: ${candidatesToProcess.length} need processing`);
|
||||
if (candidatesToProcess.length > 0) {
|
||||
await processBatch(
|
||||
candidatesToProcess as { id: number; imageFilename: string }[],
|
||||
(item) => fetchFromS3(item.imageFilename),
|
||||
async (id, color) => {
|
||||
await db
|
||||
.update(schema.threadCandidates)
|
||||
.set({ dominantColor: color })
|
||||
.where(eq(schema.threadCandidates.id, id));
|
||||
},
|
||||
"Candidates",
|
||||
);
|
||||
}
|
||||
|
||||
console.log("\n=== Backfill Complete ===");
|
||||
await client.end();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Backfill failed:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user