One-time migration script processes items, globalItems, and threadCandidates to extract dominant colors via Sharp. Idempotent, batched (10 concurrent), with progress logging. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
257 lines
6.6 KiB
TypeScript
257 lines
6.6 KiB
TypeScript
/**
|
|
* Backfill dominant colors for all existing images.
|
|
*
|
|
* Usage:
|
|
* DATABASE_URL=postgres://... S3_ENDPOINT=... bun run scripts/backfill-dominant-colors.ts
|
|
*
|
|
* Idempotent — skips records that already have dominantColor set.
|
|
* Processes in batches of 10 concurrent requests.
|
|
*/
|
|
|
|
import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3";
|
|
import { and, eq, isNotNull, isNull } from "drizzle-orm";
|
|
import { drizzle } from "drizzle-orm/postgres-js";
|
|
import postgres from "postgres";
|
|
import sharp from "sharp";
|
|
import * as schema from "../src/db/schema.ts";
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Setup
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const DATABASE_URL = process.env.DATABASE_URL;
|
|
if (!DATABASE_URL) {
|
|
console.error("DATABASE_URL environment variable is required");
|
|
process.exit(1);
|
|
}
|
|
|
|
const client = postgres(DATABASE_URL);
|
|
const db = drizzle(client, { schema });
|
|
|
|
const s3 = new S3Client({
|
|
endpoint: process.env.S3_ENDPOINT,
|
|
region: process.env.S3_REGION ?? "us-east-1",
|
|
credentials: {
|
|
accessKeyId: process.env.S3_ACCESS_KEY!,
|
|
secretAccessKey: process.env.S3_SECRET_KEY!,
|
|
},
|
|
forcePathStyle: true,
|
|
});
|
|
const bucket = process.env.S3_BUCKET ?? "gearbox-images";
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
async function extractColor(buffer: Buffer): Promise<string | null> {
|
|
try {
|
|
const { data } = await sharp(buffer)
|
|
.resize(1, 1)
|
|
.raw()
|
|
.toBuffer({ resolveWithObject: true });
|
|
return `#${data[0].toString(16).padStart(2, "0")}${data[1].toString(16).padStart(2, "0")}${data[2].toString(16).padStart(2, "0")}`;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function fetchFromS3(filename: string): Promise<Buffer | null> {
|
|
try {
|
|
const response = await s3.send(
|
|
new GetObjectCommand({ Bucket: bucket, Key: filename }),
|
|
);
|
|
const bytes = await response.Body?.transformToByteArray();
|
|
return bytes ? Buffer.from(bytes) : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function fetchFromUrl(url: string): Promise<Buffer | null> {
|
|
try {
|
|
const response = await fetch(url, {
|
|
signal: AbortSignal.timeout(10000),
|
|
});
|
|
if (!response.ok) return null;
|
|
return Buffer.from(await response.arrayBuffer());
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
const BATCH_SIZE = 10;
|
|
|
|
async function processBatch<T extends { id: number }>(
|
|
items: T[],
|
|
getBuffer: (item: T) => Promise<Buffer | null>,
|
|
updateFn: (id: number, color: string) => Promise<void>,
|
|
label: string,
|
|
) {
|
|
let processed = 0;
|
|
let updated = 0;
|
|
let failed = 0;
|
|
|
|
for (let i = 0; i < items.length; i += BATCH_SIZE) {
|
|
const batch = items.slice(i, i + BATCH_SIZE);
|
|
await Promise.allSettled(
|
|
batch.map(async (item) => {
|
|
const buffer = await getBuffer(item);
|
|
if (!buffer) {
|
|
failed++;
|
|
return;
|
|
}
|
|
const color = await extractColor(buffer);
|
|
if (!color) {
|
|
failed++;
|
|
return;
|
|
}
|
|
await updateFn(item.id, color);
|
|
updated++;
|
|
}),
|
|
);
|
|
processed += batch.length;
|
|
console.log(
|
|
` ${label}: ${processed}/${items.length} processed, ${updated} updated, ${failed} failed`,
|
|
);
|
|
}
|
|
|
|
return { updated, failed };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Main
|
|
// ---------------------------------------------------------------------------
|
|
|
|
async function main() {
|
|
console.log("=== Backfill Dominant Colors ===\n");
|
|
|
|
// Items with imageFilename but no dominantColor
|
|
const itemsToProcess = await db
|
|
.select({
|
|
id: schema.items.id,
|
|
imageFilename: schema.items.imageFilename,
|
|
})
|
|
.from(schema.items)
|
|
.where(
|
|
and(
|
|
isNotNull(schema.items.imageFilename),
|
|
isNull(schema.items.dominantColor),
|
|
),
|
|
);
|
|
|
|
console.log(`Items: ${itemsToProcess.length} need processing`);
|
|
if (itemsToProcess.length > 0) {
|
|
await processBatch(
|
|
itemsToProcess as { id: number; imageFilename: string }[],
|
|
(item) => fetchFromS3(item.imageFilename),
|
|
async (id, color) => {
|
|
await db
|
|
.update(schema.items)
|
|
.set({ dominantColor: color })
|
|
.where(eq(schema.items.id, id));
|
|
},
|
|
"Items",
|
|
);
|
|
}
|
|
|
|
// GlobalItems with imageSourceUrl (stored in S3)
|
|
const globalWithSource = await db
|
|
.select({
|
|
id: schema.globalItems.id,
|
|
imageSourceUrl: schema.globalItems.imageSourceUrl,
|
|
})
|
|
.from(schema.globalItems)
|
|
.where(
|
|
and(
|
|
isNotNull(schema.globalItems.imageSourceUrl),
|
|
isNull(schema.globalItems.dominantColor),
|
|
),
|
|
);
|
|
|
|
console.log(
|
|
`\nGlobal Items (source URL): ${globalWithSource.length} need processing`,
|
|
);
|
|
if (globalWithSource.length > 0) {
|
|
await processBatch(
|
|
globalWithSource as { id: number; imageSourceUrl: string }[],
|
|
(item) => fetchFromUrl(item.imageSourceUrl),
|
|
async (id, color) => {
|
|
await db
|
|
.update(schema.globalItems)
|
|
.set({ dominantColor: color })
|
|
.where(eq(schema.globalItems.id, id));
|
|
},
|
|
"Global Items (source)",
|
|
);
|
|
}
|
|
|
|
// GlobalItems with imageUrl (direct URLs)
|
|
const globalWithUrl = await db
|
|
.select({
|
|
id: schema.globalItems.id,
|
|
imageUrl: schema.globalItems.imageUrl,
|
|
})
|
|
.from(schema.globalItems)
|
|
.where(
|
|
and(
|
|
isNotNull(schema.globalItems.imageUrl),
|
|
isNull(schema.globalItems.dominantColor),
|
|
),
|
|
);
|
|
|
|
console.log(
|
|
`\nGlobal Items (image URL): ${globalWithUrl.length} need processing`,
|
|
);
|
|
if (globalWithUrl.length > 0) {
|
|
await processBatch(
|
|
globalWithUrl as { id: number; imageUrl: string }[],
|
|
(item) => fetchFromUrl(item.imageUrl),
|
|
async (id, color) => {
|
|
await db
|
|
.update(schema.globalItems)
|
|
.set({ dominantColor: color })
|
|
.where(eq(schema.globalItems.id, id));
|
|
},
|
|
"Global Items (URL)",
|
|
);
|
|
}
|
|
|
|
// Thread candidates
|
|
const candidatesToProcess = await db
|
|
.select({
|
|
id: schema.threadCandidates.id,
|
|
imageFilename: schema.threadCandidates.imageFilename,
|
|
})
|
|
.from(schema.threadCandidates)
|
|
.where(
|
|
and(
|
|
isNotNull(schema.threadCandidates.imageFilename),
|
|
isNull(schema.threadCandidates.dominantColor),
|
|
),
|
|
);
|
|
|
|
console.log(`\nCandidates: ${candidatesToProcess.length} need processing`);
|
|
if (candidatesToProcess.length > 0) {
|
|
await processBatch(
|
|
candidatesToProcess as { id: number; imageFilename: string }[],
|
|
(item) => fetchFromS3(item.imageFilename),
|
|
async (id, color) => {
|
|
await db
|
|
.update(schema.threadCandidates)
|
|
.set({ dominantColor: color })
|
|
.where(eq(schema.threadCandidates.id, id));
|
|
},
|
|
"Candidates",
|
|
);
|
|
}
|
|
|
|
console.log("\n=== Backfill Complete ===");
|
|
await client.end();
|
|
process.exit(0);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("Backfill failed:", err);
|
|
process.exit(1);
|
|
});
|