chore: archive v2.2 User Experience Polish milestone
Phases 28-31 archived to milestones/v2.2-phases/ Requirements and roadmap snapshots archived to milestones/ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,271 @@
|
||||
---
|
||||
phase: 29
|
||||
plan: 04
|
||||
type: backend
|
||||
wave: 2
|
||||
depends_on: [01]
|
||||
files_modified:
|
||||
- scripts/backfill-dominant-colors.ts
|
||||
autonomous: true
|
||||
requirements: []
|
||||
---
|
||||
|
||||
<objective>
|
||||
Create a one-time backfill script that processes all existing images in the database to extract and store their dominant color. Handles items, globalItems, and threadCandidates with imageFilename, plus globalItems with external imageUrl.
|
||||
</objective>
|
||||
|
||||
<tasks>
|
||||
|
||||
### Task 1: Create backfill script
|
||||
<task type="code">
|
||||
<read_first>
|
||||
- src/db/schema.ts
|
||||
- src/server/services/storage.service.ts
|
||||
- src/server/services/image.service.ts
|
||||
</read_first>
|
||||
<action>
|
||||
Create `scripts/backfill-dominant-colors.ts`:
|
||||
|
||||
```ts
|
||||
/**
|
||||
* Backfill dominant colors for all existing images.
|
||||
* Run with: bun run scripts/backfill-dominant-colors.ts
|
||||
*
|
||||
* Idempotent — skips records that already have dominantColor set.
|
||||
* Processes in batches of 10 concurrent requests.
|
||||
*/
|
||||
|
||||
import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3";
|
||||
import { drizzle } from "drizzle-orm/postgres-js";
|
||||
import { isNull } from "drizzle-orm";
|
||||
import postgres from "postgres";
|
||||
import sharp from "sharp";
|
||||
import * as schema from "../src/db/schema";
|
||||
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
if (!DATABASE_URL) throw new Error("DATABASE_URL required");
|
||||
|
||||
const client = postgres(DATABASE_URL);
|
||||
const db = drizzle(client, { schema });
|
||||
|
||||
const s3 = new S3Client({
|
||||
endpoint: process.env.S3_ENDPOINT,
|
||||
region: process.env.S3_REGION ?? "us-east-1",
|
||||
credentials: {
|
||||
accessKeyId: process.env.S3_ACCESS_KEY!,
|
||||
secretAccessKey: process.env.S3_SECRET_KEY!,
|
||||
},
|
||||
forcePathStyle: true,
|
||||
});
|
||||
const bucket = process.env.S3_BUCKET ?? "gearbox-images";
|
||||
|
||||
async function extractColor(buffer: Buffer): Promise<string | null> {
|
||||
try {
|
||||
const { data } = await sharp(buffer).resize(1, 1).raw().toBuffer({ resolveWithObject: true });
|
||||
return `#${data[0].toString(16).padStart(2, "0")}${data[1].toString(16).padStart(2, "0")}${data[2].toString(16).padStart(2, "0")}`;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchFromS3(filename: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const response = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: filename }));
|
||||
const bytes = await response.Body?.transformToByteArray();
|
||||
return bytes ? Buffer.from(bytes) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchFromUrl(url: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const response = await fetch(url, { signal: AbortSignal.timeout(10000) });
|
||||
if (!response.ok) return null;
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function processBatch<T extends { id: number }>(
|
||||
items: T[],
|
||||
getBuffer: (item: T) => Promise<Buffer | null>,
|
||||
updateFn: (id: number, color: string) => Promise<void>,
|
||||
label: string,
|
||||
) {
|
||||
const BATCH_SIZE = 10;
|
||||
let processed = 0;
|
||||
let updated = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (let i = 0; i < items.length; i += BATCH_SIZE) {
|
||||
const batch = items.slice(i, i + BATCH_SIZE);
|
||||
const results = await Promise.allSettled(
|
||||
batch.map(async (item) => {
|
||||
const buffer = await getBuffer(item);
|
||||
if (!buffer) { failed++; return; }
|
||||
const color = await extractColor(buffer);
|
||||
if (!color) { failed++; return; }
|
||||
await updateFn(item.id, color);
|
||||
updated++;
|
||||
})
|
||||
);
|
||||
processed += batch.length;
|
||||
console.log(` ${label}: ${processed}/${items.length} processed, ${updated} updated, ${failed} failed`);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("=== Backfill Dominant Colors ===\n");
|
||||
|
||||
// Items with imageFilename but no dominantColor
|
||||
const { eq, and, isNotNull } = await import("drizzle-orm");
|
||||
|
||||
const itemsToProcess = await db
|
||||
.select({ id: schema.items.id, imageFilename: schema.items.imageFilename })
|
||||
.from(schema.items)
|
||||
.where(and(isNotNull(schema.items.imageFilename), isNull(schema.items.dominantColor)));
|
||||
|
||||
console.log(`Items: ${itemsToProcess.length} need processing`);
|
||||
await processBatch(
|
||||
itemsToProcess as { id: number; imageFilename: string }[],
|
||||
(item) => fetchFromS3(item.imageFilename),
|
||||
async (id, color) => {
|
||||
const { eq } = await import("drizzle-orm");
|
||||
await db.update(schema.items).set({ dominantColor: color }).where(eq(schema.items.id, id));
|
||||
},
|
||||
"Items",
|
||||
);
|
||||
|
||||
// GlobalItems with imageSourceUrl (external URLs stored in S3)
|
||||
const globalWithFile = await db
|
||||
.select({ id: schema.globalItems.id, imageSourceUrl: schema.globalItems.imageSourceUrl })
|
||||
.from(schema.globalItems)
|
||||
.where(and(isNotNull(schema.globalItems.imageSourceUrl), isNull(schema.globalItems.dominantColor)));
|
||||
|
||||
console.log(`\nGlobal Items (with source URL): ${globalWithFile.length} need processing`);
|
||||
await processBatch(
|
||||
globalWithFile as { id: number; imageSourceUrl: string }[],
|
||||
(item) => fetchFromUrl(item.imageSourceUrl),
|
||||
async (id, color) => {
|
||||
const { eq } = await import("drizzle-orm");
|
||||
await db.update(schema.globalItems).set({ dominantColor: color }).where(eq(schema.globalItems.id, id));
|
||||
},
|
||||
"Global Items",
|
||||
);
|
||||
|
||||
// GlobalItems with imageUrl (direct URLs)
|
||||
const globalWithUrl = await db
|
||||
.select({ id: schema.globalItems.id, imageUrl: schema.globalItems.imageUrl })
|
||||
.from(schema.globalItems)
|
||||
.where(and(isNotNull(schema.globalItems.imageUrl), isNull(schema.globalItems.dominantColor)));
|
||||
|
||||
console.log(`\nGlobal Items (with image URL): ${globalWithUrl.length} need processing`);
|
||||
await processBatch(
|
||||
globalWithUrl as { id: number; imageUrl: string }[],
|
||||
(item) => fetchFromUrl(item.imageUrl),
|
||||
async (id, color) => {
|
||||
const { eq } = await import("drizzle-orm");
|
||||
await db.update(schema.globalItems).set({ dominantColor: color }).where(eq(schema.globalItems.id, id));
|
||||
},
|
||||
"Global Items (URL)",
|
||||
);
|
||||
|
||||
// Thread candidates
|
||||
const candidatesToProcess = await db
|
||||
.select({ id: schema.threadCandidates.id, imageFilename: schema.threadCandidates.imageFilename })
|
||||
.from(schema.threadCandidates)
|
||||
.where(and(isNotNull(schema.threadCandidates.imageFilename), isNull(schema.threadCandidates.dominantColor)));
|
||||
|
||||
console.log(`\nCandidates: ${candidatesToProcess.length} need processing`);
|
||||
await processBatch(
|
||||
candidatesToProcess as { id: number; imageFilename: string }[],
|
||||
(item) => fetchFromS3(item.imageFilename),
|
||||
async (id, color) => {
|
||||
const { eq } = await import("drizzle-orm");
|
||||
await db.update(schema.threadCandidates).set({ dominantColor: color }).where(eq(schema.threadCandidates.id, id));
|
||||
},
|
||||
"Candidates",
|
||||
);
|
||||
|
||||
console.log("\n=== Backfill Complete ===");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Backfill failed:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
```
|
||||
|
||||
Note: The exact import patterns for drizzle-orm may need adjustment based on the project's existing database connection setup. Check `src/db/` for the actual connection pattern used and replicate it in the script.
|
||||
</action>
|
||||
<verify>
|
||||
<automated>test -f scripts/backfill-dominant-colors.ts && grep "extractColor" scripts/backfill-dominant-colors.ts && grep "processBatch" scripts/backfill-dominant-colors.ts && echo "PASS" || echo "FAIL"</automated>
|
||||
</verify>
|
||||
<acceptance_criteria>
|
||||
- `scripts/backfill-dominant-colors.ts` exists
|
||||
- Script queries items, globalItems, threadCandidates with images but no dominantColor
|
||||
- Processes in batches of 10 concurrent
|
||||
- Extracts dominant color via Sharp resize(1,1)
|
||||
- Updates database records with extracted color
|
||||
- Skips records that already have dominantColor (idempotent)
|
||||
- Logs progress: `Items: 45/123 processed, 42 updated, 3 failed`
|
||||
- Handles errors gracefully (skips failed images, logs them)
|
||||
- Exits with 0 on success, 1 on fatal error
|
||||
</acceptance_criteria>
|
||||
</task>
|
||||
|
||||
### Task 2: Add npm script for backfill
|
||||
<task type="code">
|
||||
<read_first>
|
||||
- package.json
|
||||
</read_first>
|
||||
<action>
|
||||
Add to `scripts` section in `package.json`:
|
||||
```json
|
||||
"backfill:colors": "bun run scripts/backfill-dominant-colors.ts"
|
||||
```
|
||||
</action>
|
||||
<verify>
|
||||
<automated>grep "backfill:colors" package.json && echo "PASS" || echo "FAIL"</automated>
|
||||
</verify>
|
||||
<acceptance_criteria>
|
||||
- package.json contains `"backfill:colors"` script
|
||||
- Script points to `scripts/backfill-dominant-colors.ts`
|
||||
</acceptance_criteria>
|
||||
</task>
|
||||
|
||||
</tasks>
|
||||
|
||||
<verification>
|
||||
1. `bun run lint` passes (script follows project conventions)
|
||||
2. Script is syntactically valid: `bun run scripts/backfill-dominant-colors.ts --help` or `bun check scripts/backfill-dominant-colors.ts`
|
||||
3. Script handles missing S3 credentials gracefully (error message, not crash)
|
||||
</verification>
|
||||
|
||||
<success_criteria>
|
||||
- Backfill script exists and processes all 3 tables
|
||||
- Script is idempotent (safe to re-run)
|
||||
- Batch processing limits concurrency to 10
|
||||
- Progress logging shows processing status
|
||||
- npm script shortcut available
|
||||
</success_criteria>
|
||||
|
||||
<threat_model>
|
||||
| Threat | Severity | Mitigation |
|
||||
|--------|----------|------------|
|
||||
| S3 credential exposure in script | Low | Uses env vars from process.env, no hardcoded credentials |
|
||||
| SSRF via globalItems imageUrl | Medium | Script only processes URLs already stored in the database (previously validated on ingestion); fetch has 10s timeout |
|
||||
| Database overload from bulk updates | Low | Batch size of 10 limits concurrent DB writes |
|
||||
</threat_model>
|
||||
|
||||
<must_haves>
|
||||
- [ ] Backfill script at scripts/backfill-dominant-colors.ts
|
||||
- [ ] Processes items, globalItems, threadCandidates
|
||||
- [ ] Idempotent (skips existing dominantColor)
|
||||
- [ ] Batch processing with concurrency limit
|
||||
- [ ] Progress logging
|
||||
- [ ] npm script shortcut
|
||||
</must_haves>
|
||||
Reference in New Issue
Block a user