feat: crawl-all batch runner — iterate active manufacturers by tier
This commit is contained in:
84
scripts/crawl-all.ts
Normal file
84
scripts/crawl-all.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Crawl all active manufacturers of a given tier.
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/crawl-all.ts --tier=1
|
||||
* bun run scripts/crawl-all.ts --tier=1 --dry-run
|
||||
*
|
||||
* Env vars required:
|
||||
* GEARBOX_URL — Base URL of the GearBox instance (default: http://localhost:3000)
|
||||
* GEARBOX_API_KEY — GearBox API key with write access
|
||||
* ANTHROPIC_API_KEY — Anthropic API key (passed through to crawl-manufacturer)
|
||||
*/
|
||||
|
||||
const GEARBOX_URL = process.env.GEARBOX_URL ?? "http://localhost:3000";
|
||||
const GEARBOX_API_KEY = process.env.GEARBOX_API_KEY ?? "";
|
||||
|
||||
const args = Object.fromEntries(
|
||||
process.argv
|
||||
.slice(2)
|
||||
.filter((a) => a.startsWith("--"))
|
||||
.map((a) => {
|
||||
const [k, v] = a.slice(2).split("=");
|
||||
return [k, v ?? "true"];
|
||||
}),
|
||||
);
|
||||
|
||||
const tier = args["tier"] ? Number(args["tier"]) : 1;
|
||||
const dryRun = args["dry-run"] === "true";
|
||||
|
||||
async function listActiveManufacturers(targetTier: number) {
|
||||
const res = await fetch(`${GEARBOX_URL}/api/manufacturers`);
|
||||
if (!res.ok) throw new Error(`Failed to list manufacturers: HTTP ${res.status}`);
|
||||
const all = await res.json() as Array<{ slug: string; tier: number; active: boolean; name: string }>;
|
||||
return all.filter((m) => m.active && m.tier === targetTier);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (!GEARBOX_API_KEY) {
|
||||
console.error("GEARBOX_API_KEY env var is required");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const manufacturers = await listActiveManufacturers(tier);
|
||||
console.log(`Found ${manufacturers.length} active tier-${tier} manufacturers\n`);
|
||||
|
||||
const results: Array<{ slug: string; status: "ok" | "error"; error?: string }> = [];
|
||||
|
||||
for (const m of manufacturers) {
|
||||
console.log(`\n${"─".repeat(50)}`);
|
||||
console.log(`Crawling: ${m.name} (${m.slug})`);
|
||||
try {
|
||||
const extraArgs = dryRun ? ["--dry-run"] : [];
|
||||
const proc = Bun.spawn(
|
||||
["bun", "run", "scripts/crawl-manufacturer.ts", `--manufacturer=${m.slug}`, ...extraArgs],
|
||||
{ stdout: "inherit", stderr: "inherit", env: process.env },
|
||||
);
|
||||
const exitCode = await proc.exited;
|
||||
if (exitCode !== 0) throw new Error(`Exited with code ${exitCode}`);
|
||||
results.push({ slug: m.slug, status: "ok" });
|
||||
} catch (err) {
|
||||
console.error(` ERROR: ${(err as Error).message}`);
|
||||
results.push({ slug: m.slug, status: "error", error: (err as Error).message });
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n${"=".repeat(50)}`);
|
||||
console.log("Summary:");
|
||||
for (const r of results) {
|
||||
const icon = r.status === "ok" ? "✓" : "✗";
|
||||
console.log(` ${icon} ${r.slug}${r.error ? ` — ${r.error}` : ""}`);
|
||||
}
|
||||
|
||||
const failed = results.filter((r) => r.status === "error");
|
||||
if (failed.length > 0) {
|
||||
console.error(`\n${failed.length} manufacturer(s) failed`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user