feat: crawl-all batch runner — iterate active manufacturers by tier
This commit is contained in:
84
scripts/crawl-all.ts
Normal file
84
scripts/crawl-all.ts
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* Crawl all active manufacturers of a given tier.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* bun run scripts/crawl-all.ts --tier=1
|
||||||
|
* bun run scripts/crawl-all.ts --tier=1 --dry-run
|
||||||
|
*
|
||||||
|
* Env vars required:
|
||||||
|
* GEARBOX_URL — Base URL of the GearBox instance (default: http://localhost:3000)
|
||||||
|
* GEARBOX_API_KEY — GearBox API key with write access
|
||||||
|
* ANTHROPIC_API_KEY — Anthropic API key (passed through to crawl-manufacturer)
|
||||||
|
*/
|
||||||
|
|
||||||
|
const GEARBOX_URL = process.env.GEARBOX_URL ?? "http://localhost:3000";
|
||||||
|
const GEARBOX_API_KEY = process.env.GEARBOX_API_KEY ?? "";
|
||||||
|
|
||||||
|
const args = Object.fromEntries(
|
||||||
|
process.argv
|
||||||
|
.slice(2)
|
||||||
|
.filter((a) => a.startsWith("--"))
|
||||||
|
.map((a) => {
|
||||||
|
const [k, v] = a.slice(2).split("=");
|
||||||
|
return [k, v ?? "true"];
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const tier = args["tier"] ? Number(args["tier"]) : 1;
|
||||||
|
const dryRun = args["dry-run"] === "true";
|
||||||
|
|
||||||
|
async function listActiveManufacturers(targetTier: number) {
|
||||||
|
const res = await fetch(`${GEARBOX_URL}/api/manufacturers`);
|
||||||
|
if (!res.ok) throw new Error(`Failed to list manufacturers: HTTP ${res.status}`);
|
||||||
|
const all = await res.json() as Array<{ slug: string; tier: number; active: boolean; name: string }>;
|
||||||
|
return all.filter((m) => m.active && m.tier === targetTier);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
if (!GEARBOX_API_KEY) {
|
||||||
|
console.error("GEARBOX_API_KEY env var is required");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const manufacturers = await listActiveManufacturers(tier);
|
||||||
|
console.log(`Found ${manufacturers.length} active tier-${tier} manufacturers\n`);
|
||||||
|
|
||||||
|
const results: Array<{ slug: string; status: "ok" | "error"; error?: string }> = [];
|
||||||
|
|
||||||
|
for (const m of manufacturers) {
|
||||||
|
console.log(`\n${"─".repeat(50)}`);
|
||||||
|
console.log(`Crawling: ${m.name} (${m.slug})`);
|
||||||
|
try {
|
||||||
|
const extraArgs = dryRun ? ["--dry-run"] : [];
|
||||||
|
const proc = Bun.spawn(
|
||||||
|
["bun", "run", "scripts/crawl-manufacturer.ts", `--manufacturer=${m.slug}`, ...extraArgs],
|
||||||
|
{ stdout: "inherit", stderr: "inherit", env: process.env },
|
||||||
|
);
|
||||||
|
const exitCode = await proc.exited;
|
||||||
|
if (exitCode !== 0) throw new Error(`Exited with code ${exitCode}`);
|
||||||
|
results.push({ slug: m.slug, status: "ok" });
|
||||||
|
} catch (err) {
|
||||||
|
console.error(` ERROR: ${(err as Error).message}`);
|
||||||
|
results.push({ slug: m.slug, status: "error", error: (err as Error).message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n${"=".repeat(50)}`);
|
||||||
|
console.log("Summary:");
|
||||||
|
for (const r of results) {
|
||||||
|
const icon = r.status === "ok" ? "✓" : "✗";
|
||||||
|
console.log(` ${icon} ${r.slug}${r.error ? ` — ${r.error}` : ""}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const failed = results.filter((r) => r.status === "error");
|
||||||
|
if (failed.length > 0) {
|
||||||
|
console.error(`\n${failed.length} manufacturer(s) failed`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user