From 44602d409e0b24f3bb346d108dfc36dc7e600aa4 Mon Sep 17 00:00:00 2001 From: Jean-Luc Makiola Date: Sat, 18 Apr 2026 16:45:39 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20crawl-all=20batch=20runner=20=E2=80=94?= =?UTF-8?q?=20iterate=20active=20manufacturers=20by=20tier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/crawl-all.ts | 84 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 scripts/crawl-all.ts diff --git a/scripts/crawl-all.ts b/scripts/crawl-all.ts new file mode 100644 index 0000000..f6fc3e7 --- /dev/null +++ b/scripts/crawl-all.ts @@ -0,0 +1,84 @@ +#!/usr/bin/env bun +/** + * Crawl all active manufacturers of a given tier. + * + * Usage: + * bun run scripts/crawl-all.ts --tier=1 + * bun run scripts/crawl-all.ts --tier=1 --dry-run + * + * Env vars required: + * GEARBOX_URL — Base URL of the GearBox instance (default: http://localhost:3000) + * GEARBOX_API_KEY — GearBox API key with write access + * ANTHROPIC_API_KEY — Anthropic API key (passed through to crawl-manufacturer) + */ + +const GEARBOX_URL = process.env.GEARBOX_URL ?? "http://localhost:3000"; +const GEARBOX_API_KEY = process.env.GEARBOX_API_KEY ?? ""; + +const args = Object.fromEntries( + process.argv + .slice(2) + .filter((a) => a.startsWith("--")) + .map((a) => { + const [k, v] = a.slice(2).split("="); + return [k, v ?? "true"]; + }), +); + +const tier = args["tier"] ? Number(args["tier"]) : 1; +const dryRun = args["dry-run"] === "true"; + +async function listActiveManufacturers(targetTier: number) { + const res = await fetch(`${GEARBOX_URL}/api/manufacturers`); + if (!res.ok) throw new Error(`Failed to list manufacturers: HTTP ${res.status}`); + const all = await res.json() as Array<{ slug: string; tier: number; active: boolean; name: string }>; + return all.filter((m) => m.active && m.tier === targetTier); +} + +async function main() { + if (!GEARBOX_API_KEY) { + console.error("GEARBOX_API_KEY env var is required"); + process.exit(1); + } + + const manufacturers = await listActiveManufacturers(tier); + console.log(`Found ${manufacturers.length} active tier-${tier} manufacturers\n`); + + const results: Array<{ slug: string; status: "ok" | "error"; error?: string }> = []; + + for (const m of manufacturers) { + console.log(`\n${"─".repeat(50)}`); + console.log(`Crawling: ${m.name} (${m.slug})`); + try { + const extraArgs = dryRun ? ["--dry-run"] : []; + const proc = Bun.spawn( + ["bun", "run", "scripts/crawl-manufacturer.ts", `--manufacturer=${m.slug}`, ...extraArgs], + { stdout: "inherit", stderr: "inherit", env: process.env }, + ); + const exitCode = await proc.exited; + if (exitCode !== 0) throw new Error(`Exited with code ${exitCode}`); + results.push({ slug: m.slug, status: "ok" }); + } catch (err) { + console.error(` ERROR: ${(err as Error).message}`); + results.push({ slug: m.slug, status: "error", error: (err as Error).message }); + } + } + + console.log(`\n${"=".repeat(50)}`); + console.log("Summary:"); + for (const r of results) { + const icon = r.status === "ok" ? "✓" : "✗"; + console.log(` ${icon} ${r.slug}${r.error ? ` — ${r.error}` : ""}`); + } + + const failed = results.filter((r) => r.status === "error"); + if (failed.length > 0) { + console.error(`\n${failed.length} manufacturer(s) failed`); + process.exit(1); + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +});