seed.ts raw
1 #!/usr/bin/env bun
2 /**
3 * seed.ts — Seed a Neo4j database with Nostr events from JSONL.
4 *
5 * Creates the same graph structure that ORLY uses:
6 * Event nodes ──AUTHORED_BY──> NostrUser
7 * Event nodes ──REFERENCES───> Event (e-tags)
8 * Event nodes ──MENTIONS─────> NostrUser (p-tags)
9 * Event nodes ──TAGGED_WITH──> Tag (t, d, r, etc.)
10 *
11 * Usage:
12 * bun run seed.ts # defaults: localhost, 500 events
13 * bun run seed.ts --uri bolt://host:7687 # custom URI
14 * bun run seed.ts --limit 2000 # seed more events
15 * bun run seed.ts --all # seed the entire dataset (~11k events)
16 * bun run seed.ts --clean # wipe DB first
17 */
18
19 import neo4j from "neo4j-driver";
20 import { parseArgs } from "util";
21 import { resolve } from "path";
22 import { homedir } from "os";
23
24 // ── Defaults ────────────────────────────────────────────────────
25 const DEFAULT_DATA = resolve(
26 homedir(),
27 "src/git.mleku.dev/mleku/nostr/encoders/event/examples/out.jsonl"
28 );
29
30 // ── Schema (matches ORLY's pkg/neo4j/schema.go) ────────────────
31 const CONSTRAINTS = [
32 "CREATE CONSTRAINT event_id_unique IF NOT EXISTS FOR (e:Event) REQUIRE e.id IS UNIQUE",
33 "CREATE CONSTRAINT nostrUser_pubkey IF NOT EXISTS FOR (n:NostrUser) REQUIRE n.pubkey IS UNIQUE",
34 ];
35
36 const INDEXES = [
37 "CREATE INDEX event_kind IF NOT EXISTS FOR (e:Event) ON (e.kind)",
38 "CREATE INDEX event_created_at IF NOT EXISTS FOR (e:Event) ON (e.created_at)",
39 "CREATE INDEX tag_type IF NOT EXISTS FOR (t:Tag) ON (t.type)",
40 "CREATE INDEX tag_value IF NOT EXISTS FOR (t:Tag) ON (t.value)",
41 "CREATE INDEX tag_type_value IF NOT EXISTS FOR (t:Tag) ON (t.type, t.value)",
42 "CREATE INDEX event_kind_created_at IF NOT EXISTS FOR (e:Event) ON (e.kind, e.created_at)",
43 ];
44
45 // ── Parse CLI args ──────────────────────────────────────────────
46 const { values: args } = parseArgs({
47 options: {
48 uri: { type: "string", default: "bolt://localhost:7687" },
49 user: { type: "string", default: "neo4j" },
50 password: { type: "string", default: "nostr-demo-2024" },
51 data: { type: "string", default: DEFAULT_DATA },
52 limit: { type: "string", default: "500" },
53 all: { type: "boolean", default: false },
54 clean: { type: "boolean", default: false },
55 help: { type: "boolean", default: false },
56 },
57 });
58
59 if (args.help) {
60 console.log(`
61 seed.ts — Seed Neo4j with Nostr events
62
63 Options:
64 --uri <uri> Neo4j bolt URI (default: bolt://localhost:7687)
65 --user <user> Neo4j username (default: neo4j)
66 --password <pass> Neo4j password (default: nostr-demo-2024)
67 --data <path> Path to JSONL event data
68 --limit <n> Max events to seed (default: 500)
69 --all Seed ALL events (ignores --limit)
70 --clean Wipe the database before seeding
71 --help Show this help
72 `);
73 process.exit(0);
74 }
75
76 const limit = args.all ? Infinity : parseInt(args.limit!, 10);
77
78 // ── Load events from JSONL ──────────────────────────────────────
79 interface NostrEvent {
80 id: string;
81 pubkey: string;
82 kind: number;
83 created_at: number;
84 content: string;
85 sig: string;
86 tags: string[][];
87 }
88
89 async function loadEvents(path: string, max: number): Promise<NostrEvent[]> {
90 const file = Bun.file(path);
91 if (!(await file.exists())) {
92 console.error(`Error: Data file not found: ${path}`);
93 process.exit(1);
94 }
95
96 const text = await file.text();
97 const lines = text.trim().split("\n");
98 const events: NostrEvent[] = [];
99 for (let i = 0; i < lines.length && i < max; i++) {
100 events.push(JSON.parse(lines[i]));
101 }
102 return events;
103 }
104
105 // ── Main ────────────────────────────────────────────────────────
106 async function main() {
107 console.log(`Connecting to Neo4j at ${args.uri} ...`);
108 const driver = neo4j.driver(
109 args.uri!,
110 neo4j.auth.basic(args.user!, args.password!)
111 );
112
113 try {
114 await driver.verifyConnectivity();
115 } catch (e: any) {
116 console.error(`Cannot connect: ${e.message}`);
117 console.error("Is Neo4j running? Try: docker ps | grep neo4j");
118 process.exit(1);
119 }
120 console.log(" Connected.");
121
122 const session = driver.session();
123
124 try {
125 // Clean if requested
126 if (args.clean) {
127 console.log("Wiping database ...");
128 await session.run("MATCH (n) DETACH DELETE n");
129 console.log(" Done.");
130 }
131
132 // Apply schema
133 console.log("Applying schema ...");
134 for (const cypher of [...CONSTRAINTS, ...INDEXES]) {
135 await session.run(cypher);
136 }
137 console.log(" Schema applied (constraints + indexes).");
138
139 // Load events
140 console.log(`Loading events from ${args.data} ...`);
141 const events = await loadEvents(args.data!, limit);
142 console.log(` Loaded ${events.length} events.`);
143
144 // Seed in batches
145 console.log("Seeding graph ...");
146 const t0 = Date.now();
147 const batchSize = 50;
148
149 for (let i = 0; i < events.length; i += batchSize) {
150 const batch = events.slice(i, i + batchSize);
151 await session.executeWrite(async (tx) => {
152 // Create Event + NostrUser nodes with AUTHORED_BY
153 const batchData = batch.map((ev) => ({
154 id: ev.id,
155 kind: neo4j.int(ev.kind),
156 created_at: neo4j.int(ev.created_at),
157 content: ev.content || "",
158 sig: ev.sig || "",
159 pubkey: ev.pubkey,
160 tags: JSON.stringify(ev.tags || []),
161 }));
162
163 await tx.run(
164 `UNWIND $events AS ev
165 MERGE (e:Event {id: ev.id})
166 SET e.kind = ev.kind,
167 e.created_at = ev.created_at,
168 e.content = ev.content,
169 e.sig = ev.sig,
170 e.pubkey = ev.pubkey,
171 e.tags = ev.tags
172 MERGE (a:NostrUser {pubkey: ev.pubkey})
173 MERGE (e)-[:AUTHORED_BY]->(a)`,
174 { events: batchData }
175 );
176
177 // Create tag relationships for each event
178 for (const ev of batch) {
179 for (const tag of ev.tags || []) {
180 if (tag.length < 2 || !tag[1]) continue;
181 const [tagType, tagValue] = tag;
182
183 if (tagType === "e") {
184 await tx.run(
185 `MATCH (src:Event {id: $srcId})
186 MERGE (tgt:Event {id: $refId})
187 MERGE (src)-[:REFERENCES]->(tgt)`,
188 { srcId: ev.id, refId: tagValue }
189 );
190 } else if (tagType === "p") {
191 await tx.run(
192 `MATCH (src:Event {id: $srcId})
193 MERGE (mentioned:NostrUser {pubkey: $pubkey})
194 MERGE (src)-[:MENTIONS]->(mentioned)`,
195 { srcId: ev.id, pubkey: tagValue }
196 );
197 } else {
198 await tx.run(
199 `MATCH (src:Event {id: $srcId})
200 MERGE (t:Tag {type: $type, value: $value})
201 MERGE (src)-[:TAGGED_WITH]->(t)`,
202 { srcId: ev.id, type: tagType, value: tagValue }
203 );
204 }
205 }
206 }
207 });
208
209 const done = Math.min(i + batchSize, events.length);
210 const pct = Math.floor((done * 100) / events.length);
211 process.stdout.write(`\r Progress: ${done}/${events.length} (${pct}%)`);
212 }
213
214 const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
215 console.log(`\n Seeded ${events.length} events in ${elapsed}s`);
216
217 // Print summary
218 const stats = await session.run(`
219 MATCH (e:Event) WITH count(e) AS events
220 MATCH (a:NostrUser) WITH events, count(a) AS users
221 MATCH (t:Tag) WITH events, users, count(t) AS tags
222 RETURN events, users, tags
223 `);
224 const row = stats.records[0];
225 console.log(`\n Graph summary:`);
226 console.log(` Events: ${row.get("events")}`);
227 console.log(` Users: ${row.get("users")}`);
228 console.log(` Tags: ${row.get("tags")}`);
229
230 const rels = await session.run(`
231 MATCH ()-[r:AUTHORED_BY]->() WITH count(r) AS authored
232 MATCH ()-[r:REFERENCES]->() WITH authored, count(r) AS refs
233 MATCH ()-[r:MENTIONS]->() WITH authored, refs, count(r) AS mentions
234 MATCH ()-[r:TAGGED_WITH]->() WITH authored, refs, mentions, count(r) AS tagged
235 RETURN authored, refs, mentions, tagged
236 `);
237 const rrow = rels.records[0];
238 console.log(` AUTHORED_BY: ${rrow.get("authored")}`);
239 console.log(` REFERENCES: ${rrow.get("refs")}`);
240 console.log(` MENTIONS: ${rrow.get("mentions")}`);
241 console.log(` TAGGED_WITH: ${rrow.get("tagged")}`);
242
243 console.log("\nDone! The database is ready for Cypher queries.");
244 } finally {
245 await session.close();
246 await driver.close();
247 }
248 }
249
250 main();
251