diff --git a/apps/api/package.json b/apps/api/package.json index e3179e8..3ee9bb7 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -11,7 +11,8 @@ "type-check": "tsc -p tsconfig.json --noEmit", "test": "vitest run", "script:scrub-data": "tsx scripts/scrub-data.ts", - "script:setup-dev-data": "tsx scripts/setup-dev-data.ts" + "script:setup-dev-data": "tsx scripts/setup-dev-data.ts", + "script:import-laddr": "tsx scripts/import-laddr.ts" }, "dependencies": { "@aws-sdk/client-s3": "^3.1048.0", diff --git a/apps/api/scripts/fixtures/laddr-fixture.sql b/apps/api/scripts/fixtures/laddr-fixture.sql new file mode 100644 index 0000000..2b61917 --- /dev/null +++ b/apps/api/scripts/fixtures/laddr-fixture.sql @@ -0,0 +1,111 @@ +-- Synthetic laddr mysqldump fixture for import-laddr tests. +-- Mirrors the shape (CREATE TABLE then INSERT) of real laddr dumps. + +CREATE TABLE `people` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `Username` varchar(255) NOT NULL, + `FirstName` varchar(255) DEFAULT NULL, + `LastName` varchar(255) DEFAULT NULL, + `FullName` varchar(255) DEFAULT NULL, + `Email` varchar(255) DEFAULT NULL, + `Password` varchar(255) DEFAULT NULL, + `About` text DEFAULT NULL, + `AccountLevel` varchar(64) DEFAULT 'User', + `Created` datetime DEFAULT NULL, + `Modified` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `people` VALUES (1,'jane-doe','Jane','Doe','Jane Doe','jane@example.com','$2y$10$abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQ','Civic technologist.','Administrator','2020-01-15 18:42:00','2024-05-01 09:00:00'); +INSERT INTO `people` VALUES (2,'bobsmith','Bob','Smith',NULL,'bob@example.org','$2y$10$xyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyzxyz','I like buses.','User','2021-06-20 12:00:00','2021-06-20 12:00:00'),(3,'Weird Name!','Carol','Singh','Carol Singh','carol@example.net',NULL,NULL,'User','2022-03-01 00:00:00','2022-03-01 00:00:00'); +INSERT INTO `people` VALUES (4,'no-email','Dee','Park','Dee Park',NULL,NULL,NULL,'User','2023-01-01 00:00:00','2023-01-01 00:00:00'); + +CREATE TABLE `projects` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `Handle` varchar(255) NOT NULL, + `Title` varchar(255) NOT NULL, + `Summary` varchar(280) DEFAULT NULL, + `README` text DEFAULT NULL, + `Stage` varchar(64) DEFAULT 'Commenting', + `MaintainerID` int(11) DEFAULT NULL, + `UsersUrl` varchar(255) DEFAULT NULL, + `DevelopersUrl` varchar(255) DEFAULT NULL, + `ChatChannel` varchar(64) DEFAULT NULL, + `Created` datetime DEFAULT NULL, + `Modified` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `projects` VALUES (10,'squadquest','SquadQuest','Realtime events.','## Overview\n\nSquadQuest is a civic app.','Testing',1,'https://squadquest.app','https://github.com/example/squadquest','squadquest','2020-02-01 00:00:00','2024-04-15 00:00:00'); +INSERT INTO `projects` VALUES (11,'transit-tools','Transit Tools','Better SEPTA info.',NULL,'Prototyping',2,NULL,'https://github.com/example/transit-tools','transit','2021-01-01 00:00:00','2021-01-01 00:00:00'); + +CREATE TABLE `project_members` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `ProjectID` int(11) NOT NULL, + `PersonID` int(11) NOT NULL, + `Role` varchar(255) DEFAULT NULL, + `Joined` datetime DEFAULT NULL, + `Created` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `project_members` VALUES (100,10,1,'Maintainer','2020-02-01 00:00:00','2020-02-01 00:00:00'),(101,10,2,'Backend Engineer','2020-03-01 00:00:00','2020-03-01 00:00:00'),(102,11,2,'Founder','2021-01-01 00:00:00','2021-01-01 00:00:00'); + +CREATE TABLE `project_updates` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `ProjectID` int(11) NOT NULL, + `AuthorID` int(11) DEFAULT NULL, + `Update` text NOT NULL, + `Created` datetime DEFAULT NULL, + `Modified` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `project_updates` VALUES (200,10,1,'We shipped v1.0!','2024-03-01 00:00:00','2024-03-01 00:00:00'); +INSERT INTO `project_updates` VALUES (201,10,2,'Beta testers wanted.','2024-04-01 00:00:00','2024-04-01 00:00:00'),(202,11,2,'First commit.','2021-01-02 00:00:00','2021-01-02 00:00:00'); + +CREATE TABLE `project_buzz` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `ProjectID` int(11) NOT NULL, + `PostedByID` int(11) DEFAULT NULL, + `Headline` varchar(255) NOT NULL, + `URL` varchar(500) NOT NULL, + `Published` datetime DEFAULT NULL, + `Summary` text DEFAULT NULL, + `Created` datetime DEFAULT NULL, + `Modified` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `project_buzz` VALUES (300,10,1,'The Inquirer praises SquadQuest','https://www.inquirer.com/tech/squadquest','2024-01-15 00:00:00','Great review.','2024-01-15 00:00:00','2024-01-15 00:00:00'); + +CREATE TABLE `tags` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `Handle` varchar(255) NOT NULL, + `Title` varchar(255) NOT NULL, + `Created` datetime DEFAULT NULL, + `Modified` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `tags` VALUES (500,'tech.flutter','Flutter','2020-01-01 00:00:00','2020-01-01 00:00:00'),(501,'topic.transit','Transit','2020-01-01 00:00:00','2020-01-01 00:00:00'),(502,'event.hackathon','Hackathon','2020-01-01 00:00:00','2020-01-01 00:00:00'); + +CREATE TABLE `tag_items` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `TagID` int(11) NOT NULL, + `ContextClass` varchar(255) NOT NULL, + `ContextID` int(11) NOT NULL, + `Created` datetime DEFAULT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `tag_items` VALUES (600,500,'Emergence\\\\Models\\\\Project',10,'2020-02-01 00:00:00'),(601,501,'Emergence\\\\Models\\\\Project',11,'2021-01-01 00:00:00'),(602,500,'Emergence\\\\People\\\\Person',1,'2020-02-01 00:00:00'); + +-- Tables we deliberately skip per specs/deferred.md +CREATE TABLE `member_checkins` ( + `ID` int(11) NOT NULL AUTO_INCREMENT, + `PersonID` int(11) NOT NULL, + PRIMARY KEY (`ID`) +); + +INSERT INTO `member_checkins` VALUES (1000,1); diff --git a/apps/api/scripts/import-laddr.ts b/apps/api/scripts/import-laddr.ts new file mode 100644 index 0000000..18504f0 --- /dev/null +++ b/apps/api/scripts/import-laddr.ts @@ -0,0 +1,134 @@ +/** + * import-laddr.ts — One-shot migration from a laddr mysqldump + * + * Reads a mysqldump (`--sql`), translates each row to the v1 data model + * (Zod-validated against `@cfp/shared/schemas`), and writes records into: + * + * - the public gitsheets data repo (`--data-repo`) + * - the private filesystem store (`--private-store`) + * + * Idempotent on `legacyId`: re-running against the same dump + target + * skips rows already present. See specs/behaviors/legacy-id-mapping.md. + * + * Usage: + * npm run -w apps/api script:import-laddr -- \ + * --sql=./scratch/laddr.sql \ + * --data-repo=./codeforphilly-data \ + * --private-store=./scratch/private-storage \ + * [--dry-run] [--verbose] [--limit=N] + */ +import { resolve } from 'node:path'; + +import { FilesystemPrivateStore } from '../src/store/private/filesystem.js'; +import { importLaddr, type ImportReport } from './import-laddr/importer.js'; + +interface CliArgs { + readonly sql: string; + readonly dataRepo: string; + readonly privateStore: string; + readonly dryRun: boolean; + readonly verbose: boolean; + readonly limit: number | undefined; +} + +function parseArgs(argv: readonly string[]): CliArgs { + const opts: Record = {}; + for (const a of argv) { + if (!a.startsWith('--')) continue; + const eq = a.indexOf('='); + if (eq === -1) opts[a.slice(2)] = true; + else opts[a.slice(2, eq)] = a.slice(eq + 1); + } + const need = (k: string): string => { + const v = opts[k]; + if (typeof v !== 'string' || !v) { + process.stderr.write(`missing --${k}=\n`); + process.exit(2); + } + return v; + }; + const limitRaw = opts['limit']; + const limit = + typeof limitRaw === 'string' ? Number.parseInt(limitRaw, 10) : undefined; + + return { + sql: resolve(need('sql')), + dataRepo: resolve(need('data-repo')), + privateStore: resolve(need('private-store')), + dryRun: opts['dry-run'] === true, + verbose: opts['verbose'] === true, + limit: Number.isFinite(limit ?? NaN) ? limit : undefined, + }; +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + + const privateStore = new FilesystemPrivateStore({ + CFP_PRIVATE_STORAGE_PATH: args.privateStore, + }); + await privateStore.load(); + + console.log(`[import-laddr] sql=${args.sql}`); + console.log(`[import-laddr] data-repo=${args.dataRepo}`); + console.log(`[import-laddr] private-store=${args.privateStore}`); + console.log(`[import-laddr] dry-run=${args.dryRun} limit=${args.limit ?? 'none'}`); + + const report = await importLaddr({ + sql: args.sql, + dataRepo: args.dataRepo, + privateStore, + dryRun: args.dryRun, + verbose: args.verbose, + limit: args.limit, + }); + + printReport(report, args.dryRun); +} + +function printReport(report: ImportReport, dryRun: boolean): void { + const lines: string[] = []; + lines.push(`\n=== import-laddr report ===`); + lines.push(`runAt: ${report.runAt}`); + lines.push(`sourceSha256: ${report.sourceSha256}`); + for (const [sheet, r] of Object.entries(report.entities)) { + lines.push( + ` ${sheet.padEnd(22)} input=${r.input} imported=${r.imported} skipped=${r.skipped} errors=${r.errors}`, + ); + } + lines.push(`warnings: ${report.warnings.length}`); + for (const w of report.warnings.slice(0, 25)) lines.push(` ${w}`); + if (report.warnings.length > 25) { + lines.push(` ... (${report.warnings.length - 25} more)`); + } + if (dryRun) { + lines.push(`(dry-run: no writes performed)`); + } else { + lines.push(`commits: ${report.commits.length}`); + for (const c of report.commits) lines.push(` ${c}`); + } + console.log(lines.join('\n')); + + process.stdout.write(`\n${JSON.stringify(reportToJson(report), null, 2)}\n`); +} + +function reportToJson(report: ImportReport): unknown { + return { + runAt: report.runAt, + sourceSha256: report.sourceSha256, + entities: report.entities, + warnings: report.warnings, + commits: report.commits, + }; +} + +const isMain = + process.argv[1] !== undefined && + import.meta.url.endsWith(process.argv[1].replace(/\\/g, '/')); + +if (isMain) { + main().catch((err: unknown) => { + console.error('[import-laddr] failed:', err); + process.exit(1); + }); +} diff --git a/apps/api/scripts/import-laddr/importer.ts b/apps/api/scripts/import-laddr/importer.ts new file mode 100644 index 0000000..7576e80 --- /dev/null +++ b/apps/api/scripts/import-laddr/importer.ts @@ -0,0 +1,662 @@ +/** + * Orchestrator: one-shot laddr → v1 migration. + * + * Public side: one gitsheets commit per entity type (7 commits), all under + * a single pseudonymous author per specs/behaviors/storage.md. Idempotence + * comes from a pre-pass that builds `byLegacyId.` from any existing + * records in the data repo; subsequent rows with the same `legacyId` are + * skipped (insert-if-absent semantics rather than always-overwrite, because + * re-running an import is only meant to backfill rows added since). + * + * Private side: PrivateProfile + LegacyPasswordCredential land in the + * private store via a single transact() at the end of the people pass. + * + * All writes are gated by `--dry-run`. In dry-run mode the script counts + * and validates everything but never touches the git repo or private store. + */ +import { execFile } from 'node:child_process'; +import { createHash } from 'node:crypto'; +import { createReadStream } from 'node:fs'; +import { promisify } from 'node:util'; + +import { openRepo } from 'gitsheets'; + +const exec = promisify(execFile); +import { + LegacyPasswordCredentialSchema, + PersonSchema, + PrivateProfileSchema, + ProjectBuzzSchema, + ProjectMembershipSchema, + ProjectSchema, + ProjectUpdateSchema, + TagAssignmentSchema, + TagSchema, +} from '@cfp/shared/schemas'; +import type { + LegacyPasswordCredential, + Person, + PrivateProfile, + Project, + ProjectBuzz, + ProjectMembership, + ProjectUpdate, + Tag, + TagAssignment, +} from '@cfp/shared/schemas'; + +import type { PrivateStore } from '../../src/store/private/interface.js'; +import { streamRows, type Row } from './mysqldump-parser.js'; +import { + newIdMaps, + translateBuzz, + translateMembership, + translatePerson, + translateProject, + translateTag, + translateTagAssignment, + translateUpdate, + type IdMaps, + type Warnings, +} from './translators.js'; + +export interface ImportOptions { + readonly sql: string; + readonly dataRepo: string; + readonly privateStore: PrivateStore; + readonly dryRun?: boolean; + readonly verbose?: boolean; + /** Per-table truncation: stop after N rows of each table. */ + readonly limit?: number; + /** Override the import wall clock for deterministic tests. */ + readonly now?: string; +} + +export interface EntityReport { + input: number; + imported: number; + skipped: number; + errors: number; +} + +export interface ImportReport { + readonly sourceSha256: string; + readonly runAt: string; + readonly entities: Record; + readonly warnings: string[]; + /** Commit hashes produced (in order), or [] in dry-run. */ + readonly commits: string[]; +} + +const AUTHOR_NAME = 'Code for Philly API'; +const AUTHOR_EMAIL = 'api@users.noreply.codeforphilly.org'; + +interface RunState { + readonly idMaps: IdMaps; + readonly warnings: Warnings; + readonly entities: Record; + readonly opts: ImportOptions; + readonly now: string; + readonly sourceSha256: string; + readonly commits: string[]; + readonly existing: ExistingLegacyIds; +} + +interface ExistingLegacyIds { + /** legacyId → { id, slug } */ + readonly people: Map; + readonly projects: Map; + readonly tags: Map; + readonly projectUpdates: Set; + readonly projectBuzz: Set; + /** + * Membership composite keys (`projectSlug/personSlug`) already committed — + * memberships have no legacyId of their own to dedupe on, so path-presence + * is the truth. + */ + readonly membershipPaths: Set; + /** Tag-assignment composite keys (`tagId/type/taggableId`) already committed. */ + readonly tagAssignmentPaths: Set; +} + +export async function importLaddr(opts: ImportOptions): Promise { + const warnings: string[] = []; + const sink: Warnings = { + push: (w) => { + warnings.push(w); + if (opts.verbose) console.warn(w); + }, + }; + + const sourceSha256 = await hashFile(opts.sql); + const now = opts.now ?? new Date().toISOString(); + + const entities: Record = { + people: blank(), + projects: blank(), + 'project-memberships': blank(), + 'project-updates': blank(), + 'project-buzz': blank(), + tags: blank(), + 'tag-assignments': blank(), + }; + + const existing = await collectExistingLegacyIds(opts.dataRepo); + + const state: RunState = { + idMaps: newIdMaps(), + warnings: sink, + entities, + opts, + now, + sourceSha256, + commits: [], + existing, + }; + + // Order matters — FK resolution depends on earlier passes filling the id + // maps. Each pass yields rows lazily via streamRows; on dry-run nothing + // is written but counts/warnings still tally correctly. + await importTags(state); + await importPeople(state); + await importProjects(state); + await importMemberships(state); + await importProjectUpdates(state); + await importProjectBuzz(state); + await importTagAssignments(state); + + return { + sourceSha256, + runAt: now, + entities, + warnings, + commits: state.commits, + }; +} + +// --------------------------------------------------------------------------- +// Per-entity passes +// --------------------------------------------------------------------------- + +async function importTags(state: RunState): Promise { + const records: Tag[] = []; + for await (const row of takeRows(state, 'tags')) { + const legacyId = numericId(row, 'ID'); + if (legacyId !== null && state.existing.tags.has(legacyId)) { + state.entities.tags!.skipped++; + state.idMaps.tagByLegacy.set(legacyId, state.existing.tags.get(legacyId)!); + continue; + } + const r = safeRun(state, 'tags', () => translateTag(row, ctxFor(state))); + if (!r) continue; + const parsed = parseOrSkip(state, 'tags', () => TagSchema.parse(r)); + if (parsed) { + records.push(parsed); + state.entities.tags!.imported++; + } + } + + await commit(state, 'tags', `${records.length} tags`, async (tx) => { + const sheet = tx.sheet('tags'); + for (const r of records) await sheet.upsert(r as unknown as Record); + }); +} + +async function importPeople(state: RunState): Promise { + const people: Person[] = []; + const profiles: PrivateProfile[] = []; + const legacyPasswords: LegacyPasswordCredential[] = []; + + for await (const row of takeRows(state, 'people')) { + const legacyId = numericId(row, 'ID'); + if (legacyId !== null && state.existing.people.has(legacyId)) { + state.entities.people!.skipped++; + const existing = state.existing.people.get(legacyId)!; + state.idMaps.personByLegacy.set(legacyId, existing.id); + state.idMaps.personSlugById.set(existing.id, existing.slug); + const used = state.idMaps.usedSlugs.get('people') ?? new Set(); + used.add(existing.slug); + state.idMaps.usedSlugs.set('people', used); + continue; + } + const r = safeRun(state, 'people', () => translatePerson(row, ctxFor(state))); + if (!r) continue; + + const parsedPerson = parseOrSkip(state, 'people', () => PersonSchema.parse(r.person)); + if (!parsedPerson) continue; + people.push(parsedPerson); + state.entities.people!.imported++; + + if (r.privateProfile) { + const parsedProfile = parseOrSkip( + state, + 'private-profiles', + () => PrivateProfileSchema.parse(r.privateProfile), + ); + if (parsedProfile) profiles.push(parsedProfile); + } + if (r.legacyPassword) { + const parsedLp = parseOrSkip( + state, + 'legacy-passwords', + () => LegacyPasswordCredentialSchema.parse(r.legacyPassword), + ); + if (parsedLp) legacyPasswords.push(parsedLp); + } + } + + await commit(state, 'people', `${people.length} people`, async (tx) => { + const sheet = tx.sheet('people'); + for (const r of people) await sheet.upsert(r as unknown as Record); + }); + + if (state.opts.dryRun) return; + + if (profiles.length > 0) { + await state.opts.privateStore.transact(async (privTx) => { + for (const p of profiles) privTx.putProfile(p); + }); + } + if (legacyPasswords.length > 0) { + await writeLegacyPasswords(state.opts.privateStore, legacyPasswords); + } +} + +async function importProjects(state: RunState): Promise { + const records: Project[] = []; + for await (const row of takeRows(state, 'projects')) { + const legacyId = numericId(row, 'ID'); + if (legacyId !== null && state.existing.projects.has(legacyId)) { + state.entities.projects!.skipped++; + const existing = state.existing.projects.get(legacyId)!; + state.idMaps.projectByLegacy.set(legacyId, existing.id); + state.idMaps.projectSlugByLegacy.set(legacyId, existing.slug); + const used = state.idMaps.usedSlugs.get('projects') ?? new Set(); + used.add(existing.slug); + state.idMaps.usedSlugs.set('projects', used); + continue; + } + const r = safeRun(state, 'projects', () => translateProject(row, ctxFor(state))); + if (!r) continue; + const parsed = parseOrSkip(state, 'projects', () => ProjectSchema.parse(r)); + if (parsed) { + records.push(parsed); + state.entities.projects!.imported++; + } + } + + await commit(state, 'projects', `${records.length} projects`, async (tx) => { + const sheet = tx.sheet('projects'); + for (const r of records) await sheet.upsert(r as unknown as Record); + }); +} + +interface MembershipWritable { + readonly record: ProjectMembership; + readonly pathFields: { projectSlug: string; personSlug: string }; +} + +async function importMemberships(state: RunState): Promise { + const records: MembershipWritable[] = []; + for await (const row of takeRows(state, 'project_members')) { + const r = safeRun(state, 'project-memberships', () => + translateMembership(row, ctxFor(state)), + ); + if (!r) continue; + const compositeKey = `${r.pathFields.projectSlug}/${r.pathFields.personSlug}`; + if (state.existing.membershipPaths.has(compositeKey)) { + state.entities['project-memberships']!.skipped++; + continue; + } + const parsed = parseOrSkip(state, 'project-memberships', () => + ProjectMembershipSchema.parse(r.membership), + ); + if (parsed) { + records.push({ record: parsed, pathFields: r.pathFields }); + state.entities['project-memberships']!.imported++; + } + } + + await commit( + state, + 'project-memberships', + `${records.length} project-memberships`, + async (tx) => { + const sheet = tx.sheet('project-memberships'); + for (const { record, pathFields } of records) { + await sheet.upsert({ ...record, ...pathFields } as unknown as Record); + } + }, + ); +} + +interface UpdateWritable { + readonly record: ProjectUpdate; + readonly pathFields: { projectSlug: string }; +} + +async function importProjectUpdates(state: RunState): Promise { + const records: UpdateWritable[] = []; + for await (const row of takeRows(state, 'project_updates')) { + const legacyId = numericId(row, 'ID'); + if (legacyId !== null && state.existing.projectUpdates.has(legacyId)) { + state.entities['project-updates']!.skipped++; + continue; + } + const r = safeRun(state, 'project-updates', () => translateUpdate(row, ctxFor(state))); + if (!r) continue; + const parsed = parseOrSkip(state, 'project-updates', () => + ProjectUpdateSchema.parse(r.update), + ); + if (parsed) { + records.push({ record: parsed, pathFields: r.pathFields }); + state.entities['project-updates']!.imported++; + } + } + + await commit( + state, + 'project-updates', + `${records.length} project-updates`, + async (tx) => { + const sheet = tx.sheet('project-updates'); + for (const { record, pathFields } of records) { + await sheet.upsert({ ...record, ...pathFields } as unknown as Record); + } + }, + ); +} + +interface BuzzWritable { + readonly record: ProjectBuzz; + readonly pathFields: { projectSlug: string }; +} + +async function importProjectBuzz(state: RunState): Promise { + const records: BuzzWritable[] = []; + for await (const row of takeRows(state, 'project_buzz')) { + const legacyId = numericId(row, 'ID'); + if (legacyId !== null && state.existing.projectBuzz.has(legacyId)) { + state.entities['project-buzz']!.skipped++; + continue; + } + const r = safeRun(state, 'project-buzz', () => translateBuzz(row, ctxFor(state))); + if (!r) continue; + const parsed = parseOrSkip(state, 'project-buzz', () => ProjectBuzzSchema.parse(r.buzz)); + if (parsed) { + records.push({ record: parsed, pathFields: r.pathFields }); + state.entities['project-buzz']!.imported++; + } + } + + await commit(state, 'project-buzz', `${records.length} project-buzz`, async (tx) => { + const sheet = tx.sheet('project-buzz'); + for (const { record, pathFields } of records) { + await sheet.upsert({ ...record, ...pathFields } as unknown as Record); + } + }); +} + +async function importTagAssignments(state: RunState): Promise { + const records: TagAssignment[] = []; + for await (const row of takeRows(state, 'tag_items')) { + const r = safeRun(state, 'tag-assignments', () => + translateTagAssignment(row, ctxFor(state)), + ); + if (!r) continue; + const compositeKey = `${r.tagId}/${r.taggableType}/${r.taggableId}`; + if (state.existing.tagAssignmentPaths.has(compositeKey)) { + state.entities['tag-assignments']!.skipped++; + continue; + } + const parsed = parseOrSkip(state, 'tag-assignments', () => + TagAssignmentSchema.parse(r), + ); + if (parsed) { + records.push(parsed); + state.entities['tag-assignments']!.imported++; + } + } + + await commit( + state, + 'tag-assignments', + `${records.length} tag-assignments`, + async (tx) => { + const sheet = tx.sheet('tag-assignments'); + for (const r of records) await sheet.upsert(r as unknown as Record); + }, + ); +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function blank(): EntityReport { + return { input: 0, imported: 0, skipped: 0, errors: 0 }; +} + +function ctxFor(state: RunState): { + idMaps: IdMaps; + warnings: Warnings; + now: string; +} { + return { idMaps: state.idMaps, warnings: state.warnings, now: state.now }; +} + +async function* takeRows(state: RunState, table: string): AsyncGenerator { + const limit = state.opts.limit ?? Infinity; + let yielded = 0; + for await (const row of streamRows(state.opts.sql, table)) { + // The "input" tally counts rows seen pre-limit so dry-run reports + // reflect dump size accurately, not just what was imported. + state.entities[sheetNameForTable(table)]!.input++; + if (yielded >= limit) continue; + yielded++; + yield row; + } +} + +function sheetNameForTable(table: string): string { + switch (table) { + case 'people': return 'people'; + case 'projects': return 'projects'; + case 'project_members': return 'project-memberships'; + case 'project_updates': return 'project-updates'; + case 'project_buzz': return 'project-buzz'; + case 'tags': return 'tags'; + case 'tag_items': return 'tag-assignments'; + default: throw new Error(`unhandled table ${table}`); + } +} + +function numericId(row: Row, key: string): number | null { + const v = row[key]; + if (typeof v === 'number') return v; + if (typeof v === 'string') { + const n = parseInt(v, 10); + return Number.isNaN(n) ? null : n; + } + return null; +} + +function safeRun(state: RunState, sheet: string, fn: () => T): T | null { + try { + return fn(); + } catch (err) { + state.entities[sheet]!.errors++; + state.warnings.push(`[${sheet}] translator threw: ${describe(err)}`); + return null; + } +} + +function parseOrSkip(state: RunState, sheet: string, fn: () => T): T | null { + try { + return fn(); + } catch (err) { + state.entities[sheet]!.errors++; + state.warnings.push(`[${sheet}] zod validation failed: ${describe(err)}`); + return null; + } +} + +function describe(err: unknown): string { + if (err instanceof Error) return err.message; + return String(err); +} + +async function commit( + state: RunState, + sheet: string, + summary: string, + // The transaction tx type is opaque here so this module doesn't take on a + // gitsheets-Transaction generic; the upsert calls are routed through the + // sheet getter the same way seed-fixtures.ts does. + fn: (tx: { sheet: (name: string) => { upsert: (r: Record) => Promise } }) => Promise, +): Promise { + if (state.opts.dryRun) return; + const repo = await openRepo({ + gitDir: `${state.opts.dataRepo}/.git`, + workTree: state.opts.dataRepo, + }); + const result = await repo.transact( + { + message: `import: from laddr mysqldump (${sheet})\n\n${summary} imported.`, + author: { name: AUTHOR_NAME, email: AUTHOR_EMAIL }, + trailers: { + Action: 'import.laddr', + 'Source-Dump': state.sourceSha256, + 'Run-At': state.now, + }, + }, + async (tx) => fn(tx as unknown as Parameters[0]), + ); + if (result.commitHash) state.commits.push(result.commitHash); +} + +async function hashFile(filePath: string): Promise { + return new Promise((resolve, reject) => { + const h = createHash('sha256'); + const s = createReadStream(filePath); + s.on('data', (chunk) => h.update(chunk)); + s.on('end', () => resolve(h.digest('hex'))); + s.on('error', reject); + }); +} + +async function collectExistingLegacyIds(dataRepo: string): Promise { + const out: ExistingLegacyIds = { + people: new Map(), + projects: new Map(), + tags: new Map(), + projectUpdates: new Set(), + projectBuzz: new Set(), + membershipPaths: new Set(), + tagAssignmentPaths: new Set(), + }; + + // Fresh repo with no HEAD or pre-import HEAD: ls-tree returns empty. + // Walking git's tree rather than the working dir (gitsheets only updates + // refs, no checkout) keeps the read aligned with what was committed. + let listing: string; + try { + const { stdout } = await exec('git', ['ls-tree', '-r', '--name-only', 'HEAD'], { + cwd: dataRepo, + }); + listing = stdout; + } catch { + return out; + } + + for (const path of listing.split('\n').filter((p) => p.endsWith('.toml'))) { + // Memberships + tag-assignments live solely by path; cheap to dedupe + // on path-presence so the second-run skip is trivial. + if (path.startsWith('project-memberships/')) { + const stripped = path.slice('project-memberships/'.length, -'.toml'.length); + out.membershipPaths.add(stripped); + continue; + } + if (path.startsWith('tag-assignments/')) { + const stripped = path.slice('tag-assignments/'.length, -'.toml'.length); + out.tagAssignmentPaths.add(stripped); + continue; + } + + let mapTarget: { sheet: 'people' | 'projects' | 'tags' | 'updates' | 'buzz' } | null = null; + if (path.startsWith('people/')) mapTarget = { sheet: 'people' }; + else if (path.startsWith('projects/')) mapTarget = { sheet: 'projects' }; + else if (path.startsWith('tags/')) mapTarget = { sheet: 'tags' }; + else if (path.startsWith('project-updates/')) mapTarget = { sheet: 'updates' }; + else if (path.startsWith('project-buzz/')) mapTarget = { sheet: 'buzz' }; + if (!mapTarget) continue; + + let content: string; + try { + content = ( + await exec('git', ['show', `HEAD:${path}`], { cwd: dataRepo }) + ).stdout; + } catch { + continue; + } + const id = matchToml(content, 'id'); + const slug = matchToml(content, 'slug'); + const legacyIdRaw = matchToml(content, 'legacyId'); + const legacyId = legacyIdRaw !== null ? parseInt(legacyIdRaw, 10) : null; + if (legacyId === null || Number.isNaN(legacyId)) continue; + + switch (mapTarget.sheet) { + case 'people': + if (id && slug) out.people.set(legacyId, { id, slug }); + break; + case 'projects': + if (id && slug) out.projects.set(legacyId, { id, slug }); + break; + case 'tags': + if (id) out.tags.set(legacyId, id); + break; + case 'updates': + out.projectUpdates.add(legacyId); + break; + case 'buzz': + out.projectBuzz.add(legacyId); + break; + } + } + return out; +} + +function matchToml(content: string, key: string): string | null { + const re = new RegExp(`^${key}\\s*=\\s*(.+)$`, 'm'); + const m = content.match(re); + if (!m) return null; + const raw = m[1]!.trim(); + if (raw.startsWith('"') && raw.endsWith('"')) return raw.slice(1, -1); + if (raw.startsWith("'") && raw.endsWith("'")) return raw.slice(1, -1); + return raw; +} + +/** + * Write legacy-password records to the private store. + * + * The PrivateStoreTx interface only exposes profile mutations and legacy- + * password *deletes* (the runtime only ever drains them, never adds). For + * the one-shot import we reach past the interface via a duck-typed cast + * onto the BasePrivateStore's internal `legacyPasswords` Map + flush, the + * same shape exercised in the store's own tests. + */ +async function writeLegacyPasswords( + store: PrivateStore, + records: readonly LegacyPasswordCredential[], +): Promise { + const internal = store as unknown as { + legacyPasswords: Map; + flushLegacyPasswords: () => Promise; + indices: { legacyPasswordByPersonId: Map }; + }; + for (const r of records) { + internal.legacyPasswords.set(r.personId, r); + } + internal.indices.legacyPasswordByPersonId = internal.legacyPasswords; + await internal.flushLegacyPasswords(); +} diff --git a/apps/api/scripts/import-laddr/mysqldump-parser.ts b/apps/api/scripts/import-laddr/mysqldump-parser.ts new file mode 100644 index 0000000..a152bb4 --- /dev/null +++ b/apps/api/scripts/import-laddr/mysqldump-parser.ts @@ -0,0 +1,229 @@ +/** + * Minimal streaming mysqldump parser. + * + * Why a custom parser: laddr's dump is large (tens of MB+) and we want + * lazy per-table iteration. The grammar we need to handle is narrow — + * just `CREATE TABLE` (for column order) and `INSERT INTO ... VALUES (...)`. + * Pulling in a full SQL parser (sql-parser, node-sql-parser) brings PEG.js + * runtime overhead and grammar surface we don't need. + * + * Supports: + * - CREATE TABLE with backtick identifiers; column names captured in order + * - INSERT INTO `table` VALUES (...),(...); — single or multi-row + * - String literals with single quotes, escaped via `\'`, `\\`, `\n`, etc. + * - Backslash-N (`\N`) → null + * - NULL keyword → null + * - Numeric literals (int and float) + * + * Does NOT support: + * - INSERT with explicit column lists (laddr dumps don't use them) + * - REPLACE INTO, UPDATE, etc. (out of scope for a dump-reading importer) + * - Binary/hex literals (0x...; not present in laddr text columns) + */ +import { createReadStream } from 'node:fs'; +import { createInterface } from 'node:readline'; + +export type SqlValue = string | number | null; + +/** A row keyed by column name. */ +export type Row = Record; + +/** + * Iterate rows from one table in a mysqldump file. + * + * Yields rows lazily — the file is streamed line-by-line. Only the target + * table's INSERT statements are parsed; everything else is skipped. + * + * The dump must include the `CREATE TABLE` for the requested table before + * its INSERTs (standard mysqldump output), so we know the column order. + */ +export async function* streamRows( + filePath: string, + tableName: string, +): AsyncGenerator { + const stream = createReadStream(filePath, { encoding: 'utf8' }); + const rl = createInterface({ input: stream, crlfDelay: Infinity }); + + let columns: string[] | null = null; + let inCreate = false; + let inInsertBuffer: string | null = null; + + for await (const line of rl) { + if (!inCreate && !inInsertBuffer) { + const createMatch = line.match(/^CREATE TABLE `([^`]+)`/); + if (createMatch && createMatch[1] === tableName) { + inCreate = true; + columns = []; + continue; + } + + if (line.startsWith(`INSERT INTO \`${tableName}\``)) { + if (columns === null) { + throw new Error( + `[mysqldump-parser] INSERT for table "${tableName}" before its CREATE TABLE`, + ); + } + // INSERT can span multiple lines; buffer until the trailing ; + inInsertBuffer = line; + if (line.trimEnd().endsWith(';')) { + for (const row of parseInsertStatement(inInsertBuffer, columns)) yield row; + inInsertBuffer = null; + } + continue; + } + continue; + } + + if (inCreate) { + const colMatch = line.match(/^\s*`([^`]+)`\s+/); + if (colMatch && columns) { + columns.push(colMatch[1]!); + continue; + } + if (/^\s*(PRIMARY KEY|UNIQUE KEY|KEY|CONSTRAINT|FULLTEXT|FOREIGN KEY)/.test(line)) { + continue; + } + if (line.startsWith(')')) { + inCreate = false; + } + continue; + } + + if (inInsertBuffer) { + inInsertBuffer += '\n' + line; + if (line.trimEnd().endsWith(';')) { + if (!columns) { + throw new Error(`[mysqldump-parser] no columns available for ${tableName}`); + } + for (const row of parseInsertStatement(inInsertBuffer, columns)) yield row; + inInsertBuffer = null; + } + } + } +} + +/** + * Parse one buffered `INSERT INTO ... VALUES (...),(...);` statement + * into an array of rows. Public for unit testing. + */ +export function parseInsertStatement( + statement: string, + columns: readonly string[], +): Row[] { + const valuesIdx = statement.indexOf('VALUES'); + if (valuesIdx === -1) return []; + const tail = statement.slice(valuesIdx + 'VALUES'.length); + + const rows: Row[] = []; + let i = 0; + while (i < tail.length) { + while (i < tail.length && /[\s,]/.test(tail[i]!)) i++; + if (i >= tail.length || tail[i] === ';') break; + if (tail[i] !== '(') { + i++; + continue; + } + const { values, end } = parseTuple(tail, i); + if (values.length !== columns.length) { + throw new Error( + `[mysqldump-parser] column count mismatch: expected ${columns.length}, got ${values.length}`, + ); + } + const row: Row = {}; + for (let c = 0; c < columns.length; c++) { + row[columns[c]!] = values[c]!; + } + rows.push(row); + i = end; + } + return rows; +} + +/** + * Parse one parenthesized tuple starting at `tail[start]` (which must be '('). + * Returns the parsed values and the index just past the closing ')'. + */ +function parseTuple(tail: string, start: number): { values: SqlValue[]; end: number } { + if (tail[start] !== '(') { + throw new Error(`[mysqldump-parser] expected '(' at ${start}`); + } + let i = start + 1; + const values: SqlValue[] = []; + + while (i < tail.length) { + while (i < tail.length && /\s/.test(tail[i]!)) i++; + if (tail[i] === ')') { + return { values, end: i + 1 }; + } + if (tail[i] === ',') { + i++; + continue; + } + const { value, next } = parseValue(tail, i); + values.push(value); + i = next; + } + + throw new Error('[mysqldump-parser] unterminated tuple'); +} + +function parseValue(tail: string, start: number): { value: SqlValue; next: number } { + const c = tail[start]; + if (c === "'") return parseQuotedString(tail, start); + // NULL literal or \N (MySQL's "tab-separated NULL" leaks into some dump variants) + if ((c === 'N' || c === 'n') && /^null/i.test(tail.slice(start, start + 4))) { + return { value: null, next: start + 4 }; + } + if (c === '\\' && tail[start + 1] === 'N') { + return { value: null, next: start + 2 }; + } + return parseNumber(tail, start); +} + +function parseQuotedString(tail: string, start: number): { value: string; next: number } { + let i = start + 1; + let result = ''; + while (i < tail.length) { + const ch = tail[i]!; + if (ch === '\\') { + const next = tail[i + 1]; + switch (next) { + case 'n': result += '\n'; break; + case 'r': result += '\r'; break; + case 't': result += '\t'; break; + case '0': result += '\0'; break; + case 'b': result += '\b'; break; + case 'Z': result += '\x1A'; break; + case '\\': result += '\\'; break; + case "'": result += "'"; break; + case '"': result += '"'; break; + default: result += next ?? ''; break; + } + i += 2; + continue; + } + if (ch === "'") { + // MySQL also allows doubled-up '' inside single-quoted strings + if (tail[i + 1] === "'") { + result += "'"; + i += 2; + continue; + } + return { value: result, next: i + 1 }; + } + result += ch; + i++; + } + throw new Error('[mysqldump-parser] unterminated string literal'); +} + +function parseNumber(tail: string, start: number): { value: number; next: number } { + let i = start; + while (i < tail.length && /[\d.\-+eE]/.test(tail[i]!)) i++; + const raw = tail.slice(start, i); + const n = Number(raw); + if (Number.isNaN(n)) { + throw new Error(`[mysqldump-parser] invalid numeric literal "${raw}" at ${start}`); + } + return { value: n, next: i }; +} diff --git a/apps/api/scripts/import-laddr/translators.ts b/apps/api/scripts/import-laddr/translators.ts new file mode 100644 index 0000000..7e1477d --- /dev/null +++ b/apps/api/scripts/import-laddr/translators.ts @@ -0,0 +1,638 @@ +/** + * Translators: laddr (MySQL/Emergence-PHP shape) → v1 (gitsheets/private) + * + * Each translator takes one laddr row + a context bag (id maps, ts.id + * generator, warning sink) and returns the target record(s). UUIDs are + * minted here and remembered in the context maps so subsequent translators + * can resolve cross-table FKs. + * + * Schemas in `@cfp/shared/schemas` are the validation contract; this layer + * is a pure mapping. Validation happens in the importer after the translator + * returns, so warnings/errors surface with the row's legacyId attached. + * + * Field-mapping source of truth: specs/data-model.md `Naming map: laddr → + * rewrite` table. + */ +import { uuidv7 } from 'uuidv7'; + +import type { Row, SqlValue } from './mysqldump-parser.js'; +import type { + LegacyPasswordCredential, + Person, + PrivateProfile, + Project, + ProjectBuzz, + ProjectMembership, + ProjectUpdate, + Tag, + TagAssignment, +} from '@cfp/shared/schemas'; + +export interface Warnings { + push(warning: string): void; +} + +export interface IdMaps { + /** laddr Person.ID → v1 Person.id (uuid) */ + readonly personByLegacy: Map; + /** laddr Project.ID → v1 Project.id (uuid) */ + readonly projectByLegacy: Map; + /** laddr Project.ID → v1 Project.slug (for path-template fields) */ + readonly projectSlugByLegacy: Map; + /** laddr Tag.ID → v1 Tag.id (uuid) */ + readonly tagByLegacy: Map; + /** v1 Person.id → v1 Person.slug (for path-template fields on membership) */ + readonly personSlugById: Map; + /** v1 Project.id → number generator for ProjectUpdate.number */ + readonly nextUpdateNumberByProjectId: Map; + /** used slugs per entity sheet for dedupe (`'people' → Set`) */ + readonly usedSlugs: Map>; +} + +export function newIdMaps(): IdMaps { + return { + personByLegacy: new Map(), + projectByLegacy: new Map(), + projectSlugByLegacy: new Map(), + tagByLegacy: new Map(), + personSlugById: new Map(), + nextUpdateNumberByProjectId: new Map(), + usedSlugs: new Map(), + }; +} + +// --------------------------------------------------------------------------- +// Cell readers +// --------------------------------------------------------------------------- + +function str(row: Row, key: string): string | null { + const v: SqlValue = row[key] ?? null; + if (v === null) return null; + return typeof v === 'string' ? v : String(v); +} + +function nonEmptyStr(row: Row, key: string): string | null { + const s = str(row, key); + return s === null || s.length === 0 ? null : s; +} + +function int(row: Row, key: string): number | null { + const v: SqlValue = row[key] ?? null; + if (v === null) return null; + if (typeof v === 'number') return Number.isInteger(v) ? v : Math.trunc(v); + const n = parseInt(v as string, 10); + return Number.isNaN(n) ? null : n; +} + +function requireInt(row: Row, key: string): number { + const v = int(row, key); + if (v === null) throw new Error(`expected integer at column "${key}"`); + return v; +} + +/** + * Parse a MySQL DATETIME/TIMESTAMP cell into ISO 8601 UTC. + * + * laddr dumps timestamps as `YYYY-MM-DD HH:MM:SS` in UTC (no tz suffix). + * Numeric epoch-seconds also appear in some Emergence schemas. + */ +function toIso(row: Row, key: string): string | null { + const v: SqlValue = row[key] ?? null; + if (v === null) return null; + if (typeof v === 'number') { + // Emergence sometimes stores Unix timestamps as INT — interpret as seconds + return new Date(v * 1000).toISOString(); + } + const s = v as string; + if (/^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/.test(s)) { + return new Date(s.replace(' ', 'T') + 'Z').toISOString(); + } + return null; +} + +function toIsoOrDefault(row: Row, key: string, defaultIso: string): string { + return toIso(row, key) ?? defaultIso; +} + +// --------------------------------------------------------------------------- +// Slug normalization +// --------------------------------------------------------------------------- + +/** + * Coerce an arbitrary string into a v1-valid slug for the given sheet. + * + * Two slug regexes are in play: + * - person/tag/buzz/slackSamlNameId: `^[a-z0-9][a-z0-9-]{1,49}$` + * - project: `^[a-z0-9][a-z0-9-_]{1,79}$` + * + * Strategy: lowercase, replace runs of non-allowed chars with `-`, trim + * leading/trailing separators, truncate. If the result collides with a + * previously-used slug in the same sheet, append `-2`, `-3`, ... until + * unique. Returns the safe slug and (when changed from input) emits a + * warning. + */ +export function safeSlug( + rawInput: string, + sheet: string, + maxLen: number, + allowUnderscore: boolean, + ctx: { idMaps: IdMaps; warnings: Warnings; legacyId: number | string }, +): string { + const allowedChars = allowUnderscore ? 'a-z0-9-_' : 'a-z0-9-'; + const replaceRe = allowUnderscore ? /[^a-z0-9_-]+/g : /[^a-z0-9-]+/g; + const headRe = new RegExp(`^[a-z0-9][${allowedChars}]{1,${maxLen - 1}}$`); + + let candidate = rawInput.toLowerCase().replace(replaceRe, '-'); + candidate = candidate.replace(/^[-_]+|[-_]+$/g, ''); + if (candidate.length === 0) candidate = `legacy-${ctx.legacyId}`; + if (candidate.length > maxLen) candidate = candidate.slice(0, maxLen); + if (!/^[a-z0-9]/.test(candidate)) candidate = `s${candidate}`.slice(0, maxLen); + + if (candidate !== rawInput) { + ctx.warnings.push( + `[${sheet}] legacyId=${ctx.legacyId} slug "${rawInput}" normalized to "${candidate}"`, + ); + } + + let used = ctx.idMaps.usedSlugs.get(sheet); + if (!used) { + used = new Set(); + ctx.idMaps.usedSlugs.set(sheet, used); + } + + let final = candidate; + let suffix = 2; + while (used.has(final)) { + const tail = `-${suffix}`; + const base = candidate.slice(0, Math.max(1, maxLen - tail.length)); + final = `${base}${tail}`; + suffix++; + } + + if (final !== candidate) { + ctx.warnings.push( + `[${sheet}] legacyId=${ctx.legacyId} slug "${candidate}" deduped to "${final}"`, + ); + } + + used.add(final); + + if (!headRe.test(final)) { + throw new Error( + `[${sheet}] could not produce a valid slug from "${rawInput}" (got "${final}")`, + ); + } + + return final; +} + +// --------------------------------------------------------------------------- +// Stage normalization +// --------------------------------------------------------------------------- + +const VALID_STAGES = [ + 'commenting', + 'bootstrapping', + 'prototyping', + 'testing', + 'maintaining', + 'drifting', + 'hibernating', +] as const; +type Stage = (typeof VALID_STAGES)[number]; + +function normalizeStage(raw: string | null, warnings: Warnings, legacyId: number): Stage { + if (raw === null) return 'commenting'; + const lower = raw.toLowerCase(); + if ((VALID_STAGES as readonly string[]).includes(lower)) { + return lower as Stage; + } + warnings.push( + `[projects] legacyId=${legacyId} stage "${raw}" not recognized; defaulting to "commenting"`, + ); + return 'commenting'; +} + +// --------------------------------------------------------------------------- +// Tag handle splitting +// --------------------------------------------------------------------------- + +const VALID_NAMESPACES = ['topic', 'tech', 'event'] as const; +type Namespace = (typeof VALID_NAMESPACES)[number]; + +export function splitTagHandle( + handle: string, + warnings: Warnings, + legacyId: number, +): { namespace: Namespace; slug: string } | null { + const dotIdx = handle.indexOf('.'); + if (dotIdx === -1) { + warnings.push(`[tags] legacyId=${legacyId} handle "${handle}" has no namespace; skipped`); + return null; + } + const ns = handle.slice(0, dotIdx).toLowerCase(); + const slug = handle.slice(dotIdx + 1).toLowerCase(); + if (!(VALID_NAMESPACES as readonly string[]).includes(ns)) { + warnings.push( + `[tags] legacyId=${legacyId} namespace "${ns}" not one of topic|tech|event; skipped`, + ); + return null; + } + if (slug.length === 0) { + warnings.push(`[tags] legacyId=${legacyId} empty slug after namespace; skipped`); + return null; + } + return { namespace: ns as Namespace, slug }; +} + +// --------------------------------------------------------------------------- +// Context taggable type mapping +// --------------------------------------------------------------------------- + +/** + * laddr `tag_items.ContextClass` → v1 `tag-assignments.taggableType`. + * Returns null for context classes we drop in v1 (e.g. BlogPost). + */ +export function mapContextClass( + contextClass: string, + warnings: Warnings, + legacyId: number, +): 'project' | 'person' | null { + // Emergence/laddr uses PHP namespace-style class strings. + if (/Project$/.test(contextClass)) return 'project'; + if (/Person$/.test(contextClass)) return 'person'; + warnings.push( + `[tag-assignments] legacyId=${legacyId} unsupported ContextClass "${contextClass}"; skipped`, + ); + return null; +} + +// --------------------------------------------------------------------------- +// Translators +// --------------------------------------------------------------------------- + +export interface PersonResult { + /** Public Person record (gitsheets) */ + readonly person: Person; + /** Private profile (if the person has an email) */ + readonly privateProfile: PrivateProfile | null; + /** Legacy bcrypt-style password hash (if present) */ + readonly legacyPassword: LegacyPasswordCredential | null; +} + +export function translatePerson( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): PersonResult { + const legacyId = requireInt(row, 'ID'); + const username = str(row, 'Username') ?? `legacy-${legacyId}`; + const slug = safeSlug(username, 'people', 50, false, { + idMaps: ctx.idMaps, + warnings: ctx.warnings, + legacyId, + }); + + const id = uuidv7(); + ctx.idMaps.personByLegacy.set(legacyId, id); + ctx.idMaps.personSlugById.set(id, slug); + + const firstName = nonEmptyStr(row, 'FirstName'); + const lastName = nonEmptyStr(row, 'LastName'); + const computedName = + [firstName, lastName].filter((s) => s !== null).join(' ').trim(); + const fullName = + nonEmptyStr(row, 'FullName') ?? + (computedName.length > 0 ? computedName : username); + + const accountLevelRaw = nonEmptyStr(row, 'AccountLevel') ?? 'User'; + const accountLevel = mapAccountLevel(accountLevelRaw); + + const createdAt = toIsoOrDefault(row, 'Created', ctx.now); + const updatedAt = toIsoOrDefault(row, 'Modified', createdAt); + + const person: Person = { + id, + legacyId, + slug, + fullName, + firstName: firstName ?? undefined, + lastName: lastName ?? undefined, + bio: nonEmptyStr(row, 'About') ?? undefined, + accountLevel, + slackSamlNameId: slug, + createdAt, + updatedAt, + }; + + const email = nonEmptyStr(row, 'Email'); + let privateProfile: PrivateProfile | null = null; + if (email !== null) { + privateProfile = { + personId: id, + email: email.toLowerCase(), + emailRefreshedAt: ctx.now, + updatedAt: ctx.now, + }; + } else { + ctx.warnings.push(`[people] legacyId=${legacyId} has no email; no PrivateProfile written`); + } + + const passwordHash = nonEmptyStr(row, 'Password'); + let legacyPassword: LegacyPasswordCredential | null = null; + if (passwordHash !== null) { + legacyPassword = { + personId: id, + passwordHash, + importedAt: ctx.now, + }; + } + + return { person, privateProfile, legacyPassword }; +} + +function mapAccountLevel(raw: string): 'user' | 'staff' | 'administrator' { + const lower = raw.toLowerCase(); + if (lower === 'administrator' || lower === 'developer') return 'administrator'; + if (lower === 'staff' || lower === 'editor' || lower === 'manager') return 'staff'; + return 'user'; +} + +export function translateProject( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): Project { + const legacyId = requireInt(row, 'ID'); + const handle = str(row, 'Handle') ?? `legacy-${legacyId}`; + const slug = safeSlug(handle, 'projects', 80, true, { + idMaps: ctx.idMaps, + warnings: ctx.warnings, + legacyId, + }); + + const id = uuidv7(); + ctx.idMaps.projectByLegacy.set(legacyId, id); + ctx.idMaps.projectSlugByLegacy.set(legacyId, slug); + + const createdAt = toIsoOrDefault(row, 'Created', ctx.now); + const updatedAt = toIsoOrDefault(row, 'Modified', createdAt); + + const maintainerLegacy = int(row, 'MaintainerID'); + const maintainerId = + maintainerLegacy !== null ? (ctx.idMaps.personByLegacy.get(maintainerLegacy) ?? null) : null; + if (maintainerLegacy !== null && maintainerId === null) { + ctx.warnings.push( + `[projects] legacyId=${legacyId} MaintainerID=${maintainerLegacy} not found among imported people`, + ); + } + + return { + id, + legacyId, + slug, + title: nonEmptyStr(row, 'Title') ?? slug, + summary: nonEmptyStr(row, 'Summary') ?? undefined, + overview: nonEmptyStr(row, 'README') ?? undefined, + stage: normalizeStage(str(row, 'Stage'), ctx.warnings, legacyId), + maintainerId: maintainerId ?? undefined, + usersUrl: validHttps(nonEmptyStr(row, 'UsersUrl')) ?? undefined, + developersUrl: validHttps(nonEmptyStr(row, 'DevelopersUrl')) ?? undefined, + chatChannel: nonEmptyStr(row, 'ChatChannel') ?? undefined, + featured: false, + createdAt, + updatedAt, + }; +} + +function validHttps(s: string | null): string | null { + if (s === null) return null; + try { + const u = new URL(s); + return u.protocol === 'https:' ? u.toString() : null; + } catch { + return null; + } +} + +export interface MembershipResult { + readonly membership: ProjectMembership; + /** Path-template fields the storage layer needs but the Zod schema doesn't expose. */ + readonly pathFields: { projectSlug: string; personSlug: string }; +} + +export function translateMembership( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): MembershipResult | null { + const projectLegacyId = requireInt(row, 'ProjectID'); + const personLegacyId = requireInt(row, 'PersonID'); + const projectId = ctx.idMaps.projectByLegacy.get(projectLegacyId); + const personId = ctx.idMaps.personByLegacy.get(personLegacyId); + const projectSlug = ctx.idMaps.projectSlugByLegacy.get(projectLegacyId); + const personSlug = personId ? ctx.idMaps.personSlugById.get(personId) : undefined; + if (!projectId || !personId || !projectSlug || !personSlug) { + ctx.warnings.push( + `[project-memberships] project=${projectLegacyId} person=${personLegacyId} — unresolved FK; skipped`, + ); + return null; + } + + const joinedAt = toIsoOrDefault(row, 'Joined', toIsoOrDefault(row, 'Created', ctx.now)); + const role = nonEmptyStr(row, 'Role'); + const isMaintainer = + (str(row, 'Role') ?? '').toLowerCase() === 'maintainer' || + int(row, 'IsMaintainer') === 1; + + return { + membership: { + id: uuidv7(), + projectId, + personId, + role: role ?? undefined, + isMaintainer, + joinedAt, + createdAt: joinedAt, + updatedAt: joinedAt, + }, + pathFields: { projectSlug, personSlug }, + }; +} + +export interface UpdateResult { + readonly update: ProjectUpdate; + readonly pathFields: { projectSlug: string }; +} + +export function translateUpdate( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): UpdateResult | null { + const legacyId = requireInt(row, 'ID'); + const projectLegacyId = requireInt(row, 'ProjectID'); + const projectId = ctx.idMaps.projectByLegacy.get(projectLegacyId); + const projectSlug = ctx.idMaps.projectSlugByLegacy.get(projectLegacyId); + if (!projectId || !projectSlug) { + ctx.warnings.push( + `[project-updates] legacyId=${legacyId} project=${projectLegacyId} — unresolved FK; skipped`, + ); + return null; + } + + const authorLegacyId = int(row, 'AuthorID'); + const authorId = + authorLegacyId !== null ? (ctx.idMaps.personByLegacy.get(authorLegacyId) ?? null) : null; + + const next = (ctx.idMaps.nextUpdateNumberByProjectId.get(projectId) ?? 0) + 1; + ctx.idMaps.nextUpdateNumberByProjectId.set(projectId, next); + + const createdAt = toIsoOrDefault(row, 'Created', ctx.now); + const updatedAt = toIsoOrDefault(row, 'Modified', createdAt); + + return { + update: { + id: uuidv7(), + legacyId, + projectId, + authorId: authorId ?? undefined, + body: nonEmptyStr(row, 'Update') ?? nonEmptyStr(row, 'Body') ?? '(no body)', + number: next, + createdAt, + updatedAt, + }, + pathFields: { projectSlug }, + }; +} + +export interface BuzzResult { + readonly buzz: ProjectBuzz; + readonly pathFields: { projectSlug: string }; +} + +export function translateBuzz( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): BuzzResult | null { + const legacyId = requireInt(row, 'ID'); + const projectLegacyId = requireInt(row, 'ProjectID'); + const projectId = ctx.idMaps.projectByLegacy.get(projectLegacyId); + const projectSlug = ctx.idMaps.projectSlugByLegacy.get(projectLegacyId); + if (!projectId || !projectSlug) { + ctx.warnings.push( + `[project-buzz] legacyId=${legacyId} project=${projectLegacyId} — unresolved FK; skipped`, + ); + return null; + } + const url = validHttps(nonEmptyStr(row, 'URL')); + if (!url) { + ctx.warnings.push( + `[project-buzz] legacyId=${legacyId} missing/invalid URL; skipped`, + ); + return null; + } + + const headline = nonEmptyStr(row, 'Headline') ?? `buzz-${legacyId}`; + const slug = safeSlug(headline, `project-buzz:${projectSlug}`, 50, false, { + idMaps: ctx.idMaps, + warnings: ctx.warnings, + legacyId, + }); + + const postedByLegacy = int(row, 'PostedByID') ?? int(row, 'AuthorID'); + const postedById = + postedByLegacy !== null ? (ctx.idMaps.personByLegacy.get(postedByLegacy) ?? null) : null; + + const createdAt = toIsoOrDefault(row, 'Created', ctx.now); + const publishedAt = + toIso(row, 'Published') ?? + toIso(row, 'PublishedDate') ?? + createdAt; + const updatedAt = toIsoOrDefault(row, 'Modified', createdAt); + + return { + buzz: { + id: uuidv7(), + legacyId, + projectId, + postedById: postedById ?? undefined, + slug, + headline, + url, + publishedAt, + summary: nonEmptyStr(row, 'Summary') ?? undefined, + createdAt, + updatedAt, + }, + pathFields: { projectSlug }, + }; +} + +export function translateTag( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): Tag | null { + const legacyId = requireInt(row, 'ID'); + const handle = nonEmptyStr(row, 'Handle'); + if (!handle) { + ctx.warnings.push(`[tags] legacyId=${legacyId} has empty handle; skipped`); + return null; + } + const split = splitTagHandle(handle, ctx.warnings, legacyId); + if (!split) return null; + + const id = uuidv7(); + ctx.idMaps.tagByLegacy.set(legacyId, id); + + const createdAt = toIsoOrDefault(row, 'Created', ctx.now); + const updatedAt = toIsoOrDefault(row, 'Modified', createdAt); + + return { + id, + legacyId, + namespace: split.namespace, + slug: split.slug, + title: nonEmptyStr(row, 'Title') ?? split.slug, + createdAt, + updatedAt, + }; +} + +export function translateTagAssignment( + row: Row, + ctx: { idMaps: IdMaps; warnings: Warnings; now: string }, +): TagAssignment | null { + const legacyId = requireInt(row, 'ID'); + const tagLegacyId = requireInt(row, 'TagID'); + const tagId = ctx.idMaps.tagByLegacy.get(tagLegacyId); + if (!tagId) { + ctx.warnings.push( + `[tag-assignments] legacyId=${legacyId} TagID=${tagLegacyId} not imported; skipped`, + ); + return null; + } + const contextClass = nonEmptyStr(row, 'ContextClass'); + if (!contextClass) { + ctx.warnings.push(`[tag-assignments] legacyId=${legacyId} missing ContextClass; skipped`); + return null; + } + const taggableType = mapContextClass(contextClass, ctx.warnings, legacyId); + if (!taggableType) return null; + + const contextLegacyId = requireInt(row, 'ContextID'); + const taggableId = + taggableType === 'project' + ? ctx.idMaps.projectByLegacy.get(contextLegacyId) + : ctx.idMaps.personByLegacy.get(contextLegacyId); + if (!taggableId) { + ctx.warnings.push( + `[tag-assignments] legacyId=${legacyId} ${taggableType} ContextID=${contextLegacyId} not imported; skipped`, + ); + return null; + } + + return { + id: uuidv7(), + tagId, + taggableType, + taggableId, + createdAt: toIsoOrDefault(row, 'Created', ctx.now), + }; +} diff --git a/apps/api/tests/import-laddr.test.ts b/apps/api/tests/import-laddr.test.ts new file mode 100644 index 0000000..ea9419e --- /dev/null +++ b/apps/api/tests/import-laddr.test.ts @@ -0,0 +1,307 @@ +import { execFile } from 'node:child_process'; +import { existsSync } from 'node:fs'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { promisify } from 'node:util'; + +import { describe, expect, it } from 'vitest'; + +import { FilesystemPrivateStore } from '../src/store/private/filesystem.js'; +import { importLaddr } from '../scripts/import-laddr/importer.js'; +import { parseInsertStatement } from '../scripts/import-laddr/mysqldump-parser.js'; + +const exec = promisify(execFile); +const FIXTURE = resolve(__dirname, '../scripts/fixtures/laddr-fixture.sql'); + +const SHEET_CONFIGS: ReadonlyArray<{ name: string; path: string }> = [ + { name: 'people', path: '${{ slug }}' }, + { name: 'projects', path: '${{ slug }}' }, + { name: 'project-memberships', path: '${{ projectSlug }}/${{ personSlug }}' }, + { name: 'project-updates', path: '${{ projectSlug }}/${{ number }}' }, + { name: 'project-buzz', path: '${{ projectSlug }}/${{ slug }}' }, + { name: 'tags', path: '${{ namespace }}/${{ slug }}' }, + { name: 'tag-assignments', path: '${{ tagId }}/${{ taggableType }}/${{ taggableId }}' }, +]; + +async function makeRepo(): Promise<{ path: string; cleanup: () => Promise }> { + const dir = await mkdtemp(join(tmpdir(), 'cfp-import-')); + const git = (...a: string[]) => exec('git', a, { cwd: dir }); + await git('init', '-b', 'main'); + await git('config', 'user.email', 'test@cfp.test'); + await git('config', 'user.name', 'test'); + await git('config', 'commit.gpgsign', 'false'); + await git('commit', '--allow-empty', '-m', 'initial'); + + await mkdir(join(dir, '.gitsheets'), { recursive: true }); + for (const { name, path } of SHEET_CONFIGS) { + const cfg = `[gitsheet]\nroot = '${name}'\npath = '${path}'\n`; + await writeFile(join(dir, '.gitsheets', `${name}.toml`), cfg); + } + await git('add', '.gitsheets'); + await git('commit', '-m', 'configs'); + + return { path: dir, cleanup: () => rm(dir, { recursive: true, force: true }) }; +} + +async function makePrivate(): Promise<{ dir: string; cleanup: () => Promise }> { + const dir = await mkdtemp(join(tmpdir(), 'cfp-priv-')); + return { dir, cleanup: () => rm(dir, { recursive: true, force: true }) }; +} + +describe('mysqldump-parser', () => { + it('parses a simple INSERT', () => { + const rows = parseInsertStatement( + "VALUES (1,'foo','bar'),(2,'baz',NULL);", + ['id', 'a', 'b'], + ); + expect(rows).toEqual([ + { id: 1, a: 'foo', b: 'bar' }, + { id: 2, a: 'baz', b: null }, + ]); + }); + + it('handles escaped quotes and backslashes', () => { + const rows = parseInsertStatement( + "VALUES (1,'it\\'s \"safe\"','line1\\nline2');", + ['id', 'a', 'b'], + ); + expect(rows[0]!['a']).toBe('it\'s "safe"'); + expect(rows[0]!['b']).toBe('line1\nline2'); + }); + + it('handles \\N as NULL', () => { + const rows = parseInsertStatement('VALUES (1,\\N);', ['id', 'a']); + expect(rows[0]!['a']).toBeNull(); + }); +}); + +describe('import-laddr against fixture', () => { + it('produces expected counts in dry-run with no writes', async () => { + const repo = await makeRepo(); + const priv = await makePrivate(); + try { + const store = new FilesystemPrivateStore({ + CFP_PRIVATE_STORAGE_PATH: priv.dir, + }); + await store.load(); + + const report = await importLaddr({ + sql: FIXTURE, + dataRepo: repo.path, + privateStore: store, + dryRun: true, + now: '2026-05-15T00:00:00.000Z', + }); + + expect(report.entities['people']).toEqual({ + input: 4, + imported: 4, + skipped: 0, + errors: 0, + }); + expect(report.entities['projects']).toEqual({ + input: 2, + imported: 2, + skipped: 0, + errors: 0, + }); + expect(report.entities['tags']!.imported).toBe(3); + expect(report.entities['project-memberships']!.imported).toBe(3); + expect(report.entities['project-updates']!.imported).toBe(3); + expect(report.entities['project-buzz']!.imported).toBe(1); + expect(report.entities['tag-assignments']!.imported).toBe(3); + + expect(report.commits).toHaveLength(0); + expect(existsSync(join(priv.dir, 'profiles.jsonl'))).toBe(false); + + // Slug normalization warning for "Weird Name!" + expect( + report.warnings.some((w) => w.includes('Weird Name') && w.includes('normalized')), + ).toBe(true); + } finally { + await repo.cleanup(); + await priv.cleanup(); + } + }); + + it('writes records, commits per entity, and seeds private store', { timeout: 120_000 }, async () => { + const repo = await makeRepo(); + const priv = await makePrivate(); + try { + const store = new FilesystemPrivateStore({ + CFP_PRIVATE_STORAGE_PATH: priv.dir, + }); + await store.load(); + + const report = await importLaddr({ + sql: FIXTURE, + dataRepo: repo.path, + privateStore: store, + now: '2026-05-15T00:00:00.000Z', + }); + + // 7 entity commits (one per sheet) on top of the 2 config/init commits + expect(report.commits.length).toBeGreaterThan(0); + + // Records landed in the public repo (read via git tree, not working dir; + // gitsheets updates refs only, no working-tree checkout) + const tree = await exec( + 'git', + ['ls-tree', '-r', '--name-only', 'HEAD'], + { cwd: repo.path }, + ); + const treePaths = tree.stdout.split('\n').filter(Boolean); + const peopleFiles = treePaths + .filter((p) => p.startsWith('people/') && p.endsWith('.toml')) + .map((p) => p.slice('people/'.length)) + .sort(); + expect(peopleFiles).toEqual([ + 'bobsmith.toml', + 'jane-doe.toml', + 'no-email.toml', + 'weird-name.toml', + ]); + + const janeToml = ( + await exec('git', ['show', 'HEAD:people/jane-doe.toml'], { cwd: repo.path }) + ).stdout; + expect(janeToml).toContain('slug = "jane-doe"'); + expect(janeToml).toContain('legacyId = 1'); + expect(janeToml).toContain('slackSamlNameId = "jane-doe"'); + expect(janeToml).toContain('accountLevel = "administrator"'); + + // PII must NOT be in the public repo — scan every committed TOML + for (const path of treePaths.filter((p) => p.endsWith('.toml'))) { + const content = ( + await exec('git', ['show', `HEAD:${path}`], { cwd: repo.path }) + ).stdout; + expect( + content, + `expected no @example/example.com/.org in ${path}`, + ).not.toMatch(/@example\./); + expect(content, `expected no bcrypt $2y$ hash in ${path}`).not.toMatch(/\$2y\$/); + } + + // Private store has all 3 emailed profiles + 2 legacy-password records + const profilesJsonl = await readFile(join(priv.dir, 'profiles.jsonl'), 'utf8'); + const profileLines = profilesJsonl.trim().split('\n').filter(Boolean); + expect(profileLines).toHaveLength(3); + const profiles = profileLines.map((l) => JSON.parse(l)); + const emails = profiles.map((p) => p.email).sort(); + expect(emails).toEqual([ + 'bob@example.org', + 'carol@example.net', + 'jane@example.com', + ]); + + const legacyJsonl = await readFile(join(priv.dir, 'legacy-passwords.jsonl'), 'utf8'); + const legacyLines = legacyJsonl.trim().split('\n').filter(Boolean); + expect(legacyLines).toHaveLength(2); + + // Tag namespace splitting + const tagNamespaces = new Set( + treePaths + .filter((p) => p.startsWith('tags/') && p.endsWith('.toml')) + .map((p) => p.split('/')[1]!), + ); + expect([...tagNamespaces].sort()).toEqual(['event', 'tech', 'topic']); + const flutterToml = ( + await exec('git', ['show', 'HEAD:tags/tech/flutter.toml'], { + cwd: repo.path, + }) + ).stdout; + expect(flutterToml).toContain('namespace = "tech"'); + expect(flutterToml).toContain('slug = "flutter"'); + + // Project stage lowercase + const sqProject = ( + await exec('git', ['show', 'HEAD:projects/squadquest.toml'], { + cwd: repo.path, + }) + ).stdout; + expect(sqProject).toContain('stage = "testing"'); + + // Membership composite path + expect( + treePaths.includes('project-memberships/squadquest/jane-doe.toml'), + ).toBe(true); + + // ProjectUpdate per-project numbering — squadquest gets 2 updates: 1, 2 + const sqUpdates = treePaths + .filter((p) => p.startsWith('project-updates/squadquest/')) + .map((p) => p.slice('project-updates/squadquest/'.length)) + .sort(); + expect(sqUpdates).toEqual(['1.toml', '2.toml']); + + // tag-assignments use commit trailer Action: import.laddr + const log = await exec( + 'git', + ['log', '--format=%B%n---END---'], + { cwd: repo.path }, + ); + expect(log.stdout).toContain('Action: import.laddr'); + expect(log.stdout).toContain(`Source-Dump: ${report.sourceSha256}`); + + // Author is the pseudonymous Code for Philly API identity + const authorLog = await exec('git', ['log', '--format=%an <%ae>'], { + cwd: repo.path, + }); + expect(authorLog.stdout).toContain( + 'Code for Philly API ', + ); + + // Re-running yields no new files in the tree (idempotent — same + // legacyIds produce the same slugs which dedupe at upsert time). + const beforeTree = ( + await exec('git', ['ls-tree', '-r', '--name-only', 'HEAD'], { + cwd: repo.path, + }) + ).stdout; + await importLaddr({ + sql: FIXTURE, + dataRepo: repo.path, + privateStore: store, + now: '2026-05-15T00:00:00.000Z', + }); + const afterTree = ( + await exec('git', ['ls-tree', '-r', '--name-only', 'HEAD'], { + cwd: repo.path, + }) + ).stdout; + expect(afterTree).toBe(beforeTree); + } finally { + await repo.cleanup(); + await priv.cleanup(); + } + }); + + it('respects --limit', async () => { + const repo = await makeRepo(); + const priv = await makePrivate(); + try { + const store = new FilesystemPrivateStore({ + CFP_PRIVATE_STORAGE_PATH: priv.dir, + }); + await store.load(); + + const report = await importLaddr({ + sql: FIXTURE, + dataRepo: repo.path, + privateStore: store, + dryRun: true, + limit: 1, + now: '2026-05-15T00:00:00.000Z', + }); + + expect(report.entities['people']!.input).toBe(4); + expect(report.entities['people']!.imported).toBe(1); + expect(report.entities['projects']!.imported).toBe(1); + expect(report.entities['tags']!.imported).toBe(1); + } finally { + await repo.cleanup(); + await priv.cleanup(); + } + }); +}); + diff --git a/plans/laddr-import.md b/plans/laddr-import.md index ebdfdb2..069787a 100644 --- a/plans/laddr-import.md +++ b/plans/laddr-import.md @@ -1,10 +1,11 @@ --- -status: planned +status: done depends: [storage-foundation] specs: - specs/behaviors/legacy-id-mapping.md - specs/data-model.md issues: [] +pr: 24 --- # Plan: Laddr importer @@ -125,17 +126,17 @@ Staff review the report before the real run. ## Validation -- [ ] Run against a small fixture mysqldump → produces the expected records in the public repo + private store -- [ ] Re-run against the same dump → no-op (idempotent; byLegacyId lookups hit existing rows) -- [ ] Run with `--limit=10` → only the first 10 of each table imported -- [ ] Dry-run produces a complete JSON report with no DB writes -- [ ] `Person.slackSamlNameId` populated correctly for every Person; matches their `slug` -- [ ] Stage values translated (TitleCase → lowercase) -- [ ] `Person.email`, `LegacyPasswordCredential.passwordHash` land in the private store, not the public repo (grep the public repo for any email pattern → zero hits) -- [ ] Tag handles like `topic.transit` split correctly into `namespace='topic', slug='transit'` -- [ ] `tag_items.ContextClass` → `taggableType` mapping correct +- [x] Run against a small fixture mysqldump → produces the expected records in the public repo + private store +- [x] Re-run against the same dump → no-op (idempotent; byLegacyId lookups hit existing rows) +- [x] Run with `--limit=10` → only the first 10 of each table imported +- [x] Dry-run produces a complete JSON report with no DB writes +- [x] `Person.slackSamlNameId` populated correctly for every Person; matches their `slug` +- [x] Stage values translated (TitleCase → lowercase) +- [x] `Person.email`, `LegacyPasswordCredential.passwordHash` land in the private store, not the public repo (grep the public repo for any email pattern → zero hits) +- [x] Tag handles like `topic.transit` split correctly into `namespace='topic', slug='transit'` +- [x] `tag_items.ContextClass` → `taggableType` mapping correct - [ ] All laddr slugs are accessible via `/projects/:slug` and `/members/:slug` after the import (verified via API test on a sample of 100 random records) -- [ ] Drop-tables (member_checkins, blog_posts, etc.) are skipped without error +- [x] Drop-tables (member_checkins, blog_posts, etc.) are skipped without error ## Risks / unknowns @@ -145,3 +146,17 @@ Staff review the report before the real run. - **Time of run.** The single big commit is large — could be hundreds of thousands of file writes if laddr has lots of project-updates. May need to chunk by entity type into multiple commits to keep individual commits reviewable. Decide during dry-run; could end up with one commit per entity type (7 commits total). ## Notes + +- **Per-entity commits over single big commit.** Shipped as 7 commits (one per sheet, in FK-order). Each is reviewable on its own — the "could be hundreds of thousands of file writes" risk in the original plan landed on the chunked side. +- **Slug-collision policy: silently slugify-and-dedupe with `-N` suffix.** That was the plan's "default I'd go with" choice. Warnings emitted for every normalization or dedupe so staff can review them in the dry-run report before the real run. +- **`--limit=10` criterion verified with `--limit=1` in the test.** The CLI parameter is the same; only the test value differs. +- **`/projects/:slug` and `/members/:slug` reachability** stays unchecked at merge time — those routes are owned by `read-api` and aren't built yet. The slugs themselves are committed (verified by the import-laddr test) and the gitsheets path templates match the API's expected lookup shape, so reachability will close out once `read-api` lands. Tracked in Follow-ups. +- **Idempotence for memberships + tag-assignments** uses composite-path presence in the data repo's git tree, not `legacyId` (those sheets don't carry one). This means re-running after a manual edit that changes path-defining fields would no-op rather than re-import; staff should reset the repo before a re-run if they want a forced re-import. +- **LegacyPasswordCredential writes** reach past the `PrivateStoreTx` runtime interface (which only exposes profile mutations + legacy-password *deletes* — the runtime drains them, never adds). Import flushes via a typed cast onto the BasePrivateStore's internal `legacyPasswords` Map. Documented at the call site; narrow blast radius (one-shot migration only). +- **Password-hash algorithm verification deferred** — the fixture uses synthetic `$2y$10$...` (bcrypt) strings. Real production hashes need a one-time inspection before cutover; the account-claim endpoint (separate plan) will verify against them. + +## Follow-ups + +- Tracked as: `read-api` / `web-routing` to close out the last validation criterion (laddr slugs reachable via `/projects/:slug` and `/members/:slug`) once those routes exist. +- Issue [#25](https://github.com/CodeForPhilly/codeforphilly-ng/issues/25) — verify production-dump password-hash format (bcrypt vs sha512crypt vs Emergence-specific) before staging cutover. +- Issue [#26](https://github.com/CodeForPhilly/codeforphilly-ng/issues/26) — `--limit=N` tally edge case: `input` counts pre-limit rows (intentional, so dry-run reports reflect dump size) but `imported + skipped + errors` < `input` when limited, which may surprise; document in the script's --help once we have one.