Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,495 changes: 10 additions & 1,485 deletions src/builder.js

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions src/builder/context.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* PipelineContext — shared mutable state threaded through all build stages.
*
* Each stage reads what it needs and writes what it produces.
* This replaces the closure-captured locals in the old monolithic buildGraph().
*/
export class PipelineContext {
// ── Inputs (set during setup) ──────────────────────────────────────
/** @type {string} Absolute root directory */
rootDir;
/** @type {import('better-sqlite3').Database} */
db;
/** @type {string} Absolute path to the database file */
dbPath;
/** @type {object} From loadConfig() */
config;
/** @type {object} Original buildGraph opts */
opts;
/** @type {{ engine: string, dataflow: boolean, ast: boolean }} */
engineOpts;
/** @type {string} 'native' | 'wasm' */
engineName;
/** @type {string|null} */
engineVersion;
/** @type {{ baseUrl: string|null, paths: object }} */
aliases;
/** @type {boolean} Whether incremental mode is enabled */
incremental;
/** @type {boolean} Force full rebuild (engine/schema mismatch) */
forceFullRebuild = false;
/** @type {number} Current schema version */
schemaVersion;

// ── File collection (set by collectFiles stage) ────────────────────
/** @type {string[]} Absolute file paths */
allFiles;
/** @type {Set<string>} Absolute directory paths */
discoveredDirs;

// ── Change detection (set by detectChanges stage) ──────────────────
/** @type {boolean} */
isFullBuild;
/** @type {Array<{ file: string, relPath?: string, content?: string, hash?: string, stat?: object, _reverseDepOnly?: boolean }>} */
parseChanges;
/** @type {Array<{ relPath: string, hash: string, stat: object }>} Metadata-only self-heal updates */
metadataUpdates;
/** @type {string[]} Relative paths of deleted files */
removed;
/** @type {boolean} True when no changes detected — skip remaining stages */
earlyExit = false;

// ── Parsing (set by parseFiles stage) ──────────────────────────────
/** @type {Map<string, object>} relPath → symbols from parseFilesAuto */
allSymbols;
/** @type {Map<string, object>} relPath → symbols (includes incrementally loaded) */
fileSymbols;
/** @type {Array<{ file: string, relPath?: string }>} Files to parse this build */
filesToParse;

// ── Import resolution (set by resolveImports stage) ────────────────
/** @type {Map<string, string>|null} "absFile|source" → resolved path */
batchResolved;
/** @type {Map<string, Array>} relPath → re-export descriptors */
reexportMap;
/** @type {Set<string>} Files loaded only for barrel resolution (don't rebuild edges) */
barrelOnlyFiles;

// ── Node lookup (set by insertNodes / buildEdges stages) ───────────
/** @type {Map<string, Array>} name → node rows */
nodesByName;
/** @type {Map<string, Array>} "name|file" → node rows */
nodesByNameAndFile;

// ── Misc state ─────────────────────────────────────────────────────
/** @type {boolean} Whether embeddings table exists */
hasEmbeddings = false;
/** @type {Map<string, number>} relPath → line count */
lineCountMap;

// ── Phase timing ───────────────────────────────────────────────────
timing = {};

/** @type {number} performance.now() at build start */
buildStart;
}
218 changes: 218 additions & 0 deletions src/builder/helpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/**
* Builder helper functions — shared utilities used across pipeline stages.
*
* Extracted from the monolithic builder.js so stages can import individually.
*/
import { createHash } from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import { EXTENSIONS, IGNORE_DIRS } from '../constants.js';
import { purgeFilesData } from '../db.js';
import { warn } from '../logger.js';

export const BUILTIN_RECEIVERS = new Set([
'console',
'Math',
'JSON',
'Object',
'Array',
'String',
'Number',
'Boolean',
'Date',
'RegExp',
'Map',
'Set',
'WeakMap',
'WeakSet',
'Promise',
'Symbol',
'Error',
'TypeError',
'RangeError',
'Proxy',
'Reflect',
'Intl',
'globalThis',
'window',
'document',
'process',
'Buffer',
'require',
]);

/**
* Recursively collect all source files under `dir`.
* When `directories` is a Set, also tracks which directories contain files.
*/
export function collectFiles(
dir,
files = [],
config = {},
directories = null,
_visited = new Set(),
) {
const trackDirs = directories instanceof Set;
let hasFiles = false;

// Merge config ignoreDirs with defaults
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;

// Detect symlink loops (before I/O to avoid wasted readdirSync)
let realDir;
try {
realDir = fs.realpathSync(dir);
} catch {
return trackDirs ? { files, directories } : files;
}
if (_visited.has(realDir)) {
warn(`Symlink loop detected, skipping: ${dir}`);
return trackDirs ? { files, directories } : files;
}
_visited.add(realDir);

let entries;
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
} catch (err) {
warn(`Cannot read directory ${dir}: ${err.message}`);
return trackDirs ? { files, directories } : files;
}

for (const entry of entries) {
if (entry.name.startsWith('.') && entry.name !== '.') {
if (IGNORE_DIRS.has(entry.name)) continue;
if (entry.isDirectory()) continue;
}
if (IGNORE_DIRS.has(entry.name)) continue;
if (extraIgnore?.has(entry.name)) continue;

const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
collectFiles(full, files, config, directories, _visited);
} else if (EXTENSIONS.has(path.extname(entry.name))) {
files.push(full);
hasFiles = true;
}
}
if (trackDirs && hasFiles) {
directories.add(dir);
}
return trackDirs ? { files, directories } : files;
}

/**
* Load path aliases from tsconfig.json / jsconfig.json.
*/
export function loadPathAliases(rootDir) {
const aliases = { baseUrl: null, paths: {} };
for (const configName of ['tsconfig.json', 'jsconfig.json']) {
const configPath = path.join(rootDir, configName);
if (!fs.existsSync(configPath)) continue;
try {
const raw = fs
.readFileSync(configPath, 'utf-8')
.replace(/\/\/.*$/gm, '')
.replace(/\/\*[\s\S]*?\*\//g, '')
.replace(/,\s*([\]}])/g, '$1');
const config = JSON.parse(raw);
const opts = config.compilerOptions || {};
if (opts.baseUrl) aliases.baseUrl = path.resolve(rootDir, opts.baseUrl);
if (opts.paths) {
for (const [pattern, targets] of Object.entries(opts.paths)) {
aliases.paths[pattern] = targets.map((t) => path.resolve(aliases.baseUrl || rootDir, t));
}
}
break;
} catch (err) {
warn(`Failed to parse ${configName}: ${err.message}`);
}
}
return aliases;
}

/**
* Compute MD5 hash of file contents for incremental builds.
*/
export function fileHash(content) {
return createHash('md5').update(content).digest('hex');
}

/**
* Stat a file, returning { mtimeMs, size } or null on error.
*/
export function fileStat(filePath) {
try {
const s = fs.statSync(filePath);
return { mtimeMs: s.mtimeMs, size: s.size };
} catch {
return null;
}
}

/**
* Read a file with retry on transient errors (EBUSY/EACCES/EPERM).
*/
const TRANSIENT_CODES = new Set(['EBUSY', 'EACCES', 'EPERM']);
const RETRY_DELAY_MS = 50;

export function readFileSafe(filePath, retries = 2) {
for (let attempt = 0; ; attempt++) {
try {
return fs.readFileSync(filePath, 'utf-8');
} catch (err) {
if (attempt < retries && TRANSIENT_CODES.has(err.code)) {
const end = Date.now() + RETRY_DELAY_MS;
while (Date.now() < end) {}
continue;
}
throw err;
}
}
}

/**
* Purge all graph data for the specified files.
*/
export function purgeFilesFromGraph(db, files, options = {}) {
purgeFilesData(db, files, options);
}

/** Batch INSERT chunk size for multi-value INSERTs. */
export const BATCH_CHUNK = 200;

/**
* Batch-insert node rows via multi-value INSERT statements.
* Each row: [name, kind, file, line, end_line, parent_id]
*/
export function batchInsertNodes(db, rows) {
if (!rows.length) return;
const ph = '(?,?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const vals = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4], r[5]);
db.prepare(
'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
}
}

/**
* Batch-insert edge rows via multi-value INSERT statements.
* Each row: [source_id, target_id, kind, confidence, dynamic]
*/
export function batchInsertEdges(db, rows) {
if (!rows.length) return;
const ph = '(?,?,?,?,?)';
for (let i = 0; i < rows.length; i += BATCH_CHUNK) {
const chunk = rows.slice(i, i + BATCH_CHUNK);
const vals = [];
for (const r of chunk) vals.push(r[0], r[1], r[2], r[3], r[4]);
db.prepare(
'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic) VALUES ' +
chunk.map(() => ph).join(','),
).run(...vals);
}
}
Loading
Loading