Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 94 additions & 43 deletions packages/domscribe-manifest/src/id-stabilizer/id-stabilizer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ vi.mock('xxhash-wasm', () => ({
default: vi.fn(),
}));

vi.mock('@domscribe/core', () => ({
generateEntryId: vi.fn(),
}));

// Import mocked modules to access mock functions
import {
existsSync,
Expand All @@ -41,7 +37,6 @@ import {
unlinkSync,
} from 'fs';
import xxhashFactory, { XXHashAPI } from 'xxhash-wasm';
import { generateEntryId } from '@domscribe/core';

const mockMkdirSync = vi.mocked(mkdirSync);
const mockWriteFileSync = vi.mocked(writeFileSync);
Expand All @@ -50,10 +45,9 @@ const mockRenameSync = vi.mocked(renameSync);
const mockUnlinkSync = vi.mocked(unlinkSync);
const mockExistsSync = vi.mocked(existsSync);
const mockXxhashFactory = vi.mocked(xxhashFactory);
const mockGenerateElementId = vi.mocked(generateEntryId);

// Counter for mock ID generation
let idCounter = 0;
/** Matches a valid 8-char base58 ID */
const ID_FORMAT = /^[0-9A-HJ-NP-Za-hj-np-z]{8}$/;

// Helper functions
function createFileIdentity(
Expand All @@ -72,8 +66,13 @@ function createPosition(
}

function mockHash(content: string): string {
// Create deterministic hash based on content
return `hash_${Buffer.from(content).toString('base64').slice(0, 16)}`;
// Must match computeFileHash: h64(content).toString(16).padStart(16, '0')
// Our h64 mock produces: hash = sum of (char * 31^i) as bigint
let hash = 0n;
for (let i = 0; i < content.length; i++) {
hash = hash * 31n + BigInt(content.charCodeAt(i));
}
return hash.toString(16).padStart(16, '0');
}

function createSerializedCache(
Expand Down Expand Up @@ -101,21 +100,16 @@ describe('IDStabilizer', () => {
beforeEach(() => {
vi.resetAllMocks();

// Reset counter for ID generation
idCounter = 0;

// Setup mock implementations
mockGenerateElementId.mockImplementation(() => {
return `id${(idCounter++).toString().padStart(6, '0')}`;
});

// Mock xxhash factory to return a hasher with h64 method
// Mock xxhash factory to return a hasher with h64 that produces real bigints
mockXxhashFactory.mockResolvedValue({
...vi.mocked({} as XXHashAPI),
h64: vi.fn((content: string) => {
return {
toString: () => mockHash(content),
} as unknown as bigint;
// Produce a deterministic bigint from content for testing
let hash = 0n;
for (let i = 0; i < content.length; i++) {
hash = hash * 31n + BigInt(content.charCodeAt(i));
}
return hash;
}),
});

Expand Down Expand Up @@ -222,7 +216,7 @@ describe('IDStabilizer', () => {
const id = stabilizer.getStableId(fileIdentity, position);

// Assert
expect(id).toBe('id000000');
expect(id).toMatch(ID_FORMAT);
});
});

Expand All @@ -239,7 +233,7 @@ describe('IDStabilizer', () => {

// Assert
expect(id2).toBe(id1);
expect(id1).toBe('id000000');
expect(id1).toMatch(ID_FORMAT);
});

it('should track cache hit when returning cached ID', async () => {
Expand Down Expand Up @@ -294,8 +288,7 @@ describe('IDStabilizer', () => {
const id = stabilizer.getStableId(fileIdentity, position);

// Assert
expect(id).toBe('id000000');
expect(mockGenerateElementId).toHaveBeenCalledTimes(1);
expect(id).toMatch(ID_FORMAT);
});

it('should track cache miss for new file', async () => {
Expand Down Expand Up @@ -330,8 +323,8 @@ describe('IDStabilizer', () => {

// Assert
expect(id2).not.toBe(id1);
expect(id1).toBe('id000000');
expect(id2).toBe('id000001');
expect(id1).toMatch(ID_FORMAT);
expect(id2).toMatch(ID_FORMAT);
});

it('should track cache miss when file content changes', async () => {
Expand Down Expand Up @@ -488,8 +481,9 @@ describe('IDStabilizer', () => {
stabilizer.getStableId(fileIdentity, createPosition(4, 0));
stabilizer.getStableId(fileIdentity, createPosition(5, 0));

// Assert - hash should only be computed once due to per-file caching
expect(h64Spy).toHaveBeenCalledTimes(1);
// Assert - file hash computed once (cached), plus 5 h64 calls for deterministic ID generation
// Total: 1 file hash + 5 ID hashes = 6
expect(h64Spy).toHaveBeenCalledTimes(6);
});

it('should recompute hash when file content changes', async () => {
Expand All @@ -510,8 +504,8 @@ describe('IDStabilizer', () => {
createPosition(1, 0),
);

// Assert - hash should be computed twice (once per content change)
expect(h64Spy).toHaveBeenCalledTimes(2);
// Assert - 2 file hashes (content changed) + 2 ID hashes = 4
expect(h64Spy).toHaveBeenCalledTimes(4);
});

it('should recompute hash when switching to a different file', async () => {
Expand All @@ -532,8 +526,8 @@ describe('IDStabilizer', () => {
createPosition(1, 0),
);

// Assert - hash should be computed twice (once per file)
expect(h64Spy).toHaveBeenCalledTimes(2);
// Assert - 2 file hashes (different files) + 2 ID hashes = 4
expect(h64Spy).toHaveBeenCalledTimes(4);
});

it('should use cached hash when returning to previously processed file with same content', async () => {
Expand All @@ -551,9 +545,11 @@ describe('IDStabilizer', () => {
stabilizer.getStableId(file2, createPosition(1, 0));
stabilizer.getStableId(file1, createPosition(2, 0)); // Back to file1

// Assert - hash computed 3 times (cache only holds most recent file)
// This is expected behavior - we only cache the most recent file
expect(h64Spy).toHaveBeenCalledTimes(3);
// Assert - 3 file hashes (cache only holds most recent file) + 3 ID hashes = 6
// file1 pos(1,0): new file → file hash + ID hash
// file2 pos(1,0): new file → file hash + ID hash
// file1 pos(2,0): file hash recomputed (not cached) + new position → ID hash
expect(h64Spy).toHaveBeenCalledTimes(6);
});
});
});
Expand Down Expand Up @@ -853,7 +849,7 @@ describe('IDStabilizer', () => {
expect(stats.misses).toBe(0);
});

it('should generate new IDs after clearing cache', async () => {
it('should produce same deterministic ID after clearing cache', async () => {
// Arrange
await stabilizer.initialize();
const fileIdentity = createFileIdentity('/test.tsx', 'content');
Expand All @@ -864,8 +860,8 @@ describe('IDStabilizer', () => {
stabilizer.clearCache();
const id2 = stabilizer.getStableId(fileIdentity, position);

// Assert
expect(id2).not.toBe(id1);
// Assert - deterministic: same input → same output
expect(id2).toBe(id1);
});
});

Expand Down Expand Up @@ -990,7 +986,7 @@ describe('IDStabilizer', () => {
createFileIdentity('/test.tsx', 'content'),
createPosition(10, 10),
);
expect(id).toBe('id000000');
expect(id).toMatch(ID_FORMAT);
});

it('should start fresh after version mismatch', async () => {
Expand Down Expand Up @@ -1049,8 +1045,9 @@ describe('IDStabilizer', () => {
);

// Assert - should continue working
expect(id1).toBe('id000000');
expect(id2).toBe('id000001');
expect(id1).toMatch(ID_FORMAT);
expect(id2).toMatch(ID_FORMAT);
expect(id2).not.toBe(id1);
});
});

Expand Down Expand Up @@ -1095,4 +1092,58 @@ describe('IDStabilizer', () => {
);
});
});

describe('Deterministic ID generation', () => {
it('should produce identical IDs from independent instances for same input', async () => {
// Arrange
const s1 = new IDStabilizer('/workspace1');
const s2 = new IDStabilizer('/workspace2');
await s1.initialize();
await s2.initialize();

const fileIdentity = createFileIdentity('/test.tsx', 'const x = 1;');
const position = createPosition(10, 10);

// Act & Assert
expect(s1.getStableId(fileIdentity, position)).toBe(
s2.getStableId(fileIdentity, position),
);
});

it('should produce different IDs for different positions', async () => {
// Arrange
await stabilizer.initialize();
const fileIdentity = createFileIdentity('/test.tsx', 'const x = 1;');

// Act
const id1 = stabilizer.getStableId(fileIdentity, createPosition(1, 0));
const id2 = stabilizer.getStableId(fileIdentity, createPosition(2, 0));

// Assert
expect(id1).toMatch(ID_FORMAT);
expect(id2).toMatch(ID_FORMAT);
expect(id1).not.toBe(id2);
});

it('should produce different IDs for same position in different file content', async () => {
// Arrange
await stabilizer.initialize();
const position = createPosition(10, 10);

// Act
const id1 = stabilizer.getStableId(
createFileIdentity('/test.tsx', 'version 1'),
position,
);
const id2 = stabilizer.getStableId(
createFileIdentity('/test.tsx', 'version 2'),
position,
);

// Assert
expect(id1).toMatch(ID_FORMAT);
expect(id2).toMatch(ID_FORMAT);
expect(id1).not.toBe(id2);
});
});
});
39 changes: 36 additions & 3 deletions packages/domscribe-manifest/src/id-stabilizer/id-stabilizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import {
import { existsSync } from 'fs';
import path, { join } from 'path';
import xxhash from 'xxhash-wasm';
import { generateEntryId, SourcePosition } from '@domscribe/core';
import { SourcePosition } from '@domscribe/core';
import {
SerializedIDCacheSchema,
type IDGenerator,
Expand All @@ -44,6 +44,12 @@ const CACHE_SCHEMA_VERSION = '1.0.0';
const DEFAULT_CACHE_FILE = 'id-cache.json';

export class IDStabilizer implements IDGenerator, IDCacheControl {
/** Alphabet matching core's id-generator.ts (base58, no ambiguous chars) */
private static readonly ALPHABET =
'0123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz';
private static readonly ALPHABET_LEN = BigInt(IDStabilizer.ALPHABET.length); // 58n
private static readonly ID_LENGTH = 8;

/** Singleton instances keyed by normalized cache directory path */
private static instances = new Map<string, IDStabilizer>();

Expand Down Expand Up @@ -160,7 +166,7 @@ export class IDStabilizer implements IDGenerator, IDCacheControl {

// Case 1: File not in cache OR file hash changed (content changed)
if (!entry || entry.fileHash !== fileHash) {
const newId = generateEntryId();
const newId = this.generateDeterministicId(`${fileHash}:${positionKey}`);
const newEntry: IDCacheEntry = {
fileHash,
filePath,
Expand Down Expand Up @@ -197,7 +203,9 @@ export class IDStabilizer implements IDGenerator, IDCacheControl {
}

// Case 3: New position in existing file
const newId = generateEntryId();
const newId = this.generateDeterministicId(
`${entry.fileHash}:${positionKey}`,
);
entry.ids.set(positionKey, newId);
entry.timestamp = Date.now();
this.stats.misses++;
Expand Down Expand Up @@ -414,4 +422,29 @@ export class IDStabilizer implements IDGenerator, IDCacheControl {

return this.hasher.h64(fileContent).toString(16).padStart(16, '0');
}

/**
* Derive a deterministic 8-character ID from a seed string.
* Uses xxhash64 to hash the seed, then maps the 64-bit result
* to the ID alphabet via repeated modulo division.
*/
private generateDeterministicId(seed: string): string {
if (!this.hasher) {
throw new Error('Hasher not initialized. Call initialize() first.');
}
const hash = this.hasher.h64(seed);
return IDStabilizer.hashToAlphabetId(hash);
}

private static hashToAlphabetId(hash: bigint): string {
const chars: string[] = [];
let value = hash < 0n ? -hash : hash;
for (let i = 0; i < IDStabilizer.ID_LENGTH; i++) {
chars.push(
IDStabilizer.ALPHABET[Number(value % IDStabilizer.ALPHABET_LEN)],
);
value = value / IDStabilizer.ALPHABET_LEN;
}
return chars.join('');
}
}
Loading