vectors.test.ts 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. /**
  2. * Vector Embedding Tests
  3. *
  4. * Tests for vector embedding and semantic search functionality.
  5. * Note: Full embedding tests require the model to be downloaded,
  6. * which can take time on first run.
  7. */
  8. import { describe, it, expect, beforeEach, afterEach } from 'vitest';
  9. import * as fs from 'fs';
  10. import * as path from 'path';
  11. import * as os from 'os';
  12. import CodeGraph from '../src/index';
  13. import { TextEmbedder } from '../src/vectors/embedder';
  14. import { VectorSearchManager, createVectorSearch } from '../src/vectors/search';
  15. import { DatabaseConnection } from '../src/db';
  16. describe('Vector Embeddings', () => {
  17. describe('TextEmbedder', () => {
  18. describe('createNodeText', () => {
  19. it('should create text representation from node', () => {
  20. const node = {
  21. name: 'processPayment',
  22. kind: 'function',
  23. qualifiedName: 'PaymentService.processPayment',
  24. signature: '(amount: number) => Promise<Receipt>',
  25. docstring: 'Process a payment and return a receipt.',
  26. filePath: 'src/services/payment.ts',
  27. };
  28. const text = TextEmbedder.createNodeText(node);
  29. expect(text).toContain('function: processPayment');
  30. expect(text).toContain('path: PaymentService.processPayment');
  31. expect(text).toContain('file: src/services/payment.ts');
  32. expect(text).toContain('signature: (amount: number) => Promise<Receipt>');
  33. expect(text).toContain('documentation: Process a payment');
  34. });
  35. it('should handle minimal node data', () => {
  36. const node = {
  37. name: 'helper',
  38. kind: 'function',
  39. filePath: 'src/utils.ts',
  40. };
  41. const text = TextEmbedder.createNodeText(node);
  42. expect(text).toContain('function: helper');
  43. expect(text).toContain('file: src/utils.ts');
  44. expect(text).not.toContain('signature:');
  45. expect(text).not.toContain('documentation:');
  46. });
  47. });
  48. describe('cosineSimilarity', () => {
  49. it('should compute similarity between identical vectors', () => {
  50. const vec = new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5]);
  51. const similarity = TextEmbedder.cosineSimilarity(vec, vec);
  52. expect(similarity).toBeCloseTo(1.0, 5);
  53. });
  54. it('should compute similarity between orthogonal vectors', () => {
  55. const vec1 = new Float32Array([1, 0, 0]);
  56. const vec2 = new Float32Array([0, 1, 0]);
  57. const similarity = TextEmbedder.cosineSimilarity(vec1, vec2);
  58. expect(similarity).toBeCloseTo(0.0, 5);
  59. });
  60. it('should compute similarity between opposite vectors', () => {
  61. const vec1 = new Float32Array([1, 0, 0]);
  62. const vec2 = new Float32Array([-1, 0, 0]);
  63. const similarity = TextEmbedder.cosineSimilarity(vec1, vec2);
  64. expect(similarity).toBeCloseTo(-1.0, 5);
  65. });
  66. it('should throw for vectors of different dimensions', () => {
  67. const vec1 = new Float32Array([1, 2, 3]);
  68. const vec2 = new Float32Array([1, 2]);
  69. expect(() => TextEmbedder.cosineSimilarity(vec1, vec2)).toThrow(
  70. 'Embeddings must have the same dimension'
  71. );
  72. });
  73. it('should handle zero vectors', () => {
  74. const vec1 = new Float32Array([0, 0, 0]);
  75. const vec2 = new Float32Array([1, 2, 3]);
  76. const similarity = TextEmbedder.cosineSimilarity(vec1, vec2);
  77. expect(similarity).toBe(0);
  78. });
  79. });
  80. });
  81. describe('VectorSearchManager', () => {
  82. let tempDir: string;
  83. let db: DatabaseConnection;
  84. let searchManager: VectorSearchManager;
  85. const TEST_DIMENSION = 3; // Use small dimension for tests
  86. beforeEach(() => {
  87. tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-vector-test-'));
  88. const dbPath = path.join(tempDir, 'test.db');
  89. db = DatabaseConnection.initialize(dbPath);
  90. searchManager = createVectorSearch(db.getDb(), TEST_DIMENSION);
  91. });
  92. afterEach(() => {
  93. db.close();
  94. if (fs.existsSync(tempDir)) {
  95. fs.rmSync(tempDir, { recursive: true, force: true });
  96. }
  97. });
  98. it('should store and retrieve vectors', async () => {
  99. await searchManager.initialize();
  100. const embedding = new Float32Array([0.1, 0.2, 0.3]);
  101. searchManager.storeVector('node1', embedding, 'test-model');
  102. const retrieved = searchManager.getVector('node1');
  103. expect(retrieved).not.toBeNull();
  104. expect(retrieved?.length).toBe(3);
  105. expect(retrieved?.[0]).toBeCloseTo(0.1, 5);
  106. });
  107. it('should return null for non-existent vectors', async () => {
  108. await searchManager.initialize();
  109. const retrieved = searchManager.getVector('non-existent');
  110. expect(retrieved).toBeNull();
  111. });
  112. it('should check if vector exists', async () => {
  113. await searchManager.initialize();
  114. const embedding = new Float32Array([0.1, 0.2, 0.3]);
  115. searchManager.storeVector('node1', embedding, 'test-model');
  116. expect(searchManager.hasVector('node1')).toBe(true);
  117. expect(searchManager.hasVector('node2')).toBe(false);
  118. });
  119. it('should delete vectors', async () => {
  120. await searchManager.initialize();
  121. const embedding = new Float32Array([0.1, 0.2, 0.3]);
  122. searchManager.storeVector('node1', embedding, 'test-model');
  123. expect(searchManager.hasVector('node1')).toBe(true);
  124. searchManager.deleteVector('node1');
  125. expect(searchManager.hasVector('node1')).toBe(false);
  126. });
  127. it('should count vectors', async () => {
  128. await searchManager.initialize();
  129. expect(searchManager.getVectorCount()).toBe(0);
  130. searchManager.storeVector('node1', new Float32Array([0.1, 0.2, 0.3]), 'test');
  131. searchManager.storeVector('node2', new Float32Array([0.4, 0.5, 0.6]), 'test');
  132. expect(searchManager.getVectorCount()).toBe(2);
  133. });
  134. it('should clear all vectors', async () => {
  135. await searchManager.initialize();
  136. searchManager.storeVector('node1', new Float32Array([0.1, 0.2, 0.3]), 'test');
  137. searchManager.storeVector('node2', new Float32Array([0.4, 0.5, 0.6]), 'test');
  138. expect(searchManager.getVectorCount()).toBe(2);
  139. searchManager.clear();
  140. expect(searchManager.getVectorCount()).toBe(0);
  141. });
  142. it('should perform brute-force similarity search', async () => {
  143. await searchManager.initialize();
  144. // Store some test vectors
  145. searchManager.storeVector('node1', new Float32Array([1, 0, 0]), 'test');
  146. searchManager.storeVector('node2', new Float32Array([0.9, 0.1, 0]), 'test');
  147. searchManager.storeVector('node3', new Float32Array([0, 1, 0]), 'test');
  148. // Search for similar to [1, 0, 0]
  149. const query = new Float32Array([1, 0, 0]);
  150. const results = searchManager.search(query, { limit: 3 });
  151. expect(results.length).toBe(3);
  152. expect(results[0].nodeId).toBe('node1'); // Most similar
  153. expect(results[0].score).toBeCloseTo(1.0, 5);
  154. expect(results[1].nodeId).toBe('node2'); // Second most similar
  155. });
  156. it('should respect minScore in search', async () => {
  157. await searchManager.initialize();
  158. searchManager.storeVector('node1', new Float32Array([1, 0, 0]), 'test');
  159. searchManager.storeVector('node2', new Float32Array([0, 1, 0]), 'test');
  160. const query = new Float32Array([1, 0, 0]);
  161. const results = searchManager.search(query, { limit: 10, minScore: 0.5 });
  162. // Only node1 should match with score >= 0.5
  163. expect(results.length).toBe(1);
  164. expect(results[0].nodeId).toBe('node1');
  165. });
  166. it('should store vectors in batch', async () => {
  167. await searchManager.initialize();
  168. // Use normalized 3-dimensional vectors
  169. const entries = [
  170. { nodeId: 'node1', embedding: new Float32Array([1.0, 0.0, 0.0]) },
  171. { nodeId: 'node2', embedding: new Float32Array([0.0, 1.0, 0.0]) },
  172. { nodeId: 'node3', embedding: new Float32Array([0.0, 0.0, 1.0]) },
  173. ];
  174. searchManager.storeVectorBatch(entries, 'test-model');
  175. expect(searchManager.getVectorCount()).toBe(3);
  176. expect(searchManager.hasVector('node1')).toBe(true);
  177. expect(searchManager.hasVector('node2')).toBe(true);
  178. expect(searchManager.hasVector('node3')).toBe(true);
  179. });
  180. it('should get indexed node IDs', async () => {
  181. await searchManager.initialize();
  182. searchManager.storeVector('node1', new Float32Array([0.1, 0.2, 0.3]), 'test');
  183. searchManager.storeVector('node2', new Float32Array([0.4, 0.5, 0.6]), 'test');
  184. const ids = searchManager.getIndexedNodeIds();
  185. expect(ids).toContain('node1');
  186. expect(ids).toContain('node2');
  187. expect(ids.length).toBe(2);
  188. });
  189. });
  190. describe('CodeGraph Embedding Integration', () => {
  191. let testDir: string;
  192. let cg: CodeGraph;
  193. beforeEach(() => {
  194. testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-embed-integration-'));
  195. // Create a simple test file
  196. fs.writeFileSync(
  197. path.join(testDir, 'test.ts'),
  198. `
  199. export function processData(input: string): string {
  200. return input.toUpperCase();
  201. }
  202. `
  203. );
  204. cg = CodeGraph.initSync(testDir, {
  205. config: {
  206. include: ['**/*.ts'],
  207. exclude: [],
  208. },
  209. });
  210. });
  211. afterEach(() => {
  212. if (cg) {
  213. cg.destroy();
  214. }
  215. if (fs.existsSync(testDir)) {
  216. fs.rmSync(testDir, { recursive: true, force: true });
  217. }
  218. });
  219. it('should report embeddings not initialized', () => {
  220. expect(cg.isEmbeddingsInitialized()).toBe(false);
  221. });
  222. it('should return null embedding stats when not initialized', () => {
  223. const stats = cg.getEmbeddingStats();
  224. expect(stats).toBeNull();
  225. });
  226. it('should throw when calling semanticSearch without initialization', async () => {
  227. await expect(cg.semanticSearch('test')).rejects.toThrow(/not initialized/i);
  228. });
  229. it('should throw when calling findSimilar without initialization', async () => {
  230. await expect(cg.findSimilar('test-id')).rejects.toThrow(/not initialized/i);
  231. });
  232. });
  233. });