haiany
/
codegraph
kopia lustrzana https://github.com/colbymchenry/codegraph.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
							/**
 * Front-load hook project resolution (#964).
 *
 * The Claude `UserPromptSubmit` front-load hook must inject CodeGraph context
 * for the RIGHT project — including the monorepo case where the agent's cwd is
 * an un-indexed workspace root and the index lives in a sub-project. These test
 * `planFrontload` / `findIndexedSubprojectRoots` directly (the hook's decision
 * logic), since the end-to-end hook is validated by a live agent run, not a
 * unit test.
 */
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { planFrontload, findIndexedSubprojectRoots, isStructuralPrompt, hasStructuralKeyword, extractCodeTokens } from '../src/directory';

/** Make `dir` look indexed (isInitialized needs `.codegraph/codegraph.db`). */
function mkIndexed(dir: string): string {
  fs.mkdirSync(path.join(dir, '.codegraph'), { recursive: true });
  fs.writeFileSync(path.join(dir, '.codegraph', 'codegraph.db'), '');
  return dir;
}
/** A workspace-root manifest so the down-scan gate (looksLikeProjectRoot) passes. */
function mkWorkspaceRoot(dir: string): string {
  fs.mkdirSync(dir, { recursive: true });
  fs.writeFileSync(path.join(dir, 'package.json'), '{"private":true,"workspaces":["packages/*"]}');
  return dir;
}

describe('planFrontload — front-load hook project resolution (#964)', () => {
  let tmp: string;
  beforeEach(() => { tmp = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'cg-frontload-'))); });
  afterEach(() => { fs.rmSync(tmp, { recursive: true, force: true }); });

  it('cwd is itself indexed → front-load cwd (the common single-project case)', () => {
    mkIndexed(tmp);
    const plan = planFrontload(tmp, 'how does login work');
    expect(plan.exploreRoot).toBe(tmp);
    expect(plan.viaSubScan).toBe(false);
    expect(plan.nudgeProjects).toEqual([]);
  });

  it('a nested file under an indexed project resolves up to that project', () => {
    mkIndexed(tmp);
    const nested = path.join(tmp, 'src', 'deep');
    fs.mkdirSync(nested, { recursive: true });
    expect(planFrontload(nested, 'trace the flow').exploreRoot).toBe(tmp);
  });

  it('un-indexed workspace root with ONE indexed sub-project → front-load it (the #964 case)', () => {
    mkWorkspaceRoot(tmp);
    const api = mkIndexed(path.join(tmp, 'packages', 'api'));
    const plan = planFrontload(tmp, 'how does the request get handled');
    expect(plan.exploreRoot).toBe(api);
    expect(plan.viaSubScan).toBe(true);
    expect(plan.nudgeProjects).toEqual([]);
  });

  it('multiple indexed sub-projects, prompt names one by path → front-load it, nudge the rest', () => {
    mkWorkspaceRoot(tmp);
    const api = mkIndexed(path.join(tmp, 'packages', 'api'));
    const web = mkIndexed(path.join(tmp, 'packages', 'web'));
    const plan = planFrontload(tmp, 'in packages/api, how does the handler validate the token?');
    expect(plan.exploreRoot).toBe(api);
    expect(plan.viaSubScan).toBe(true);
    expect(plan.nudgeProjects).toEqual([web]);
  });

  it('multiple indexed sub-projects, prompt names one by package name → front-load it', () => {
    mkWorkspaceRoot(tmp);
    mkIndexed(path.join(tmp, 'packages', 'api'));
    const web = mkIndexed(path.join(tmp, 'packages', 'web'));
    const plan = planFrontload(tmp, 'how does the web frontend render the dashboard?');
    expect(plan.exploreRoot).toBe(web);
  });

  it('multiple indexed sub-projects, NO clear match → nudge the full list, do not guess', () => {
    mkWorkspaceRoot(tmp);
    const api = mkIndexed(path.join(tmp, 'packages', 'api'));
    const web = mkIndexed(path.join(tmp, 'packages', 'web'));
    const plan = planFrontload(tmp, 'how does authentication work end to end?');
    expect(plan.exploreRoot).toBeNull();
    expect(plan.viaSubScan).toBe(true);
    expect(plan.nudgeProjects.sort()).toEqual([api, web].sort());
  });

  it('un-indexed dir that is NOT a workspace root → no-op (guards $HOME-style crawls)', () => {
    // Indexed project exists below, but cwd has no manifest, so the down-scan is skipped.
    mkIndexed(path.join(tmp, 'some', 'project'));
    const plan = planFrontload(tmp, 'how does it work');
    expect(plan.exploreRoot).toBeNull();
    expect(plan.nudgeProjects).toEqual([]);
  });

  it('nothing indexed anywhere → no-op', () => {
    mkWorkspaceRoot(tmp);
    fs.mkdirSync(path.join(tmp, 'packages', 'api'), { recursive: true });
    const plan = planFrontload(tmp, 'how does it work');
    expect(plan.exploreRoot).toBeNull();
    expect(plan.nudgeProjects).toEqual([]);
  });
});

describe('findIndexedSubprojectRoots', () => {
  let tmp: string;
  beforeEach(() => { tmp = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'cg-subscan-'))); });
  afterEach(() => { fs.rmSync(tmp, { recursive: true, force: true }); });

  it('finds indexed projects a couple levels down and skips node_modules/.git', () => {
    mkIndexed(path.join(tmp, 'packages', 'api'));
    mkIndexed(path.join(tmp, 'services', 'auth'));
    // Decoys that must NOT be scanned into.
    mkIndexed(path.join(tmp, 'node_modules', 'dep'));
    mkIndexed(path.join(tmp, '.git', 'x'));
    const found = findIndexedSubprojectRoots(tmp).map((p) => path.relative(tmp, p)).sort();
    expect(found).toEqual([path.join('packages', 'api'), path.join('services', 'auth')].sort());
  });

  it('does not descend INTO an indexed project (a project\'s sub-dirs are not separate projects)', () => {
    const api = mkIndexed(path.join(tmp, 'packages', 'api'));
    mkIndexed(path.join(api, 'submodule')); // nested index under an already-indexed project
    const found = findIndexedSubprojectRoots(tmp);
    expect(found).toEqual([api]);
  });

  it('respects the depth bound', () => {
    mkIndexed(path.join(tmp, 'a', 'b', 'c', 'd', 'e', 'deep'));
    expect(findIndexedSubprojectRoots(tmp, { maxDepth: 2 })).toEqual([]);
  });
});

describe('hasStructuralKeyword — keyword signal fires the hook directly (#994)', () => {
  it('English keywords match with word boundaries so "flow" ≠ "flower"', () => {
    expect(hasStructuralKeyword('how does article publish work')).toBe(true);
    expect(hasStructuralKeyword('where is the token validated')).toBe(true);
    expect(hasStructuralKeyword('trace the request flow')).toBe(true);
    expect(hasStructuralKeyword('what calls parseToken')).toBe(true);
    expect(hasStructuralKeyword('water the flower')).toBe(false);   // "flow" in "flower"
  });

  it('Chinese keywords match WITHOUT `\\b` — the #994 fix (were silently dropped)', () => {
    expect(hasStructuralKeyword('介绍文章发布流程')).toBe(true);      // introduce / flow
    expect(hasStructuralKeyword('登录是如何实现的')).toBe(true);       // how / implement
    expect(hasStructuralKeyword('这个函数的调用链')).toBe(true);        // call (chain)
    expect(hasStructuralKeyword('支付模块依赖哪些服务')).toBe(true);    // depend
    expect(hasStructuralKeyword('修复这个拼写错误')).toBe(false);       // "fix this typo"
  });

  it('a bare code-token is NOT a keyword — it needs graph verification', () => {
    expect(hasStructuralKeyword('看看 get_user 这段逻辑')).toBe(false);
    expect(hasStructuralKeyword('I really love JavaScript')).toBe(false);
  });
});

describe('hasStructuralKeyword — Latin-script languages, Cyrillic, JA/KO (#1126)', () => {
  it('French structural prompts fire — including the prompts from the report', () => {
    expect(hasStructuralKeyword('comment marche la state machine des commandes ?')).toBe(true);
    expect(hasStructuralKeyword("explique l'architecture du module de stock")).toBe(true);
    expect(hasStructuralKeyword('qui appelle cette fonction de parsing ?')).toBe(true);
    expect(hasStructuralKeyword('de quoi dépend le module de paiement ?')).toBe(true);
  });

  it('accented keyword edges match — ASCII `\\b` could never bound "où"', () => {
    expect(hasStructuralKeyword('où est validé le token ?')).toBe(true);
    expect(hasStructuralKeyword("d'où vient cette valeur ?")).toBe(true);
  });

  it('Spanish / Portuguese / German / Italian fire', () => {
    expect(hasStructuralKeyword('¿cómo funciona la máquina de estados de pedidos?')).toBe(true);
    expect(hasStructuralKeyword('¿qué rompe este cambio?')).toBe(true);
    expect(hasStructuralKeyword('como funciona a máquina de estados dos pedidos?')).toBe(true);
    expect(hasStructuralKeyword('qual é a arquitetura do módulo de estoque?')).toBe(true);
    expect(hasStructuralKeyword('wie funktioniert die Zustandsmaschine für Bestellungen?')).toBe(true);
    expect(hasStructuralKeyword('wovon hängt das Zahlungsmodul ab?')).toBe(true);
    expect(hasStructuralKeyword('come funziona la macchina a stati degli ordini?')).toBe(true);
    expect(hasStructuralKeyword('spiegami la struttura del modulo ordini')).toBe(true);
  });

  it('Russian / Japanese / Korean / traditional Chinese fire', () => {
    expect(hasStructuralKeyword('как работает конечный автомат заказов?')).toBe(true);
    expect(hasStructuralKeyword('от чего зависит модуль оплаты?')).toBe(true);
    expect(hasStructuralKeyword('注文のステートマシンの仕組みを説明して')).toBe(true);
    expect(hasStructuralKeyword('この関数の呼び出しの流れは?')).toBe(true);
    expect(hasStructuralKeyword('주문 상태 머신은 어떻게 작동하나요?')).toBe(true);
    expect(hasStructuralKeyword('訂單狀態機的架構是怎麼實現的?')).toBe(true);
  });

  it('English derived forms fire — "architecture"/"dependencies" failed the old exact-word list', () => {
    expect(hasStructuralKeyword('explain the architecture of the stock module')).toBe(true);
    expect(hasStructuralKeyword('what are the dependencies of the parser?')).toBe(true);
  });

  it('second-tier languages fire — VI/TR/ID/PL/UA/NL/CS/RO/HU/EL/SV/NO/FI/HI', () => {
    expect(hasStructuralKeyword('state machine của đơn hàng hoạt động thế nào?')).toBe(true);   // Vietnamese
    expect(hasStructuralKeyword('sipariş durum makinesi nasıl çalışıyor?')).toBe(true);          // Turkish
    expect(hasStructuralKeyword('bu fonksiyonun bağımlılıkları neler?')).toBe(true);             // Turkish (stem)
    expect(hasStructuralKeyword('bagaimana cara kerja mesin status pesanan?')).toBe(true);       // Indonesian
    expect(hasStructuralKeyword('jak działa maszyna stanów zamówień?')).toBe(true);              // Polish
    expect(hasStructuralKeyword('co wywołuje tę funkcję?')).toBe(true);                          // Polish (stem)
    expect(hasStructuralKeyword('як працює кінцевий автомат замовлень?')).toBe(true);            // Ukrainian
    expect(hasStructuralKeyword('від чого залежить модуль оплати?')).toBe(true);                 // Ukrainian (stem)
    expect(hasStructuralKeyword('hoe werkt de state machine van bestellingen?')).toBe(true);     // Dutch
    expect(hasStructuralKeyword('jak funguje stavový automat objednávek?')).toBe(true);          // Czech
    expect(hasStructuralKeyword('cum funcționează mașina de stări a comenzilor?')).toBe(true);   // Romanian
    expect(hasStructuralKeyword('hogyan működik a rendelések állapotgépe?')).toBe(true);         // Hungarian
    expect(hasStructuralKeyword('πώς λειτουργεί η μηχανή καταστάσεων παραγγελιών;')).toBe(true); // Greek
    expect(hasStructuralKeyword('hur fungerar orderns tillståndsmaskin?')).toBe(true);           // Swedish
    expect(hasStructuralKeyword('hvordan fungerer ordrenes tilstandsmaskin?')).toBe(true);       // Norwegian/Danish
    expect(hasStructuralKeyword('miten tilausten tilakone toimii?')).toBe(true);                 // Finnish
    expect(hasStructuralKeyword('ऑर्डर स्टेट मशीन कैसे काम करती है?')).toBe(true);                 // Hindi
  });

  it('RTL scripts and Thai fire — proclitics/unsegmented text uses substring matching', () => {
    expect(hasStructuralKeyword('كيف تعمل آلة حالات الطلبات؟')).toBe(true);        // Arabic
    expect(hasStructuralKeyword('وكيف يعتمد هذا على قاعدة البيانات؟')).toBe(true); // Arabic, proclitic و attached
    expect(hasStructuralKeyword('ماشین وضعیت سفارش‌ها چگونه کار می‌کند؟')).toBe(true); // Farsi
    expect(hasStructuralKeyword('איך עובדת מכונת המצבים של ההזמנות?')).toBe(true);  // Hebrew
    expect(hasStructuralKeyword('สถาปัตยกรรมของระบบทำงานอย่างไร')).toBe(true);       // Thai
  });

  it('terms that collide with English or code words are deliberately excluded', () => {
    expect(hasStructuralKeyword('pad the buffer with zeros')).toBe(false);     // NL pad=path skipped
    expect(hasStructuralKeyword('declare a var for the count')).toBe(false);   // SV var=where skipped
    expect(hasStructuralKeyword('refresh the token')).toBe(false);             // CS tok=flow skipped
    expect(hasStructuralKeyword('run the llama model locally')).toBe(false);   // ES bare llama skipped
    expect(hasStructuralKeyword('come back to this later')).toBe(false);       // IT bare come skipped
  });

  it('stems match only at word start — no mid-word false positives', () => {
    expect(hasStructuralKeyword('restructure this paragraph')).toBe(false); // "structur" mid-word
    expect(hasStructuralKeyword('an independent module')).toBe(false);      // "depend" mid-word
    expect(hasStructuralKeyword('water the flower')).toBe(false);           // unchanged guarantee
  });

  it('non-structural prose stays a no-op in every covered language', () => {
    expect(hasStructuralKeyword('corrige cette faute de frappe')).toBe(false);   // FR "fix this typo"
    expect(hasStructuralKeyword('arregla este error tipográfico')).toBe(false);  // ES
    expect(hasStructuralKeyword('behebe diesen Tippfehler')).toBe(false);        // DE
    expect(hasStructuralKeyword('исправь эту опечатку')).toBe(false);            // RU
    expect(hasStructuralKeyword('このタイプミスを直して')).toBe(false);            // JA
    expect(hasStructuralKeyword('이 오타를 수정해줘')).toBe(false);                // KO
    expect(hasStructuralKeyword('sửa lỗi chính tả này')).toBe(false);            // VI
    expect(hasStructuralKeyword('bu yazım hatasını düzelt')).toBe(false);        // TR
    expect(hasStructuralKeyword('popraw tę literówkę')).toBe(false);             // PL
    expect(hasStructuralKeyword('صحح هذا الخطأ الإملائي')).toBe(false);          // AR
  });
});

describe('extractCodeTokens — candidate symbols the hook verifies against the graph', () => {
  it('pulls camelCase / PascalCase / snake_case / call / member tokens', () => {
    expect(extractCodeTokens('prepareArticlePublish 的调用链')).toContain('prepareArticlePublish');
    expect(extractCodeTokens('看看 get_user 这段逻辑')).toContain('get_user');   // snake_case
    expect(extractCodeTokens('render() 在哪触发')).toContain('render');          // call form
    expect(extractCodeTokens('user.login 做了什么').sort()).toEqual(['login', 'user']); // member access
    expect(extractCodeTokens('看看 UserService')).toContain('UserService');      // PascalCase class kept
  });

  it('a tech brand is extracted as a CANDIDATE — the hook’s graph check is what rejects it', () => {
    // This is the #994 follow-up: "JavaScript" is identifier-shaped, so it surfaces
    // here as a candidate; the hook only fires if it's a real symbol in the index.
    expect(extractCodeTokens('I really love JavaScript')).toEqual(['JavaScript']);
    expect(extractCodeTokens('thoughts on GitHub vs GitLab').sort()).toEqual(['GitHub', 'GitLab']);
  });

  it('ordinary prose and doc/data filenames yield no tokens', () => {
    expect(extractCodeTokens('fix typo in readme')).toEqual([]);
    expect(extractCodeTokens('fix the typo in README.md')).toEqual([]);   // doc filename excluded
    expect(extractCodeTokens('bump the version in package.json')).toEqual([]);
    expect(extractCodeTokens('water the flower')).toEqual([]);
  });
});

describe('isStructuralPrompt — cheap candidate gate (keyword OR code-token)', () => {
  it('fires on a keyword prompt in any language', () => {
    expect(isStructuralPrompt('how does article publish work')).toBe(true);
    expect(isStructuralPrompt('介绍文章发布流程')).toBe(true);
  });

  it('fires on a code-token prompt with no keyword', () => {
    expect(isStructuralPrompt('看看 get_user 这段逻辑')).toBe(true);
    expect(isStructuralPrompt('where is prepareArticlePublish 定义')).toBe(true);
    expect(isStructuralPrompt('user.login 做了什么')).toBe(true);
  });

  it('a tech brand passes the CHEAP gate as a candidate — the hook then graph-verifies it', () => {
    // Layering, not a bug: isStructuralPrompt is shape-only, so a token-shaped brand
    // is a candidate here; the hook rejects it as a non-symbol (proven by the CLI e2e).
    expect(isStructuralPrompt('I really love JavaScript')).toBe(true);
  });

  it('non-structural prose stays a no-op — in either language', () => {
    expect(isStructuralPrompt('fix typo in readme')).toBe(false);
    expect(isStructuralPrompt('修复这个拼写错误')).toBe(false);
    expect(isStructuralPrompt('water the flower')).toBe(false);
    expect(isStructuralPrompt('')).toBe(false);
  });
});