search-query-parser.test.ts 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /**
  2. * Unit tests for the field-qualified query parser and bounded
  3. * edit distance — the two algorithms behind `kind:`/`lang:`/`path:`/
  4. * `name:` filtering and the fuzzy typo fallback.
  5. */
  6. import { describe, it, expect } from 'vitest';
  7. import { parseQuery, boundedEditDistance } from '../src/search/query-parser';
  8. describe('parseQuery', () => {
  9. it('returns plain text for a query with no field prefixes', () => {
  10. const r = parseQuery('authenticate user');
  11. expect(r.text).toBe('authenticate user');
  12. expect(r.kinds).toEqual([]);
  13. expect(r.languages).toEqual([]);
  14. expect(r.pathFilters).toEqual([]);
  15. expect(r.nameFilters).toEqual([]);
  16. });
  17. it('extracts kind: filter and removes it from text', () => {
  18. const r = parseQuery('kind:function auth');
  19. expect(r.kinds).toEqual(['function']);
  20. expect(r.text).toBe('auth');
  21. });
  22. it('extracts lang: and language: as the same filter family', () => {
  23. const a = parseQuery('lang:typescript foo');
  24. const b = parseQuery('language:typescript foo');
  25. expect(a.languages).toEqual(['typescript']);
  26. expect(b.languages).toEqual(['typescript']);
  27. });
  28. it('handles multiple kind: filters as an OR set', () => {
  29. const r = parseQuery('kind:function kind:method auth');
  30. expect(r.kinds.sort()).toEqual(['function', 'method']);
  31. });
  32. it('extracts path: and name: as substring filters (kept verbatim)', () => {
  33. const r = parseQuery('path:src/api name:Handler');
  34. expect(r.pathFilters).toEqual(['src/api']);
  35. expect(r.nameFilters).toEqual(['Handler']);
  36. });
  37. it('preserves quoted spans as a single token (whitespace in path:)', () => {
  38. const r = parseQuery('path:"my dir/file" foo');
  39. expect(r.pathFilters).toEqual(['my dir/file']);
  40. expect(r.text).toBe('foo');
  41. });
  42. it('passes URL-like tokens through to text (does not match http: as a field)', () => {
  43. const r = parseQuery('http://example.com');
  44. expect(r.text).toBe('http://example.com');
  45. expect(r.kinds).toEqual([]);
  46. });
  47. it('passes empty-value tokens through as text (kind: → "kind:")', () => {
  48. const r = parseQuery('kind: foo');
  49. expect(r.kinds).toEqual([]);
  50. // The trailing-colon token comes back as plain text
  51. expect(r.text.includes('kind:')).toBe(true);
  52. });
  53. it('passes unknown field prefixes through as text (TODO: keeps the colon)', () => {
  54. const r = parseQuery('TODO: needs review');
  55. expect(r.text).toBe('TODO: needs review');
  56. expect(r.kinds).toEqual([]);
  57. });
  58. it('rejects unknown values for kind: (passes the whole token to text)', () => {
  59. const r = parseQuery('kind:invalid foo');
  60. // Invalid kind value falls back to text
  61. expect(r.kinds).toEqual([]);
  62. expect(r.text).toContain('kind:invalid');
  63. });
  64. it('handles all-filters-no-text query', () => {
  65. const r = parseQuery('kind:function lang:typescript');
  66. expect(r.kinds).toEqual(['function']);
  67. expect(r.languages).toEqual(['typescript']);
  68. expect(r.text).toBe('');
  69. });
  70. it('survives empty input', () => {
  71. const r = parseQuery('');
  72. expect(r.text).toBe('');
  73. expect(r.kinds).toEqual([]);
  74. });
  75. it('survives a very long input (no allocation explosion)', () => {
  76. const huge = 'foo '.repeat(5000); // 20k chars
  77. const r = parseQuery(huge);
  78. expect(r.text.length).toBeGreaterThan(0);
  79. });
  80. });
  81. describe('boundedEditDistance', () => {
  82. it('returns 0 for identical strings', () => {
  83. expect(boundedEditDistance('user', 'user', 2)).toBe(0);
  84. });
  85. it('returns 1 for a single substitution', () => {
  86. expect(boundedEditDistance('user', 'usar', 2)).toBe(1);
  87. });
  88. it('returns 1 for a single insertion', () => {
  89. expect(boundedEditDistance('user', 'users', 2)).toBe(1);
  90. });
  91. it('returns 1 for a single deletion', () => {
  92. expect(boundedEditDistance('users', 'user', 2)).toBe(1);
  93. });
  94. it('returns 2 for a transposition (two edits in basic Levenshtein)', () => {
  95. // 'aple' vs 'palp' would be 2; pick a clearer pair.
  96. // 'foo' vs 'fou': substitution + insertion = 2 if different lengths.
  97. expect(boundedEditDistance('confg', 'configX', 2)).toBe(2);
  98. });
  99. it('returns maxDist+1 when distance clearly exceeds budget', () => {
  100. expect(boundedEditDistance('foo', 'completely-different', 2)).toBe(3);
  101. });
  102. it('respects length-difference shortcut', () => {
  103. // |len(a) - len(b)| > maxDist must immediately be over budget
  104. expect(boundedEditDistance('a', 'aaaaaaa', 2)).toBe(3);
  105. });
  106. it('handles empty inputs', () => {
  107. expect(boundedEditDistance('', '', 2)).toBe(0);
  108. expect(boundedEditDistance('a', '', 2)).toBe(1);
  109. expect(boundedEditDistance('', 'abc', 2)).toBe(3);
  110. });
  111. it('is case-sensitive — caller must lowercase if case-insensitive match wanted', () => {
  112. expect(boundedEditDistance('Foo', 'foo', 2)).toBe(1);
  113. });
  114. it('early-exits when row min exceeds budget (correctness, not just perf)', () => {
  115. // 'aaaaa' vs 'bbbbb': distance is 5, well over budget 2
  116. expect(boundedEditDistance('aaaaa', 'bbbbb', 2)).toBe(3);
  117. });
  118. });