// Unit tests for browse/src/sanitize.ts (#1440).
// Covers stripLoneSurrogates (raw UTF-16) and stripLoneSurrogateEscapes
// (\uXXXX escape text) used by the response chokepoints.

import { describe, expect, test } from 'bun:test';
import { stripLoneSurrogates, stripLoneSurrogateEscapes, sanitizeBody } from '../src/sanitize';

describe('stripLoneSurrogates', () => {
  test('replaces lone high surrogate with U+FFFD', () => {
    const lone = '\uD800x';
    const out = stripLoneSurrogates(lone);
    expect(out).toBe('�x');
  });

  test('replaces lone low surrogate with U+FFFD', () => {
    const lone = 'x\uDC00';
    expect(stripLoneSurrogates(lone)).toBe('x�');
  });

  test('leaves valid surrogate pairs (emoji) unchanged', () => {
    const smiley = '😀'; // U+1F600 = 😀
    expect(stripLoneSurrogates(smiley)).toBe(smiley);
  });

  test('empty string is unchanged', () => {
    expect(stripLoneSurrogates('')).toBe('');
  });

  test('mixed valid + lone surrogates', () => {
    const input = `a\uD800b😀c\uDC00d`;
    const out = stripLoneSurrogates(input);
    expect(out).toBe(`a�b😀c�d`);
  });

  test('clean text passes through unchanged', () => {
    const text = 'The quick brown fox jumps over 13 lazy dogs.';
    expect(stripLoneSurrogates(text)).toBe(text);
  });

  test('high surrogate immediately followed by high surrogate replaces both individually', () => {
    const input = '\uD800\uD801'; // two lone highs in a row, neither paired
    const out = stripLoneSurrogates(input);
    expect(out).toBe('��');
  });
});

describe('stripLoneSurrogateEscapes', () => {
  test('replaces lone high surrogate ESCAPE with \\uFFFD', () => {
    const json = '{"name":"\\uD800"}';
    expect(stripLoneSurrogateEscapes(json)).toBe('{"name":"\\uFFFD"}');
  });

  test('replaces lone low surrogate ESCAPE with \\uFFFD', () => {
    const json = '{"name":"\\uDC00"}';
    expect(stripLoneSurrogateEscapes(json)).toBe('{"name":"\\uFFFD"}');
  });

  test('leaves valid escape pair unchanged', () => {
    // 😀 = 😀 — must NOT be touched
    const json = '{"emoji":"\\uD83D\\uDE00"}';
    expect(stripLoneSurrogateEscapes(json)).toBe(json);
  });

  test('mixed escape pairs and lone escapes', () => {
    const json = '{"a":"\\uD800","b":"\\uD83D\\uDE00","c":"\\uDC00"}';
    expect(stripLoneSurrogateEscapes(json)).toBe('{"a":"\\uFFFD","b":"\\uD83D\\uDE00","c":"\\uFFFD"}');
  });

  test('clean JSON passes through unchanged', () => {
    const json = '{"results":[{"status":200,"command":"text"}]}';
    expect(stripLoneSurrogateEscapes(json)).toBe(json);
  });

  test('case-insensitive matching: \\uD8aa works like \\uD8AA', () => {
    expect(stripLoneSurrogateEscapes('\\uD8aa')).toBe('\\uFFFD');
  });
});

describe('sanitizeBody', () => {
  test('text/plain body: applies raw-surrogate strip only', () => {
    const input = `pre\uD800post`;
    expect(sanitizeBody(input, false)).toBe(`pre�post`);
  });

  test('JSON body: applies both raw and escape passes', () => {
    // Both raw and escape variants in the same body
    const input = `{"raw":"\uD800","esc":"\\uD800"}`;
    const out = sanitizeBody(input, true);
    expect(out).toBe(`{"raw":"�","esc":"\\uFFFD"}`);
  });

  test('clean text/plain body unchanged', () => {
    const text = 'Hello world\nLine 2';
    expect(sanitizeBody(text, false)).toBe(text);
  });

  test('clean JSON body unchanged', () => {
    const json = '{"ok":true}';
    expect(sanitizeBody(json, true)).toBe(json);
  });
});

describe('perf smoke', () => {
  test('1MB of clean text sanitizes in <500ms', () => {
    const big = 'A'.repeat(1024 * 1024);
    const start = performance.now();
    const out = stripLoneSurrogates(big);
    const elapsed = performance.now() - start;
    expect(out.length).toBe(big.length);
    expect(elapsed).toBeLessThan(500);
  });
});