diff --git a/scripts/ci/check-unicode-safety.js b/scripts/ci/check-unicode-safety.js index c4f1740c..96c9ba54 100644 --- a/scripts/ci/check-unicode-safety.js +++ b/scripts/ci/check-unicode-safety.js @@ -122,7 +122,23 @@ function isDangerousInvisibleCodePoint(codePoint) { // an attacker hides instructions inside ASCII-looking strings (PR // bodies, SKILL.md, frontmatter), the LLM consumes the tag bytes, // and the human reviewer sees nothing. - (codePoint >= 0xE0000 && codePoint <= 0xE007F) + (codePoint >= 0xE0000 && codePoint <= 0xE007F) || + // U+180E MONGOLIAN VOWEL SEPARATOR — formerly classified as a space + // separator, reclassified as a format control in Unicode 6.3; renders + // as zero-width and routinely abused for homograph / smuggling. + codePoint === 0x180E || + // U+115F / U+1160 HANGUL CHOSEONG/JUNGSEONG FILLER — zero-width fillers + // used in Korean text shaping; abused as invisible characters. + codePoint === 0x115F || + codePoint === 0x1160 || + // U+2061–U+2064 invisible math operators (FUNCTION APPLICATION, + // INVISIBLE TIMES, INVISIBLE SEPARATOR, INVISIBLE PLUS). Zero-width + // and not used outside math typesetting; legitimate Markdown / source + // does not contain them. + (codePoint >= 0x2061 && codePoint <= 0x2064) || + // U+3164 HANGUL FILLER — zero-width filler reportedly used in Discord + // / Twitter smuggling attacks; not used in legitimate Korean text. + codePoint === 0x3164 ); }