|
1 | 1 | /** |
2 | | - * @fileoverview Unit tests for regular expression utilities. |
| 2 | + * @fileoverview Unit tests for `escapeRegExp`. |
3 | 3 | * |
4 | | - * Tests regex helper functions: |
5 | | - * - escapeRegExp() escapes special characters for safe regex construction |
6 | | - * - Handles all regex metacharacters: \, |, {, }, [, ], (, ), *, +, ?, ., ^, $ |
7 | | - * - Prevents regex injection vulnerabilities |
8 | | - * - Used for dynamic pattern building from user input |
9 | | - * Used throughout Socket tools for safe regex pattern construction. |
| 4 | + * Tests align with the TC39 RegExp.escape spec: |
| 5 | + * https://tc39.es/ecma262/#sec-regexp.escape |
| 6 | + * |
| 7 | + * Assertions are BEHAVIOR-based (the escaped output produces a regex that |
| 8 | + * matches the original input exactly) plus targeted SPEC-SHAPE checks for |
| 9 | + * the two invariants that matter for safe concatenation: |
| 10 | + * 1. Leading `[0-9A-Za-z]` is encoded as `\xHH` so it can't merge with |
| 11 | + * a preceding `\0..\9` / `\c` in a larger pattern. |
| 12 | + * 2. `/` is backslash-escaped so the result is safe inside a `/.../` |
| 13 | + * literal. |
| 14 | + * |
| 15 | + * We verify the same guarantees hold whether `escapeRegExp` is bound to |
| 16 | + * native `RegExp.escape` (Node 24+) or our hand-rolled fallback. |
10 | 17 | */ |
11 | 18 |
|
12 | 19 | import { describe, expect, it } from 'vitest' |
13 | 20 |
|
14 | 21 | import { escapeRegExp } from '@socketsecurity/lib/regexps' |
15 | 22 |
|
| 23 | +/** `new RegExp(escapeRegExp(input))` must match exactly `input`. */ |
| 24 | +function expectLiteralRoundtrip(input: string): void { |
| 25 | + const re = new RegExp(`^${escapeRegExp(input)}$`) |
| 26 | + expect(re.test(input)).toBe(true) |
| 27 | +} |
| 28 | + |
16 | 29 | describe('regexps', () => { |
17 | 30 | describe('escapeRegExp', () => { |
18 | | - it('should escape backslash', () => { |
19 | | - expect(escapeRegExp('\\')).toBe('\\\\') |
20 | | - }) |
21 | | - |
22 | | - it('should escape pipe', () => { |
23 | | - expect(escapeRegExp('|')).toBe('\\|') |
24 | | - }) |
25 | | - |
26 | | - it('should escape curly braces', () => { |
27 | | - expect(escapeRegExp('{}')).toBe('\\{\\}') |
28 | | - expect(escapeRegExp('{')).toBe('\\{') |
29 | | - expect(escapeRegExp('}')).toBe('\\}') |
30 | | - }) |
31 | | - |
32 | | - it('should escape parentheses', () => { |
33 | | - expect(escapeRegExp('()')).toBe('\\(\\)') |
34 | | - expect(escapeRegExp('(')).toBe('\\(') |
35 | | - expect(escapeRegExp(')')).toBe('\\)') |
| 31 | + it('is a function (native or fallback)', () => { |
| 32 | + expect(typeof escapeRegExp).toBe('function') |
36 | 33 | }) |
37 | 34 |
|
38 | | - it('should escape square brackets', () => { |
39 | | - expect(escapeRegExp('[]')).toBe('\\[\\]') |
40 | | - expect(escapeRegExp('[')).toBe('\\[') |
41 | | - expect(escapeRegExp(']')).toBe('\\]') |
| 35 | + it('empty string returns empty string', () => { |
| 36 | + expect(escapeRegExp('')).toBe('') |
42 | 37 | }) |
43 | 38 |
|
44 | | - it('should escape caret', () => { |
45 | | - expect(escapeRegExp('^')).toBe('\\^') |
| 39 | + // Spec §22.2.5.1 step 3.a: leading `[0-9A-Za-z]` → `\xHH`. |
| 40 | + it('encodes leading ASCII letter/digit as \\xHH', () => { |
| 41 | + expect(escapeRegExp('a')).toBe('\\x61') |
| 42 | + expect(escapeRegExp('Z')).toBe('\\x5a') |
| 43 | + expect(escapeRegExp('0')).toBe('\\x30') |
| 44 | + expect(escapeRegExp('9')).toBe('\\x39') |
| 45 | + // Trailing letters/digits are NOT hex-escaped. |
| 46 | + expect(escapeRegExp('abc').startsWith('\\x61')).toBe(true) |
| 47 | + expect(escapeRegExp('abc').endsWith('bc')).toBe(true) |
46 | 48 | }) |
47 | 49 |
|
48 | | - it('should escape dollar sign', () => { |
49 | | - expect(escapeRegExp('$')).toBe('\\$') |
| 50 | + // Spec §22.2.5.1.1 step 1: SyntaxCharacter + `/` → backslash prefix. |
| 51 | + it('backslash-prefixes SyntaxCharacter + /', () => { |
| 52 | + for (const ch of '^$\\.*+?()[]{}|/') { |
| 53 | + expect(escapeRegExp(ch)).toBe('\\' + ch) |
| 54 | + } |
50 | 55 | }) |
51 | 56 |
|
52 | | - it('should escape plus', () => { |
53 | | - expect(escapeRegExp('+')).toBe('\\+') |
| 57 | + // Spec §22.2.5.1.1 step 2: ControlEscape (Table 62). |
| 58 | + it('encodes control-escape characters as their escape forms', () => { |
| 59 | + expect(escapeRegExp('\t')).toBe('\\t') |
| 60 | + expect(escapeRegExp('\n')).toBe('\\n') |
| 61 | + expect(escapeRegExp('\v')).toBe('\\v') |
| 62 | + expect(escapeRegExp('\f')).toBe('\\f') |
| 63 | + expect(escapeRegExp('\r')).toBe('\\r') |
54 | 64 | }) |
55 | 65 |
|
56 | | - it('should escape asterisk', () => { |
57 | | - expect(escapeRegExp('*')).toBe('\\*') |
| 66 | + // Spec §22.2.5.1.1 step 4: otherPunctuators → \xHH (cp ≤ 0xFF). |
| 67 | + it('hex-escapes the otherPunctuators set', () => { |
| 68 | + for (const ch of ',-=<>#&!%:;@~\'`"') { |
| 69 | + const cp = ch.codePointAt(0)! |
| 70 | + expect(escapeRegExp(ch)).toBe('\\x' + cp.toString(16).padStart(2, '0')) |
| 71 | + } |
58 | 72 | }) |
59 | 73 |
|
60 | | - it('should escape question mark', () => { |
61 | | - expect(escapeRegExp('?')).toBe('\\?') |
| 74 | + // Critical for the character-class splice use case. |
| 75 | + it('escaped `-` stays literal inside a character class', () => { |
| 76 | + const escaped = escapeRegExp('a-z') |
| 77 | + const re = new RegExp(`^[${escaped}]$`) |
| 78 | + expect(re.test('a')).toBe(true) |
| 79 | + expect(re.test('-')).toBe(true) |
| 80 | + expect(re.test('z')).toBe(true) |
| 81 | + // Letter between a and z must NOT match if `-` stayed literal. |
| 82 | + expect(re.test('m')).toBe(false) |
62 | 83 | }) |
63 | 84 |
|
64 | | - it('should escape dot', () => { |
65 | | - expect(escapeRegExp('.')).toBe('\\.') |
| 85 | + // Behavior-level roundtrip: any metacharacter-only string must match |
| 86 | + // itself literally after escape. |
| 87 | + it('every metacharacter round-trips as a literal match', () => { |
| 88 | + for (const ch of '\\|{}()[]^$+*?.-/') { |
| 89 | + expectLiteralRoundtrip(ch) |
| 90 | + } |
66 | 91 | }) |
67 | 92 |
|
68 | | - it('should escape multiple special characters', () => { |
69 | | - // biome-ignore lint/suspicious/noTemplateCurlyInString: Testing regex escape for curly braces |
70 | | - expect(escapeRegExp('.*+?^${}()|[]')).toBe( |
71 | | - '\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]', |
72 | | - ) |
| 93 | + it('paired metacharacters round-trip', () => { |
| 94 | + for (const pair of ['{}', '()', '[]', '{{', '}}']) { |
| 95 | + expectLiteralRoundtrip(pair) |
| 96 | + } |
73 | 97 | }) |
74 | 98 |
|
75 | | - it('should not escape regular characters', () => { |
76 | | - expect(escapeRegExp('abc123')).toBe('abc123') |
77 | | - expect(escapeRegExp('hello world')).toBe('hello world') |
| 99 | + it('every metacharacter in one string round-trips', () => { |
| 100 | + expectLiteralRoundtrip('.*+?^${}()|[]/\\-') |
78 | 101 | }) |
79 | 102 |
|
80 | | - it('should handle mixed strings', () => { |
81 | | - expect(escapeRegExp('hello.world')).toBe('hello\\.world') |
82 | | - expect(escapeRegExp('test(123)')).toBe('test\\(123\\)') |
83 | | - expect(escapeRegExp('price: $50+')).toBe('price: \\$50\\+') |
| 103 | + it('round-trips mixed plain + metacharacter strings', () => { |
| 104 | + for (const s of [ |
| 105 | + 'hello.world', |
| 106 | + 'test(123)', |
| 107 | + 'price: $50+', |
| 108 | + '*.{js,ts}', |
| 109 | + 'a{1,3}', |
| 110 | + ]) { |
| 111 | + expectLiteralRoundtrip(s) |
| 112 | + } |
84 | 113 | }) |
85 | 114 |
|
86 | | - it('should handle empty string', () => { |
87 | | - expect(escapeRegExp('')).toBe('') |
| 115 | + it('round-trips plain ASCII strings', () => { |
| 116 | + for (const s of ['abc123', 'hello world', 'foo', '123']) { |
| 117 | + expectLiteralRoundtrip(s) |
| 118 | + } |
88 | 119 | }) |
89 | 120 |
|
90 | | - it('should work in actual regex', () => { |
91 | | - const input = 'test.file' |
92 | | - const escaped = escapeRegExp(input) |
93 | | - const regex = new RegExp(escaped) |
94 | | - |
95 | | - expect(regex.test('test.file')).toBe(true) |
96 | | - expect(regex.test('testXfile')).toBe(false) |
| 121 | + // A sanity check that metacharacter meaning is neutralized, not just |
| 122 | + // that the input string matches itself (which a `.*` regex would |
| 123 | + // trivially satisfy). |
| 124 | + it('escaped `.` does not act as a wildcard', () => { |
| 125 | + const re = new RegExp(`^${escapeRegExp('test.file')}$`) |
| 126 | + expect(re.test('test.file')).toBe(true) |
| 127 | + expect(re.test('testXfile')).toBe(false) |
97 | 128 | }) |
98 | 129 |
|
99 | | - it('should escape complex file patterns', () => { |
100 | | - const pattern = '*.{js,ts}' |
101 | | - const escaped = escapeRegExp(pattern) |
102 | | - expect(escaped).toBe('\\*\\.\\{js,ts\\}') |
| 130 | + it('escaped quantifier does not quantify', () => { |
| 131 | + const re = new RegExp(`^${escapeRegExp('a{1,3}')}$`) |
| 132 | + expect(re.test('a{1,3}')).toBe(true) |
| 133 | + expect(re.test('aaa')).toBe(false) |
103 | 134 | }) |
104 | 135 |
|
105 | | - it('should escape regex quantifiers', () => { |
106 | | - expect(escapeRegExp('a{1,3}')).toBe('a\\{1,3\\}') |
107 | | - expect(escapeRegExp('a*')).toBe('a\\*') |
108 | | - expect(escapeRegExp('a+')).toBe('a\\+') |
109 | | - expect(escapeRegExp('a?')).toBe('a\\?') |
| 136 | + it('escaped `*` does not act as a wildcard in a glob-like input', () => { |
| 137 | + const re = new RegExp(`^${escapeRegExp('*.{js,ts}')}$`) |
| 138 | + expect(re.test('*.{js,ts}')).toBe(true) |
| 139 | + expect(re.test('foo.js')).toBe(false) |
110 | 140 | }) |
111 | 141 |
|
112 | | - it('should escape character classes (including the range hyphen)', () => { |
113 | | - // `-` is now escaped so splicing the result into a character class |
114 | | - // (e.g. `[${escapeRegExp('a-z')}]`) produces three literal chars |
115 | | - // rather than a range. |
116 | | - expect(escapeRegExp('[a-z]')).toBe('\\[a\\-z\\]') |
117 | | - expect(escapeRegExp('[^0-9]')).toBe('\\[\\^0\\-9\\]') |
| 142 | + it('round-trips unicode characters', () => { |
| 143 | + expectLiteralRoundtrip('hello世界') |
| 144 | + expectLiteralRoundtrip('test.世界') |
118 | 145 | }) |
119 | 146 |
|
120 | | - it('should handle unicode characters', () => { |
121 | | - expect(escapeRegExp('hello世界')).toBe('hello世界') |
122 | | - expect(escapeRegExp('test.世界')).toBe('test\\.世界') |
| 147 | + // Spec guarantees safe concatenation into any Pattern context. |
| 148 | + it('escaped output is safe to splice between arbitrary regex fragments', () => { |
| 149 | + const middle = escapeRegExp('1.2.3') |
| 150 | + const re = new RegExp(`^v${middle}-release$`) |
| 151 | + expect(re.test('v1.2.3-release')).toBe(true) |
| 152 | + expect(re.test('vX2X3-release')).toBe(false) |
123 | 153 | }) |
124 | 154 | }) |
125 | 155 | }) |
0 commit comments