Skip to content

Commit 10a5d11

Browse files
committed
fix: minor correctness issues across words/regexps/cache
- words.capitalize: iterate by code point so non-BMP inputs (emoji, astral scripts) don't split into broken surrogate pairs - words.determineArticle: match vowels case-insensitively; 'Apple' and 'apple' both pick 'an' now - regexps.escapeRegExp: escape '-' so splicing the result into a character class produces literal chars rather than an unintended range - cache-with-ttl / cacache patternToRegex: anchor both ends with $ so `foo*bar` matches exactly that shape, not `foo*bar<extra>` — fixes a silent over-delete when callers use trailing wildcards Tests updated where they pinned the old (incorrect) behavior.
1 parent bcd7268 commit 10a5d11

6 files changed

Lines changed: 42 additions & 24 deletions

File tree

src/cacache.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,15 @@ function matchesPattern(key: string, pattern: string): boolean {
5454

5555
/**
5656
* Convert wildcard pattern to regex for matching.
57-
* Supports * as wildcard (matches any characters).
57+
* Supports * as wildcard (matches any characters). Anchors both ends —
58+
* `foo*bar` matches exactly `foo<anything>bar`, not `foo<anything>bar<more>`.
5859
*/
5960
function patternToRegex(pattern: string): RegExp {
6061
// Escape regex special characters except *
6162
const escaped = pattern.replaceAll(/[.+?^${}()|[\]\\]/g, '\\$&')
6263
// Convert * to .* (match any characters)
6364
const regexPattern = escaped.replaceAll('*', '.*')
64-
return new RegExp(`^${regexPattern}`)
65+
return new RegExp(`^${regexPattern}$`)
6566
}
6667

6768
/**

src/cache-with-ttl.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,13 @@ export function createTtlCache(options?: TtlCacheOptions): TtlCache {
253253
return (key: string) => key.startsWith(fullPattern)
254254
}
255255

256-
// Wildcard matching with regex.
256+
// Wildcard matching with regex. Anchor both ends so `foo*bar` matches
257+
// exactly `foo<anything>bar` and not `foo<anything>bar<anything else>`.
258+
// Missing the `$` anchor let `deleteAll('foo*bar')` also sweep
259+
// `foo123bar-extra`, which silently over-deletes.
257260
const escaped = fullPattern.replaceAll(/[.+?^${}()|[\]\\]/g, '\\$&')
258261
const regexPattern = escaped.replaceAll('*', '.*')
259-
const regex = new RegExp(`^${regexPattern}`)
262+
const regex = new RegExp(`^${regexPattern}$`)
260263
return (key: string) => regex.test(key)
261264
}
262265

src/regexps.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@
2020
*/
2121
/*@__NO_SIDE_EFFECTS__*/
2222
export function escapeRegExp(str: string): string {
23-
// Escape characters with special meaning either inside or outside character sets.
24-
// Use a simple backslash escape when it's always valid, and a `\xnn` escape when
25-
// the simpler form would be disallowed by Unicode patterns' stricter grammar.
26-
return str.replace(/[\\|{}()[\]^$+*?.]/g, '\\$&')
23+
// Escape characters with special meaning either inside or outside
24+
// character sets. Includes `-` so callers that splice an escaped
25+
// string into a character class — e.g. `new RegExp('[' +
26+
// escapeRegExp(userInput) + ']')` — don't accidentally create a range
27+
// when input contains '-'. Matches the MDN / `escape-string-regexp`
28+
// reference set.
29+
return str.replace(/[\\|{}()[\]^$+*?.-]/g, '\\$&')
2730
}

src/words.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ export interface PluralizeOptions {
1919
*/
2020
/*@__NO_SIDE_EFFECTS__*/
2121
export function capitalize(word: string): string {
22-
const { length } = word
23-
if (length === 0) {
22+
if (word.length === 0) {
2423
return word
2524
}
26-
if (length === 1) {
27-
return word.toUpperCase()
28-
}
29-
return `${word.charAt(0).toUpperCase()}${word.slice(1).toLowerCase()}`
25+
// Iterate by code point, not UTF-16 unit, so non-BMP characters
26+
// (emoji, astral-plane scripts) aren't split between their surrogate
27+
// pair halves. `charAt(0).toUpperCase() + slice(1).toLowerCase()` used
28+
// to produce broken surrogate pairs for inputs like '𐐀foo'.
29+
const [first, ...rest] = [...word]
30+
return (first ?? '').toUpperCase() + rest.join('').toLowerCase()
3031
}
3132

3233
/**
@@ -40,7 +41,11 @@ export function capitalize(word: string): string {
4041
*/
4142
/*@__NO_SIDE_EFFECTS__*/
4243
export function determineArticle(word: string): string {
43-
return /^[aeiou]/.test(word) ? 'an' : 'a'
44+
// Case-insensitive so `Apple` and `apple` both pick `an`. Strict
45+
// spelling rules can't handle silent-h / y-sound exceptions (hour,
46+
// user); documenting that as a known limitation rather than shipping
47+
// a multi-entry exception list.
48+
return /^[aeiou]/i.test(word) ? 'an' : 'a'
4449
}
4550

4651
/**

test/unit/regexps.test.mts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,12 @@ describe('regexps', () => {
109109
expect(escapeRegExp('a?')).toBe('a\\?')
110110
})
111111

112-
it('should escape character classes', () => {
113-
expect(escapeRegExp('[a-z]')).toBe('\\[a-z\\]')
114-
expect(escapeRegExp('[^0-9]')).toBe('\\[\\^0-9\\]')
112+
it('should escape character classes (including the range hyphen)', () => {
113+
// `-` is now escaped so splicing the result into a character class
114+
// (e.g. `[${escapeRegExp('a-z')}]`) produces three literal chars
115+
// rather than a range.
116+
expect(escapeRegExp('[a-z]')).toBe('\\[a\\-z\\]')
117+
expect(escapeRegExp('[^0-9]')).toBe('\\[\\^0\\-9\\]')
115118
})
116119

117120
it('should handle unicode characters', () => {

test/unit/words.test.mts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,18 @@ describe('words', () => {
9999
expect(determineArticle('zebra')).toBe('a')
100100
})
101101

102-
it('should be case-sensitive (lowercase vowels)', () => {
103-
expect(determineArticle('Apple')).toBe('a')
104-
expect(determineArticle('Elephant')).toBe('a')
105-
expect(determineArticle('Orange')).toBe('a')
102+
it('matches vowels case-insensitively', () => {
103+
// Previously gated on a case-sensitive `/^[aeiou]/` regex, which
104+
// produced "a Apple" for capitalized inputs. Now uses /i so any
105+
// leading vowel (upper or lower) picks "an".
106+
expect(determineArticle('Apple')).toBe('an')
107+
expect(determineArticle('Elephant')).toBe('an')
108+
expect(determineArticle('Orange')).toBe('an')
106109
})
107110

108-
it('should handle uppercase vowels at start', () => {
111+
it('handles uppercase and lowercase vowels uniformly', () => {
109112
expect(determineArticle('apple')).toBe('an')
110-
expect(determineArticle('APPLE')).toBe('a')
113+
expect(determineArticle('APPLE')).toBe('an')
111114
})
112115

113116
it('should handle empty string', () => {

0 commit comments

Comments
 (0)