fix: minor correctness issues across words/regexps/cache

jdalton · jdalton · commit 10a5d11a2ac6 · 2026-04-19T21:31:41.000-04:00
- words.capitalize: iterate by code point so non-BMP inputs (emoji,
  astral scripts) don't split into broken surrogate pairs
- words.determineArticle: match vowels case-insensitively; 'Apple' and
  'apple' both pick 'an' now
- regexps.escapeRegExp: escape '-' so splicing the result into a
  character class produces literal chars rather than an unintended range
- cache-with-ttl / cacache patternToRegex: anchor both ends with $ so
  `foo*bar` matches exactly that shape, not `foo*bar&lt;extra&gt;` — fixes
  a silent over-delete when callers use trailing wildcards

Tests updated where they pinned the old (incorrect) behavior.
diff --git a/src/cacache.ts b/src/cacache.ts
@@ -54,14 +54,15 @@ function matchesPattern(key: string, pattern: string): boolean {
 
 /**
  * Convert wildcard pattern to regex for matching.
- * Supports * as wildcard (matches any characters).
+ * Supports * as wildcard (matches any characters). Anchors both ends —
+ * `foo*bar` matches exactly `foo<anything>bar`, not `foo<anything>bar<more>`.
  */
 function patternToRegex(pattern: string): RegExp {
   // Escape regex special characters except *
   const escaped = pattern.replaceAll(/[.+?^${}()|[\]\\]/g, '\\$&')
   // Convert * to .* (match any characters)
   const regexPattern = escaped.replaceAll('*', '.*')
-  return new RegExp(`^${regexPattern}`)
+  return new RegExp(`^${regexPattern}$`)
 }
 
 /**
diff --git a/src/cache-with-ttl.ts b/src/cache-with-ttl.ts
@@ -253,10 +253,13 @@ export function createTtlCache(options?: TtlCacheOptions): TtlCache {
       return (key: string) => key.startsWith(fullPattern)
     }
 
-    // Wildcard matching with regex.
+    // Wildcard matching with regex. Anchor both ends so `foo*bar` matches
+    // exactly `foo<anything>bar` and not `foo<anything>bar<anything else>`.
+    // Missing the `$` anchor let `deleteAll('foo*bar')` also sweep
+    // `foo123bar-extra`, which silently over-deletes.
     const escaped = fullPattern.replaceAll(/[.+?^${}()|[\]\\]/g, '\\$&')
     const regexPattern = escaped.replaceAll('*', '.*')
-    const regex = new RegExp(`^${regexPattern}`)
+    const regex = new RegExp(`^${regexPattern}$`)
     return (key: string) => regex.test(key)
   }
 
diff --git a/src/regexps.ts b/src/regexps.ts
@@ -20,8 +20,11 @@
  */
 /*@__NO_SIDE_EFFECTS__*/
 export function escapeRegExp(str: string): string {
-  // Escape characters with special meaning either inside or outside character sets.
-  // Use a simple backslash escape when it's always valid, and a `\xnn` escape when
-  // the simpler form would be disallowed by Unicode patterns' stricter grammar.
-  return str.replace(/[\\|{}()[\]^$+*?.]/g, '\\$&')
+  // Escape characters with special meaning either inside or outside
+  // character sets. Includes `-` so callers that splice an escaped
+  // string into a character class — e.g. `new RegExp('[' +
+  // escapeRegExp(userInput) + ']')` — don't accidentally create a range
+  // when input contains '-'. Matches the MDN / `escape-string-regexp`
+  // reference set.
+  return str.replace(/[\\|{}()[\]^$+*?.-]/g, '\\$&')
 }
diff --git a/src/words.ts b/src/words.ts
@@ -19,14 +19,15 @@ export interface PluralizeOptions {
  */
 /*@__NO_SIDE_EFFECTS__*/
 export function capitalize(word: string): string {
-  const { length } = word
-  if (length === 0) {
+  if (word.length === 0) {
     return word
   }
-  if (length === 1) {
-    return word.toUpperCase()
-  }
-  return `${word.charAt(0).toUpperCase()}${word.slice(1).toLowerCase()}`
+  // Iterate by code point, not UTF-16 unit, so non-BMP characters
+  // (emoji, astral-plane scripts) aren't split between their surrogate
+  // pair halves. `charAt(0).toUpperCase() + slice(1).toLowerCase()` used
+  // to produce broken surrogate pairs for inputs like '𐐀foo'.
+  const [first, ...rest] = [...word]
+  return (first ?? '').toUpperCase() + rest.join('').toLowerCase()
 }
 
 /**
@@ -40,7 +41,11 @@ export function capitalize(word: string): string {
  */
 /*@__NO_SIDE_EFFECTS__*/
 export function determineArticle(word: string): string {
-  return /^[aeiou]/.test(word) ? 'an' : 'a'
+  // Case-insensitive so `Apple` and `apple` both pick `an`. Strict
+  // spelling rules can't handle silent-h / y-sound exceptions (hour,
+  // user); documenting that as a known limitation rather than shipping
+  // a multi-entry exception list.
+  return /^[aeiou]/i.test(word) ? 'an' : 'a'
 }
 
 /**
diff --git a/test/unit/regexps.test.mts b/test/unit/regexps.test.mts
@@ -109,9 +109,12 @@ describe('regexps', () => {
       expect(escapeRegExp('a?')).toBe('a\\?')
     })
 
-    it('should escape character classes', () => {
-      expect(escapeRegExp('[a-z]')).toBe('\\[a-z\\]')
-      expect(escapeRegExp('[^0-9]')).toBe('\\[\\^0-9\\]')
+    it('should escape character classes (including the range hyphen)', () => {
+      // `-` is now escaped so splicing the result into a character class
+      // (e.g. `[${escapeRegExp('a-z')}]`) produces three literal chars
+      // rather than a range.
+      expect(escapeRegExp('[a-z]')).toBe('\\[a\\-z\\]')
+      expect(escapeRegExp('[^0-9]')).toBe('\\[\\^0\\-9\\]')
     })
 
     it('should handle unicode characters', () => {
diff --git a/test/unit/words.test.mts b/test/unit/words.test.mts
@@ -99,15 +99,18 @@ describe('words', () => {
       expect(determineArticle('zebra')).toBe('a')
     })
 
-    it('should be case-sensitive (lowercase vowels)', () => {
-      expect(determineArticle('Apple')).toBe('a')
-      expect(determineArticle('Elephant')).toBe('a')
-      expect(determineArticle('Orange')).toBe('a')
+    it('matches vowels case-insensitively', () => {
+      // Previously gated on a case-sensitive `/^[aeiou]/` regex, which
+      // produced "a Apple" for capitalized inputs. Now uses /i so any
+      // leading vowel (upper or lower) picks "an".
+      expect(determineArticle('Apple')).toBe('an')
+      expect(determineArticle('Elephant')).toBe('an')
+      expect(determineArticle('Orange')).toBe('an')
     })
 
-    it('should handle uppercase vowels at start', () => {
+    it('handles uppercase and lowercase vowels uniformly', () => {
       expect(determineArticle('apple')).toBe('an')
-      expect(determineArticle('APPLE')).toBe('a')
+      expect(determineArticle('APPLE')).toBe('an')
     })
 
     it('should handle empty string', () => {

Original file line number	Diff line number	Diff line change
`@@ -253,10 +253,13 @@ export function createTtlCache(options?: TtlCacheOptions): TtlCache {`
`253`	`253`	`return (key: string) => key.startsWith(fullPattern)`
`254`	`254`	`}`
`255`	`255`
`256`		`- // Wildcard matching with regex.`
	`256`	+ // Wildcard matching with regex. Anchor both ends so `foo*bar` matches
	`257`	+ // exactly `foo<anything>bar` and not `foo<anything>bar<anything else>`.
	`258`	+ // Missing the `$` anchor let `deleteAll('foo*bar')` also sweep
	`259`	+ // `foo123bar-extra`, which silently over-deletes.
`257`	`260`	`const escaped = fullPattern.replaceAll(/[.+?^${}()\|[\]\\]/g, '\\$&')`
`258`	`261`	`const regexPattern = escaped.replaceAll('', '.')`
`259`		- const regex = new RegExp(`^${regexPattern}`)
	`262`	+ const regex = new RegExp(`^${regexPattern}$`)
`260`	`263`	`return (key: string) => regex.test(key)`
`261`	`264`	`}`
`262`	`265`