@@ -1234,11 +1234,10 @@ fn is_nullish_coalesce(expr: &OutputExpression<'_>) -> bool {
12341234/// Escape a string for JavaScript output.
12351235///
12361236/// Uses double quotes to match Angular's output style.
1237- /// Escapes `"`, `\`, `\n`, `\r`, `$` (when requested), ASCII control characters,
1238- /// and all non-ASCII characters (code point > 0x7E) as `\uNNNN` sequences.
1239- /// Characters above the BMP (U+10000+) are encoded as UTF-16 surrogate pairs
1240- /// (`\uXXXX\uXXXX`). This matches TypeScript's emitter behavior, which escapes
1241- /// non-ASCII characters in string literals.
1237+ /// Escapes `"`, `\`, `\n`, `\r`, `$` (when requested), and ASCII control characters
1238+ /// as `\uNNNN` sequences. Non-ASCII characters (code point > 0x7E) are emitted as
1239+ /// raw UTF-8 to match Angular's TypeScript emitter behavior (see `escapeIdentifier`
1240+ /// in `abstract_emitter.ts`), which only escapes `'`, `\`, `\n`, `\r`, and `$`.
12421241pub ( crate ) fn escape_string ( input : & str , escape_dollar : bool ) -> String {
12431242 let mut result = String :: with_capacity ( input. len ( ) + 2 ) ;
12441243 result. push ( '"' ) ;
@@ -1251,18 +1250,14 @@ pub(crate) fn escape_string(input: &str, escape_dollar: bool) -> String {
12511250 '$' if escape_dollar => result. push_str ( "\\ $" ) ,
12521251 // ASCII printable characters (0x20-0x7E) are emitted literally
12531252 c if ( ' ' ..='\x7E' ) . contains ( & c) => result. push ( c) ,
1254- // Everything else (ASCII control chars, non-ASCII) is escaped as \uNNNN.
1255- // Characters above the BMP are encoded as UTF-16 surrogate pairs.
1253+ // DEL (0x7F) is an ASCII control character and must be escaped
1254+ '\x7F' => push_unicode_escape ( & mut result, 0x7F ) ,
1255+ // Non-ASCII characters (> 0x7F) are emitted as raw UTF-8 to match
1256+ // Angular's TypeScript emitter, which does not escape them.
1257+ c if ( c as u32 ) > 0x7F => result. push ( c) ,
1258+ // ASCII control characters (0x00-0x1F) are escaped as \uNNNN.
12561259 c => {
1257- let code = c as u32 ;
1258- if code <= 0xFFFF {
1259- push_unicode_escape ( & mut result, code) ;
1260- } else {
1261- let hi = 0xD800 + ( ( code - 0x10000 ) >> 10 ) ;
1262- let lo = 0xDC00 + ( ( code - 0x10000 ) & 0x3FF ) ;
1263- push_unicode_escape ( & mut result, hi) ;
1264- push_unicode_escape ( & mut result, lo) ;
1265- }
1260+ push_unicode_escape ( & mut result, c as u32 ) ;
12661261 }
12671262 }
12681263 }
@@ -1514,35 +1509,35 @@ mod tests {
15141509
15151510 #[ test]
15161511 fn test_escape_string_unicode_literals ( ) {
1517- // Non-ASCII characters should be escaped as \uNNNN to match
1518- // TypeScript 's emitter behavior.
1512+ // Non-ASCII characters should be emitted as raw UTF-8 to match
1513+ // Angular 's TypeScript emitter behavior (escapeIdentifier in abstract_emitter.ts) .
15191514
1520- // × (multiplication sign U+00D7) -> \u00D7
1521- assert_eq ! ( escape_string( "\u{00D7} " , false ) , "\" \\ u00D7 \" " ) ;
1515+ // × (multiplication sign U+00D7) -> raw UTF-8
1516+ assert_eq ! ( escape_string( "\u{00D7} " , false ) , "\" \u{00D7} \" " ) ;
15221517
1523- // (non-breaking space U+00A0) -> \u00A0
1524- assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \\ u00A0 \" " ) ;
1518+ // (non-breaking space U+00A0) -> raw UTF-8
1519+ assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \u{00A0} \" " ) ;
15251520
15261521 // Mixed ASCII and non-ASCII
1527- assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\\ u00D7b \" " ) ;
1522+ assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\u{00D7} b \" " ) ;
15281523
15291524 // Multiple non-ASCII characters
1530- assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \\ u00D7 \\ u00A0 \" " ) ;
1525+ assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \u{00D7} \u{00A0} \" " ) ;
15311526
1532- // Characters outside BMP (emoji) -> surrogate pair
1533- assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \\ uD83D \\ uDE00 \" " ) ;
1527+ // Characters outside BMP (emoji) -> raw UTF-8
1528+ assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \u{1F600} \" " ) ;
15341529
1535- // Common HTML entities -> all escaped as \uNNNN
1536- assert_eq ! ( escape_string( "\u{00A9} " , false ) , "\" \\ u00A9 \" " ) ; // © ©
1537- assert_eq ! ( escape_string( "\u{00AE} " , false ) , "\" \\ u00AE \" " ) ; // ® ®
1538- assert_eq ! ( escape_string( "\u{2014} " , false ) , "\" \\ u2014 \" " ) ; // — —
1539- assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \\ u2013 \" " ) ; // – –
1530+ // Common HTML entities -> all emitted as raw UTF-8
1531+ assert_eq ! ( escape_string( "\u{00A9} " , false ) , "\" \u{00A9} \" " ) ; // © ©
1532+ assert_eq ! ( escape_string( "\u{00AE} " , false ) , "\" \u{00AE} \" " ) ; // ® ®
1533+ assert_eq ! ( escape_string( "\u{2014} " , false ) , "\" \u{2014} \" " ) ; // — —
1534+ assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \u{2013} \" " ) ; // – –
15401535
15411536 // Greek letter alpha
1542- assert_eq ! ( escape_string( "\u{03B1} " , false ) , "\" \\ u03B1 \" " ) ; // α
1537+ assert_eq ! ( escape_string( "\u{03B1} " , false ) , "\" \u{03B1} \" " ) ; // α
15431538
15441539 // Accented Latin letter
1545- assert_eq ! ( escape_string( "\u{00E9} " , false ) , "\" \\ u00E9 \" " ) ; // é
1540+ assert_eq ! ( escape_string( "\u{00E9} " , false ) , "\" \u{00E9} \" " ) ; // é
15461541 }
15471542
15481543 #[ test]
@@ -1561,34 +1556,33 @@ mod tests {
15611556 }
15621557
15631558 #[ test]
1564- fn test_escape_string_non_ascii_as_unicode_escapes ( ) {
1565- // Non-ASCII characters should be escaped as \uNNNN to match
1566- // TypeScript 's emitter behavior (which escapes non-ASCII in string literals ).
1559+ fn test_escape_string_non_ascii_as_raw_utf8 ( ) {
1560+ // Non-ASCII characters should be emitted as raw UTF-8 to match
1561+ // Angular 's TypeScript emitter behavior (escapeIdentifier in abstract_emitter.ts ).
15671562
15681563 // Non-breaking space U+00A0
1569- assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \\ u00A0 \" " ) ;
1564+ assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \u{00A0} \" " ) ;
15701565
15711566 // En dash U+2013
1572- assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \\ u2013 \" " ) ;
1567+ assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \u{2013} \" " ) ;
15731568
15741569 // Trademark U+2122
1575- assert_eq ! ( escape_string( "\u{2122} " , false ) , "\" \\ u2122 \" " ) ;
1570+ assert_eq ! ( escape_string( "\u{2122} " , false ) , "\" \u{2122} \" " ) ;
15761571
15771572 // Infinity U+221E
1578- assert_eq ! ( escape_string( "\u{221E} " , false ) , "\" \\ u221E \" " ) ;
1573+ assert_eq ! ( escape_string( "\u{221E} " , false ) , "\" \u{221E} \" " ) ;
15791574
15801575 // Mixed ASCII and non-ASCII
1581- assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\\ u00D7b \" " ) ;
1576+ assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\u{00D7} b \" " ) ;
15821577
15831578 // Multiple non-ASCII characters
1584- assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \\ u00D7 \\ u00A0 \" " ) ;
1579+ assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \u{00D7} \u{00A0} \" " ) ;
15851580
1586- // Characters above BMP should use surrogate pairs
1587- // U+1F600 (grinning face) = surrogate pair D83D DE00
1588- assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \\ uD83D\\ uDE00\" " ) ;
1581+ // Characters above BMP (emoji) -> raw UTF-8
1582+ assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \u{1F600} \" " ) ;
15891583
1590- // U+10000 (first supplementary char) = surrogate pair D800 DC00
1591- assert_eq ! ( escape_string( "\u{10000} " , false ) , "\" \\ uD800 \\ uDC00 \" " ) ;
1584+ // U+10000 (first supplementary char) -> raw UTF-8
1585+ assert_eq ! ( escape_string( "\u{10000} " , false ) , "\" \u{10000} \" " ) ;
15921586
15931587 // ASCII printable chars (0x20-0x7E) should remain literal
15941588 assert_eq ! ( escape_string( " ~" , false ) , "\" ~\" " ) ;
0 commit comments