From 29400d4fab3794d2cb7d65112a88af72282a3911 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 11 Jun 2025 18:26:02 -0700 Subject: [PATCH 1/3] fix: unicode backslash escaped single quote in char literal --- grammar.js | 4 ++++ test/corpus/literals.txt | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/grammar.js b/grammar.js index a97fd9f..138ccee 100644 --- a/grammar.js +++ b/grammar.js @@ -165,10 +165,14 @@ module.exports = grammar({ character_literal: _ => token(seq( '\'', + // this accepts multiple characters while java doesn't + // hence multiple characters (even unicode ones) are allowed repeat1(choice( /[^\\'\n]/, /\\./, /\\\n/, + /\\u+005[cC]./, + /\\u+005[cC]\n/, )), '\'', )), diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt index fe49e80..5e42bcc 100644 --- a/test/corpus/literals.txt +++ b/test/corpus/literals.txt @@ -125,6 +125,8 @@ character literals '\uFFFF'; '\177'; '™'; +'\u005cn'; +'\u005c''; --- @@ -137,6 +139,8 @@ character literals (expression_statement (character_literal)) (expression_statement (character_literal)) (expression_statement (character_literal)) + (expression_statement (character_literal)) + (expression_statement (character_literal)) (expression_statement (character_literal))) =============== From 722c45e231547f1aa822babe27145f46b0cd1393 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 11 Jun 2025 21:36:01 -0700 Subject: [PATCH 2/3] fix: multiple u chars in unicode escapes within string literals --- grammar.js | 2 +- test/corpus/literals.txt | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/grammar.js b/grammar.js index 138ccee..8e9566c 100644 --- a/grammar.js +++ b/grammar.js @@ -222,7 +222,7 @@ module.exports = grammar({ /[^xu0-7]/, /[0-7]{1,3}/, /x[0-9a-fA-F]{2}/, - /u[0-9a-fA-F]{4}/, + /u+[0-9a-fA-F]{4}/, /u\{[0-9a-fA-F]+\}/, ))), diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt index 5e42bcc..c6ecbbc 100644 --- a/test/corpus/literals.txt +++ b/test/corpus/literals.txt @@ -151,6 +151,7 @@ string literals "\""; "This is a string"; "'"; +"\uuu0041"; --- @@ -165,7 +166,10 @@ string literals (string_fragment))) (expression_statement (string_literal - (string_fragment)))) + (string_fragment))) + (expression_statement + (string_literal + (escape_sequence)))) =============== text block From a6b5342f579e329a07df09ef70f09df7b1736e7d Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Wed, 11 Jun 2025 21:44:46 -0700 Subject: [PATCH 3/3] fix: unicode backslash escaped double quote in string literal --- grammar.js | 2 +- test/corpus/literals.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/grammar.js b/grammar.js index 8e9566c..0426af2 100644 --- a/grammar.js +++ b/grammar.js @@ -217,7 +217,7 @@ module.exports = grammar({ prec(1, $.escape_sequence), ), escape_sequence: _ => token.immediate(seq( - '\\', + choice('\\', /\\u+005[cC]/), choice( /[^xu0-7]/, /[0-7]{1,3}/, diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt index c6ecbbc..18940a0 100644 --- a/test/corpus/literals.txt +++ b/test/corpus/literals.txt @@ -152,6 +152,7 @@ string literals "This is a string"; "'"; "\uuu0041"; +"\u005c""; --- @@ -167,6 +168,9 @@ string literals (expression_statement (string_literal (string_fragment))) + (expression_statement + (string_literal + (escape_sequence))) (expression_statement (string_literal (escape_sequence))))