From 84d8275073c2a91e69ab8d58f366f356d72d874e Mon Sep 17 00:00:00 2001
From: Daniel Demmel <hello@danieldemmel.me>
Date: Mon, 29 Jun 2026 22:44:53 +0100
Subject: [PATCH 1/2] Support Unicode identifiers in Handlebars expressions

Handlebars allows variable, helper, partial and block names in any
language, but the grammar's identifier character classes were limited to
ASCII (a-zA-Z0-9), so non-Latin names lost highlighting.

Replace the ASCII ranges with Oniguruma Unicode property classes
\p{L} (any letter) and \p{N} (any number) in the Handlebars-specific
rules: block_helper, end_block, partial_and_var, attribute name/value,
layout (!<) and else_token. HTML-structural rules (tag names, entities,
generic attributes) keep their ASCII ranges per the HTML spec.

This supersedes PR #90, which only added Cyrillic to a subset of rules
(and missed the closing-tag rule); the maintainer's review on that PR
asked for full-language support instead.

Closes #90. Adds test/unicode.test.js covering Cyrillic, CJK, Arabic
and Latin-with-diacritics across variables, blocks, partials, hashes
and else-if.
---
 grammars/Handlebars.json           | 14 +++---
 grammars/Handlebars.sublime-syntax | 14 +++---
 grammars/Handlebars.tmLanguage     | 14 +++---
 test/unicode.test.js               | 69 ++++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 21 deletions(-)
 create mode 100644 test/unicode.test.js

diff --git a/grammars/Handlebars.json b/grammars/Handlebars.json
index 2a5ff3c..e405437 100644
--- a/grammars/Handlebars.json
+++ b/grammars/Handlebars.json
@@ -327,7 +327,7 @@
             ]
         },
         "end_block": {
-            "begin": "(\\{\\{)(~?/)([a-zA-Z0-9/_\\.-]+)\\s*",
+            "begin": "(\\{\\{)(~?/)([\\p{L}\\p{N}/_\\.-]+)\\s*",
             "end": "(~?\\}\\})",
             "name": "meta.function.block.end.handlebars",
             "endCaptures": {
@@ -451,7 +451,7 @@
             ]
         },
         "block_helper": {
-            "begin": "(\\{\\{)(~?\\#)([-a-zA-Z0-9_\\./>]+)\\s?(@?[-a-zA-Z0-9_\\./]+)*\\s?(@?[-a-zA-Z0-9_\\./]+)*\\s?(@?[-a-zA-Z0-9_\\./]+)*",
+            "begin": "(\\{\\{)(~?\\#)([-\\p{L}\\p{N}_\\./>]+)\\s?(@?[-\\p{L}\\p{N}_\\./]+)*\\s?(@?[-\\p{L}\\p{N}_\\./]+)*\\s?(@?[-\\p{L}\\p{N}_\\./]+)*",
             "end": "(~?\\}\\})",
             "name": "meta.function.block.start.handlebars",
             "endCaptures": {
@@ -548,7 +548,7 @@
             "match": "\\\\\""
         },
         "partial_and_var": {
-            "begin": "(\\{\\{~?\\{*(>|!<)*)\\s*(@?[-a-zA-Z0-9$_\\./]+)*",
+            "begin": "(\\{\\{~?\\{*(>|!<)*)\\s*(@?[-\\p{L}\\p{N}$_\\./]+)*",
             "end": "(~?\\}\\}\\}*)",
             "name": "meta.function.inline.other.handlebars",
             "beginCaptures": {
@@ -574,7 +574,7 @@
             ]
         },
         "handlebars_attribute_name": {
-            "begin": "\\b([-a-zA-Z0-9_\\.]+)\\b=",
+            "begin": "\\b([-\\p{L}\\p{N}_\\.]+)\\b=",
             "captures": {
                 "1": {
                     "name": "variable.parameter.handlebars"
@@ -584,7 +584,7 @@
             "name": "entity.other.attribute-name.handlebars"
         },
         "handlebars_attribute_value": {
-            "begin": "([-a-zA-Z0-9_\\./]+)\\b",
+            "begin": "([-\\p{L}\\p{N}_\\./]+)\\b",
             "captures": {
                 "1": {
                     "name": "variable.parameter.handlebars"
@@ -612,7 +612,7 @@
             "patterns": [
                 {
                     "end": "(\\}\\})",
-                    "begin": "(\\{\\{!<)\\s([-a-zA-Z0-9_\\./]+)",
+                    "begin": "(\\{\\{!<)\\s([-\\p{L}\\p{N}_\\./]+)",
                     "beginCaptures": {
                         "1": {
                             "name": "support.function.handlebars"
@@ -631,7 +631,7 @@
             ]
         },
         "else_token": {
-            "begin": "(\\{\\{)(~?else)(@?\\s(if)\\s([-a-zA-Z0-9_\\.\\(\\s\\)/]+))?",
+            "begin": "(\\{\\{)(~?else)(@?\\s(if)\\s([-\\p{L}\\p{N}_\\.\\(\\s\\)/]+))?",
             "end": "(~?\\}\\}\\}*)",
             "name": "meta.function.inline.else.handlebars",
             "beginCaptures": {
diff --git a/grammars/Handlebars.sublime-syntax b/grammars/Handlebars.sublime-syntax
index ba2d35d..54e55a3 100644
--- a/grammars/Handlebars.sublime-syntax
+++ b/grammars/Handlebars.sublime-syntax
@@ -51,7 +51,7 @@ contexts:
         - match: '--'
           scope: invalid.illegal.bad-comments-or-CDATA.html
   block_helper:
-    - match: '(\{\{)(~?\#)([-a-zA-Z0-9_\./>]+)\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*'
+    - match: '(\{\{)(~?\#)([-\p{L}\p{N}_\./>]+)\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*'
       captures:
         1: support.constant.handlebars
         2: support.constant.handlebars keyword.control
@@ -89,7 +89,7 @@ contexts:
         - match: '--'
           scope: invalid.illegal.bad-comments-or-CDATA.html
   else_token:
-    - match: '(\{\{)(~?else)(@?\s(if)\s([-a-zA-Z0-9_\.\(\s\)/]+))?'
+    - match: '(\{\{)(~?else)(@?\s(if)\s([-\p{L}\p{N}_\.\(\s\)/]+))?'
       captures:
         1: support.constant.handlebars
         2: support.constant.handlebars keyword.control
@@ -102,7 +102,7 @@ contexts:
             1: support.constant.handlebars
           pop: true
   end_block:
-    - match: '(\{\{)(~?/)([a-zA-Z0-9/_\.-]+)\s*'
+    - match: '(\{\{)(~?/)([\p{L}\p{N}/_\.-]+)\s*'
       captures:
         1: support.constant.handlebars
         2: support.constant.handlebars keyword.control
@@ -128,7 +128,7 @@ contexts:
     - match: \\'
       scope: constant.character.escape.js
   extends:
-    - match: '(\{\{!<)\s([-a-zA-Z0-9_\./]+)'
+    - match: '(\{\{!<)\s([-\p{L}\p{N}_\./]+)'
       captures:
         1: support.function.handlebars
         2: support.class.handlebars
@@ -142,7 +142,7 @@ contexts:
     - include: handlebars_attribute_name
     - include: handlebars_attribute_value
   handlebars_attribute_name:
-    - match: '\b([-a-zA-Z0-9_\.]+)\b='
+    - match: '\b([-\p{L}\p{N}_\.]+)\b='
       captures:
         1: variable.parameter.handlebars
       push:
@@ -152,7 +152,7 @@ contexts:
             1: variable.parameter.handlebars
           pop: true
   handlebars_attribute_value:
-    - match: '([-a-zA-Z0-9_\./]+)\b'
+    - match: '([-\p{L}\p{N}_\./]+)\b'
       captures:
         1: variable.parameter.handlebars
       push:
@@ -368,7 +368,7 @@ contexts:
             - include: html_tags
             - include: scope:text.html.basic
   partial_and_var:
-    - match: '(\{\{~?\{*(>|!<)*)\s*(@?[-a-zA-Z0-9$_\./]+)*'
+    - match: '(\{\{~?\{*(>|!<)*)\s*(@?[-\p{L}\p{N}$_\./]+)*'
       captures:
         1: support.constant.handlebars
         3: variable.parameter.handlebars
diff --git a/grammars/Handlebars.tmLanguage b/grammars/Handlebars.tmLanguage
index 9d0c1f5..8f0a7ab 100644
--- a/grammars/Handlebars.tmLanguage
+++ b/grammars/Handlebars.tmLanguage
@@ -124,7 +124,7 @@
 		<key>block_helper</key>
 		<dict>
 			<key>begin</key>
-			<string>(\{\{)(~?\#)([-a-zA-Z0-9_\./&gt;]+)\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*</string>
+			<string>(\{\{)(~?\#)([-\p{L}\p{N}_\./&gt;]+)\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*</string>
 			<key>beginCaptures</key>
 			<dict>
 				<key>1</key>
@@ -242,7 +242,7 @@
 		<key>else_token</key>
 		<dict>
 			<key>begin</key>
-			<string>(\{\{)(~?else)(@?\s(if)\s([-a-zA-Z0-9_\.\(\s\)/]+))?</string>
+			<string>(\{\{)(~?else)(@?\s(if)\s([-\p{L}\p{N}_\.\(\s\)/]+))?</string>
 			<key>beginCaptures</key>
 			<dict>
 				<key>1</key>
@@ -282,7 +282,7 @@
 		<key>end_block</key>
 		<dict>
 			<key>begin</key>
-			<string>(\{\{)(~?/)([a-zA-Z0-9/_\.-]+)\s*</string>
+			<string>(\{\{)(~?/)([\p{L}\p{N}/_\.-]+)\s*</string>
 			<key>beginCaptures</key>
 			<dict>
 				<key>1</key>
@@ -367,7 +367,7 @@
 			<array>
 				<dict>
 					<key>begin</key>
-					<string>(\{\{!&lt;)\s([-a-zA-Z0-9_\./]+)</string>
+					<string>(\{\{!&lt;)\s([-\p{L}\p{N}_\./]+)</string>
 					<key>beginCaptures</key>
 					<dict>
 						<key>1</key>
@@ -413,7 +413,7 @@
 		<key>handlebars_attribute_name</key>
 		<dict>
 			<key>begin</key>
-			<string>\b([-a-zA-Z0-9_\.]+)\b=</string>
+			<string>\b([-\p{L}\p{N}_\.]+)\b=</string>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
@@ -430,7 +430,7 @@
 		<key>handlebars_attribute_value</key>
 		<dict>
 			<key>begin</key>
-			<string>([-a-zA-Z0-9_\./]+)\b</string>
+			<string>([-\p{L}\p{N}_\./]+)\b</string>
 			<key>captures</key>
 			<dict>
 				<key>1</key>
@@ -1029,7 +1029,7 @@
 		<key>partial_and_var</key>
 		<dict>
 			<key>begin</key>
-			<string>(\{\{~?\{*(&gt;|!&lt;)*)\s*(@?[-a-zA-Z0-9$_\./]+)*</string>
+			<string>(\{\{~?\{*(&gt;|!&lt;)*)\s*(@?[-\p{L}\p{N}$_\./]+)*</string>
 			<key>beginCaptures</key>
 			<dict>
 				<key>1</key>
diff --git a/test/unicode.test.js b/test/unicode.test.js
new file mode 100644
index 0000000..82eaba1
--- /dev/null
+++ b/test/unicode.test.js
@@ -0,0 +1,69 @@
+'use strict';
+
+// Coverage for non-ASCII (Unicode) identifiers in Handlebars expressions.
+// Handlebars allows variable, helper, partial and block names in any language,
+// so the grammar's identifier character classes use Oniguruma's `\p{L}` (any
+// letter) and `\p{N}` (any number) rather than a hardcoded `a-zA-Z0-9` range.
+// This supersedes PR #90, which only added Cyrillic. The sample strings below
+// span Cyrillic, CJK, Arabic and Latin-with-diacritics so a regression in any
+// single script is caught.
+
+const { test } = require('node:test');
+const assert = require('node:assert/strict');
+const { scopesOf } = require('./helpers/grammar');
+
+// Asserts the named token carries `scope` somewhere in its stack.
+async function assertScope(source, text, scope) {
+  const scopes = await scopesOf(source, text);
+  assert.ok(
+    scopes.some((s) => s === scope || s.split(' ').includes(scope)),
+    `token ${JSON.stringify(text)} in ${JSON.stringify(source)}\n` +
+      `  expected scope ${JSON.stringify(scope)}\n  got ${JSON.stringify(scopes)}`
+  );
+}
+
+test('Cyrillic variable name is a variable token', async () => {
+  await assertScope('{{Москва}}', 'Москва', 'variable.parameter.handlebars');
+});
+
+test('CJK variable name is a variable token', async () => {
+  await assertScope('{{北京市}}', '北京市', 'variable.parameter.handlebars');
+});
+
+test('Arabic variable name is a variable token', async () => {
+  await assertScope('{{إسرائيل}}', 'إسرائيل', 'variable.parameter.handlebars');
+});
+
+test('Latin-with-diacritics variable name is a variable token', async () => {
+  await assertScope('{{Düsseldorf}}', 'Düsseldorf', 'variable.parameter.handlebars');
+});
+
+test('block helper with a non-ASCII name highlights open and close', async () => {
+  const src = '{{#список}}{{/список}}';
+  await assertScope(src, 'список', 'meta.function.block.start.handlebars');
+  // The closing tag must accept the same non-ASCII name: the `/` only appears in
+  // the close, and its scope confirms the end_block rule matched the Cyrillic name.
+  await assertScope(src, '/', 'meta.function.block.end.handlebars');
+});
+
+test('block helper parameters may be non-ASCII', async () => {
+  await assertScope('{{#each города}}', 'города', 'variable.parameter.handlebars');
+});
+
+test('partial with a non-ASCII name', async () => {
+  await assertScope('{{> меню}}', 'меню', 'variable.parameter.handlebars');
+});
+
+test('else if with a non-ASCII condition is consumed by the else rule', async () => {
+  // The grammar tokenizes the condition with a leading space and (by a
+  // pre-existing quirk) does not give it variable scope; what matters here is
+  // that the non-ASCII name is matched by the else_token rule rather than
+  // spilling out as plain text.
+  await assertScope('{{else if активен}}', ' активен', 'meta.function.inline.else.handlebars');
+});
+
+test('non-ASCII hash key and value', async () => {
+  const src = '{{foo имя=значение}}';
+  await assertScope(src, 'имя', 'entity.other.attribute-name.handlebars');
+  await assertScope(src, 'значение', 'entity.other.attribute-value.handlebars');
+});

From 9920a4d895eef70397993d2dabf472bbb1b2c78e Mon Sep 17 00:00:00 2001
From: Daniel Demmel <hello@danieldemmel.me>
Date: Mon, 29 Jun 2026 23:13:12 +0100
Subject: [PATCH 2/2] Add Unicode regression test for the extends ({{!< ...}})
 rule

The extends rule was widened to \p{L}\p{N} alongside the other
identifier rules but had no Unicode coverage; only the ASCII case in
embedding.test.js guarded it. Add a test with a non-ASCII template name
so the widened rule stays protected.
---
 test/unicode.test.js | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unicode.test.js b/test/unicode.test.js
index 82eaba1..b7c0969 100644
--- a/test/unicode.test.js
+++ b/test/unicode.test.js
@@ -62,6 +62,12 @@ test('else if with a non-ASCII condition is consumed by the else rule', async ()
   await assertScope('{{else if активен}}', ' активен', 'meta.function.inline.else.handlebars');
 });
 
+test('extends preprocessor with a non-ASCII template name', async () => {
+  // Guards the widened `extends` rule ({{!< ...}}); mirrors the ASCII case in
+  // embedding.test.js so the Unicode name keeps its support.class scope.
+  await assertScope('{{!< макет}}', 'макет', 'support.class.handlebars');
+});
+
 test('non-ASCII hash key and value', async () => {
   const src = '{{foo имя=значение}}';
   await assertScope(src, 'имя', 'entity.other.attribute-name.handlebars');