From 84d8275073c2a91e69ab8d58f366f356d72d874e Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 29 Jun 2026 22:44:53 +0100 Subject: [PATCH 1/2] Support Unicode identifiers in Handlebars expressions Handlebars allows variable, helper, partial and block names in any language, but the grammar's identifier character classes were limited to ASCII (a-zA-Z0-9), so non-Latin names lost highlighting. Replace the ASCII ranges with Oniguruma Unicode property classes \p{L} (any letter) and \p{N} (any number) in the Handlebars-specific rules: block_helper, end_block, partial_and_var, attribute name/value, layout (!<) and else_token. HTML-structural rules (tag names, entities, generic attributes) keep their ASCII ranges per the HTML spec. This supersedes PR #90, which only added Cyrillic to a subset of rules (and missed the closing-tag rule); the maintainer's review on that PR asked for full-language support instead. Closes #90. Adds test/unicode.test.js covering Cyrillic, CJK, Arabic and Latin-with-diacritics across variables, blocks, partials, hashes and else-if. --- grammars/Handlebars.json | 14 +++--- grammars/Handlebars.sublime-syntax | 14 +++--- grammars/Handlebars.tmLanguage | 14 +++--- test/unicode.test.js | 69 ++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 21 deletions(-) create mode 100644 test/unicode.test.js diff --git a/grammars/Handlebars.json b/grammars/Handlebars.json index 2a5ff3c..e405437 100644 --- a/grammars/Handlebars.json +++ b/grammars/Handlebars.json @@ -327,7 +327,7 @@ ] }, "end_block": { - "begin": "(\\{\\{)(~?/)([a-zA-Z0-9/_\\.-]+)\\s*", + "begin": "(\\{\\{)(~?/)([\\p{L}\\p{N}/_\\.-]+)\\s*", "end": "(~?\\}\\})", "name": "meta.function.block.end.handlebars", "endCaptures": { @@ -451,7 +451,7 @@ ] }, "block_helper": { - "begin": "(\\{\\{)(~?\\#)([-a-zA-Z0-9_\\./>]+)\\s?(@?[-a-zA-Z0-9_\\./]+)*\\s?(@?[-a-zA-Z0-9_\\./]+)*\\s?(@?[-a-zA-Z0-9_\\./]+)*", + "begin": "(\\{\\{)(~?\\#)([-\\p{L}\\p{N}_\\./>]+)\\s?(@?[-\\p{L}\\p{N}_\\./]+)*\\s?(@?[-\\p{L}\\p{N}_\\./]+)*\\s?(@?[-\\p{L}\\p{N}_\\./]+)*", "end": "(~?\\}\\})", "name": "meta.function.block.start.handlebars", "endCaptures": { @@ -548,7 +548,7 @@ "match": "\\\\\"" }, "partial_and_var": { - "begin": "(\\{\\{~?\\{*(>|!<)*)\\s*(@?[-a-zA-Z0-9$_\\./]+)*", + "begin": "(\\{\\{~?\\{*(>|!<)*)\\s*(@?[-\\p{L}\\p{N}$_\\./]+)*", "end": "(~?\\}\\}\\}*)", "name": "meta.function.inline.other.handlebars", "beginCaptures": { @@ -574,7 +574,7 @@ ] }, "handlebars_attribute_name": { - "begin": "\\b([-a-zA-Z0-9_\\.]+)\\b=", + "begin": "\\b([-\\p{L}\\p{N}_\\.]+)\\b=", "captures": { "1": { "name": "variable.parameter.handlebars" @@ -584,7 +584,7 @@ "name": "entity.other.attribute-name.handlebars" }, "handlebars_attribute_value": { - "begin": "([-a-zA-Z0-9_\\./]+)\\b", + "begin": "([-\\p{L}\\p{N}_\\./]+)\\b", "captures": { "1": { "name": "variable.parameter.handlebars" @@ -612,7 +612,7 @@ "patterns": [ { "end": "(\\}\\})", - "begin": "(\\{\\{!<)\\s([-a-zA-Z0-9_\\./]+)", + "begin": "(\\{\\{!<)\\s([-\\p{L}\\p{N}_\\./]+)", "beginCaptures": { "1": { "name": "support.function.handlebars" @@ -631,7 +631,7 @@ ] }, "else_token": { - "begin": "(\\{\\{)(~?else)(@?\\s(if)\\s([-a-zA-Z0-9_\\.\\(\\s\\)/]+))?", + "begin": "(\\{\\{)(~?else)(@?\\s(if)\\s([-\\p{L}\\p{N}_\\.\\(\\s\\)/]+))?", "end": "(~?\\}\\}\\}*)", "name": "meta.function.inline.else.handlebars", "beginCaptures": { diff --git a/grammars/Handlebars.sublime-syntax b/grammars/Handlebars.sublime-syntax index ba2d35d..54e55a3 100644 --- a/grammars/Handlebars.sublime-syntax +++ b/grammars/Handlebars.sublime-syntax @@ -51,7 +51,7 @@ contexts: - match: '--' scope: invalid.illegal.bad-comments-or-CDATA.html block_helper: - - match: '(\{\{)(~?\#)([-a-zA-Z0-9_\./>]+)\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*' + - match: '(\{\{)(~?\#)([-\p{L}\p{N}_\./>]+)\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*' captures: 1: support.constant.handlebars 2: support.constant.handlebars keyword.control @@ -89,7 +89,7 @@ contexts: - match: '--' scope: invalid.illegal.bad-comments-or-CDATA.html else_token: - - match: '(\{\{)(~?else)(@?\s(if)\s([-a-zA-Z0-9_\.\(\s\)/]+))?' + - match: '(\{\{)(~?else)(@?\s(if)\s([-\p{L}\p{N}_\.\(\s\)/]+))?' captures: 1: support.constant.handlebars 2: support.constant.handlebars keyword.control @@ -102,7 +102,7 @@ contexts: 1: support.constant.handlebars pop: true end_block: - - match: '(\{\{)(~?/)([a-zA-Z0-9/_\.-]+)\s*' + - match: '(\{\{)(~?/)([\p{L}\p{N}/_\.-]+)\s*' captures: 1: support.constant.handlebars 2: support.constant.handlebars keyword.control @@ -128,7 +128,7 @@ contexts: - match: \\' scope: constant.character.escape.js extends: - - match: '(\{\{!<)\s([-a-zA-Z0-9_\./]+)' + - match: '(\{\{!<)\s([-\p{L}\p{N}_\./]+)' captures: 1: support.function.handlebars 2: support.class.handlebars @@ -142,7 +142,7 @@ contexts: - include: handlebars_attribute_name - include: handlebars_attribute_value handlebars_attribute_name: - - match: '\b([-a-zA-Z0-9_\.]+)\b=' + - match: '\b([-\p{L}\p{N}_\.]+)\b=' captures: 1: variable.parameter.handlebars push: @@ -152,7 +152,7 @@ contexts: 1: variable.parameter.handlebars pop: true handlebars_attribute_value: - - match: '([-a-zA-Z0-9_\./]+)\b' + - match: '([-\p{L}\p{N}_\./]+)\b' captures: 1: variable.parameter.handlebars push: @@ -368,7 +368,7 @@ contexts: - include: html_tags - include: scope:text.html.basic partial_and_var: - - match: '(\{\{~?\{*(>|!<)*)\s*(@?[-a-zA-Z0-9$_\./]+)*' + - match: '(\{\{~?\{*(>|!<)*)\s*(@?[-\p{L}\p{N}$_\./]+)*' captures: 1: support.constant.handlebars 3: variable.parameter.handlebars diff --git a/grammars/Handlebars.tmLanguage b/grammars/Handlebars.tmLanguage index 9d0c1f5..8f0a7ab 100644 --- a/grammars/Handlebars.tmLanguage +++ b/grammars/Handlebars.tmLanguage @@ -124,7 +124,7 @@ block_helper begin - (\{\{)(~?\#)([-a-zA-Z0-9_\./>]+)\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)*\s?(@?[-a-zA-Z0-9_\./]+)* + (\{\{)(~?\#)([-\p{L}\p{N}_\./>]+)\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)*\s?(@?[-\p{L}\p{N}_\./]+)* beginCaptures 1 @@ -242,7 +242,7 @@ else_token begin - (\{\{)(~?else)(@?\s(if)\s([-a-zA-Z0-9_\.\(\s\)/]+))? + (\{\{)(~?else)(@?\s(if)\s([-\p{L}\p{N}_\.\(\s\)/]+))? beginCaptures 1 @@ -282,7 +282,7 @@ end_block begin - (\{\{)(~?/)([a-zA-Z0-9/_\.-]+)\s* + (\{\{)(~?/)([\p{L}\p{N}/_\.-]+)\s* beginCaptures 1 @@ -367,7 +367,7 @@ begin - (\{\{!<)\s([-a-zA-Z0-9_\./]+) + (\{\{!<)\s([-\p{L}\p{N}_\./]+) beginCaptures 1 @@ -413,7 +413,7 @@ handlebars_attribute_name begin - \b([-a-zA-Z0-9_\.]+)\b= + \b([-\p{L}\p{N}_\.]+)\b= captures 1 @@ -430,7 +430,7 @@ handlebars_attribute_value begin - ([-a-zA-Z0-9_\./]+)\b + ([-\p{L}\p{N}_\./]+)\b captures 1 @@ -1029,7 +1029,7 @@ partial_and_var begin - (\{\{~?\{*(>|!<)*)\s*(@?[-a-zA-Z0-9$_\./]+)* + (\{\{~?\{*(>|!<)*)\s*(@?[-\p{L}\p{N}$_\./]+)* beginCaptures 1 diff --git a/test/unicode.test.js b/test/unicode.test.js new file mode 100644 index 0000000..82eaba1 --- /dev/null +++ b/test/unicode.test.js @@ -0,0 +1,69 @@ +'use strict'; + +// Coverage for non-ASCII (Unicode) identifiers in Handlebars expressions. +// Handlebars allows variable, helper, partial and block names in any language, +// so the grammar's identifier character classes use Oniguruma's `\p{L}` (any +// letter) and `\p{N}` (any number) rather than a hardcoded `a-zA-Z0-9` range. +// This supersedes PR #90, which only added Cyrillic. The sample strings below +// span Cyrillic, CJK, Arabic and Latin-with-diacritics so a regression in any +// single script is caught. + +const { test } = require('node:test'); +const assert = require('node:assert/strict'); +const { scopesOf } = require('./helpers/grammar'); + +// Asserts the named token carries `scope` somewhere in its stack. +async function assertScope(source, text, scope) { + const scopes = await scopesOf(source, text); + assert.ok( + scopes.some((s) => s === scope || s.split(' ').includes(scope)), + `token ${JSON.stringify(text)} in ${JSON.stringify(source)}\n` + + ` expected scope ${JSON.stringify(scope)}\n got ${JSON.stringify(scopes)}` + ); +} + +test('Cyrillic variable name is a variable token', async () => { + await assertScope('{{Москва}}', 'Москва', 'variable.parameter.handlebars'); +}); + +test('CJK variable name is a variable token', async () => { + await assertScope('{{北京市}}', '北京市', 'variable.parameter.handlebars'); +}); + +test('Arabic variable name is a variable token', async () => { + await assertScope('{{إسرائيل}}', 'إسرائيل', 'variable.parameter.handlebars'); +}); + +test('Latin-with-diacritics variable name is a variable token', async () => { + await assertScope('{{Düsseldorf}}', 'Düsseldorf', 'variable.parameter.handlebars'); +}); + +test('block helper with a non-ASCII name highlights open and close', async () => { + const src = '{{#список}}{{/список}}'; + await assertScope(src, 'список', 'meta.function.block.start.handlebars'); + // The closing tag must accept the same non-ASCII name: the `/` only appears in + // the close, and its scope confirms the end_block rule matched the Cyrillic name. + await assertScope(src, '/', 'meta.function.block.end.handlebars'); +}); + +test('block helper parameters may be non-ASCII', async () => { + await assertScope('{{#each города}}', 'города', 'variable.parameter.handlebars'); +}); + +test('partial with a non-ASCII name', async () => { + await assertScope('{{> меню}}', 'меню', 'variable.parameter.handlebars'); +}); + +test('else if with a non-ASCII condition is consumed by the else rule', async () => { + // The grammar tokenizes the condition with a leading space and (by a + // pre-existing quirk) does not give it variable scope; what matters here is + // that the non-ASCII name is matched by the else_token rule rather than + // spilling out as plain text. + await assertScope('{{else if активен}}', ' активен', 'meta.function.inline.else.handlebars'); +}); + +test('non-ASCII hash key and value', async () => { + const src = '{{foo имя=значение}}'; + await assertScope(src, 'имя', 'entity.other.attribute-name.handlebars'); + await assertScope(src, 'значение', 'entity.other.attribute-value.handlebars'); +}); From 9920a4d895eef70397993d2dabf472bbb1b2c78e Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 29 Jun 2026 23:13:12 +0100 Subject: [PATCH 2/2] Add Unicode regression test for the extends ({{!< ...}}) rule The extends rule was widened to \p{L}\p{N} alongside the other identifier rules but had no Unicode coverage; only the ASCII case in embedding.test.js guarded it. Add a test with a non-ASCII template name so the widened rule stays protected. --- test/unicode.test.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/unicode.test.js b/test/unicode.test.js index 82eaba1..b7c0969 100644 --- a/test/unicode.test.js +++ b/test/unicode.test.js @@ -62,6 +62,12 @@ test('else if with a non-ASCII condition is consumed by the else rule', async () await assertScope('{{else if активен}}', ' активен', 'meta.function.inline.else.handlebars'); }); +test('extends preprocessor with a non-ASCII template name', async () => { + // Guards the widened `extends` rule ({{!< ...}}); mirrors the ASCII case in + // embedding.test.js so the Unicode name keeps its support.class scope. + await assertScope('{{!< макет}}', 'макет', 'support.class.handlebars'); +}); + test('non-ASCII hash key and value', async () => { const src = '{{foo имя=значение}}'; await assertScope(src, 'имя', 'entity.other.attribute-name.handlebars');