[python mode] Highlight expressions nested inside format strings

Bo Peng · marijnh · commit 30ae1edf7c2b · 2018-04-15T13:27:00.000+02:00
diff --git a/mode/python/index.html b/mode/python/index.html
@@ -126,6 +126,15 @@ <h2>Python mode</h2>
     def __init__(self, mixin = 'Hello'):
         self.mixin = mixin
 
+# Python 3.6 f-strings (https://www.python.org/dev/peps/pep-0498/)
+f'My name is {name}, my age next year is {age+1}, my anniversary is {anniversary:%A, %B %d, %Y}.'
+f'He said his name is {name!r}.'
+f"""He said his name is {name!r}."""
+f'{"quoted string"}'
+f'{{ {4*10} }}'
+f'This is an error }'
+f'This is ok }}'
+fr'x={4*10}\n'
 </textarea></div>
 
 
diff --git a/mode/python/python.js b/mode/python/python.js
@@ -62,7 +62,7 @@
       var identifiers = parserConf.identifiers|| /^[_A-Za-z\u00A1-\uFFFF][_A-Za-z0-9\u00A1-\uFFFF]*/;
       myKeywords = myKeywords.concat(["nonlocal", "False", "True", "None", "async", "await"]);
       myBuiltins = myBuiltins.concat(["ascii", "bytes", "exec", "print"]);
-      var stringPrefixes = new RegExp("^(([rbuf]|(br))?('{3}|\"{3}|['\"]))", "i");
+      var stringPrefixes = new RegExp("^(([rbuf]|(br)|(fr))?('{3}|\"{3}|['\"]))", "i");
     } else {
       var identifiers = parserConf.identifiers|| /^[_A-Za-z][_A-Za-z0-9]*/;
       myKeywords = myKeywords.concat(["exec", "print"]);
@@ -142,8 +142,18 @@
 
       // Handle Strings
       if (stream.match(stringPrefixes)) {
-        state.tokenize = tokenStringFactory(stream.current());
-        return state.tokenize(stream, state);
+        var isFmtString = stream.current().toLowerCase().indexOf('f') !== -1;
+        if (!isFmtString || state.fstr_state !== null) {
+          // if this is a nested format string (e.g. f' {   f"{10*10}" + "a" }' )
+          // we do not format the nested expression and treat the nested format
+          // string as regular string
+          state.tokenize = tokenStringFactory(stream.current());
+          return state.tokenize(stream, state);
+        } else {
+          // need to do something more sophisticated
+          state.tokenize = formatStringFactory(stream.current());
+          return state.tokenize(stream, state);
+        }
       }
 
       for (var i = 0; i < operators.length; i++)
@@ -174,6 +184,76 @@
       return ERRORCLASS;
     }
 
+    function formatStringFactory(delimiter) {
+      while ("rubf".indexOf(delimiter.charAt(0).toLowerCase()) >= 0)
+        delimiter = delimiter.substr(1);
+
+      var singleline = delimiter.length == 1;
+      var OUTCLASS = "string";
+
+      function tokenString(stream, state) {
+        if (state.fstr_state) {
+          // inside f-str Expression
+          if (stream.match(delimiter)) {
+            // expression ends pre-maturally, but very common in editing
+            // Could show error to remind users to close brace here
+            state.fstr_state = null;
+            return OUTCLASS;
+          } else if (stream.match('{')) {
+            // starting brace, if not eaten below
+            return "punctuation";
+          } else if (stream.match('}')) {
+            // return to regular inside string state
+            state.fstr_state = null;
+            return "punctuation";
+          } else {
+            // use tokenBaseInner to parse the expression
+            return tokenBaseInner(stream, state.fstr_state);
+          }
+        }
+        while (!stream.eol()) {
+          stream.eatWhile(/[^'"\{\}\\]/);
+          if (stream.eat("\\")) {
+            stream.next();
+            if (singleline && stream.eol())
+              return OUTCLASS;
+          } else if (stream.match(delimiter)) {
+            state.tokenize = tokenBase;
+            return OUTCLASS;
+          } else if (stream.match('{{')) {
+            // ignore {{ in f-str
+            return OUTCLASS;
+          } else if (stream.match('{', false)) {
+            // switch to nested mode
+            state.fstr_state = {};
+            if (stream.current()) {
+              return OUTCLASS;
+            } else {
+              // need to return something, so eat the starting {
+              stream.next();
+              return "punctuation";
+            }
+          } else if (stream.match('}}')) {
+            return OUTCLASS;
+          } else if (stream.match('}')) {
+            // single } in f-string is an error
+            return ERRORCLASS;
+          } else {
+            stream.eat(/['"]/);
+          }
+        }
+        if (singleline) {
+          if (parserConf.singleLineStringErrors)
+            return ERRORCLASS;
+          else
+            state.tokenize = tokenBase;
+        }
+        return OUTCLASS;
+      }
+      tokenString.isString = true;
+      return tokenString;
+    }
+
     function tokenStringFactory(delimiter) {
       while ("rubf".indexOf(delimiter.charAt(0).toLowerCase()) >= 0)
         delimiter = delimiter.substr(1);
@@ -278,6 +358,7 @@
         return {
           tokenize: tokenBase,
           scopes: [{offset: basecolumn || 0, type: "py", align: null}],
+          fstr_state: null,
           indent: basecolumn || 0,
           lastToken: null,
           lambda: false,
diff --git a/mode/python/test.js b/mode/python/test.js
@@ -30,6 +30,8 @@
     MT("before_equal_sign_" + c, "[variable a] [operator " + c + "=] [variable b]");
   }
 
-  MT("fValidStringPrefix", "[string f'this is a {formatted} string']");
+  MT("fValidStringPrefix", "[string f'this is a]{[variable formatted]}[string string']");
+  MT("fValidExpressioninFString", "[string f'expression ]{[number 100][operator *][number 5]}[string string']");
+  MT("fInvalidFString", "[error f'this is wrong}]");
   MT("uValidStringPrefix", "[string u'this is an unicode string']");
 })();

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,8 @@`
`30`	`30`	`MT("before_equal_sign_" + c, "[variable a] [operator " + c + "=] [variable b]");`
`31`	`31`	`}`
`32`	`32`
`33`		`- MT("fValidStringPrefix", "[string f'this is a {formatted} string']");`
	`33`	`+ MT("fValidStringPrefix", "[string f'this is a]{[variable formatted]}[string string']");`
	`34`	`+ MT("fValidExpressioninFString", "[string f'expression ]{[number 100][operator *][number 5]}[string string']");`
	`35`	`+ MT("fInvalidFString", "[error f'this is wrong}]");`
`34`	`36`	`MT("uValidStringPrefix", "[string u'this is an unicode string']");`
`35`	`37`	`})();`