From 10e5ccff940e09a8502d350950d5e09e4f2def26 Mon Sep 17 00:00:00 2001
From: Thomas Kowalski <thomas.kowalski@datadoghq.com>
Date: Thu, 30 Apr 2026 10:53:52 +0000
Subject: [PATCH 1/4] fix: avoid MemoryError in tokenize

---
 Lib/test/test_tokenize.py | 10 ++++++++++
 Parser/lexer/lexer.c      | 23 ++++++++++++++++-------
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index ab53a20cff5539..8c91e271f4d5e8 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3188,6 +3188,16 @@ def get_tokens(string):
             with self.subTest(case=case):
                 self.assertRaises(tokenize.TokenError, get_tokens, case)
 
+    def test_tstring_multiline_bang_underflow(self):
+        # gh-149183: t-string with '!' across two lines used to raise
+        # MemoryError because last_expr_end > last_expr_size produced a
+        # negative length that was cast to a huge size_t.
+        self.assertRaises(
+            tokenize.TokenError,
+            list,
+            tokenize.tokenize(BytesIO(b't"{!\n!x').readline),
+        )
+
     @support.skip_wasi_stack_overflow()
     def test_max_indent(self):
         MAXINDENT = 100
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 7f25afec302c22..748df54338f6a3 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -121,12 +121,22 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     }
     PyObject *res = NULL;
 
+    Py_ssize_t expr_len = tok_mode->last_expr_size - tok_mode->last_expr_end;
+    if (expr_len < 0) {
+        /* last_expr_end > last_expr_size: happens when '{' and the closing
+           delimiter span different source lines, causing the strlen-based
+           size tracking to underflow.  Treat as a tokenizer error rather
+           than passing a negative length (cast to huge size_t) to malloc or
+           PyUnicode_DecodeUTF8. */
+        return -1;
+    }
+
     // Look for a # character outside of string literals
     int hash_detected = 0;
     int in_string = 0;
     char quote_char = 0;
 
-    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
+    for (Py_ssize_t i = 0; i < expr_len; i++) {
         char ch = tok_mode->last_expr_buffer[i];
 
         // Skip escaped characters
@@ -163,7 +173,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     // If we found a # character in the expression, we need to handle comments
     if (hash_detected) {
         // Allocate buffer for processed result
-        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
+        char *result = (char *)PyMem_Malloc((expr_len + 1) * sizeof(char));
         if (!result) {
             return -1;
         }
@@ -174,7 +184,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
         quote_char = 0;    // Current string quote char
 
         // Process each character
-        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+        while (i < expr_len) {
             char ch = tok_mode->last_expr_buffer[i];
 
             // Handle string quotes
@@ -190,11 +200,10 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
             }
             // Skip comments
             else if (ch == '#' && !in_string) {
-                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
-                       tok_mode->last_expr_buffer[i] != '\n') {
+                while (i < expr_len && tok_mode->last_expr_buffer[i] != '\n') {
                     i++;
                 }
-                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+                if (i < expr_len) {
                     result[j++] = '\n';
                 }
             }
@@ -211,7 +220,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     } else {
         res = PyUnicode_DecodeUTF8(
             tok_mode->last_expr_buffer,
-            tok_mode->last_expr_size - tok_mode->last_expr_end,
+            expr_len,
             NULL
         );
     }

From 1ca203e9eecfb1f2c4ba93aaebd221a999ac4fff Mon Sep 17 00:00:00 2001
From: Thomas Kowalski <thom.kowa@gmail.com>
Date: Mon, 11 May 2026 10:47:43 +0200
Subject: [PATCH 2/4] misc: add news entry

---
 .../2026-05-11-10-46-00.gh-issue-149183.aB3xQr.rst              | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-11-10-46-00.gh-issue-149183.aB3xQr.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-11-10-46-00.gh-issue-149183.aB3xQr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-11-10-46-00.gh-issue-149183.aB3xQr.rst
new file mode 100644
index 00000000000000..14bb6ca4080bc3
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-11-10-46-00.gh-issue-149183.aB3xQr.rst
@@ -0,0 +1,2 @@
+Fix :exc:`MemoryError` in the t-string tokenizer when the opening ``{``
+and closing delimiter span different source lines.

From 876683f4f79102885e7bd8e976baf17d512681c9 Mon Sep 17 00:00:00 2001
From: Thomas Kowalski <thom.kowa@gmail.com>
Date: Mon, 11 May 2026 10:47:48 +0200
Subject: [PATCH 3/4] review: move check before declaration

---
 Parser/lexer/lexer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 748df54338f6a3..bf987cb85cf44e 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -119,7 +119,6 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
         return 0;
     }
-    PyObject *res = NULL;
 
     Py_ssize_t expr_len = tok_mode->last_expr_size - tok_mode->last_expr_end;
     if (expr_len < 0) {
@@ -131,6 +130,8 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
         return -1;
     }
 
+    PyObject *res = NULL;
+
     // Look for a # character outside of string literals
     int hash_detected = 0;
     int in_string = 0;

From 9203a8ec158cdf6cf1c51e2cfbee9aecede11ec9 Mon Sep 17 00:00:00 2001
From: Thomas Kowalski <thom.kowa@gmail.com>
Date: Mon, 11 May 2026 13:33:20 +0200
Subject: [PATCH 4/4] review: update Lib/test/test_tokenize.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
---
 Lib/test/test_tokenize.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 8c91e271f4d5e8..4ed94b649fc0f2 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3192,11 +3192,8 @@ def test_tstring_multiline_bang_underflow(self):
         # gh-149183: t-string with '!' across two lines used to raise
         # MemoryError because last_expr_end > last_expr_size produced a
         # negative length that was cast to a huge size_t.
-        self.assertRaises(
-            tokenize.TokenError,
-            list,
-            tokenize.tokenize(BytesIO(b't"{!\n!x').readline),
-        )
+        readline = BytesIO(b't"{!\n!x').readline
+        self.assertRaises(tokenize.TokenError, list, tokenize.tokenize(readline))
 
     @support.skip_wasi_stack_overflow()
     def test_max_indent(self):