|
| 1 | +From c4eda3e58680528147a4cc7e2b3c9044f795c9c9 Mon Sep 17 00:00:00 2001 |
| 2 | +From: zhangskz <sandyzhang@google.com> |
| 3 | +Date: Thu, 29 Jan 2026 14:31:08 -0500 |
| 4 | +Subject: [PATCH] Fix Any recursion depth bypass in Python |
| 5 | + json_format.ParseDict (#25239) (#25586) |
| 6 | + |
| 7 | +This fixes a security vulnerability where nested google.protobuf.Any messages could bypass the max_recursion_depth limit, potentially leading to denial of service via stack overflow. |
| 8 | + |
| 9 | +The root cause was that _ConvertAnyMessage() was calling itself recursively via methodcaller() for nested well-known types, bypassing the recursion depth tracking in ConvertMessage(). |
| 10 | + |
| 11 | +The fix routes well-known type parsing through ConvertMessage() to ensure proper recursion depth accounting for all message types including nested Any. |
| 12 | + |
| 13 | +Fixes #25070 |
| 14 | +Closes #25239 |
| 15 | + |
| 16 | +COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/25239 from aviralgarg05:fix-any-recursion-depth-bypass 3cbbcbea142593d3afd2ceba2db14b05660f62f4 |
| 17 | +PiperOrigin-RevId: 862740421 |
| 18 | + |
| 19 | +Co-authored-by: Aviral Garg <gargaviral99@gmail.com> |
| 20 | + |
| 21 | +Signed-off-by: Azure Linux Security Servicing Account <azurelinux-security@microsoft.com> |
| 22 | +Upstream-reference: AI Backport of https://github.com/protocolbuffers/protobuf/commit/c4eda3e58680528147a4cc7e2b3c9044f795c9c9.patch |
| 23 | +--- |
| 24 | + .../protobuf/internal/json_format_test.py | 100 ++++++++++++++++++ |
| 25 | + .../python/google/protobuf/json_format.py | 38 +++++-- |
| 26 | + 2 files changed, 131 insertions(+), 7 deletions(-) |
| 27 | + |
| 28 | +diff --git a/third_party/protobuf/python/google/protobuf/internal/json_format_test.py b/third_party/protobuf/python/google/protobuf/internal/json_format_test.py |
| 29 | +index 68aa21c4..69246a91 100755 |
| 30 | +--- a/third_party/protobuf/python/google/protobuf/internal/json_format_test.py |
| 31 | ++++ b/third_party/protobuf/python/google/protobuf/internal/json_format_test.py |
| 32 | +@@ -1244,6 +1244,106 @@ class JsonFormatTest(JsonFormatBase): |
| 33 | + 'uint32Value': 4, 'stringValue': 'bla'}, |
| 34 | + indent=2, sort_keys=True)) |
| 35 | + |
| 36 | ++ def testAnyRecursionDepthEnforcement(self): |
| 37 | ++ """Test that nested Any messages respect max_recursion_depth limit.""" |
| 38 | ++ # Test that deeply nested Any messages raise ParseError instead of |
| 39 | ++ # bypassing the recursion limit. This prevents DoS via nested Any. |
| 40 | ++ message = any_pb2.Any() |
| 41 | ++ |
| 42 | ++ # Create nested Any structure that should exceed depth limit |
| 43 | ++ # With max_recursion_depth=5, we can nest 4 Any messages |
| 44 | ++ # (depth 1 = outer Any, depth 2-4 = nested Anys, depth 5 = final value) |
| 45 | ++ nested_any = { |
| 46 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 47 | ++ 'value': { |
| 48 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 49 | ++ 'value': { |
| 50 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 51 | ++ 'value': { |
| 52 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 53 | ++ 'value': { |
| 54 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 55 | ++ 'value': {}, |
| 56 | ++ }, |
| 57 | ++ }, |
| 58 | ++ }, |
| 59 | ++ }, |
| 60 | ++ } |
| 61 | ++ |
| 62 | ++ # Should raise ParseError due to exceeding max depth, not RecursionError |
| 63 | ++ self.assertRaisesRegexp( |
| 64 | ++ json_format.ParseError, |
| 65 | ++ 'Message too deep. Max recursion depth is 5', |
| 66 | ++ json_format.ParseDict, |
| 67 | ++ nested_any, |
| 68 | ++ message, |
| 69 | ++ max_recursion_depth=5, |
| 70 | ++ ) |
| 71 | ++ |
| 72 | ++ # Verify that Any messages within the limit can be parsed successfully |
| 73 | ++ # With max_recursion_depth=5, we can nest up to 4 Any messages |
| 74 | ++ shallow_any = { |
| 75 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 76 | ++ 'value': { |
| 77 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 78 | ++ 'value': { |
| 79 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 80 | ++ 'value': { |
| 81 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 82 | ++ 'value': {}, |
| 83 | ++ }, |
| 84 | ++ }, |
| 85 | ++ }, |
| 86 | ++ } |
| 87 | ++ json_format.ParseDict(shallow_any, message, max_recursion_depth=5) |
| 88 | ++ |
| 89 | ++ def testAnyRecursionDepthBoundary(self): |
| 90 | ++ """Test recursion depth boundary behavior (exclusive upper limit).""" |
| 91 | ++ message = any_pb2.Any() |
| 92 | ++ |
| 93 | ++ # Create nested Any at depth exactly 4 (should succeed with max_recursion_depth=5) |
| 94 | ++ depth_4_any = { |
| 95 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 96 | ++ 'value': { |
| 97 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 98 | ++ 'value': { |
| 99 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 100 | ++ 'value': { |
| 101 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 102 | ++ 'value': {}, |
| 103 | ++ }, |
| 104 | ++ }, |
| 105 | ++ }, |
| 106 | ++ } |
| 107 | ++ # This should succeed: depth 4 < max_recursion_depth 5 |
| 108 | ++ json_format.ParseDict(depth_4_any, message, max_recursion_depth=5) |
| 109 | ++ |
| 110 | ++ # Create nested Any at depth exactly 5 (should fail with max_recursion_depth=5) |
| 111 | ++ depth_5_any = { |
| 112 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 113 | ++ 'value': { |
| 114 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 115 | ++ 'value': { |
| 116 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 117 | ++ 'value': { |
| 118 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 119 | ++ 'value': { |
| 120 | ++ '@type': 'type.googleapis.com/google.protobuf.Any', |
| 121 | ++ 'value': {}, |
| 122 | ++ }, |
| 123 | ++ }, |
| 124 | ++ }, |
| 125 | ++ }, |
| 126 | ++ } |
| 127 | ++ # This should fail: depth 5 == max_recursion_depth 5 (exclusive limit) |
| 128 | ++ self.assertRaisesRegexp( |
| 129 | ++ json_format.ParseError, |
| 130 | ++ 'Message too deep. Max recursion depth is 5', |
| 131 | ++ json_format.ParseDict, |
| 132 | ++ depth_5_any, |
| 133 | ++ message, |
| 134 | ++ max_recursion_depth=5, |
| 135 | ++ ) |
| 136 | + |
| 137 | + if __name__ == '__main__': |
| 138 | + unittest.main() |
| 139 | +diff --git a/third_party/protobuf/python/google/protobuf/json_format.py b/third_party/protobuf/python/google/protobuf/json_format.py |
| 140 | +index 4d76d021..4147e9e1 100644 |
| 141 | +--- a/third_party/protobuf/python/google/protobuf/json_format.py |
| 142 | ++++ b/third_party/protobuf/python/google/protobuf/json_format.py |
| 143 | +@@ -408,7 +408,7 @@ def _CreateMessageFromTypeUrl(type_url, descriptor_pool): |
| 144 | + return message_class() |
| 145 | + |
| 146 | + |
| 147 | +-def Parse(text, message, ignore_unknown_fields=False, descriptor_pool=None): |
| 148 | ++def Parse(text, message, ignore_unknown_fields=False, descriptor_pool=None, max_recursion_depth=100): |
| 149 | + """Parses a JSON representation of a protocol message into a message. |
| 150 | + |
| 151 | + Args: |
| 152 | +@@ -417,6 +417,9 @@ def Parse(text, message, ignore_unknown_fields=False, descriptor_pool=None): |
| 153 | + ignore_unknown_fields: If True, do not raise errors for unknown fields. |
| 154 | + descriptor_pool: A Descriptor Pool for resolving types. If None use the |
| 155 | + default. |
| 156 | ++ max_recursion_depth: max recursion depth of JSON message to be deserialized. |
| 157 | ++ JSON messages over this depth will fail to be deserialized. Default value |
| 158 | ++ is 100. |
| 159 | + |
| 160 | + Returns: |
| 161 | + The same message passed as argument. |
| 162 | +@@ -429,13 +432,14 @@ def Parse(text, message, ignore_unknown_fields=False, descriptor_pool=None): |
| 163 | + js = json.loads(text, object_pairs_hook=_DuplicateChecker) |
| 164 | + except ValueError as e: |
| 165 | + raise ParseError('Failed to load JSON: {0}.'.format(str(e))) |
| 166 | +- return ParseDict(js, message, ignore_unknown_fields, descriptor_pool) |
| 167 | ++ return ParseDict(js, message, ignore_unknown_fields, descriptor_pool, max_recursion_depth) |
| 168 | + |
| 169 | + |
| 170 | + def ParseDict(js_dict, |
| 171 | + message, |
| 172 | + ignore_unknown_fields=False, |
| 173 | +- descriptor_pool=None): |
| 174 | ++ descriptor_pool=None, |
| 175 | ++ max_recursion_depth=100): |
| 176 | + """Parses a JSON dictionary representation into a message. |
| 177 | + |
| 178 | + Args: |
| 179 | +@@ -444,11 +448,14 @@ def ParseDict(js_dict, |
| 180 | + ignore_unknown_fields: If True, do not raise errors for unknown fields. |
| 181 | + descriptor_pool: A Descriptor Pool for resolving types. If None use the |
| 182 | + default. |
| 183 | ++ max_recursion_depth: max recursion depth of JSON message to be deserialized. |
| 184 | ++ JSON messages over this depth will fail to be deserialized. Default value |
| 185 | ++ is 100. |
| 186 | + |
| 187 | + Returns: |
| 188 | + The same message passed as argument. |
| 189 | + """ |
| 190 | +- parser = _Parser(ignore_unknown_fields, descriptor_pool) |
| 191 | ++ parser = _Parser(ignore_unknown_fields, descriptor_pool, max_recursion_depth) |
| 192 | + parser.ConvertMessage(js_dict, message) |
| 193 | + return message |
| 194 | + |
| 195 | +@@ -459,9 +466,11 @@ _INT_OR_FLOAT = six.integer_types + (float,) |
| 196 | + class _Parser(object): |
| 197 | + """JSON format parser for protocol message.""" |
| 198 | + |
| 199 | +- def __init__(self, ignore_unknown_fields, descriptor_pool): |
| 200 | ++ def __init__(self, ignore_unknown_fields, descriptor_pool, max_recursion_depth): |
| 201 | + self.ignore_unknown_fields = ignore_unknown_fields |
| 202 | + self.descriptor_pool = descriptor_pool |
| 203 | ++ self.max_recursion_depth = max_recursion_depth |
| 204 | ++ self.recursion_depth = 0 |
| 205 | + |
| 206 | + def ConvertMessage(self, value, message): |
| 207 | + """Convert a JSON object into a message. |
| 208 | +@@ -473,6 +482,17 @@ class _Parser(object): |
| 209 | + Raises: |
| 210 | + ParseError: In case of convert problems. |
| 211 | + """ |
| 212 | ++ # Increment recursion depth at message entry. The max_recursion_depth limit |
| 213 | ++ # is exclusive: a depth value equal to max_recursion_depth will trigger an |
| 214 | ++ # error. For example, with max_recursion_depth=5, nesting up to depth 4 is |
| 215 | ++ # allowed, but attempting depth 5 raises ParseError. |
| 216 | ++ self.recursion_depth += 1 |
| 217 | ++ if self.recursion_depth > self.max_recursion_depth: |
| 218 | ++ raise ParseError( |
| 219 | ++ 'Message too deep. Max recursion depth is {0}'.format( |
| 220 | ++ self.max_recursion_depth |
| 221 | ++ ) |
| 222 | ++ ) |
| 223 | + message_descriptor = message.DESCRIPTOR |
| 224 | + full_name = message_descriptor.full_name |
| 225 | + if _IsWrapperMessage(message_descriptor): |
| 226 | +@@ -481,6 +501,7 @@ class _Parser(object): |
| 227 | + methodcaller(_WKTJSONMETHODS[full_name][1], value, message)(self) |
| 228 | + else: |
| 229 | + self._ConvertFieldValuePair(value, message) |
| 230 | ++ self.recursion_depth -= 1 |
| 231 | + |
| 232 | + def _ConvertFieldValuePair(self, js, message): |
| 233 | + """Convert field value pairs into regular message. |
| 234 | +@@ -612,8 +633,11 @@ class _Parser(object): |
| 235 | + if _IsWrapperMessage(message_descriptor): |
| 236 | + self._ConvertWrapperMessage(value['value'], sub_message) |
| 237 | + elif full_name in _WKTJSONMETHODS: |
| 238 | +- methodcaller( |
| 239 | +- _WKTJSONMETHODS[full_name][1], value['value'], sub_message)(self) |
| 240 | ++ # For well-known types (including nested Any), use ConvertMessage |
| 241 | ++ # to ensure recursion depth is properly tracked |
| 242 | ++ self.ConvertMessage( |
| 243 | ++ value['value'], sub_message) |
| 244 | ++ ) |
| 245 | + else: |
| 246 | + del value['@type'] |
| 247 | + self._ConvertFieldValuePair(value, sub_message) |
| 248 | +-- |
| 249 | +2.45.4 |
| 250 | + |
0 commit comments