From 50dc1fb792734e03ab2a0c2ff61430cb78bc905f Mon Sep 17 00:00:00 2001 From: a2811057970 Date: Fri, 3 Jul 2026 11:53:37 +0100 Subject: [PATCH] Restrict pickle deserialization to safe types (CVE-2025-69872) BREAKING CHANGE: Pickle deserialization now only permits safe built-in types (builtins, collections, datetime, decimal, fractions, uuid). Arbitrary objects can no longer be deserialized from cache, preventing code execution via crafted pickle payloads. Users caching custom types should migrate to JSONDisk or a custom Disk subclass. There is no opt-out mechanism by design. - Add SafeUnpickler with allowlist-based find_class override - Add UnpicklingError (inherits pickle.UnpicklingError) for downstream compatibility with libraries catching pickle.PickleError - Support pickle protocols 0-5 via __builtin__, copy_reg, and _codecs allowlist entries - Use frozenset values in SAFE_PICKLE_CLASSES to prevent runtime bypass - Bump version to 6.0.0 (breaking change per semver) This takes a different approach to PR #361 (HMAC envelope). The HMAC approach still allows arbitrary deserialization once the signature is verified, meaning an attacker with read+write access to the cache directory can read the auto-generated key file and forge valid payloads. The allowlist approach blocks dangerous types regardless of filesystem access. Fixes: CVE-2025-69872 Closes: #357, #360, #362 --- CHANGES.rst | 52 ++++++ diskcache/__init__.py | 8 +- diskcache/core.py | 149 ++++++++++++++++- tests/test_safe_pickle.py | 331 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 535 insertions(+), 5 deletions(-) create mode 100644 CHANGES.rst create mode 100644 tests/test_safe_pickle.py diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..934cd04 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,52 @@ +Changes +======= + +6.0.0 (NEXT) +------------- + +**Breaking Changes** + +* Pickle deserialization is now restricted to safe built-in types only. + This mitigates CVE-2025-69872, which allowed arbitrary code execution + when an attacker with write access to the cache directory injected a + crafted pickle payload. + + The following types are permitted during deserialization: + + - Python builtins: ``int``, ``float``, ``str``, ``bytes``, ``bytearray``, + ``list``, ``dict``, ``tuple``, ``set``, ``frozenset``, ``complex``, + ``range``, ``slice``, ``object``, ``bool``, ``None`` + - ``collections``: ``OrderedDict``, ``defaultdict``, ``deque`` + - ``datetime``: ``date``, ``datetime``, ``time``, ``timedelta``, + ``timezone`` + - ``decimal.Decimal`` + - ``fractions.Fraction`` + - ``uuid.UUID`` + + All other types will raise ``UnpicklingError`` on read. + +* There is no opt-out mechanism. Users who need to cache custom types have + two migration paths: + + 1. Use ``JSONDisk`` for JSON-serializable data:: + + cache = Cache('/tmp/my-cache', disk=JSONDisk) + + 2. Subclass ``Disk`` and override ``get()`` and ``fetch()`` with a custom + serialization strategy appropriate for your data. + +**New Features** + +* Added ``SafeUnpickler`` class for restricted pickle deserialization. +* Added ``UnpicklingError`` exception raised when a disallowed type is + encountered during deserialization. + +**Internal** + +* ``SAFE_PICKLE_CLASSES`` uses ``frozenset`` values to prevent runtime + modification. + +5.6.3 (2023-08-31) +------------------- + +* Previous release (see git history for details). diff --git a/diskcache/__init__.py b/diskcache/__init__.py index 7757d66..a7e451d 100644 --- a/diskcache/__init__.py +++ b/diskcache/__init__.py @@ -14,8 +14,10 @@ Disk, EmptyDirWarning, JSONDisk, + SafeUnpickler, Timeout, UnknownFileWarning, + UnpicklingError, ) from .fanout import FanoutCache from .persistent import Deque, Index @@ -44,9 +46,11 @@ 'JSONDisk', 'Lock', 'RLock', + 'SafeUnpickler', 'Timeout', 'UNKNOWN', 'UnknownFileWarning', + 'UnpicklingError', 'barrier', 'memoize_stampede', 'throttle', @@ -61,8 +65,8 @@ pass __title__ = 'diskcache' -__version__ = '5.6.3' -__build__ = 0x050603 +__version__ = '6.0.0' +__build__ = 0x060000 __author__ = 'Grant Jenks' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2016-2023 Grant Jenks' diff --git a/diskcache/core.py b/diskcache/core.py index 7a3d23b..6138f88 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -100,6 +100,149 @@ def __repr__(self): } +class UnpicklingError(pickle.UnpicklingError): + """Error raised when unpickling encounters a disallowed type.""" + + +# Safe modules and classes that are allowed during deserialization. +# These are standard Python types that cannot execute arbitrary code +# during unpickling. This structure is immutable to prevent runtime +# modification as a security bypass. +SAFE_PICKLE_CLASSES = { + 'builtins': frozenset( + { + 'True', + 'False', + 'None', + 'bytes', + 'bytearray', + 'complex', + 'dict', + 'float', + 'frozenset', + 'int', + 'list', + 'object', + 'range', + 'set', + 'slice', + 'str', + 'tuple', + } + ), + # Python 2 module name used by pickle protocols 0 and 1. + '__builtin__': frozenset( + { + 'True', + 'False', + 'None', + 'bytes', + 'bytearray', + 'complex', + 'dict', + 'float', + 'frozenset', + 'int', + 'list', + 'long', + 'object', + 'range', + 'set', + 'slice', + 'str', + 'tuple', + 'unicode', + 'xrange', + } + ), + 'collections': frozenset( + { + 'OrderedDict', + 'defaultdict', + 'deque', + } + ), + # Used by pickle protocols 0 and 1 for object reconstruction. + 'copy_reg': frozenset( + { + '_reconstructor', + } + ), + 'copyreg': frozenset( + { + '_reconstructor', + } + ), + 'datetime': frozenset( + { + 'date', + 'datetime', + 'time', + 'timedelta', + 'timezone', + } + ), + 'decimal': frozenset( + { + 'Decimal', + } + ), + 'fractions': frozenset( + { + 'Fraction', + } + ), + 'uuid': frozenset( + { + 'UUID', + } + ), + '_codecs': frozenset( + { + 'encode', + } + ), +} + + +class SafeUnpickler(pickle.Unpickler): + """Restricted unpickler that only allows safe built-in types. + + This prevents arbitrary code execution via crafted pickle payloads. + Only types listed in SAFE_PICKLE_CLASSES are permitted. + + """ + + def find_class(self, module, name): + """Only allow safe classes to be unpickled. + + :param str module: module name + :param str name: class/function name + :raises UnpicklingError: if the class is not in the allowlist + + """ + allowed = SAFE_PICKLE_CLASSES.get(module, frozenset()) + if name in allowed: + return super().find_class(module, name) + raise UnpicklingError( + 'Unpickling of {}.{} is not allowed. ' + 'Only safe built-in types can be deserialized. ' + 'Use JSONDisk or a custom Disk subclass for other types.'.format( + module, name + ) + ) + + +def safe_pickle_load(file_obj): + """Load a pickle from a file object using the restricted unpickler. + + :param file_obj: file-like object to read from + :return: deserialized Python object + + """ + return SafeUnpickler(file_obj).load() + + class Disk: """Cache key and value serialization for SQLite database and files.""" @@ -174,7 +317,7 @@ def get(self, key, raw): if raw: return bytes(key) if type(key) is sqlite3.Binary else key else: - return pickle.load(io.BytesIO(key)) + return safe_pickle_load(io.BytesIO(key)) def store(self, value, read, key=UNKNOWN): """Convert `value` to fields size, mode, filename, and value for Cache @@ -279,9 +422,9 @@ def fetch(self, mode, filename, value, read): elif mode == MODE_PICKLE: if value is None: with open(op.join(self._directory, filename), 'rb') as reader: - return pickle.load(reader) + return safe_pickle_load(reader) else: - return pickle.load(io.BytesIO(value)) + return safe_pickle_load(io.BytesIO(value)) def filename(self, key=UNKNOWN, value=UNKNOWN): """Return filename and full-path tuple for file storage. diff --git a/tests/test_safe_pickle.py b/tests/test_safe_pickle.py new file mode 100644 index 0000000..49e4c36 --- /dev/null +++ b/tests/test_safe_pickle.py @@ -0,0 +1,331 @@ +"""Test diskcache safe pickle deserialization (CVE-2025-69872 fix).""" + +import inspect +import io +import os +import pickle +import shutil +import subprocess +import tempfile +from collections import OrderedDict, deque +from datetime import datetime, timedelta, timezone +from decimal import Decimal +from fractions import Fraction +from uuid import UUID + +import pytest + +import diskcache as dc +from diskcache.core import MODE_PICKLE, UnpicklingError, safe_pickle_load + + +@pytest.fixture +def cache(): + with dc.Cache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) + + +# --- SafeUnpickler Tests --- + + +class TestSafeUnpickler: + """Test the SafeUnpickler restricts deserialization correctly.""" + + def test_allows_basic_types(self): + """Safe types should deserialize without error.""" + safe_values = [ + 42, + 3.14, + 'hello', + b'bytes', + True, + False, + None, + [1, 2, 3], + {'key': 'value'}, + (1, 2, 3), + {1, 2, 3}, + frozenset([1, 2, 3]), + ] + for value in safe_values: + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_collections(self): + """Standard collection types should deserialize.""" + values = [ + OrderedDict([('a', 1), ('b', 2)]), + deque([1, 2, 3]), + ] + for value in values: + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_datetime(self): + """Datetime types should deserialize.""" + values = [ + datetime(2025, 1, 1, 12, 0, 0), + timedelta(days=1, hours=2), + timezone.utc, + ] + for value in values: + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_decimal(self): + """Decimal should deserialize.""" + value = Decimal('3.14159') + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_fraction(self): + """Fraction should deserialize.""" + value = Fraction(1, 3) + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_uuid(self): + """UUID should deserialize.""" + value = UUID('12345678-1234-5678-1234-567812345678') + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_blocks_os_system(self): + """os.system should be blocked - classic RCE vector.""" + data = pickle.dumps(os.system) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_eval(self): + """eval should be blocked.""" + data = pickle.dumps(eval) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_exec(self): + """exec should be blocked.""" + data = pickle.dumps(exec) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_subprocess(self): + """subprocess.Popen should be blocked.""" + data = pickle.dumps(subprocess.Popen) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_pickle_reduce_exploit(self): + """Crafted __reduce__ payloads should be blocked.""" + + class Exploit: + def __reduce__(self): + return (os.system, ('echo pwned',)) + + data = pickle.dumps(Exploit()) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_arbitrary_class(self): + """Custom classes should be blocked.""" + data = pickle.dumps(tempfile.NamedTemporaryFile) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_error_message_includes_class_info(self): + """Error message should indicate what was blocked.""" + data = pickle.dumps(os.system) + with pytest.raises(UnpicklingError) as exc_info: + safe_pickle_load(io.BytesIO(data)) + assert 'JSONDisk' in str(exc_info.value) + + def test_nested_safe_types(self): + """Nested structures of safe types should work.""" + value = { + 'list': [1, 2.0, 'three'], + 'tuple': (4, 5, 6), + 'nested': {'a': [True, False, None]}, + 'ordered': OrderedDict([('x', datetime(2025, 1, 1))]), + } + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + +# --- Cache Integration Tests --- + + +class TestCacheDefaultSafe: + """Test that Cache uses safe deserialization unconditionally.""" + + def test_safe_values_work(self, cache): + """Standard safe types should round-trip through cache.""" + test_data = { + 'int': 42, + 'float': 3.14, + 'str': 'hello world', + 'bytes': b'binary data', + 'list': [1, 2, 3], + 'dict': {'nested': True}, + 'tuple': (1, 'two', 3.0), + 'none': None, + 'bool': True, + } + for key, value in test_data.items(): + cache[key] = value + + for key, value in test_data.items(): + assert cache[key] == value + + def test_safe_complex_types(self, cache): + """Allowed complex types should work.""" + cache['decimal'] = Decimal('3.14') + cache['uuid'] = UUID('12345678-1234-5678-1234-567812345678') + cache['datetime'] = datetime(2025, 6, 15, 10, 30) + cache['ordered'] = OrderedDict([('a', 1), ('b', 2)]) + + assert cache['decimal'] == Decimal('3.14') + assert cache['uuid'] == UUID('12345678-1234-5678-1234-567812345678') + assert cache['datetime'] == datetime(2025, 6, 15, 10, 30) + assert cache['ordered'] == OrderedDict([('a', 1), ('b', 2)]) + + def test_blocks_malicious_payload(self, cache): + """Injecting a malicious pickle into cache should fail on read.""" + + class Exploit: + def __reduce__(self): + return (os.system, ('echo pwned',)) + + malicious_data = pickle.dumps(Exploit()) + + # Manually insert malicious data as if attacker had write access + sql = cache._sql + sql( + 'INSERT INTO Cache (key, raw, store_time, expire_time,' + ' access_time, access_count, tag, mode, filename, value)' + ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + 'malicious', + True, + 0, + None, + 0, + 0, + None, + MODE_PICKLE, + None, + malicious_data, + ), + ) + + with pytest.raises(UnpicklingError): + cache['malicious'] + + +class TestNoEscapeHatch: + """Confirm there is no way to bypass safe deserialization.""" + + def test_no_allow_pickle_parameter(self): + """Disk.__init__ should not accept allow_pickle.""" + sig = inspect.signature(dc.Disk.__init__) + assert 'allow_pickle' not in sig.parameters + + def test_no_disk_allow_pickle_setting(self): + """DEFAULT_SETTINGS should not contain disk_allow_pickle.""" + assert 'disk_allow_pickle' not in dc.DEFAULT_SETTINGS + + def test_unknown_disk_setting_rejected(self): + """Passing disk_allow_pickle to Cache should raise TypeError.""" + with pytest.raises(TypeError): + dc.Cache(disk_allow_pickle=True) + + def test_safe_unpickling_always_active(self, cache): + """Even after explicit attempts, deserialization stays restricted.""" + # Try to bypass by setting attribute directly on disk + cache.disk.allow_pickle = True # This attribute doesn't exist/matter + + class Exploit: + def __reduce__(self): + return (os.system, ('echo pwned',)) + + malicious_data = pickle.dumps(Exploit()) + + sql = cache._sql + sql( + 'INSERT INTO Cache (key, raw, store_time, expire_time,' + ' access_time, access_count, tag, mode, filename, value)' + ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + 'bypass_attempt', + True, + 0, + None, + 0, + 0, + None, + MODE_PICKLE, + None, + malicious_data, + ), + ) + + with pytest.raises(UnpicklingError): + cache['bypass_attempt'] + + +# --- FanoutCache Integration --- + + +class TestFanoutCacheSafe: + """Test FanoutCache uses safe deserialization.""" + + def test_default_safe(self): + """FanoutCache should use safe mode.""" + with dc.FanoutCache() as cache: + cache['key'] = [1, 2, 3] + assert cache['key'] == [1, 2, 3] + shutil.rmtree(cache.directory, ignore_errors=True) + + +# --- Deque and Index Integration --- + + +class TestPersistentSafe: + """Test Deque and Index use safe deserialization.""" + + def test_deque_safe(self): + """Deque should work with safe types.""" + deq = dc.Deque([1, 2, 3]) + assert list(deq) == [1, 2, 3] + shutil.rmtree(deq.directory, ignore_errors=True) + + def test_index_safe(self): + """Index should work with safe types.""" + index = dc.Index({'a': 1, 'b': 2}) + assert index['a'] == 1 + shutil.rmtree(index.directory, ignore_errors=True) + + +# --- Key Serialization Tests --- + + +class TestKeySerialization: + """Test that non-raw keys (tuple keys) are deserialized safely.""" + + def test_tuple_key_safe(self, cache): + """Tuple keys use pickle serialization and should work safely.""" + key = (1, 'two', 3.0) + cache[key] = 'value' + assert cache[key] == 'value' + + def test_complex_key_safe(self, cache): + """Complex safe keys should work.""" + key = (None, 0, 'abc') + cache[key] = 'value' + assert cache[key] == 'value'