diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..934cd04 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,52 @@ +Changes +======= + +6.0.0 (NEXT) +------------- + +**Breaking Changes** + +* Pickle deserialization is now restricted to safe built-in types only. + This mitigates CVE-2025-69872, which allowed arbitrary code execution + when an attacker with write access to the cache directory injected a + crafted pickle payload. + + The following types are permitted during deserialization: + + - Python builtins: ``int``, ``float``, ``str``, ``bytes``, ``bytearray``, + ``list``, ``dict``, ``tuple``, ``set``, ``frozenset``, ``complex``, + ``range``, ``slice``, ``object``, ``bool``, ``None`` + - ``collections``: ``OrderedDict``, ``defaultdict``, ``deque`` + - ``datetime``: ``date``, ``datetime``, ``time``, ``timedelta``, + ``timezone`` + - ``decimal.Decimal`` + - ``fractions.Fraction`` + - ``uuid.UUID`` + + All other types will raise ``UnpicklingError`` on read. + +* There is no opt-out mechanism. Users who need to cache custom types have + two migration paths: + + 1. Use ``JSONDisk`` for JSON-serializable data:: + + cache = Cache('/tmp/my-cache', disk=JSONDisk) + + 2. Subclass ``Disk`` and override ``get()`` and ``fetch()`` with a custom + serialization strategy appropriate for your data. + +**New Features** + +* Added ``SafeUnpickler`` class for restricted pickle deserialization. +* Added ``UnpicklingError`` exception raised when a disallowed type is + encountered during deserialization. + +**Internal** + +* ``SAFE_PICKLE_CLASSES`` uses ``frozenset`` values to prevent runtime + modification. + +5.6.3 (2023-08-31) +------------------- + +* Previous release (see git history for details). diff --git a/diskcache/__init__.py b/diskcache/__init__.py index 7757d66..a7e451d 100644 --- a/diskcache/__init__.py +++ b/diskcache/__init__.py @@ -14,8 +14,10 @@ Disk, EmptyDirWarning, JSONDisk, + SafeUnpickler, Timeout, UnknownFileWarning, + UnpicklingError, ) from .fanout import FanoutCache from .persistent import Deque, Index @@ -44,9 +46,11 @@ 'JSONDisk', 'Lock', 'RLock', + 'SafeUnpickler', 'Timeout', 'UNKNOWN', 'UnknownFileWarning', + 'UnpicklingError', 'barrier', 'memoize_stampede', 'throttle', @@ -61,8 +65,8 @@ pass __title__ = 'diskcache' -__version__ = '5.6.3' -__build__ = 0x050603 +__version__ = '6.0.0' +__build__ = 0x060000 __author__ = 'Grant Jenks' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2016-2023 Grant Jenks' diff --git a/diskcache/core.py b/diskcache/core.py index 7a3d23b..6138f88 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -100,6 +100,149 @@ def __repr__(self): } +class UnpicklingError(pickle.UnpicklingError): + """Error raised when unpickling encounters a disallowed type.""" + + +# Safe modules and classes that are allowed during deserialization. +# These are standard Python types that cannot execute arbitrary code +# during unpickling. This structure is immutable to prevent runtime +# modification as a security bypass. +SAFE_PICKLE_CLASSES = { + 'builtins': frozenset( + { + 'True', + 'False', + 'None', + 'bytes', + 'bytearray', + 'complex', + 'dict', + 'float', + 'frozenset', + 'int', + 'list', + 'object', + 'range', + 'set', + 'slice', + 'str', + 'tuple', + } + ), + # Python 2 module name used by pickle protocols 0 and 1. + '__builtin__': frozenset( + { + 'True', + 'False', + 'None', + 'bytes', + 'bytearray', + 'complex', + 'dict', + 'float', + 'frozenset', + 'int', + 'list', + 'long', + 'object', + 'range', + 'set', + 'slice', + 'str', + 'tuple', + 'unicode', + 'xrange', + } + ), + 'collections': frozenset( + { + 'OrderedDict', + 'defaultdict', + 'deque', + } + ), + # Used by pickle protocols 0 and 1 for object reconstruction. + 'copy_reg': frozenset( + { + '_reconstructor', + } + ), + 'copyreg': frozenset( + { + '_reconstructor', + } + ), + 'datetime': frozenset( + { + 'date', + 'datetime', + 'time', + 'timedelta', + 'timezone', + } + ), + 'decimal': frozenset( + { + 'Decimal', + } + ), + 'fractions': frozenset( + { + 'Fraction', + } + ), + 'uuid': frozenset( + { + 'UUID', + } + ), + '_codecs': frozenset( + { + 'encode', + } + ), +} + + +class SafeUnpickler(pickle.Unpickler): + """Restricted unpickler that only allows safe built-in types. + + This prevents arbitrary code execution via crafted pickle payloads. + Only types listed in SAFE_PICKLE_CLASSES are permitted. + + """ + + def find_class(self, module, name): + """Only allow safe classes to be unpickled. + + :param str module: module name + :param str name: class/function name + :raises UnpicklingError: if the class is not in the allowlist + + """ + allowed = SAFE_PICKLE_CLASSES.get(module, frozenset()) + if name in allowed: + return super().find_class(module, name) + raise UnpicklingError( + 'Unpickling of {}.{} is not allowed. ' + 'Only safe built-in types can be deserialized. ' + 'Use JSONDisk or a custom Disk subclass for other types.'.format( + module, name + ) + ) + + +def safe_pickle_load(file_obj): + """Load a pickle from a file object using the restricted unpickler. + + :param file_obj: file-like object to read from + :return: deserialized Python object + + """ + return SafeUnpickler(file_obj).load() + + class Disk: """Cache key and value serialization for SQLite database and files.""" @@ -174,7 +317,7 @@ def get(self, key, raw): if raw: return bytes(key) if type(key) is sqlite3.Binary else key else: - return pickle.load(io.BytesIO(key)) + return safe_pickle_load(io.BytesIO(key)) def store(self, value, read, key=UNKNOWN): """Convert `value` to fields size, mode, filename, and value for Cache @@ -279,9 +422,9 @@ def fetch(self, mode, filename, value, read): elif mode == MODE_PICKLE: if value is None: with open(op.join(self._directory, filename), 'rb') as reader: - return pickle.load(reader) + return safe_pickle_load(reader) else: - return pickle.load(io.BytesIO(value)) + return safe_pickle_load(io.BytesIO(value)) def filename(self, key=UNKNOWN, value=UNKNOWN): """Return filename and full-path tuple for file storage. diff --git a/tests/test_safe_pickle.py b/tests/test_safe_pickle.py new file mode 100644 index 0000000..49e4c36 --- /dev/null +++ b/tests/test_safe_pickle.py @@ -0,0 +1,331 @@ +"""Test diskcache safe pickle deserialization (CVE-2025-69872 fix).""" + +import inspect +import io +import os +import pickle +import shutil +import subprocess +import tempfile +from collections import OrderedDict, deque +from datetime import datetime, timedelta, timezone +from decimal import Decimal +from fractions import Fraction +from uuid import UUID + +import pytest + +import diskcache as dc +from diskcache.core import MODE_PICKLE, UnpicklingError, safe_pickle_load + + +@pytest.fixture +def cache(): + with dc.Cache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) + + +# --- SafeUnpickler Tests --- + + +class TestSafeUnpickler: + """Test the SafeUnpickler restricts deserialization correctly.""" + + def test_allows_basic_types(self): + """Safe types should deserialize without error.""" + safe_values = [ + 42, + 3.14, + 'hello', + b'bytes', + True, + False, + None, + [1, 2, 3], + {'key': 'value'}, + (1, 2, 3), + {1, 2, 3}, + frozenset([1, 2, 3]), + ] + for value in safe_values: + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_collections(self): + """Standard collection types should deserialize.""" + values = [ + OrderedDict([('a', 1), ('b', 2)]), + deque([1, 2, 3]), + ] + for value in values: + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_datetime(self): + """Datetime types should deserialize.""" + values = [ + datetime(2025, 1, 1, 12, 0, 0), + timedelta(days=1, hours=2), + timezone.utc, + ] + for value in values: + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_decimal(self): + """Decimal should deserialize.""" + value = Decimal('3.14159') + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_fraction(self): + """Fraction should deserialize.""" + value = Fraction(1, 3) + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_allows_uuid(self): + """UUID should deserialize.""" + value = UUID('12345678-1234-5678-1234-567812345678') + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + def test_blocks_os_system(self): + """os.system should be blocked - classic RCE vector.""" + data = pickle.dumps(os.system) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_eval(self): + """eval should be blocked.""" + data = pickle.dumps(eval) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_exec(self): + """exec should be blocked.""" + data = pickle.dumps(exec) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_subprocess(self): + """subprocess.Popen should be blocked.""" + data = pickle.dumps(subprocess.Popen) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_pickle_reduce_exploit(self): + """Crafted __reduce__ payloads should be blocked.""" + + class Exploit: + def __reduce__(self): + return (os.system, ('echo pwned',)) + + data = pickle.dumps(Exploit()) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_blocks_arbitrary_class(self): + """Custom classes should be blocked.""" + data = pickle.dumps(tempfile.NamedTemporaryFile) + with pytest.raises(UnpicklingError, match='not allowed'): + safe_pickle_load(io.BytesIO(data)) + + def test_error_message_includes_class_info(self): + """Error message should indicate what was blocked.""" + data = pickle.dumps(os.system) + with pytest.raises(UnpicklingError) as exc_info: + safe_pickle_load(io.BytesIO(data)) + assert 'JSONDisk' in str(exc_info.value) + + def test_nested_safe_types(self): + """Nested structures of safe types should work.""" + value = { + 'list': [1, 2.0, 'three'], + 'tuple': (4, 5, 6), + 'nested': {'a': [True, False, None]}, + 'ordered': OrderedDict([('x', datetime(2025, 1, 1))]), + } + data = pickle.dumps(value) + result = safe_pickle_load(io.BytesIO(data)) + assert result == value + + +# --- Cache Integration Tests --- + + +class TestCacheDefaultSafe: + """Test that Cache uses safe deserialization unconditionally.""" + + def test_safe_values_work(self, cache): + """Standard safe types should round-trip through cache.""" + test_data = { + 'int': 42, + 'float': 3.14, + 'str': 'hello world', + 'bytes': b'binary data', + 'list': [1, 2, 3], + 'dict': {'nested': True}, + 'tuple': (1, 'two', 3.0), + 'none': None, + 'bool': True, + } + for key, value in test_data.items(): + cache[key] = value + + for key, value in test_data.items(): + assert cache[key] == value + + def test_safe_complex_types(self, cache): + """Allowed complex types should work.""" + cache['decimal'] = Decimal('3.14') + cache['uuid'] = UUID('12345678-1234-5678-1234-567812345678') + cache['datetime'] = datetime(2025, 6, 15, 10, 30) + cache['ordered'] = OrderedDict([('a', 1), ('b', 2)]) + + assert cache['decimal'] == Decimal('3.14') + assert cache['uuid'] == UUID('12345678-1234-5678-1234-567812345678') + assert cache['datetime'] == datetime(2025, 6, 15, 10, 30) + assert cache['ordered'] == OrderedDict([('a', 1), ('b', 2)]) + + def test_blocks_malicious_payload(self, cache): + """Injecting a malicious pickle into cache should fail on read.""" + + class Exploit: + def __reduce__(self): + return (os.system, ('echo pwned',)) + + malicious_data = pickle.dumps(Exploit()) + + # Manually insert malicious data as if attacker had write access + sql = cache._sql + sql( + 'INSERT INTO Cache (key, raw, store_time, expire_time,' + ' access_time, access_count, tag, mode, filename, value)' + ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + 'malicious', + True, + 0, + None, + 0, + 0, + None, + MODE_PICKLE, + None, + malicious_data, + ), + ) + + with pytest.raises(UnpicklingError): + cache['malicious'] + + +class TestNoEscapeHatch: + """Confirm there is no way to bypass safe deserialization.""" + + def test_no_allow_pickle_parameter(self): + """Disk.__init__ should not accept allow_pickle.""" + sig = inspect.signature(dc.Disk.__init__) + assert 'allow_pickle' not in sig.parameters + + def test_no_disk_allow_pickle_setting(self): + """DEFAULT_SETTINGS should not contain disk_allow_pickle.""" + assert 'disk_allow_pickle' not in dc.DEFAULT_SETTINGS + + def test_unknown_disk_setting_rejected(self): + """Passing disk_allow_pickle to Cache should raise TypeError.""" + with pytest.raises(TypeError): + dc.Cache(disk_allow_pickle=True) + + def test_safe_unpickling_always_active(self, cache): + """Even after explicit attempts, deserialization stays restricted.""" + # Try to bypass by setting attribute directly on disk + cache.disk.allow_pickle = True # This attribute doesn't exist/matter + + class Exploit: + def __reduce__(self): + return (os.system, ('echo pwned',)) + + malicious_data = pickle.dumps(Exploit()) + + sql = cache._sql + sql( + 'INSERT INTO Cache (key, raw, store_time, expire_time,' + ' access_time, access_count, tag, mode, filename, value)' + ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + 'bypass_attempt', + True, + 0, + None, + 0, + 0, + None, + MODE_PICKLE, + None, + malicious_data, + ), + ) + + with pytest.raises(UnpicklingError): + cache['bypass_attempt'] + + +# --- FanoutCache Integration --- + + +class TestFanoutCacheSafe: + """Test FanoutCache uses safe deserialization.""" + + def test_default_safe(self): + """FanoutCache should use safe mode.""" + with dc.FanoutCache() as cache: + cache['key'] = [1, 2, 3] + assert cache['key'] == [1, 2, 3] + shutil.rmtree(cache.directory, ignore_errors=True) + + +# --- Deque and Index Integration --- + + +class TestPersistentSafe: + """Test Deque and Index use safe deserialization.""" + + def test_deque_safe(self): + """Deque should work with safe types.""" + deq = dc.Deque([1, 2, 3]) + assert list(deq) == [1, 2, 3] + shutil.rmtree(deq.directory, ignore_errors=True) + + def test_index_safe(self): + """Index should work with safe types.""" + index = dc.Index({'a': 1, 'b': 2}) + assert index['a'] == 1 + shutil.rmtree(index.directory, ignore_errors=True) + + +# --- Key Serialization Tests --- + + +class TestKeySerialization: + """Test that non-raw keys (tuple keys) are deserialized safely.""" + + def test_tuple_key_safe(self, cache): + """Tuple keys use pickle serialization and should work safely.""" + key = (1, 'two', 3.0) + cache[key] = 'value' + assert cache[key] == 'value' + + def test_complex_key_safe(self, cache): + """Complex safe keys should work.""" + key = (None, 0, 'abc') + cache[key] = 'value' + assert cache[key] == 'value'