|
| 1 | +From 557b7c38f2797aca0059deb3fafbfea550a093d2 Mon Sep 17 00:00:00 2001 |
| 2 | +From: AllSpark <allspark@microsoft.com> |
| 3 | +Date: Fri, 16 Jan 2026 17:42:25 +0000 |
| 4 | +Subject: [PATCH] Fix DoS via malicious HDF5 dataset metadata in |
| 5 | + H5IOStore.__getitem__; add MAX_BYTES limit; harden dataset shape/dtype |
| 6 | + validation |
| 7 | + |
| 8 | +Signed-off-by: Azure Linux Security Servicing Account <azurelinux-security@microsoft.com> |
| 9 | +Upstream-reference: AI Backport of https://github.com/keras-team/keras/commit/7360d4f0d764fbb1fa9c6408fe53da41974dd4f6.patch |
| 10 | +--- |
| 11 | + keras/src/saving/saving_lib.py | 59 ++++++++++++++++++++++++++++++++-- |
| 12 | + 1 file changed, 57 insertions(+), 2 deletions(-) |
| 13 | + |
| 14 | +diff --git a/keras/src/saving/saving_lib.py b/keras/src/saving/saving_lib.py |
| 15 | +index 1668489..0bcce01 100644 |
| 16 | +--- a/keras/src/saving/saving_lib.py |
| 17 | ++++ b/keras/src/saving/saving_lib.py |
| 18 | +@@ -24,6 +24,10 @@ try: |
| 19 | + except ImportError: |
| 20 | + h5py = None |
| 21 | + |
| 22 | ++ |
| 23 | ++# Maximum allowed HDF5 dataset size in bytes (4 GiB) |
| 24 | ++MAX_BYTES = 1 << 32 # 4 GiB |
| 25 | ++ |
| 26 | + _CONFIG_FILENAME = "config.json" |
| 27 | + _METADATA_FILENAME = "metadata.json" |
| 28 | + _VARS_FNAME = "model.weights" # Will become e.g. "model.weights.h5" |
| 29 | +@@ -696,9 +700,60 @@ class H5Entry: |
| 30 | + |
| 31 | + def __getitem__(self, name): |
| 32 | + value = self.group[name] |
| 33 | ++ |
| 34 | ++ # ------------------------------------------------------ |
| 35 | ++ # CASE 2 — HDF5 DATASET → SAFE LOADING |
| 36 | ++ # ------------------------------------------------------ |
| 37 | ++ |
| 38 | ++ # Skip any objects that are not proper datasets |
| 39 | ++ if not hasattr(value, "shape") or not hasattr(value, "dtype"): |
| 40 | ++ # Fallback: attempt read if possible, else return as-is |
| 41 | ++ try: |
| 42 | ++ return value[()] |
| 43 | ++ except Exception: |
| 44 | ++ return value |
| 45 | ++ |
| 46 | ++ shape = value.shape |
| 47 | ++ dtype = value.dtype |
| 48 | ++ |
| 49 | ++ # ------------------------------------------------------ |
| 50 | ++ # Validate SHAPE (avoid malformed / malicious metadata) |
| 51 | ++ # ------------------------------------------------------ |
| 52 | ++ |
| 53 | ++ # No negative dimensions |
| 54 | ++ if any(dim < 0 for dim in shape): |
| 55 | ++ raise ValueError( |
| 56 | ++ "Malformed HDF5 dataset shape encountered in .keras file; " |
| 57 | ++ "negative dimension detected." |
| 58 | ++ ) |
| 59 | ++ |
| 60 | ++ # Prevent absurdly high-rank tensors |
| 61 | ++ if len(shape) > 64: |
| 62 | ++ raise ValueError( |
| 63 | ++ "Malformed HDF5 dataset shape encountered in .keras file; " |
| 64 | ++ "tensor rank exceeds safety limit." |
| 65 | ++ ) |
| 66 | ++ |
| 67 | ++ # Safe product computation (Python int is unbounded) |
| 68 | ++ num_elems = int(np.prod(shape)) |
| 69 | ++ |
| 70 | ++ # ------------------------------------------------------ |
| 71 | ++ # Validate TOTAL memory size |
| 72 | ++ # ------------------------------------------------------ |
| 73 | ++ size_bytes = num_elems * dtype.itemsize |
| 74 | ++ if size_bytes > MAX_BYTES: |
| 75 | ++ raise ValueError( |
| 76 | ++ f"HDF5 dataset too large to load safely " |
| 77 | ++ f"({size_bytes} bytes; limit is {MAX_BYTES})." |
| 78 | ++ ) |
| 79 | ++ |
| 80 | ++ # ------------------------------------------------------ |
| 81 | ++ # SAFE — load dataset (guaranteed ≤ 4 GiB) |
| 82 | ++ # ------------------------------------------------------ |
| 83 | ++ arr = value[()] |
| 84 | + if "dtype" in value.attrs and value.attrs["dtype"] == "bfloat16": |
| 85 | +- value = np.array(value, dtype=ml_dtypes.bfloat16) |
| 86 | +- return value |
| 87 | ++ arr = np.array(arr, dtype=ml_dtypes.bfloat16) |
| 88 | ++ return arr |
| 89 | + |
| 90 | + |
| 91 | + class NpzIOStore: |
| 92 | +-- |
| 93 | +2.45.4 |
| 94 | + |
0 commit comments