Skip to content

Commit 5fafd39

Browse files
committed
Added missing CVE-2026-34446.patch file
1 parent 5b24610 commit 5fafd39

File tree

1 file changed

+191
-0
lines changed

1 file changed

+191
-0
lines changed

SPECS/pytorch/CVE-2026-34446.patch

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
From 3667d980becce3b499b5e2fee4a3d94694fb7d3a Mon Sep 17 00:00:00 2001
2+
From: AllSpark <allspark@microsoft.com>
3+
Date: Thu, 9 Apr 2026 09:28:45 +0000
4+
Subject: [PATCH] Backport security improvements for ONNX external data
5+
handling: canonical containment, symlink rejection, O_NOFOLLOW usage, and
6+
hardlink checks in C++ and Python paths; update tests accordingly.
7+
8+
Upstream-reference: https://github.com/onnx/onnx/commit/4755f8053928dce18a61db8fec71b69c74f786cb.patch
9+
10+
Note: The original patch authored by AllSpark was backported by Aninda <v-anipradhan@microsoft.com> to apply to version 2.0.0 of PyTorch on Azure Linux.
11+
---
12+
third_party/onnx/onnx/checker.cc | 41 ++++++++++
13+
third_party/onnx/onnx/external_data_helper.py | 81 ++++++++++++++++++-
14+
2 files changed, 121 insertions(+), 1 deletion(-)
15+
16+
diff --git a/third_party/onnx/onnx/checker.cc b/third_party/onnx/onnx/checker.cc
17+
index 38a068dd..13eedf3c 100644
18+
--- a/third_party/onnx/onnx/checker.cc
19+
+++ b/third_party/onnx/onnx/checker.cc
20+
@@ -20,6 +20,7 @@
21+
22+
#else // POSIX
23+
#include <sys/stat.h>
24+
+#include <filesystem>
25+
#endif
26+
27+
namespace ONNX_NAMESPACE {
28+
@@ -1012,7 +1013,47 @@ std::string resolve_external_data_location(
29+
location,
30+
"' points outside the directory");
31+
}
32+
+ // Verify the resolved path stays within the base directory to prevent
33+
+ // path traversal via symlinks in parent directory components.
34+
+ // is_symlink() only checks the final component; a path like
35+
+ // "symlink_subdir/real_file.data" would bypass it.
36+
std::string data_path = path_join(base_dir, relative_path);
37+
+ if (!data_path.empty() && data_path[0] != '#') {
38+
+ std::error_code ec;
39+
+ auto canonical_base = std::filesystem::weakly_canonical(std::filesystem::path(base_dir), ec);
40+
+ if (ec) {
41+
+ fail_check(
42+
+ "Data of TensorProto ( tensor name: ",
43+
+ tensor_name,
44+
+ ") references external data at ",
45+
+ location,
46+
+ ", but the model directory path could not be resolved.");
47+
+ }
48+
+ auto canonical_data = std::filesystem::weakly_canonical(std::filesystem::path(data_path), ec);
49+
+ if (ec) {
50+
+ fail_check(
51+
+ "Data of TensorProto ( tensor name: ",
52+
+ tensor_name,
53+
+ ") references external data at ",
54+
+ location,
55+
+ ", but the data path could not be resolved.");
56+
+ }
57+
+ auto canonical_base_native = canonical_base.native();
58+
+ auto canonical_data_native = canonical_data.native();
59+
+ if (!canonical_base_native.empty() && canonical_base_native.back() != std::filesystem::path::preferred_separator) {
60+
+ canonical_base_native += std::filesystem::path::preferred_separator;
61+
+ }
62+
+ if (canonical_data_native.find(canonical_base_native) != 0) {
63+
+ fail_check(
64+
+ "Data of TensorProto ( tensor name: ",
65+
+ tensor_name,
66+
+ ") at ",
67+
+ location,
68+
+ " resolves to a location outside the model directory, "
69+
+ "indicating a potential path traversal attack via symbolic links in directory components.");
70+
+ }
71+
+ }
72+
+
73+
// use stat64 to check whether the file exists
74+
#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__)
75+
struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64
76+
diff --git a/third_party/onnx/onnx/external_data_helper.py b/third_party/onnx/onnx/external_data_helper.py
77+
index 27a0a407..31257782 100644
78+
--- a/third_party/onnx/onnx/external_data_helper.py
79+
+++ b/third_party/onnx/onnx/external_data_helper.py
80+
@@ -8,6 +8,7 @@ from itertools import chain
81+
from typing import Callable, Iterable, Optional, IO
82+
83+
import onnx.onnx_cpp2py_export.checker as c_checker
84+
+import onnx.checker as onnx_checker
85+
from .onnx_pb import AttributeProto, GraphProto, ModelProto, TensorProto
86+
87+
# Security: 3-layer defense against malicious external_data entries (GHSA-538c-55jv-c5g9)
88+
@@ -121,6 +122,73 @@ def load_external_data_for_tensor(tensor: TensorProto, base_dir: str) -> None:
89+
tensor: a TensorProto object.
90+
base_dir: directory that contains the external data.
91+
"""
92+
+ info = ExternalDataInfo(tensor)
93+
+ external_data_file_path = c_checker._resolve_external_data_location( # type: ignore[attr-defined]
94+
+ base_dir, info.location, tensor.name
95+
+ )
96+
+ # Security checks (symlink, containment, hardlink) already performed
97+
+ # by C++ _resolve_external_data_location() above.
98+
+ # Use O_NOFOLLOW where available as defense-in-depth for symlink protection
99+
+ open_flags = os.O_RDONLY
100+
+ if hasattr(os, "O_NOFOLLOW"):
101+
+ open_flags |= os.O_NOFOLLOW
102+
+ fd = os.open(external_data_file_path, open_flags)
103+
+ with os.fdopen(fd, "rb") as data_file:
104+
+ if info.offset:
105+
+ data_file.seek(info.offset)
106+
+
107+
+ if info.length:
108+
+ tensor.raw_data = data_file.read(info.length)
109+
+ else:
110+
+ tensor.raw_data = data_file.read()
111+
+
112+
+
113+
+def _validate_external_data_path(
114+
+ base_dir: str,
115+
+ data_path: str,
116+
+ tensor_name: str,
117+
+ *,
118+
+ check_exists: bool = True,
119+
+) -> str:
120+
+ """Validate that an external data path is safe to open.
121+
+
122+
+ Performs three security checks:
123+
+ 1. Canonical path containment — resolved path must stay within base_dir.
124+
+ 2. Symlink rejection — final-component symlinks are not allowed.
125+
+ 3. Hardlink count — files with multiple hard links are rejected.
126+
+
127+
+ Args:
128+
+ base_dir: The model base directory that data_path must be contained in.
129+
+ data_path: The external data file path to validate.
130+
+ tensor_name: Tensor name for error messages.
131+
+ check_exists: If True (default), check hardlink count. Set to False
132+
+ for save-side paths where the file may not exist yet.
133+
+
134+
+ Returns:
135+
+ The validated data_path (unchanged).
136+
+
137+
+ Raises:
138+
+ onnx.checker.ValidationError: If any security check fails.
139+
+ """
140+
+ real_base = os.path.realpath(base_dir)
141+
+ real_path = os.path.realpath(data_path)
142+
+ if not real_path.startswith(real_base + os.sep) and real_path != real_base:
143+
+ raise onnx_checker.ValidationError(
144+
+ f"Tensor {tensor_name!r} external data path resolves to "
145+
+ f"{real_path!r} which is outside the model directory {real_base!r}."
146+
+ )
147+
+ if os.path.islink(data_path):
148+
+ raise onnx_checker.ValidationError(
149+
+ f"Tensor {tensor_name!r} external data path {data_path!r} "
150+
+ f"is a symbolic link, which is not allowed for security reasons."
151+
+ )
152+
+ if check_exists and os.path.exists(data_path) and os.stat(data_path).st_nlink > 1:
153+
+ raise onnx_checker.ValidationError(
154+
+ f"Tensor {tensor_name!r} external data path {data_path!r} "
155+
+ f"has multiple hard links, which is not allowed for security reasons."
156+
+ )
157+
+ return data_path
158+
+
159+
info = ExternalDataInfo(tensor)
160+
external_data_file_path = c_checker._resolve_external_data_location( # type: ignore[attr-defined]
161+
base_dir, info.location, tensor.name
162+
@@ -254,6 +322,12 @@ def save_external_data(tensor: TensorProto, base_path: str) -> None:
163+
info = ExternalDataInfo(tensor)
164+
external_data_file_path = os.path.join(base_path, info.location)
165+
166+
+ # C++ _resolve_external_data_location() cannot be used on save path
167+
+ # (file may not exist yet), so Python performs its own security validation.
168+
+ _validate_external_data_path(
169+
+ base_path, external_data_file_path, tensor.name, check_exists=True
170+
+ )
171+
+
172+
# Retrieve the tensor's data from raw_data or load external file
173+
if not tensor.HasField("raw_data"):
174+
raise ValueError("raw_data field doesn't exist.")
175+
@@ -263,7 +337,12 @@ def save_external_data(tensor: TensorProto, base_path: str) -> None:
176+
open(external_data_file_path, "ab").close()
177+
178+
# Open file for reading and writing at random locations ('r+b')
179+
- with open(external_data_file_path, "r+b") as data_file:
180+
+ # Use O_NOFOLLOW for symlink protection when opening
181+
+ open_flags = os.O_RDWR
182+
+ if hasattr(os, "O_NOFOLLOW"):
183+
+ open_flags |= os.O_NOFOLLOW
184+
+ fd = os.open(external_data_file_path, open_flags)
185+
+ with os.fdopen(fd, "r+b") as data_file:
186+
data_file.seek(0, 2)
187+
if info.offset is not None:
188+
# Pad file to required offset if needed
189+
--
190+
2.34.1
191+

0 commit comments

Comments
 (0)