Skip to content

Commit 88b36c4

Browse files
Copilothvitved
andauthored
Add shutil.unpack_archive and subprocess tar extraction as TarSlip sources and sinks
- Add test cases for shutil.unpack_archive and subprocess.run(["tar", ...]) to tarslip.py - Add ShutilUnpackArchiveSource/Sink for shutil.unpack_archive calls with non-literal filenames - Add SubprocessTarExtractionSource/Sink for subprocess calls invoking tar with extraction flags - Update TarSlip.expected with expected test output for new cases Agent-Logs-Url: https://github.com/github/codeql/sessions/f31a3622-9b18-415f-85f1-62ec14a8319f Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
1 parent c1ecca3 commit 88b36c4

File tree

3 files changed

+88
-0
lines changed

3 files changed

+88
-0
lines changed

python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,75 @@ module TarSlip {
132132
}
133133
}
134134

135+
/**
136+
* A call to `shutil.unpack_archive`, considered as a flow source.
137+
*
138+
* The archive filename is not hardcoded, so it may come from user input.
139+
*/
140+
class ShutilUnpackArchiveSource extends Source {
141+
ShutilUnpackArchiveSource() {
142+
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
143+
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
144+
}
145+
}
146+
147+
/**
148+
* A call to `shutil.unpack_archive`, considered as a flow sink.
149+
*
150+
* The archive filename is not hardcoded, so it may come from user input.
151+
*/
152+
class ShutilUnpackArchiveSink extends Sink {
153+
ShutilUnpackArchiveSink() {
154+
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
155+
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
156+
}
157+
}
158+
159+
/**
160+
* Holds if `call` is a subprocess call that invokes `tar` for archive extraction
161+
* with at least one non-literal argument (the archive filename).
162+
*
163+
* Detects patterns like `subprocess.run(["tar", "-xf", untrusted_filename])`.
164+
*/
165+
private predicate isSubprocessTarExtraction(DataFlow::CallCfgNode call) {
166+
exists(SequenceNode cmdList |
167+
call =
168+
API::moduleImport("subprocess")
169+
.getMember(["run", "call", "check_call", "check_output", "Popen"])
170+
.getACall() and
171+
cmdList = call.getArg(0).asCfgNode() and
172+
// Command must be "tar" (possibly with a full path like "/usr/bin/tar")
173+
cmdList.getElement(0).getNode().(StringLiteral).getText().matches("%tar") and
174+
// At least one extraction-related flag must be present
175+
exists(string flag |
176+
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
177+
(flag.matches("%-x%") or flag = "--extract")
178+
) and
179+
// At least one non-literal argument (the archive filename)
180+
exists(int i |
181+
i > 0 and
182+
exists(cmdList.getElement(i)) and
183+
not cmdList.getElement(i).getNode() instanceof StringLiteral
184+
)
185+
)
186+
}
187+
188+
/**
189+
* A call to `subprocess` functions that invokes `tar` for archive extraction,
190+
* considered as a flow source.
191+
*/
192+
class SubprocessTarExtractionSource extends Source {
193+
SubprocessTarExtractionSource() { isSubprocessTarExtraction(this) }
194+
}
195+
196+
/**
197+
* A call to `subprocess` functions that invokes `tar` for archive extraction,
198+
* considered as a flow sink.
199+
*/
200+
class SubprocessTarExtractionSink extends Sink {
201+
SubprocessTarExtractionSink() { isSubprocessTarExtraction(this) }
202+
}
203+
135204
/**
136205
* Holds if `g` clears taint for `tarInfo`.
137206
*

python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ nodes
5353
| tarslip.py:112:1:112:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
5454
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
5555
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
56+
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
57+
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
58+
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
5659
subpaths
5760
#select
5861
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
@@ -64,3 +67,6 @@ subpaths
6467
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
6568
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
6669
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |
70+
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | potentially untrusted source |
71+
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | potentially untrusted source |
72+
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | potentially untrusted source |

python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,16 @@ def safemembers(members):
114114

115115
tar = tarfile.open(unsafe_filename_tar)
116116
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter
117+
118+
import shutil
119+
import subprocess
120+
121+
# shutil.unpack_archive
122+
shutil.unpack_archive(unsafe_filename_tar, "out") # unsafe
123+
shutil.unpack_archive("safe.tar", "out") # safe
124+
125+
# subprocess tar extraction
126+
subprocess.run(["tar", "-xf", unsafe_filename_tar]) # unsafe
127+
subprocess.check_call(["tar", "-xf", unsafe_filename_tar]) # unsafe
128+
subprocess.run(["tar", "-xf", "safe.tar"]) # safe
129+
subprocess.run(["echo", unsafe_filename_tar]) # safe - not a tar extraction

0 commit comments

Comments
 (0)