Skip to content

Commit d6fa745

Browse files
committed
Add TarSlip Improv query
1 parent caaee26 commit d6fa745

38 files changed

Lines changed: 672 additions & 0 deletions
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Extracting files from a malicious tarball without validating that the destination file path
8+
is within the destination directory can cause files outside the destination directory to be
9+
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
10+
archive path names.</p>
11+
12+
<p>Tarball contain archive entries representing each file in the archive. These entries
13+
include a file path for the entry, but these file paths are not restricted and may contain
14+
unexpected special elements such as the directory traversal element (<code>..</code>). If these
15+
file paths are used to determine an output file to write the contents of the archive item to, then
16+
the file may be written to an unexpected location. This can result in sensitive information being
17+
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
18+
files.</p>
19+
20+
<p>For example, if a tarball contains a file entry <code>../sneaky-file</code>, and the tarball
21+
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
22+
in an output file path of <code>/tmp/tmp123/../sneaky-file</code>, which would cause the file to be
23+
written to <code>/tmp/</code>.</p>
24+
25+
</overview>
26+
<recommendation>
27+
28+
<p>Ensure that output paths constructed from tarball entries are validated
29+
to prevent writing files to unexpected locations.</p>
30+
31+
<p>The recommended way of writing an output file from a tarball entry is to call <code>extract()</code> or <code>extractall()</code>.
32+
</p>
33+
34+
</recommendation>
35+
36+
<example>
37+
<p>
38+
In this example an archive is extracted without validating file paths.
39+
</p>
40+
41+
<sample src="examples/TarSlip_1.py" />
42+
43+
<p>To fix this vulnerability, we need to call the function <code>extractall()</code>.
44+
</p>
45+
46+
<sample src="examples/NoHIT_TarSlip_1.py" />
47+
48+
</example>
49+
<references>
50+
<li>
51+
Snyk:
52+
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
53+
</li>
54+
55+
<li>
56+
Tarfile documentation
57+
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">extractall() warning</a>
58+
</li>
59+
</references>
60+
</qhelp>
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/**
2+
* @name Arbitrary file write during tarfile extraction
3+
* @description Extracting files from a malicious tar archive without validating that the
4+
* destination file path is within the destination directory can cause files outside
5+
* the destination directory to be overwritten.
6+
* @kind path-problem
7+
* @id py/tarslip
8+
* @problem.severity error
9+
* @security-severity 7.5
10+
* @precision high
11+
* @tags security
12+
* external/cwe/cwe-022
13+
*/
14+
15+
import python
16+
import semmle.python.dataflow.new.DataFlow
17+
import semmle.python.dataflow.new.TaintTracking
18+
import DataFlow::PathGraph
19+
import semmle.python.ApiGraphs
20+
import semmle.python.dataflow.new.internal.Attributes
21+
import semmle.python.dataflow.new.BarrierGuards
22+
import semmle.python.dataflow.new.RemoteFlowSources
23+
24+
/**
25+
* Handle those three cases of Tarfile opens:
26+
* - `tarfile.open()`
27+
* - `tarfile.TarFile()`
28+
* - `MKtarfile.Tarfile.open()`
29+
*/
30+
API::Node tarfileOpen() {
31+
result in [
32+
API::moduleImport("tarfile").getMember(["open", "TarFile"]),
33+
API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
34+
]
35+
}
36+
37+
/**
38+
* Handle the previous three cases, plus the use of `closing` in the previous cases
39+
*/
40+
class AllTarfileOpens extends API::CallNode {
41+
AllTarfileOpens() {
42+
this = tarfileOpen().getACall()
43+
or
44+
exists(API::Node closing, Node arg |
45+
closing = API::moduleImport("contextlib").getMember("closing") and
46+
this = closing.getACall() and
47+
arg = this.getArg(0) and
48+
arg = tarfileOpen().getACall()
49+
)
50+
}
51+
}
52+
/**
53+
* A taint-tracking configuration for detecting more "TarSlip" vulnerabilities.
54+
*/
55+
class Configuration extends TaintTracking::Configuration {
56+
Configuration() { this = "TarSlip" }
57+
58+
override predicate isSource(DataFlow::Node source) { source instanceof AllTarfileOpens }
59+
60+
override predicate isSink(DataFlow::Node sink) {
61+
// A sink capturing method calls to `extractall` without `members` argument.
62+
// For a call to `file.extractall` without `members` argument, `file` is considered a sink.
63+
exists(MethodCallNode call , AllTarfileOpens atfo|
64+
call = atfo.getReturn().getMember("extractall").getACall() and
65+
not exists(Node arg | arg = call.getArgByName("members")) and
66+
sink = call.getObject()
67+
)
68+
or
69+
// A sink capturing method calls to `extractall` with `members` argument.
70+
// For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
71+
// a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
72+
// Otherwise, the argument of `members` is considered a sink.
73+
exists(MethodCallNode call, Node arg, AllTarfileOpens atfo|
74+
call = atfo.getReturn().getMember("extractall").getACall() and
75+
arg = call.getArgByName("members") and
76+
if
77+
arg.asCfgNode() instanceof NameConstantNode or
78+
arg.asCfgNode() instanceof ListNode
79+
then sink = call.getObject()
80+
else
81+
if arg.(MethodCallNode).getMethodName() = "getmembers"
82+
then sink = arg.(MethodCallNode).getObject()
83+
else sink = call.getArgByName("members")
84+
)
85+
or
86+
// An argument to `extract` is considered a sink.
87+
exists(AllTarfileOpens atfo | sink = atfo.getReturn().getMember("extract").getACall().getArg(0))
88+
or
89+
//An argument to `_extract_member` is considered a sink.
90+
exists(MethodCallNode call, AllTarfileOpens atfo |
91+
call = atfo.getReturn().getMember("_extract_member").getACall() and
92+
call.getArg(1).(AttrRead).accesses(sink, "name")
93+
)
94+
}
95+
96+
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
97+
exists(AttrRead attr, MethodCallNode call |
98+
attr.accesses(nodeFrom, "getmembers") and
99+
nodeFrom = call.getObject() and
100+
nodeTo = call
101+
)
102+
}
103+
}
104+
105+
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
106+
where config.hasFlowPath(source, sink)
107+
select sink, source, sink, "Extraction of tarfile from $@ to a potentially untrusted source $@.",
108+
source.getNode(), source.getNode().toString(), sink.getNode(), sink.getNode().toString()
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import sys
2+
import tarfile
3+
4+
def managed_members_archive_handler(filename):
5+
tar = tarfile.open(filename)
6+
result = []
7+
for member in tar:
8+
if ".." in member.name:
9+
raise ValueError("Path in member name !!!")
10+
result.append(member)
11+
path = sys.argv[2]
12+
#print("files are extracted to: ", path)
13+
tar.extractall(path=path, members=result)
14+
tar.close()
15+
16+
if __name__ == "__main__":
17+
if len(sys.argv) > 1:
18+
filename = sys.argv[1]
19+
managed_members_archive_handler(filename)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import sys
2+
import tarfile
3+
import tempfile
4+
5+
def managed_members_archive_handler(filename):
6+
tar = tarfile.open(filename)
7+
tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar))
8+
tar.close()
9+
10+
11+
def members_filter(tarfile):
12+
result = []
13+
for member in tarfile:
14+
if '../' in member.name:
15+
print('Member name container directory traversal sequence')
16+
continue
17+
elif member.issym() or member.islnk():
18+
print('Symlink to external resource')
19+
continue
20+
result.append(member)
21+
return result
22+
23+
24+
if __name__ == "__main__":
25+
if len(sys.argv) > 1:
26+
filename = sys.argv[1]
27+
managed_members_archive_handler(filename)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import tarfile
2+
import sys
3+
4+
with tarfile.open(sys.argv[1]) as tar:
5+
for entry in tar:
6+
if ".." in entry.name:
7+
raise ValueError("Illegal tar archive entry")
8+
tar.extract(entry, "/tmp/unpack/")
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import tarfile
2+
import sys
3+
import os
4+
5+
def _validate_archive_name(name, target):
6+
if not os.path.abspath(os.path.join(target, name)).startswith(target + os.path.sep):
7+
raise ValueError(f"Provided language pack contains invalid name {name}")
8+
9+
with tarfile.open(sys.argv[1]) as tar:
10+
target = "/tmp/unpack"
11+
for entry in tar:
12+
_validate_archive_name(entry.name, target)
13+
tar.extract(entry, target)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# https://github.com/PyCQA/bandit
2+
3+
import sys
4+
import tarfile
5+
import tempfile
6+
7+
def managed_members_archive_handler(filename):
8+
tar = tarfile.open(filename)
9+
tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar))
10+
tar.close()
11+
12+
13+
def members_filter(tarfile):
14+
result = []
15+
for member in tarfile.getmembers():
16+
if '../' in member.name:
17+
print('Member name container directory traversal sequence')
18+
continue
19+
elif (member.issym() or member.islnk()) and ('../' in member.linkname):
20+
print('Symlink to external resource')
21+
continue
22+
result.append(member)
23+
return result
24+
25+
26+
if __name__ == "__main__":
27+
if len(sys.argv) > 1:
28+
filename = sys.argv[1]
29+
managed_members_archive_handler(filename)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# https://github.com/OctoPrint/OctoPrint/
2+
3+
import sys
4+
import tarfile
5+
import os
6+
7+
def _validate_tar_info(info, target):
8+
_validate_archive_name(info.name, target)
9+
if not (info.isfile() or info.isdir()):
10+
raise ValueError("Provided language pack contains invalid file type")
11+
12+
def _validate_archive_name(name, target):
13+
if not os.path.abspath(os.path.join(target, name)).startswith(target + os.path.sep):
14+
raise ValueError(f"Provided language pack contains invalid name {name}")
15+
16+
target = "/tmp/unpack"
17+
18+
with tarfile.open(sys.argv[1], "r") as tar:
19+
20+
# sanity check
21+
for info in tar.getmembers():
22+
_validate_tar_info(info, target)
23+
24+
# unpack everything
25+
tar.extractall(target)
26+
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# https://github.com/PyCQA/bandit
2+
3+
import sys
4+
import tarfile
5+
import tempfile
6+
7+
def managed_members_archive_handler(filename):
8+
tar = tarfile.open(filename)
9+
tar.extractall(path=tempfile.mkdtemp(), members=members_filter(tar))
10+
tar.close()
11+
12+
13+
def members_filter(tarfile):
14+
result = []
15+
for member in tarfile.getmembers():
16+
if '../' in member.name:
17+
print('Member name container directory traversal sequence')
18+
continue
19+
elif member.issym() or member.islnk():
20+
print('Symlink to external resource')
21+
continue
22+
result.append(member)
23+
return result
24+
25+
26+
if __name__ == "__main__":
27+
if len(sys.argv) > 1:
28+
filename = sys.argv[1]
29+
managed_members_archive_handler(filename)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# https://github.com/PyCQA/bandit
2+
3+
import sys
4+
import tarfile
5+
import tempfile
6+
7+
def managed_members_archive_handler(filename):
8+
tar = tarfile.open(filename)
9+
tarf = tar.getmembers()
10+
for f in tarf:
11+
if not f.issym():
12+
tar.extractall(path=tempfile.mkdtemp(), members=[f])
13+
tar.close()
14+
15+
if __name__ == "__main__":
16+
if len(sys.argv) > 1:
17+
filename = sys.argv[1]
18+
managed_members_archive_handler(filename)

0 commit comments

Comments
 (0)