Skip to content

Commit 97ddab0

Browse files
committed
Added support for new URIValidator in AntiSSRF library. Updated test caes to use postprocessing results. Currently results for partial ssrf still need work, it is flagging cases where the URL is fully controlled, but is sanitized. I'm not sure if this should be flagged yet.
1 parent 27e1981 commit 97ddab0

10 files changed

Lines changed: 1062 additions & 414 deletions

python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,49 @@ module ServerSideRequestForgery {
176176
strNode = [call.getArg(0), call.getArgByName("string")]
177177
)
178178
}
179+
180+
/** A validation that a string does not contain certain characters, considered as a sanitizer. */
181+
private class UriValidator extends FullUrlControlSanitizer {
182+
UriValidator() { this = DataFlow::BarrierGuard<uri_validator/3>::getABarrierNode() }
183+
}
184+
185+
import semmle.python.dataflow.new.internal.DataFlowPublic
186+
187+
private predicate uri_validator(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
188+
exists(DataFlow::CallCfgNode call, Node n, string funcs |
189+
funcs in ["in_domain", "in_azure_keyvault_domain", "in_azure_storage_domain"]
190+
|
191+
call = API::moduleImport("AntiSSRF").getMember("URIValidator").getMember(funcs).getACall() and
192+
call.getArg(0).asCfgNode() = node and
193+
n.getALocalSource() = call and
194+
(
195+
// validator used in a comparison
196+
exists(CompareNode cn, Cmpop op | cn = g |
197+
(
198+
// validator == true or validator == false or validator is True or validator is False
199+
(op instanceof Eq or op instanceof Is) and
200+
exists(ControlFlowNode l, boolean bool |
201+
l.getNode().(BooleanLiteral).booleanValue() = bool and
202+
bool in [true, false] and
203+
branch = bool and
204+
cn.operands(n.asCfgNode(), op, l)
205+
)
206+
or
207+
// validator != false or validator != true or validator is not True or validator is not False
208+
(op instanceof NotEq or op instanceof IsNot) and
209+
exists(ControlFlowNode l, boolean bool |
210+
l.getNode().(BooleanLiteral).booleanValue() = bool and
211+
bool in [true, false] and
212+
branch = bool.booleanNot() and
213+
cn.operands(n.asCfgNode(), op, l)
214+
)
215+
)
216+
)
217+
or
218+
// validator call directly (e.g., if URIValidator.in_domain(...) )
219+
g = call.asCfgNode() and
220+
branch = true
221+
)
222+
)
223+
}
179224
}

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/FullServerSideRequestForgery.expected

Lines changed: 234 additions & 130 deletions
Large diffs are not rendered by default.
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
Security/CWE-918/FullServerSideRequestForgery.ql
1+
query: Security/CWE-918/FullServerSideRequestForgery.ql
2+
postprocess: utils/test/InlineExpectationsTestQuery.ql

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/PartialServerSideRequestForgery.expected

Lines changed: 323 additions & 215 deletions
Large diffs are not rendered by default.
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
Security/CWE-918/PartialServerSideRequestForgery.ql
1+
query: Security/CWE-918/PartialServerSideRequestForgery.ql
2+
postprocess: utils/test/InlineExpectationsTestQuery.ql

python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py

Lines changed: 49 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,30 @@
1-
from flask import request
1+
from flask import request # $ Source
22

3-
import requests
3+
import requests
44
import re
55

66
def full_ssrf():
77
user_input = request.args['untrusted_input']
88
query_val = request.args['query_val']
99

10-
requests.get(user_input) # NOT OK -- user has full control
10+
# NOT OK -- user has full control
11+
requests.get(user_input) # $ Alert[py/full-ssrf]
1112

1213
url = "https://" + user_input
13-
requests.get(url) # NOT OK -- user has full control
14+
# NOT OK -- user has full control
15+
requests.get(url) # $ Alert[py/full-ssrf]
1416

1517
# although the path `/foo` is added here, this can be circumvented such that the
1618
# final URL is `https://evil.com/#/foo" -- since the fragment (#) is not sent to the
1719
# server.
1820
url = "https://" + user_input + "/foo"
19-
requests.get(url) # NOT OK -- user has full control
21+
# NOT OK -- user has full control
22+
requests.get(url) # $ Alert[py/full-ssrf]
2023

2124
# this might seem like a dummy test, but it serves to check how our sanitizers work.
2225
url = "https://" + user_input + "/foo?key=" + query_val
23-
requests.get(url) # NOT OK -- user has full control
26+
# NOT OK -- user has full control
27+
requests.get(url) # $ Alert[py/full-ssrf]
2428

2529
# taint-steps are added as `fromNode -> toNode`, but when adding a sanitizer it's
2630
# currently only possible to so on either `fromNode` or `toNode` (either all edges in
@@ -39,72 +43,87 @@ def full_ssrf_format():
3943

4044
# using .format
4145
url = "https://{}".format(user_input)
42-
requests.get(url) # NOT OK -- user has full control
46+
# NOT OK -- user has full control
47+
requests.get(url) # $ Alert[py/full-ssrf]
4348

4449
url = "https://{}/foo".format(user_input)
45-
requests.get(url) # NOT OK -- user has full control
50+
# NOT OK -- user has full control
51+
requests.get(url) # $ Alert[py/full-ssrf]
4652

4753
url = "https://{}/foo?key={}".format(user_input, query_val)
48-
requests.get(url) # NOT OK -- user has full control
54+
# NOT OK -- user has full control
55+
requests.get(url) # $ Alert[py/full-ssrf]
4956

5057
url = "https://{x}".format(x=user_input)
51-
requests.get(url) # NOT OK -- user has full control
58+
# NOT OK -- user has full control
59+
requests.get(url) # $ Alert[py/full-ssrf]
5260

5361
url = "https://{1}".format(0, user_input)
54-
requests.get(url) # NOT OK -- user has full control
62+
# NOT OK -- user has full control
63+
requests.get(url) # $ Alert[py/full-ssrf]
5564

5665
def full_ssrf_percent_format():
5766
user_input = request.args['untrusted_input']
5867
query_val = request.args['query_val']
5968

6069
# using %-formatting
6170
url = "https://%s" % user_input
62-
requests.get(url) # NOT OK -- user has full control
71+
# NOT OK -- user has full control
72+
requests.get(url) # $ Alert[py/full-ssrf]
6373

6474
url = "https://%s/foo" % user_input
65-
requests.get(url) # NOT OK -- user has full control
75+
# NOT OK -- user has full control
76+
requests.get(url) # $ Alert[py/full-ssrf]
6677

6778
url = "https://%s/foo/key=%s" % (user_input, query_val)
68-
requests.get(url) # NOT OK -- user has full control
79+
# NOT OK -- user has full and partial control
80+
requests.get(url) # $ Alert[py/partial-ssrf] $ MISSING: Alert[py/full-ssrf]
6981

7082
def full_ssrf_f_strings():
7183
user_input = request.args['untrusted_input']
7284
query_val = request.args['query_val']
7385

7486
# using f-strings
7587
url = f"https://{user_input}"
76-
requests.get(url) # NOT OK -- user has full control
88+
# NOT OK -- user has full control
89+
requests.get(url) # $ Alert[py/full-ssrf]
7790

7891
url = f"https://{user_input}/foo"
79-
requests.get(url) # NOT OK -- user has full control
92+
# NOT OK -- user has full control
93+
requests.get(url) # $ Alert[py/full-ssrf]
8094

8195
url = f"https://{user_input}/foo?key={query_val}"
82-
requests.get(url) # NOT OK -- user has full control
96+
# NOT OK -- user has full control
97+
requests.get(url) # $ Alert[py/full-ssrf]
8398

8499

85100
def partial_ssrf_1():
86101
user_input = request.args['untrusted_input']
87102

88103
url = "https://example.com/foo?" + user_input
89-
requests.get(url) # NOT OK -- user controls query parameters
104+
# NOT OK -- user controls query parameters
105+
requests.get(url) # $ Alert[py/partial-ssrf]
90106

91107
def partial_ssrf_2():
92108
user_input = request.args['untrusted_input']
93109

94110
url = "https://example.com/" + user_input
95-
requests.get(url) # NOT OK -- user controls path
111+
# NOT OK -- user controls path
112+
requests.get(url) # $ Alert[py/partial-ssrf]
96113

97114
def partial_ssrf_3():
98115
user_input = request.args['untrusted_input']
99116

100117
url = "https://example.com/" + user_input
101-
requests.get(url) # NOT OK -- user controls path
118+
# NOT OK -- user controls path
119+
requests.get(url) # $ Alert[py/partial-ssrf]
102120

103121
def partial_ssrf_4():
104122
user_input = request.args['untrusted_input']
105123

106124
url = "https://example.com/foo#{}".format(user_input)
107-
requests.get(url) # NOT OK -- user contollred fragment
125+
# NOT OK -- user contollred fragment
126+
requests.get(url) # $ Alert[py/partial-ssrf]
108127

109128
def partial_ssrf_5():
110129
user_input = request.args['untrusted_input']
@@ -113,20 +132,22 @@ def partial_ssrf_5():
113132
# controlled
114133

115134
url = "https://example.com/foo#%s" % user_input
116-
requests.get(url) # NOT OK -- user contollred fragment
135+
# NOT OK -- user contollred fragment
136+
requests.get(url) # $ Alert[py/partial-ssrf]
117137

118138
def partial_ssrf_6():
119139
user_input = request.args['untrusted_input']
120140

121141
url = f"https://example.com/foo#{user_input}"
122-
requests.get(url) # NOT OK -- user only controlled fragment
142+
# NOT OK -- user only controlled fragment
143+
requests.get(url) # $ Alert[py/partial-ssrf]
123144

124145
def partial_ssrf_7():
125146
user_input = request.args['untrusted_input']
126147

127148
if user_input.isalnum():
128149
url = f"https://example.com/foo#{user_input}"
129-
requests.get(url) # OK - user input can only contain alphanumerical characters
150+
requests.get(url) # OK - user input can only contain alphanumerical characters
130151

131152
if user_input.isalpha():
132153
url = f"https://example.com/foo#{user_input}"
@@ -154,7 +175,8 @@ def partial_ssrf_7():
154175

155176
if re.fullmatch(r'.*[a-zA-Z0-9]+.*', user_input):
156177
url = f"https://example.com/foo#{user_input}"
157-
requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary characters
178+
# NOT OK, but NOT FOUND - user input can contain arbitrary characters
179+
requests.get(url) # $ MISSING: Alert[py/partial-ssrf]
158180

159181

160182
if re.match(r'^[a-zA-Z0-9]+$', user_input):
@@ -163,7 +185,8 @@ def partial_ssrf_7():
163185

164186
if re.match(r'[a-zA-Z0-9]+', user_input):
165187
url = f"https://example.com/foo#{user_input}"
166-
requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
188+
# NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
189+
requests.get(url) # $ MISSING: Alert[py/partial-ssrf]
167190

168191
reg = re.compile(r'^[a-zA-Z0-9]+$')
169192

0 commit comments

Comments
 (0)