Skip to content

Commit d4a24df

Browse files
author
Alvaro Muñoz
committed
Refactor FlowSteps
1 parent 6a99845 commit d4a24df

4 files changed

Lines changed: 161 additions & 274 deletions

File tree

ql/lib/codeql/actions/dataflow/FlowSteps.qll

Lines changed: 55 additions & 271 deletions
Original file line numberDiff line numberDiff line change
@@ -3,120 +3,8 @@
33
*/
44

55
private import actions
6-
private import codeql.util.Unit
76
private import codeql.actions.DataFlow
87
private import codeql.actions.dataflow.FlowSources
9-
private import codeql.actions.dataflow.ExternalFlow
10-
private import codeql.actions.security.ArtifactPoisoningQuery
11-
private import codeql.actions.security.OutputClobberingQuery
12-
private import codeql.actions.security.UntrustedCheckoutQuery
13-
14-
/**
15-
* A unit class for adding additional taint steps.
16-
*
17-
* Extend this class to add additional taint steps that should apply to all
18-
* taint configurations.
19-
*/
20-
class AdditionalTaintStep extends Unit {
21-
/**
22-
* Holds if the step from `node1` to `node2` should be considered a taint
23-
* step for all configurations.
24-
*/
25-
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
26-
}
27-
28-
/**
29-
* Holds if and environment variable is used, directly or indirectly, in a Run's step expression.
30-
* Where the expression is a string captured from the Run's script.
31-
*/
32-
bindingset[var_name, expr]
33-
predicate envToRunExpr(string var_name, Run run, string expr) {
34-
// e.g. echo "FOO=$BODY" >> $GITHUB_ENV
35-
// e.g. echo "FOO=${BODY}" >> $GITHUB_ENV
36-
expr.matches("%$" + ["", "{", "ENV{"] + var_name + "%")
37-
or
38-
// e.g. echo "FOO=$(echo $BODY)" >> $GITHUB_ENV
39-
expr.matches("$(echo %") and expr.indexOf(var_name) > 0
40-
or
41-
// e.g.
42-
// FOO=$(echo $BODY)
43-
// echo "FOO=$FOO" >> $GITHUB_ENV
44-
exists(string line, string var2_name, string var2_value | run.getScript().splitAt("\n") = line |
45-
var2_name = line.regexpCapture("([a-zA-Z0-9\\-_]+)=(.*)", 1) and
46-
var2_value = line.regexpCapture("([a-zA-Z0-9\\-_]+)=(.*)", 2) and
47-
var2_value.matches("%$" + ["", "{", "ENV{"] + var_name + "%") and
48-
(
49-
expr.matches("%$" + ["", "{", "ENV{"] + var2_name + "%")
50-
or
51-
expr.matches("$(echo %") and expr.indexOf(var2_name) > 0
52-
)
53-
)
54-
}
55-
56-
/**
57-
* Holds if an environment variable is used, directly or indirectly, as an argument to a dangerous command
58-
* in a Run step.
59-
* Where the command is a string captured from the Run's script.
60-
*/
61-
bindingset[var_name]
62-
predicate envToArgInjSink(string var_name, Run run, string command) {
63-
exists(string argument, string line, string regexp, int command_group, int argument_group |
64-
run.getScript().splitAt("\n") = line and
65-
argumentInjectionSinksDataModel(regexp, command_group, argument_group) and
66-
argument = line.regexpCapture(regexp, argument_group) and
67-
command = line.regexpCapture(regexp, command_group) and
68-
envToRunExpr(var_name, run, argument) and
69-
exists(run.getInScopeEnvVarExpr(var_name))
70-
)
71-
}
72-
73-
/**
74-
* Holds if an env var is passed to a Run step and this Run step, writes its value to a special workflow file.
75-
* - file is the name of the special workflow file: GITHUB_ENV, GITHUB_OUTPUT, GITHUB_PATH
76-
* - var_name is the name of the env var
77-
* - run is the Run step
78-
* - key is the name assigned in the special workflow file.
79-
* e.g. FOO for `echo "FOO=$BODY" >> $GITHUB_ENV`
80-
* e.g. FOO for `echo "FOO=$(echo $BODY)" >> $GITHUB_OUTPUT`
81-
* e.g. path (special name) for `echo "$BODY" >> $GITHUB_PATH`
82-
*/
83-
bindingset[var_name]
84-
predicate envToSpecialFile(string file, string var_name, Run run, string key) {
85-
exists(string value |
86-
(
87-
file = "GITHUB_ENV" and
88-
run.getAWriteToGitHubEnv(key, value)
89-
or
90-
file = "GITHUB_OUTPUT" and
91-
run.getAWriteToGitHubOutput(key, value)
92-
or
93-
file = "GITHUB_PATH" and
94-
run.getAWriteToGitHubPath(value) and
95-
key = "path"
96-
) and
97-
envToRunExpr(var_name, run, value)
98-
)
99-
}
100-
101-
/**
102-
* Holds if a Run step declares an environment variable, uses it in its script to set another env var.
103-
* e.g.
104-
* env:
105-
* BODY: ${{ github.event.comment.body }}
106-
* run: |
107-
* echo "foo=$(echo $BODY)" >> $GITHUB_ENV
108-
*/
109-
predicate envToRunStep(DataFlow::Node pred, DataFlow::Node succ) {
110-
exists(Run run, string var_name |
111-
run.getInScopeEnvVarExpr(var_name) = pred.asExpr() and
112-
succ.asExpr() = run.getScriptScalar() and
113-
(
114-
envToSpecialFile(["GITHUB_ENV", "GITHUB_OUTPUT", "GITHUB_PATH"], var_name, run, _) or
115-
envToArgInjSink(var_name, run, _) or
116-
exists(OutputClobberingSink n | n.asExpr() = run.getScriptScalar())
117-
)
118-
)
119-
}
1208

1219
/**
12210
* Holds if a Run step declares an environment variable, uses it in its script and sets an output in its script.
@@ -132,201 +20,97 @@ predicate envToRunStep(DataFlow::Node pred, DataFlow::Node succ) {
13220
* echo "::set-output name=step-output::$BODY"
13321
*/
13422
predicate envToOutputStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
135-
exists(Run run, string var_name, string key |
136-
run.getInScopeEnvVarExpr(var_name) = pred.asExpr() and
23+
exists(Run run, string var, string field |
24+
run.getInScopeEnvVarExpr(var) = pred.asExpr() and
13725
succ.asExpr() = run and
138-
envToSpecialFile("GITHUB_OUTPUT", var_name, run, key) and
139-
c = any(DataFlow::FieldContent ct | ct.getName() = key)
26+
Bash::envReachingGitHubFileWrite(run, var, "GITHUB_OUTPUT", field) and
27+
c = any(DataFlow::FieldContent ct | ct.getName() = field)
14028
)
14129
}
14230

14331
predicate envToEnvStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
144-
exists(Run run, string var_name, string key, string value |
145-
run.getAWriteToGitHubEnv(key, value) and
146-
c = any(DataFlow::FieldContent ct | ct.getName() = key) and
147-
pred.asExpr() = run.getInScopeEnvVarExpr(var_name) and
32+
exists(
33+
Run run, string var, string field //string key, string value |
34+
|
35+
run.getInScopeEnvVarExpr(var) = pred.asExpr() and
14836
// we store the taint on the enclosing job since the may not exist an implicit env attribute
14937
succ.asExpr() = run.getEnclosingJob() and
150-
Bash::isBashParameterExpansion(value, var_name, _, _)
38+
Bash::envReachingGitHubFileWrite(run, var, "GITHUB_ENV", field) and
39+
c = any(DataFlow::FieldContent ct | ct.getName() = field) //and
15140
)
15241
}
15342

154-
predicate controlledCWD(Step artifact) {
155-
artifact instanceof UntrustedArtifactDownloadStep or
156-
// This shoould be:
157-
// artifact instanceof PRHeadCheckoutStep
158-
// but PRHeadCheckoutStep uses Taint Tracking anc causes a non-Monolitic Recursion error
159-
// so we list all the subclasses of PRHeadCheckoutStep here and use actions/checkout as a workaround
160-
// instead of using ActionsMutableRefCheckout and ActionsSHACheckout
161-
artifact.(Uses).getCallee() = "actions/checkout" or
162-
artifact instanceof GitMutableRefCheckout or
163-
artifact instanceof GitSHACheckout or
164-
artifact instanceof GhMutableRefCheckout or
165-
artifact instanceof GhSHACheckout
166-
}
167-
16843
/**
169-
* A downloaded artifact that gets assigned to a Run step output.
170-
* - uses: actions/download-artifact@v2
171-
* - run: echo "::set-output name=id::$(<pr-id.txt)"
44+
* A command whose output gets assigned to an environment variable or step output.
17245
* - run: |
173-
* foo=$(<pr-id.txt)"
174-
* echo "::set-output name=id::$foo
46+
* echo "foo=$(cmd)" >> "$GITHUB_OUTPUT"
47+
* - run: |
48+
* foo=$(<cmd)"
49+
* echo "bar=${foo}" >> "$GITHUB_OUTPUT"
17550
*/
176-
predicate artifactToOutputStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
177-
exists(Run run, Step artifact, string key, string value |
178-
controlledCWD(artifact) and
179-
(
180-
// A file is read and its content is assigned to an env var
181-
// - run: |
182-
// foo=$(<pr-id.txt)"
183-
// echo "::set-output name=id::$foo
184-
exists(string var_name, string file_read |
185-
run.getAnAssignment(var_name, file_read) and
186-
Bash::outputsPartialFileContent(run, file_read) and
187-
envToRunExpr(var_name, run, value) and
188-
run.getAWriteToGitHubOutput(key, value)
189-
)
190-
or
191-
// A file is read and its content is assigned to an output
192-
// - run: echo "::set-output name=id::$(<pr-id.txt)"
193-
run.getAWriteToGitHubOutput(key, value) and
194-
Bash::outputsPartialFileContent(run, value)
195-
) and
51+
predicate commandToOutputStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
52+
exists(CommandSource source, Run run, string key, string cmd |
53+
source.getCommand() = cmd and
54+
Bash::cmdReachingGitHubFileWrite(run, cmd, "GITHUB_OUTPUT", key) and
19655
c = any(DataFlow::FieldContent ct | ct.getName() = key) and
197-
artifact.getAFollowingStep() = run and
19856
pred.asExpr() = run.getScriptScalar() and
19957
succ.asExpr() = run
20058
)
20159
}
20260

20361
/**
204-
* A downloaded artifact that gets assigned to an environment variable.
205-
* - run: echo "foo=$(<pr-id.txt)" >> "$GITHUB_ENV"
62+
* A command whose output gets assigned to an environment variable or step output.
20663
* - run: |
207-
* foo=$(<pr-id.txt)"
64+
* echo "foo=$(cmd)" >> "$GITHUB_ENV"
65+
* - run: |
66+
* foo=$(<cmd)"
20867
* echo "bar=${foo}" >> "$GITHUB_ENV"
20968
*/
210-
predicate artifactToEnvStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
211-
exists(Run run, string key, string value, Step artifact |
212-
controlledCWD(artifact) and
213-
(
214-
// A file is read and its content is assigned to an env var
215-
// - run: |
216-
// foo=$(<pr-id.txt)"
217-
// echo "bar=${foo}" >> "$GITHUB_ENV"
218-
exists(string var_name, string file_read |
219-
run.getAnAssignment(var_name, file_read) and
220-
Bash::outputsPartialFileContent(run, file_read) and
221-
envToRunExpr(var_name, run, value) and
222-
run.getAWriteToGitHubEnv(key, value)
223-
)
224-
or
225-
// A file is read and its content is assigned to an output
226-
// - run: echo "foo=$(<pr-id.txt)" >> "$GITHUB_ENV"
227-
run.getAWriteToGitHubEnv(key, value) and
228-
Bash::outputsPartialFileContent(run, value)
229-
) and
69+
predicate commandToEnvStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
70+
exists(CommandSource source, Run run, string key, string cmd |
71+
source.getCommand() = cmd and
72+
Bash::cmdReachingGitHubFileWrite(run, cmd, "GITHUB_ENV", key) and
23073
c = any(DataFlow::FieldContent ct | ct.getName() = key) and
231-
artifact.getAFollowingStep() = run and
23274
pred.asExpr() = run.getScriptScalar() and
23375
// we store the taint on the enclosing job since there may not be an implicit env attribute
23476
succ.asExpr() = run.getEnclosingJob()
23577
)
23678
}
23779

23880
/**
239-
* A download artifact step followed by a step that may use downloaded artifacts.
240-
*/
241-
predicate artifactDownloadToRunStep(DataFlow::Node pred, DataFlow::Node succ) {
242-
exists(Step artifact, Run run |
243-
controlledCWD(artifact) and
244-
pred.asExpr() = artifact and
245-
succ.asExpr() = run.getScriptScalar() and
246-
artifact.getAFollowingStep() = run
247-
)
248-
}
249-
250-
//
251-
/**
252-
* A download artifact step followed by a uses step .
253-
*/
254-
predicate artifactDownloadToUsesStep(DataFlow::Node pred, DataFlow::Node succ) {
255-
exists(Step artifact, Uses uses |
256-
controlledCWD(artifact) and
257-
pred.asExpr() = artifact and
258-
succ.asExpr() = uses and
259-
artifact.getAFollowingStep() = uses
260-
)
261-
}
262-
263-
/**
264-
* A read of the _files field of the dorny/paths-filter action.
265-
*/
266-
predicate dornyPathsFilterTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
267-
exists(StepsExpression o |
268-
pred instanceof DornyPathsFilterSource and
269-
o.getStepId() = pred.asExpr().(UsesStep).getId() and
270-
o.getFieldName().matches("%_files") and
271-
succ.asExpr() = o
272-
)
273-
}
274-
275-
/**
276-
* A read of user-controlled field of the tj-actions/changed-files action.
277-
*/
278-
predicate tjActionsChangedFilesTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
279-
exists(StepsExpression o |
280-
pred instanceof TJActionsChangedFilesSource and
281-
o.getTarget() = pred.asExpr() and
282-
o.getStepId() = pred.asExpr().(UsesStep).getId() and
283-
o.getFieldName() =
284-
[
285-
"added_files", "copied_files", "deleted_files", "modified_files", "renamed_files",
286-
"all_old_new_renamed_files", "type_changed_files", "unmerged_files", "unknown_files",
287-
"all_changed_and_modified_files", "all_changed_files", "other_changed_files",
288-
"all_modified_files", "other_modified_files", "other_deleted_files", "modified_keys",
289-
"changed_keys"
290-
] and
291-
succ.asExpr() = o
292-
)
293-
}
294-
295-
/**
296-
* A read of user-controlled field of the tj-actions/verify-changed-files action.
81+
* A downloaded artifact that gets assigned to a Run step output.
82+
* - uses: actions/download-artifact@v2
83+
* - run: echo "::set-output name=id::$(<pr-id.txt)"
84+
* - run: |
85+
* foo=$(<pr-id.txt)"
86+
* echo "::set-output name=id::$foo
29787
*/
298-
predicate tjActionsVerifyChangedFilesTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
299-
exists(StepsExpression o |
300-
pred instanceof TJActionsVerifyChangedFilesSource and
301-
o.getTarget() = pred.asExpr() and
302-
o.getStepId() = pred.asExpr().(UsesStep).getId() and
303-
o.getFieldName() = "changed_files" and
304-
succ.asExpr() = o
88+
predicate fileToOutputStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
89+
exists(FileSource source, Run run, string key, string cmd |
90+
source.asExpr().(Step).getAFollowingStep() = run and
91+
Bash::cmdReachingGitHubFileWrite(run, cmd, "GITHUB_OUTPUT", key) and
92+
Bash::outputsPartialFileContent(run, cmd) and
93+
c = any(DataFlow::FieldContent ct | ct.getName() = key) and
94+
pred.asExpr() = run.getScriptScalar() and
95+
succ.asExpr() = run
30596
)
30697
}
30798

30899
/**
309-
* A read of user-controlled field of the xt0rted/slash-command-action action.
100+
* A downloaded artifact that gets assigned to an environment variable.
101+
* - run: echo "foo=$(<pr-id.txt)" >> "$GITHUB_ENV"
102+
* - run: |
103+
* foo=$(<pr-id.txt)"
104+
* echo "bar=${foo}" >> "$GITHUB_ENV"
310105
*/
311-
predicate xt0rtedSlashCommandActionTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
312-
exists(StepsExpression o |
313-
pred instanceof Xt0rtedSlashCommandSource and
314-
o.getTarget() = pred.asExpr() and
315-
o.getStepId() = pred.asExpr().(UsesStep).getId() and
316-
o.getFieldName() = "command-arguments" and
317-
succ.asExpr() = o
106+
predicate fileToEnvStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) {
107+
exists(FileSource source, Run run, string key, string cmd |
108+
source.asExpr().(Step).getAFollowingStep() = run and
109+
Bash::cmdReachingGitHubFileWrite(run, cmd, "GITHUB_ENV", key) and
110+
Bash::outputsPartialFileContent(run, cmd) and
111+
c = any(DataFlow::FieldContent ct | ct.getName() = key) and
112+
pred.asExpr() = run.getScriptScalar() and
113+
// we store the taint on the enclosing job since there may not be an implicit env attribute
114+
succ.asExpr() = run.getEnclosingJob()
318115
)
319116
}
320-
321-
class TaintSteps extends AdditionalTaintStep {
322-
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
323-
envToRunStep(node1, node2) or
324-
artifactDownloadToRunStep(node1, node2) or
325-
artifactDownloadToUsesStep(node1, node2) or
326-
// 3rd party actions
327-
dornyPathsFilterTaintStep(node1, node2) or
328-
tjActionsChangedFilesTaintStep(node1, node2) or
329-
tjActionsVerifyChangedFilesTaintStep(node1, node2) or
330-
xt0rtedSlashCommandActionTaintStep(node1, node2)
331-
}
332-
}

0 commit comments

Comments
 (0)