33 */
44
55private import actions
6- private import codeql.util.Unit
76private import codeql.actions.DataFlow
87private import codeql.actions.dataflow.FlowSources
9- private import codeql.actions.dataflow.ExternalFlow
10- private import codeql.actions.security.ArtifactPoisoningQuery
11- private import codeql.actions.security.OutputClobberingQuery
12- private import codeql.actions.security.UntrustedCheckoutQuery
13-
14- /**
15- * A unit class for adding additional taint steps.
16- *
17- * Extend this class to add additional taint steps that should apply to all
18- * taint configurations.
19- */
20- class AdditionalTaintStep extends Unit {
21- /**
22- * Holds if the step from `node1` to `node2` should be considered a taint
23- * step for all configurations.
24- */
25- abstract predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) ;
26- }
27-
28- /**
29- * Holds if and environment variable is used, directly or indirectly, in a Run's step expression.
30- * Where the expression is a string captured from the Run's script.
31- */
32- bindingset [ var_name, expr]
33- predicate envToRunExpr ( string var_name , Run run , string expr ) {
34- // e.g. echo "FOO=$BODY" >> $GITHUB_ENV
35- // e.g. echo "FOO=${BODY}" >> $GITHUB_ENV
36- expr .matches ( "%$" + [ "" , "{" , "ENV{" ] + var_name + "%" )
37- or
38- // e.g. echo "FOO=$(echo $BODY)" >> $GITHUB_ENV
39- expr .matches ( "$(echo %" ) and expr .indexOf ( var_name ) > 0
40- or
41- // e.g.
42- // FOO=$(echo $BODY)
43- // echo "FOO=$FOO" >> $GITHUB_ENV
44- exists ( string line , string var2_name , string var2_value | run .getScript ( ) .splitAt ( "\n" ) = line |
45- var2_name = line .regexpCapture ( "([a-zA-Z0-9\\-_]+)=(.*)" , 1 ) and
46- var2_value = line .regexpCapture ( "([a-zA-Z0-9\\-_]+)=(.*)" , 2 ) and
47- var2_value .matches ( "%$" + [ "" , "{" , "ENV{" ] + var_name + "%" ) and
48- (
49- expr .matches ( "%$" + [ "" , "{" , "ENV{" ] + var2_name + "%" )
50- or
51- expr .matches ( "$(echo %" ) and expr .indexOf ( var2_name ) > 0
52- )
53- )
54- }
55-
56- /**
57- * Holds if an environment variable is used, directly or indirectly, as an argument to a dangerous command
58- * in a Run step.
59- * Where the command is a string captured from the Run's script.
60- */
61- bindingset [ var_name]
62- predicate envToArgInjSink ( string var_name , Run run , string command ) {
63- exists ( string argument , string line , string regexp , int command_group , int argument_group |
64- run .getScript ( ) .splitAt ( "\n" ) = line and
65- argumentInjectionSinksDataModel ( regexp , command_group , argument_group ) and
66- argument = line .regexpCapture ( regexp , argument_group ) and
67- command = line .regexpCapture ( regexp , command_group ) and
68- envToRunExpr ( var_name , run , argument ) and
69- exists ( run .getInScopeEnvVarExpr ( var_name ) )
70- )
71- }
72-
73- /**
74- * Holds if an env var is passed to a Run step and this Run step, writes its value to a special workflow file.
75- * - file is the name of the special workflow file: GITHUB_ENV, GITHUB_OUTPUT, GITHUB_PATH
76- * - var_name is the name of the env var
77- * - run is the Run step
78- * - key is the name assigned in the special workflow file.
79- * e.g. FOO for `echo "FOO=$BODY" >> $GITHUB_ENV`
80- * e.g. FOO for `echo "FOO=$(echo $BODY)" >> $GITHUB_OUTPUT`
81- * e.g. path (special name) for `echo "$BODY" >> $GITHUB_PATH`
82- */
83- bindingset [ var_name]
84- predicate envToSpecialFile ( string file , string var_name , Run run , string key ) {
85- exists ( string value |
86- (
87- file = "GITHUB_ENV" and
88- run .getAWriteToGitHubEnv ( key , value )
89- or
90- file = "GITHUB_OUTPUT" and
91- run .getAWriteToGitHubOutput ( key , value )
92- or
93- file = "GITHUB_PATH" and
94- run .getAWriteToGitHubPath ( value ) and
95- key = "path"
96- ) and
97- envToRunExpr ( var_name , run , value )
98- )
99- }
100-
101- /**
102- * Holds if a Run step declares an environment variable, uses it in its script to set another env var.
103- * e.g.
104- * env:
105- * BODY: ${{ github.event.comment.body }}
106- * run: |
107- * echo "foo=$(echo $BODY)" >> $GITHUB_ENV
108- */
109- predicate envToRunStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
110- exists ( Run run , string var_name |
111- run .getInScopeEnvVarExpr ( var_name ) = pred .asExpr ( ) and
112- succ .asExpr ( ) = run .getScriptScalar ( ) and
113- (
114- envToSpecialFile ( [ "GITHUB_ENV" , "GITHUB_OUTPUT" , "GITHUB_PATH" ] , var_name , run , _) or
115- envToArgInjSink ( var_name , run , _) or
116- exists ( OutputClobberingSink n | n .asExpr ( ) = run .getScriptScalar ( ) )
117- )
118- )
119- }
1208
1219/**
12210 * Holds if a Run step declares an environment variable, uses it in its script and sets an output in its script.
@@ -132,201 +20,97 @@ predicate envToRunStep(DataFlow::Node pred, DataFlow::Node succ) {
13220 * echo "::set-output name=step-output::$BODY"
13321 */
13422predicate envToOutputStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
135- exists ( Run run , string var_name , string key |
136- run .getInScopeEnvVarExpr ( var_name ) = pred .asExpr ( ) and
23+ exists ( Run run , string var , string field |
24+ run .getInScopeEnvVarExpr ( var ) = pred .asExpr ( ) and
13725 succ .asExpr ( ) = run and
138- envToSpecialFile ( "GITHUB_OUTPUT" , var_name , run , key ) and
139- c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = key )
26+ Bash :: envReachingGitHubFileWrite ( run , var , "GITHUB_OUTPUT" , field ) and
27+ c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = field )
14028 )
14129}
14230
14331predicate envToEnvStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
144- exists ( Run run , string var_name , string key , string value |
145- run . getAWriteToGitHubEnv ( key , value ) and
146- c = any ( DataFlow :: FieldContent ct | ct . getName ( ) = key ) and
147- pred . asExpr ( ) = run . getInScopeEnvVarExpr ( var_name ) and
32+ exists (
33+ Run run , string var , string field //string key, string value |
34+ |
35+ run . getInScopeEnvVarExpr ( var ) = pred . asExpr ( ) and
14836 // we store the taint on the enclosing job since the may not exist an implicit env attribute
14937 succ .asExpr ( ) = run .getEnclosingJob ( ) and
150- Bash:: isBashParameterExpansion ( value , var_name , _, _)
38+ Bash:: envReachingGitHubFileWrite ( run , var , "GITHUB_ENV" , field ) and
39+ c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = field ) //and
15140 )
15241}
15342
154- predicate controlledCWD ( Step artifact ) {
155- artifact instanceof UntrustedArtifactDownloadStep or
156- // This shoould be:
157- // artifact instanceof PRHeadCheckoutStep
158- // but PRHeadCheckoutStep uses Taint Tracking anc causes a non-Monolitic Recursion error
159- // so we list all the subclasses of PRHeadCheckoutStep here and use actions/checkout as a workaround
160- // instead of using ActionsMutableRefCheckout and ActionsSHACheckout
161- artifact .( Uses ) .getCallee ( ) = "actions/checkout" or
162- artifact instanceof GitMutableRefCheckout or
163- artifact instanceof GitSHACheckout or
164- artifact instanceof GhMutableRefCheckout or
165- artifact instanceof GhSHACheckout
166- }
167-
16843/**
169- * A downloaded artifact that gets assigned to a Run step output.
170- * - uses: actions/download-artifact@v2
171- * - run: echo "::set-output name=id::$(<pr-id.txt)"
44+ * A command whose output gets assigned to an environment variable or step output.
17245 * - run: |
173- * foo=$(<pr-id.txt)"
174- * echo "::set-output name=id::$foo
46+ * echo "foo=$(cmd)" >> "$GITHUB_OUTPUT"
47+ * - run: |
48+ * foo=$(<cmd)"
49+ * echo "bar=${foo}" >> "$GITHUB_OUTPUT"
17550 */
176- predicate artifactToOutputStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
177- exists ( Run run , Step artifact , string key , string value |
178- controlledCWD ( artifact ) and
179- (
180- // A file is read and its content is assigned to an env var
181- // - run: |
182- // foo=$(<pr-id.txt)"
183- // echo "::set-output name=id::$foo
184- exists ( string var_name , string file_read |
185- run .getAnAssignment ( var_name , file_read ) and
186- Bash:: outputsPartialFileContent ( run , file_read ) and
187- envToRunExpr ( var_name , run , value ) and
188- run .getAWriteToGitHubOutput ( key , value )
189- )
190- or
191- // A file is read and its content is assigned to an output
192- // - run: echo "::set-output name=id::$(<pr-id.txt)"
193- run .getAWriteToGitHubOutput ( key , value ) and
194- Bash:: outputsPartialFileContent ( run , value )
195- ) and
51+ predicate commandToOutputStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
52+ exists ( CommandSource source , Run run , string key , string cmd |
53+ source .getCommand ( ) = cmd and
54+ Bash:: cmdReachingGitHubFileWrite ( run , cmd , "GITHUB_OUTPUT" , key ) and
19655 c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = key ) and
197- artifact .getAFollowingStep ( ) = run and
19856 pred .asExpr ( ) = run .getScriptScalar ( ) and
19957 succ .asExpr ( ) = run
20058 )
20159}
20260
20361/**
204- * A downloaded artifact that gets assigned to an environment variable.
205- * - run: echo "foo=$(<pr-id.txt)" >> "$GITHUB_ENV"
62+ * A command whose output gets assigned to an environment variable or step output.
20663 * - run: |
207- * foo=$(<pr-id.txt)"
64+ * echo "foo=$(cmd)" >> "$GITHUB_ENV"
65+ * - run: |
66+ * foo=$(<cmd)"
20867 * echo "bar=${foo}" >> "$GITHUB_ENV"
20968 */
210- predicate artifactToEnvStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
211- exists ( Run run , string key , string value , Step artifact |
212- controlledCWD ( artifact ) and
213- (
214- // A file is read and its content is assigned to an env var
215- // - run: |
216- // foo=$(<pr-id.txt)"
217- // echo "bar=${foo}" >> "$GITHUB_ENV"
218- exists ( string var_name , string file_read |
219- run .getAnAssignment ( var_name , file_read ) and
220- Bash:: outputsPartialFileContent ( run , file_read ) and
221- envToRunExpr ( var_name , run , value ) and
222- run .getAWriteToGitHubEnv ( key , value )
223- )
224- or
225- // A file is read and its content is assigned to an output
226- // - run: echo "foo=$(<pr-id.txt)" >> "$GITHUB_ENV"
227- run .getAWriteToGitHubEnv ( key , value ) and
228- Bash:: outputsPartialFileContent ( run , value )
229- ) and
69+ predicate commandToEnvStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
70+ exists ( CommandSource source , Run run , string key , string cmd |
71+ source .getCommand ( ) = cmd and
72+ Bash:: cmdReachingGitHubFileWrite ( run , cmd , "GITHUB_ENV" , key ) and
23073 c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = key ) and
231- artifact .getAFollowingStep ( ) = run and
23274 pred .asExpr ( ) = run .getScriptScalar ( ) and
23375 // we store the taint on the enclosing job since there may not be an implicit env attribute
23476 succ .asExpr ( ) = run .getEnclosingJob ( )
23577 )
23678}
23779
23880/**
239- * A download artifact step followed by a step that may use downloaded artifacts.
240- */
241- predicate artifactDownloadToRunStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
242- exists ( Step artifact , Run run |
243- controlledCWD ( artifact ) and
244- pred .asExpr ( ) = artifact and
245- succ .asExpr ( ) = run .getScriptScalar ( ) and
246- artifact .getAFollowingStep ( ) = run
247- )
248- }
249-
250- //
251- /**
252- * A download artifact step followed by a uses step .
253- */
254- predicate artifactDownloadToUsesStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
255- exists ( Step artifact , Uses uses |
256- controlledCWD ( artifact ) and
257- pred .asExpr ( ) = artifact and
258- succ .asExpr ( ) = uses and
259- artifact .getAFollowingStep ( ) = uses
260- )
261- }
262-
263- /**
264- * A read of the _files field of the dorny/paths-filter action.
265- */
266- predicate dornyPathsFilterTaintStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
267- exists ( StepsExpression o |
268- pred instanceof DornyPathsFilterSource and
269- o .getStepId ( ) = pred .asExpr ( ) .( UsesStep ) .getId ( ) and
270- o .getFieldName ( ) .matches ( "%_files" ) and
271- succ .asExpr ( ) = o
272- )
273- }
274-
275- /**
276- * A read of user-controlled field of the tj-actions/changed-files action.
277- */
278- predicate tjActionsChangedFilesTaintStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
279- exists ( StepsExpression o |
280- pred instanceof TJActionsChangedFilesSource and
281- o .getTarget ( ) = pred .asExpr ( ) and
282- o .getStepId ( ) = pred .asExpr ( ) .( UsesStep ) .getId ( ) and
283- o .getFieldName ( ) =
284- [
285- "added_files" , "copied_files" , "deleted_files" , "modified_files" , "renamed_files" ,
286- "all_old_new_renamed_files" , "type_changed_files" , "unmerged_files" , "unknown_files" ,
287- "all_changed_and_modified_files" , "all_changed_files" , "other_changed_files" ,
288- "all_modified_files" , "other_modified_files" , "other_deleted_files" , "modified_keys" ,
289- "changed_keys"
290- ] and
291- succ .asExpr ( ) = o
292- )
293- }
294-
295- /**
296- * A read of user-controlled field of the tj-actions/verify-changed-files action.
81+ * A downloaded artifact that gets assigned to a Run step output.
82+ * - uses: actions/download-artifact@v2
83+ * - run: echo "::set-output name=id::$(<pr-id.txt)"
84+ * - run: |
85+ * foo=$(<pr-id.txt)"
86+ * echo "::set-output name=id::$foo
29787 */
298- predicate tjActionsVerifyChangedFilesTaintStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
299- exists ( StepsExpression o |
300- pred instanceof TJActionsVerifyChangedFilesSource and
301- o .getTarget ( ) = pred .asExpr ( ) and
302- o .getStepId ( ) = pred .asExpr ( ) .( UsesStep ) .getId ( ) and
303- o .getFieldName ( ) = "changed_files" and
304- succ .asExpr ( ) = o
88+ predicate fileToOutputStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
89+ exists ( FileSource source , Run run , string key , string cmd |
90+ source .asExpr ( ) .( Step ) .getAFollowingStep ( ) = run and
91+ Bash:: cmdReachingGitHubFileWrite ( run , cmd , "GITHUB_OUTPUT" , key ) and
92+ Bash:: outputsPartialFileContent ( run , cmd ) and
93+ c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = key ) and
94+ pred .asExpr ( ) = run .getScriptScalar ( ) and
95+ succ .asExpr ( ) = run
30596 )
30697}
30798
30899/**
309- * A read of user-controlled field of the xt0rted/slash-command-action action.
100+ * A downloaded artifact that gets assigned to an environment variable.
101+ * - run: echo "foo=$(<pr-id.txt)" >> "$GITHUB_ENV"
102+ * - run: |
103+ * foo=$(<pr-id.txt)"
104+ * echo "bar=${foo}" >> "$GITHUB_ENV"
310105 */
311- predicate xt0rtedSlashCommandActionTaintStep ( DataFlow:: Node pred , DataFlow:: Node succ ) {
312- exists ( StepsExpression o |
313- pred instanceof Xt0rtedSlashCommandSource and
314- o .getTarget ( ) = pred .asExpr ( ) and
315- o .getStepId ( ) = pred .asExpr ( ) .( UsesStep ) .getId ( ) and
316- o .getFieldName ( ) = "command-arguments" and
317- succ .asExpr ( ) = o
106+ predicate fileToEnvStoreStep ( DataFlow:: Node pred , DataFlow:: Node succ , DataFlow:: ContentSet c ) {
107+ exists ( FileSource source , Run run , string key , string cmd |
108+ source .asExpr ( ) .( Step ) .getAFollowingStep ( ) = run and
109+ Bash:: cmdReachingGitHubFileWrite ( run , cmd , "GITHUB_ENV" , key ) and
110+ Bash:: outputsPartialFileContent ( run , cmd ) and
111+ c = any ( DataFlow:: FieldContent ct | ct .getName ( ) = key ) and
112+ pred .asExpr ( ) = run .getScriptScalar ( ) and
113+ // we store the taint on the enclosing job since there may not be an implicit env attribute
114+ succ .asExpr ( ) = run .getEnclosingJob ( )
318115 )
319116}
320-
321- class TaintSteps extends AdditionalTaintStep {
322- override predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
323- envToRunStep ( node1 , node2 ) or
324- artifactDownloadToRunStep ( node1 , node2 ) or
325- artifactDownloadToUsesStep ( node1 , node2 ) or
326- // 3rd party actions
327- dornyPathsFilterTaintStep ( node1 , node2 ) or
328- tjActionsChangedFilesTaintStep ( node1 , node2 ) or
329- tjActionsVerifyChangedFilesTaintStep ( node1 , node2 ) or
330- xt0rtedSlashCommandActionTaintStep ( node1 , node2 )
331- }
332- }
0 commit comments