Skip to content

Commit 2727bf5

Browse files
author
Alvaro Muñoz
committed
Add improved Bash script parser
1 parent 4b74ade commit 2727bf5

3 files changed

Lines changed: 104 additions & 6 deletions

File tree

ql/lib/codeql/actions/Ast.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ class Run extends Step instanceof RunImpl {
293293
Expression getAnScriptExpr() { result = super.getAnScriptExpr() }
294294

295295
string getWorkingDirectory() { result = super.getWorkingDirectory() }
296+
297+
string getACommand() { result = super.getACommand() }
298+
299+
predicate getAnAssignment(string name, string value) { super.getAnAssignment(name, value) }
296300
}
297301

298302
abstract class SimpleReferenceExpression extends AstNode instanceof SimpleReferenceExpressionImpl {

ql/lib/codeql/actions/Helper.qll

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ predicate isBashParameterExpansion(string expr, string parameter, string operato
5454
)
5555
}
5656

57-
// TODO, the followinr test fails
5857
bindingset[raw_content]
5958
predicate extractVariableAndValue(string raw_content, string key, string value) {
6059
exists(string regexp, string content | content = trimQuotes(raw_content) |
@@ -246,18 +245,14 @@ predicate inNonPrivilegedContext(AstNode node) {
246245
not node.getEnclosingJob().isPrivilegedExternallyTriggerable(_)
247246
}
248247

249-
string partialFileContentRegexp() {
250-
result = ["cat\\s+", "jq\\s+", "yq\\s+", "tail\\s+", "head\\s+", "ls\\s+"]
251-
}
252-
253248
bindingset[snippet]
254249
predicate outputsPartialFileContent(string snippet) {
255250
// e.g.
256251
// echo FOO=`yq '.foo' foo.yml` >> $GITHUB_ENV
257252
// echo "FOO=$(<foo.txt)" >> $GITHUB_ENV
258253
// yq '.foo' foo.yml >> $GITHUB_PATH
259254
// cat foo.txt >> $GITHUB_PATH
260-
snippet.regexpMatch(["(\\$\\(|`)<.*", ".*(\\b|^|\\s+)" + partialFileContentRegexp() + ".*"])
255+
Bash::getACommand(snippet).indexOf(["<", Bash::partialFileContentCommand() + " "]) = 0
261256
}
262257

263258
string defaultBranchNames() {
@@ -310,3 +305,96 @@ string normalizePath(string path) {
310305
*/
311306
bindingset[subpath, path]
312307
predicate isSubpath(string subpath, string path) { subpath.substring(0, path.length()) = path }
308+
309+
module Bash {
310+
string stmtSeparator() { result = ";" }
311+
312+
string commandSeparator() { result = ["&&", "||"] }
313+
314+
string pipeSeparator() { result = "|" }
315+
316+
string splitSeparators() {
317+
result = stmtSeparator() or result = commandSeparator() or result = pipeSeparator()
318+
}
319+
320+
string redirectionSeparator() { result = [">", ">>", "2>", "2>>", ">&", "2>&", "<", "<<<"] }
321+
322+
string partialFileContentCommand() { result = ["cat", "jq", "yq", "tail", "head"] }
323+
324+
bindingset[script]
325+
string getACommand(string script) {
326+
exists(string stmt_, string stmt, string subline2, string cmd |
327+
stmt_ = script.regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n") and
328+
stmt =
329+
[
330+
// $() command substitution
331+
stmt_
332+
.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", _, _)
333+
.regexpReplaceAll("^\\$\\(", "")
334+
.regexpReplaceAll("\\)$", ""),
335+
// `...` command substitution
336+
stmt_
337+
.regexpFind("\\`[^\\`]+\\`", _, _)
338+
.regexpReplaceAll("^\\`", "")
339+
.regexpReplaceAll("\\`$", ""),
340+
// original line with no substitutions
341+
stmt_
342+
.regexpReplaceAll("\\`[^\\`]+\\`", "SUBCOMMAND")
343+
.regexpReplaceAll("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", "SUBCOMMAND")
344+
] and
345+
// We shoulg replace quoted arguments with a placeholder to avoid splitting them
346+
// eg: ls | grep -E "*.(tar.gz|zip)$"
347+
//subline2 = subline.regexpReplaceAll("\"([^\"]+)\"", "$0").regexpReplaceAll("'([^']+)'", "$0") and
348+
(
349+
stmt.regexpMatch(".*\"([^\"]+)\".*") and
350+
exists(int i |
351+
subline2 =
352+
stmt.replaceAll(stmt.regexpFind("\"([^\"]+)\"", _, i),
353+
stmt.regexpFind("\"([^\"]+)\"", _, i)
354+
.replaceAll("|", "::PIPE::")
355+
.replaceAll(";", "::SEMICOLON::")
356+
.replaceAll("&&", "::AND::")
357+
.replaceAll("||", "::OR::"))
358+
)
359+
or
360+
stmt.regexpMatch(".*'([^']+)'.*") and
361+
exists(int i |
362+
subline2 =
363+
stmt.replaceAll(stmt.regexpFind("'([^']+)'", _, i),
364+
stmt.regexpFind("'([^']+)'", _, i)
365+
.replaceAll("|", "::PIPE::")
366+
.replaceAll(";", "::SEMICOLON::")
367+
.replaceAll("&&", "::AND::")
368+
.replaceAll("||", "::OR::"))
369+
)
370+
or
371+
not stmt.regexpMatch(".*'([^']+)'.*") and
372+
not stmt.regexpMatch(".*\"([^\"]+)\".*") and
373+
subline2 = stmt
374+
) and
375+
cmd = subline2.splitAt(splitSeparators()).trim() and
376+
// when splitting the line with a separator that is not found, the result is the original line which may contain other separators
377+
// we only one the split parts that do not contain any of the separators
378+
not cmd.indexOf(splitSeparators()) > -1 and
379+
not cmd =
380+
[
381+
"", "for", "in", "do", "done", "if", "then", "else", "elif", "fi", "while", "until",
382+
"case", "esac", "{", "}"
383+
] and
384+
result =
385+
cmd.replaceAll("::PIPE::", "|")
386+
.replaceAll("::SEMICOLON::", ";")
387+
.replaceAll("::AND::", "&&")
388+
.replaceAll("::OR::", "||")
389+
)
390+
}
391+
392+
bindingset[script]
393+
predicate getAnAssignment(string script, string name, string value) {
394+
exists(string stmt |
395+
stmt = script.regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n").trim() and
396+
name = stmt.regexpCapture("^([a-zA-Z0-9\\-_]+)=.*", 1) and
397+
value = stmt.regexpCapture("^[a-zA-Z0-9\\-_]+=(.*)", 1)
398+
)
399+
}
400+
}

ql/lib/codeql/actions/ast/internal/Ast.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,12 @@ class RunImpl extends StepImpl {
13191319

13201320
string getScript() { result = script.getValue().regexpReplaceAll("\\\\\\s*\n", "") }
13211321

1322+
string getACommand() { result = Bash::getACommand(this.getScript()) }
1323+
1324+
predicate getAnAssignment(string name, string value) {
1325+
Bash::getAnAssignment(this.getScript(), name, value)
1326+
}
1327+
13221328
ScalarValueImpl getScriptScalar() { result = TScalarValueNode(script) }
13231329

13241330
ExpressionImpl getAnScriptExpr() { result.getParentNode().getNode() = script }

0 commit comments

Comments
 (0)