|
| 1 | +/** |
| 2 | + * Provides default sources, sinks and sanitizers for reasoning about unsafe |
| 3 | + * deserialization, as well as extension points for adding your own. |
| 4 | + */ |
| 5 | + |
| 6 | +private import codeql.ruby.AST |
| 7 | +private import codeql.ruby.ApiGraphs |
| 8 | +private import codeql.ruby.CFG |
| 9 | +private import codeql.ruby.DataFlow |
| 10 | +private import codeql.ruby.dataflow.RemoteFlowSources |
| 11 | +private import codeql.ruby.frameworks.ActiveJob |
| 12 | +private import codeql.ruby.frameworks.core.Module |
| 13 | +private import codeql.ruby.frameworks.core.Kernel |
| 14 | +private import Yaml |
| 15 | + |
| 16 | +module UnsafeYamlDeserialization { |
| 17 | + /** Flow states used to distinguish whether we are using a yaml parse node or a yaml load node. */ |
| 18 | + module FlowState { |
| 19 | + private newtype TState = |
| 20 | + TParse() or |
| 21 | + TLoad() |
| 22 | + |
| 23 | + /** A flow state used to distinguish whether we have a middle node that use `YAML.load*` or `YAML.parse*` */ |
| 24 | + class State extends TState { |
| 25 | + /** |
| 26 | + * Gets a string representation of this state. |
| 27 | + */ |
| 28 | + string toString() { result = this.getStringRepresentation() } |
| 29 | + |
| 30 | + /** |
| 31 | + * Gets a canonical string representation of this state. |
| 32 | + */ |
| 33 | + string getStringRepresentation() { |
| 34 | + this = TParse() and result = "parse" |
| 35 | + or |
| 36 | + this = TLoad() and result = "load" |
| 37 | + } |
| 38 | + } |
| 39 | + |
| 40 | + /** |
| 41 | + * A flow state used for `YAML.parse*` methods. |
| 42 | + */ |
| 43 | + class Parse extends State, TParse { } |
| 44 | + |
| 45 | + /** |
| 46 | + * A flow state used for `YAML.load*` methods. |
| 47 | + */ |
| 48 | + class Load extends State, TLoad { } |
| 49 | + } |
| 50 | + |
| 51 | + /** |
| 52 | + * A data flow source for unsafe deserialization vulnerabilities. |
| 53 | + */ |
| 54 | + abstract class Source extends DataFlow::Node { |
| 55 | + /** Gets a string that describes the source. */ |
| 56 | + string describe() { result = "user-provided value" } |
| 57 | + } |
| 58 | + |
| 59 | + /** |
| 60 | + * A data flow sink for unsafe deserialization vulnerabilities. |
| 61 | + */ |
| 62 | + abstract class Sink extends DataFlow::Node { } |
| 63 | + |
| 64 | + /** |
| 65 | + * A sanitizer for unsafe deserialization vulnerabilities. |
| 66 | + */ |
| 67 | + abstract class Sanitizer extends DataFlow::Node { } |
| 68 | + |
| 69 | + /** A source of remote user input, considered as a flow source for unsafe deserialization. */ |
| 70 | + class RemoteFlowSourceAsSource extends Source instanceof RemoteFlowSource { } |
| 71 | + |
| 72 | + /** A read of data from `STDIN`/`ARGV`, considered as a flow source for unsafe deserialization. */ |
| 73 | + class StdInSource extends UnsafeYamlDeserialization::Source { |
| 74 | + boolean stdin; |
| 75 | + |
| 76 | + StdInSource() { |
| 77 | + this = API::getTopLevelMember(["STDIN", "ARGF"]).getAMethodCall(["gets", "read"]) and |
| 78 | + stdin = true |
| 79 | + or |
| 80 | + // > $stdin == STDIN |
| 81 | + // => true |
| 82 | + // but $stdin is special in that it is a global variable and not a constant. `API::getTopLevelMember` only gets constants. |
| 83 | + exists(DataFlow::Node dollarStdin | |
| 84 | + dollarStdin.asExpr().getExpr().(GlobalVariableReadAccess).getVariable().getName() = "$stdin" and |
| 85 | + this = dollarStdin.getALocalSource().getAMethodCall(["gets", "read"]) |
| 86 | + ) and |
| 87 | + stdin = true |
| 88 | + or |
| 89 | + // ARGV. |
| 90 | + this.asExpr().getExpr().(GlobalVariableReadAccess).getVariable().getName() = "ARGV" and |
| 91 | + stdin = false |
| 92 | + or |
| 93 | + this.(Kernel::KernelMethodCall).getMethodName() = ["gets", "readline", "readlines"] and |
| 94 | + stdin = true |
| 95 | + } |
| 96 | + |
| 97 | + override string describe() { |
| 98 | + if stdin = true then result = "value from stdin" else result = "value from ARGV" |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + /** |
| 103 | + * An argument in a call to `YAML.unsafe_*` and `YAML.load_stream` , considered a sink |
| 104 | + * for unsafe deserialization. The `YAML` module is an alias of `Psych` in |
| 105 | + * recent versions of Ruby. |
| 106 | + */ |
| 107 | + class YamlLoadArgument extends Sink { |
| 108 | + YamlLoadArgument() { |
| 109 | + // Note: this is safe in psych/yaml >= 4.0.0. |
| 110 | + this = yamlLibrary().getAMethodCall("load").getArgument(0) |
| 111 | + or |
| 112 | + this = |
| 113 | + yamlLibrary() |
| 114 | + .getAMethodCall(["unsafe_load_file", "unsafe_load", "load_stream"]) |
| 115 | + .getArgument(0) |
| 116 | + or |
| 117 | + this = yamlLibrary().getAMethodCall(["unsafe_load", "load_stream"]).getKeywordArgument("yaml") |
| 118 | + or |
| 119 | + this = yamlLibrary().getAMethodCall("unsafe_load_file").getKeywordArgument("filename") |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + /** |
| 124 | + * An argument in a call to `YAML.parse*`, considered a sink for unsafe deserialization |
| 125 | + * if there is a call to `to_ruby` on the returned value of any Successor. |
| 126 | + */ |
| 127 | + class YamlParseArgument extends Sink { |
| 128 | + YamlParseArgument() { |
| 129 | + this = |
| 130 | + yamlParseNode(yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])) |
| 131 | + .getMethod(["to_ruby", "transform"]) |
| 132 | + .getReturn() |
| 133 | + .asSource() |
| 134 | + } |
| 135 | + } |
| 136 | +} |
0 commit comments