@@ -34,4 +34,121 @@ private module Pandas {
3434
3535 override string getFormat ( ) { result = "pickle" }
3636 }
37+
38+ /**
39+ * Provides security related models for `pandas.DataFrame`.
40+ * See https://pandas.pydata.org/docs/reference/frame.html
41+ */
42+ module DataFrame {
43+ /**
44+ * A `pandas.DataFrame` Object.
45+ *
46+ * Extend this class to model new APIs.
47+ * See https://pandas.pydata.org/docs/reference/frame.html
48+ */
49+ abstract class DataFrame extends API:: Node {
50+ override string toString ( ) { result = this .( API:: Node ) .toString ( ) }
51+ }
52+
53+ /**
54+ * A `pandas.DataFrame` instantiation.
55+ * See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
56+ */
57+ class DataFrameConstructor extends DataFrame {
58+ DataFrameConstructor ( ) {
59+ this = API:: moduleImport ( "pandas" ) .getMember ( "DataFrame" ) .getReturn ( )
60+ }
61+ }
62+
63+ /**
64+ * The `pandas.read_*` functions that return a `pandas.DataFrame`.
65+ * See https://pandas.pydata.org/docs/reference/io.html
66+ */
67+ class InputRead extends DataFrame {
68+ InputRead ( ) {
69+ this =
70+ API:: moduleImport ( "pandas" )
71+ .getMember ( [
72+ "read_csv" , "read_fwf" , "read_pickle" , "read_table" , "read_clipboard" ,
73+ "read_excel" , "read_xml" , "read_parquet" , "read_orc" , "read_spss" ,
74+ "read_sql_table" , "read_sql_query" , "read_sql" , "read_gbq" , "read_stata"
75+ ] )
76+ .getReturn ( )
77+ or
78+ this = API:: moduleImport ( "pandas" ) .getMember ( "read_html" ) .getReturn ( ) .getASubscript ( )
79+ or
80+ exists ( API:: Node readSas , API:: CallNode readSasCall |
81+ readSas = API:: moduleImport ( "pandas" ) .getMember ( "read_sas" ) and
82+ this = readSas .getReturn ( ) and
83+ readSasCall = readSas .getACall ( )
84+ |
85+ // Returns DataFrame if iterator=False and chunksize=None, Also with default values it returns DataFrame.
86+ (
87+ not readSasCall .getParameter ( 5 , "iterator" ) .asSink ( ) .asExpr ( ) .( BooleanLiteral )
88+ instanceof True
89+ or
90+ not exists ( readSasCall .getParameter ( 5 , "iterator" ) .asSink ( ) )
91+ ) and
92+ not exists (
93+ readSasCall .getParameter ( 4 , "chunksize" ) .asSink ( ) .asExpr ( ) .( IntegerLiteral ) .getN ( )
94+ )
95+ )
96+ }
97+ }
98+
99+ /**
100+ * The `pandas.DataFrame.*` methods that return a `pandas.DataFrame` object.
101+ * See https://pandas.pydata.org/docs/reference/io.html
102+ */
103+ class DataFrameMethods extends DataFrame {
104+ DataFrameMethods ( ) {
105+ this =
106+ any ( DataFrame df )
107+ .getMember ( [
108+ "copy" , "from_records" , "from_dict" , "from_spmatrix" , "assign" , "select_dtypes" ,
109+ "set_flags" , "astype" , "infer_objects" , "head" , "xs" , "get" , "isin" , "where" ,
110+ "mask" , "query" , "add" , "mul" , "truediv" , "mod" , "pow" , "dot" , "radd" , "rsub" ,
111+ "rdiv" , "rfloordiv" , "rtruediv" , "rpow" , "lt" , "gt" , "le" , "ne" , "agg" , "combine" ,
112+ "apply" , "aggregate" , "transform" , "all" , "any" , "clip" , "corr" , "cov" , "cummax" ,
113+ "cummin" , "cumprod" , "describe" , "mode" , "pct_change" , "quantile" , "rank" ,
114+ "round" , "sem" , "add_prefix" , "add_suffix" , "at_time" , "between_time" , "drop" ,
115+ "drop_duplicates" , "filter" , "first" , "head" , "idxmin" , "last" , "reindex" ,
116+ "reindex_like" , "reset_index" , "sample" , "set_axis" , "tail" , "take" , "truncate" ,
117+ "bfill" , "dropna" , "ffill" , "fillna" , "interpolate" , "isna" , "isnull" , "notna" ,
118+ "notnull" , "pad" , "replace" , "droplevel" , "pivot" , "pivot_table" ,
119+ "reorder_levels" , "sort_values" , "sort_index" , "nlargest" , "nsmallest" ,
120+ "swaplevel" , "stack" , "unstack" , "isnull" , "notna" , "notnull" , "replace" ,
121+ "droplevel" , "pivot" , "pivot_table" , "reorder_levels" , "sort_values" ,
122+ "sort_index" , "nlargest" , "nsmallest" , "swaplevel" , "stack" , "unstack" , "melt" ,
123+ "explode" , "squeeze" , "T" , "transpose" , "compare" , "join" , "from_spmatrix" ,
124+ "shift" , "asof" , "merge" , "from_dict" , "tz_convert" , "to_period" , "asfreq" ,
125+ "to_dense" , "tz_localize" , "box" , "__dataframe__"
126+ ] )
127+ .getReturn ( )
128+ }
129+ }
130+ }
131+
132+ /**
133+ * A Call to `pandas.DataFrame.query` or `pandas.DataFrame.eval`.
134+ * See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
135+ * https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.eval.html
136+ */
137+ class CodeExecutionCall extends CodeExecution:: Range , API:: CallNode {
138+ CodeExecutionCall ( ) {
139+ this = any ( DataFrame:: DataFrame df ) .getMember ( [ "query" , "eval" ] ) .getACall ( )
140+ }
141+
142+ override DataFlow:: Node getCode ( ) { result = this .getParameter ( 0 , "expr" ) .asSink ( ) }
143+ }
144+
145+ /**
146+ * A Call to `pandas.eval`.
147+ * See https://pandas.pydata.org/docs/reference/api/pandas.eval.html
148+ */
149+ class PandasEval extends CodeExecution:: Range , API:: CallNode {
150+ PandasEval ( ) { this = API:: moduleImport ( "pandas" ) .getMember ( "eval" ) .getACall ( ) }
151+
152+ override DataFlow:: Node getCode ( ) { result = this .getParameter ( 0 , "expr" ) .asSink ( ) }
153+ }
37154}
0 commit comments