@@ -2,89 +2,176 @@ private import python
22private import semmle.python.dataflow.new.DataFlow
33private import experimental.semmle.python.Concepts
44private import semmle.python.ApiGraphs
5+ private import semmle.python.dataflow.new.TaintTracking
56
67module SmtpLib {
7- private API:: Node smtpLib ( ) { result = API:: moduleImport ( "smtplib" ) }
8+ /** Gets a reference to `smtplib.SMTP_SSL` */
9+ private API:: Node smtpConnectionInstance ( ) {
10+ result = API:: moduleImport ( "smtplib" ) .getMember ( "SMTP_SSL" )
11+ }
12+
13+ /** Gets a reference to `email.mime.multipart.MIMEMultipart` */
14+ private API:: Node smtpMimeMultipartInstance ( ) {
15+ result =
16+ API:: moduleImport ( "email" ) .getMember ( "mime" ) .getMember ( "multipart" ) .getMember ( "MIMEMultipart" )
17+ }
818
9- private API:: Node smtpConnectionInstance ( ) { result = smtpLib ( ) .getMember ( "SMTP_SSL" ) }
19+ /** Gets a reference to `email.mime.text.MIMEText` */
20+ private API:: Node smtpMimeTextInstance ( ) {
21+ result = API:: moduleImport ( "email" ) .getMember ( "mime" ) .getMember ( "text" ) .getMember ( "MIMEText" )
22+ }
1023
11- API:: Node smtpMimeMultipartInstance ( ) {
12- result = API:: moduleImport ( "email.mime.multipart" ) .getMember ( "MIMEMultipart" )
24+ private DataFlow:: CallCfgNode mimeText ( string mimetype ) {
25+ result = smtpMimeTextInstance ( ) .getACall ( ) and
26+ [ result .getArg ( 1 ) , result .getArgByName ( "_subtype" ) ] .asExpr ( ) .( Str_ ) .getS ( ) = mimetype
1327 }
1428
15- API:: Node smtpMimeTextInstance ( ) {
16- result = API:: moduleImport ( "email.mime.text" ) .getMember ( "MIMEText" )
29+ /**
30+ * Gets flow from `MIMEText()` to `MIMEMultipart(_subparts=(part1, part2))`'s `_subparts`
31+ * argument. Used because of the impossibility to get local source nodes from `_subparts`'
32+ * `(List|Tuple)` elements.
33+ */
34+ private class SMTPMessageConfig extends TaintTracking:: Configuration {
35+ SMTPMessageConfig ( ) { this = "SMTPMessageConfig" }
36+
37+ override predicate isSource ( DataFlow:: Node source ) { source = mimeText ( _) }
38+
39+ override predicate isSink ( DataFlow:: Node sink ) {
40+ sink = smtpMimeMultipartInstance ( ) .getACall ( ) .getArgByName ( "_subparts" )
41+ }
1742 }
1843
19- DataFlow:: Node smtpMimeTextHTMLInstance ( ) {
20- // select SmtpLib::smtpMimeTextInstance().getAUse().getALocalSource().getACall()
21- exists ( API:: Node mimeTextInstance , DataFlow:: CallCfgNode callNode |
22- mimeTextInstance = smtpMimeTextInstance ( ) .getReturn ( ) and
23- callNode = mimeTextInstance .getACall ( ) and
24- callNode .getArg ( 1 ) .asExpr ( ) .( Unicode ) .getText ( ) = "html" and
25- result = callNode
44+ /**
45+ * Using `MimeText` call, gets the content argument whose type argument equals `mimetype`.
46+ * This call flow sinto `MIMEMultipart`'s `_subparts` argument or `.attach()` method call,
47+ * and both local source nodes correlate to `smtp`'s `sendmail` call 3rd argument's local source.
48+ *
49+ * Given the following example with `getSmtpMessage(any(SmtpLibSendMail s), "html")`:
50+ *
51+ * ```py
52+ * part1 = MIMEText(text, "plain")
53+ * part2 = MIMEText(html, "html")
54+ * message = MIMEMultipart(_subparts=(part1, part2))
55+ * server.sendmail(sender_email, receiver_email, message.as_string())
56+ * ```
57+ *
58+ * * `source` would be `MIMEText(text, "html")`.
59+ * * `sink` would be `MIMEMultipart(_subparts=(part1, part2))`.
60+ * * Then `message` local source node is correlated to `sink`.
61+ * * Then the flow from `source` to `_subparts` is checked.
62+ *
63+ * Given the following example with `getSmtpMessage(any(SmtpLibSendMail s), "html")`:
64+ *
65+ * ```py
66+ * part1 = MIMEText(text, "plain")
67+ * part2 = MIMEText(html, "html")
68+ * message = MIMEMultipart("alternative")
69+ * message.attach(part1)
70+ * message.attach(part2)
71+ * server.sendmail(sender_email, receiver_email, message.as_string())
72+ * ```
73+ *
74+ * * `source` would be `MIMEText(text, "html")`.
75+ * * `sink` would be `message.attach(part2)`.
76+ * * Then `sink`'s object (`message`) local source is correlated to `server.sendmail`
77+ * 3rd argument local source (`MIMEMultipart("alternative")`).
78+ * * Then the flow from `source` to `sink` 1st argument is checked.
79+ */
80+ bindingset [ mimetype]
81+ private DataFlow:: Node getSmtpMessage ( DataFlow:: CallCfgNode sendCall , string mimetype ) {
82+ exists ( DataFlow:: Node source , DataFlow:: Node sink |
83+ source = mimeText ( mimetype ) and
84+ (
85+ // via _subparts
86+ sink = smtpMimeMultipartInstance ( ) .getACall ( ) and
87+ sink =
88+ [ sendCall .getArg ( 2 ) , sendCall .getArg ( 2 ) .( DataFlow:: MethodCallNode ) .getObject ( ) ]
89+ .getALocalSource ( ) and
90+ DataFlow:: flowsTo ( source , sink .( DataFlow:: CallCfgNode ) .getArgByName ( "_subparts" ) ,
91+ any ( SMTPMessageConfig a ) )
92+ or
93+ // via .attach()
94+ sink = smtpMimeMultipartInstance ( ) .getReturn ( ) .getMember ( "attach" ) .getACall ( ) and
95+ sink .( DataFlow:: MethodCallNode ) .getObject ( ) .getALocalSource ( ) =
96+ [ sendCall .getArg ( 2 ) , sendCall .getArg ( 2 ) .( DataFlow:: MethodCallNode ) .getObject ( ) ]
97+ .getALocalSource ( ) and
98+ source .( DataFlow:: CallCfgNode ) .flowsTo ( sink .( DataFlow:: CallCfgNode ) .getArg ( 0 ) )
99+ ) and
100+ result = source .( DataFlow:: CallCfgNode ) .getArg ( 0 )
26101 )
27102 }
28103
29- class SmtpLibSendMail extends DataFlow:: CallCfgNode , EmailSender {
30- SmtpLibSendMail ( ) { this = smtpConnectionInstance ( ) .getMember ( "sendmail" ) .getACall ( ) }
104+ /**
105+ * Gets a message subscript write by correlating subscript's object local source with
106+ * `smtp`'s `sendmail` call 3rd argument's local source.
107+ *
108+ * Given the following example with `getSMTPSubscriptByIndex(any(SmtpLibSendMail s), "Subject")`:
109+ *
110+ * ```py
111+ * message = MIMEMultipart("alternative")
112+ * message["Subject"] = "multipart test"
113+ * server.sendmail(sender_email, receiver_email, message.as_string())
114+ * ```
115+ *
116+ * * `def` would be `message["Subject"]` (`DefinitionNode`)
117+ * * `sub` would be `message["Subject"]` (`Subscript`)
118+ * * `result` would be `"multipart test"`
119+ */
120+ private DataFlow:: Node getSMTPSubscriptByIndex ( DataFlow:: CallCfgNode sendCall , string index ) {
121+ exists ( DefinitionNode def , Subscript sub |
122+ sub = def .getNode ( ) and
123+ DataFlow:: exprNode ( sub .getObject ( ) ) .getALocalSource ( ) =
124+ [ sendCall .getArg ( 2 ) , sendCall .getArg ( 2 ) .( DataFlow:: MethodCallNode ) .getObject ( ) ]
125+ .getALocalSource ( ) and
126+ sub .getIndex ( ) .( Str_ ) .getS ( ) = index and
127+ result .asCfgNode ( ) = def .getValue ( )
128+ )
129+ }
31130
32- override DataFlow:: Node getPlainTextBody ( ) {
33- result in [ this .getArg ( 1 ) , this .getArgByName ( "message" ) ]
131+ /**
132+ * Gets a reference to `smtplib.SMTP_SSL().sendmail()`.
133+ *
134+ * Given the following example:
135+ *
136+ * ```py
137+ * part1 = MIMEText(text, "plain")
138+ * part2 = MIMEText(html, "html")
139+ *
140+ * message = MIMEMultipart(_subparts=(part1, part2))
141+ * message["Subject"] = "multipart test"
142+ * message["From"] = sender_email
143+ * message["To"] = receiver_email
144+ *
145+ * server.login(sender_email, "SERVER_PASSWORD")
146+ * server.sendmail(sender_email, receiver_email, message.as_string())
147+ * ```
148+ *
149+ * * `this` would be `server.sendmail(sender_email, receiver_email, message.as_string())`.
150+ * * `getPlainTextBody()`'s result would be `text`.
151+ * * `getHtmlBody()`'s result would be `html`.
152+ * * `getTo()`'s result would be `receiver_email`.
153+ * * `getFrom()`'s result would be `sender_email`.
154+ * * `getSubject()`'s result would be `"multipart test"`.
155+ */
156+ private class SmtpLibSendMail extends DataFlow:: CallCfgNode , EmailSender:: Range {
157+ SmtpLibSendMail ( ) {
158+ this = smtpConnectionInstance ( ) .getReturn ( ) .getMember ( "sendmail" ) .getACall ( )
34159 }
35160
36- override DataFlow:: Node getHtmlBody ( ) {
37- result in [ this . getArg ( 8 ) , this . getArgByName ( "html_message" ) ]
38- }
161+ override DataFlow:: Node getPlainTextBody ( ) { result = getSmtpMessage ( this , "plain" ) }
162+
163+ override DataFlow :: Node getHtmlBody ( ) { result = getSmtpMessage ( this , "html" ) }
39164
40165 override DataFlow:: Node getTo ( ) {
41- result in [ this .getArg ( 3 ) , this . getArgByName ( "recipient_list ") ]
166+ result in [ this .getArg ( 1 ) , getSMTPSubscriptByIndex ( this , "To ") ]
42167 }
43168
44169 override DataFlow:: Node getFrom ( ) {
45- result in [ this .getArg ( 2 ) , this . getArgByName ( "from_email ") ]
170+ result in [ this .getArg ( 0 ) , getSMTPSubscriptByIndex ( this , "From ") ]
46171 }
47172
48173 override DataFlow:: Node getSubject ( ) {
49- result in [ this .getArg ( 0 ) , this . getArgByName ( "subject ") ]
174+ result in [ this .getArg ( 2 ) , getSMTPSubscriptByIndex ( this , "Subject ") ]
50175 }
51176 }
52177}
53-
54- // MIMEMultipart has two ways it can add tainted data:
55- // MIMEMultipart(_subparts=(part1, part2))
56- // or
57- // message = MIMEMultipart("alternative")
58- // message.attach(part1)
59- //
60- //
61- // select SmtpLib::smtpMimeTextHTMLInstance()
62- // select API::moduleImport("email.mime.multipart")
63- // .getMember("MIMEMultipart")
64- // .getACall()
65- // .getArgByName("_subparts")
66- //
67- // from DataFlow::Node arg1
68- // where
69- // arg1 =
70- // API::moduleImport("email.mime.multipart")
71- // .getMember("MIMEMultipart")
72- // .getReturn()
73- // .getMember("attach")
74- // .getACall()
75- // .getArg(0)
76- //
77- // select SmtpLib::smtpMimeTextHTMLInstance() //.getReturn()
78- //
79- //.getArg(1) //.getAUse()
80- //
81- // Work on the smtpMimeTextHTMLInstance function
82- from DataFlow:: CallCfgNode result1
83- where
84- exists ( API:: Node mimeTextInstance , DataFlow:: CallCfgNode callNode |
85- mimeTextInstance = SmtpLib:: smtpMimeTextInstance ( ) .getReturn ( ) and
86- callNode = mimeTextInstance .getACall ( ) and
87- callNode .getArg ( 1 ) .asExpr ( ) .( Unicode ) .getText ( ) = "html" and
88- result1 = callNode
89- )
90- select result1
0 commit comments