@@ -12,6 +12,9 @@ private import semmle.python.dataflow.new.internal.ImportStar
1212//
1313// This matches behavior in C#.
1414private import semmle.python.Frameworks
15+ // part of the implementation for this module has been spread over multiple files to
16+ // make it more digestible.
17+ import MatchUnpacking
1518
1619/** Gets the callable in which this node occurs. */
1720DataFlowCallable nodeGetEnclosingCallable ( Node n ) { result = n .getEnclosingCallable ( ) }
@@ -1659,320 +1662,6 @@ module IterableUnpacking {
16591662
16601663import IterableUnpacking
16611664
1662- /**
1663- * There are a number of patterns available for the match statement.
1664- * Each one transfers data and content differently to its parts.
1665- *
1666- * Furthermore, given a successful match, we can infer some data about
1667- * the subject. Consider the example:
1668- * ```python
1669- * match choice:
1670- * case 'Y':
1671- * ...body
1672- * ```
1673- * Inside `body`, we know that `choice` has the value `'Y'`.
1674- *
1675- * A similar thing happens with the "as pattern". Consider the example:
1676- * ```python
1677- * match choice:
1678- * case ('y'|'Y') as c:
1679- * ...body
1680- * ```
1681- * By the binding rules, there is data flow from `choice` to `c`. But we
1682- * can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
1683- *
1684- * We will treat such inferences separately as guards. First we will model the data flow
1685- * stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
1686- * top-level subject of the match, but rather the part recursively matched by the current pattern.
1687- * For instance, in the example:
1688- * ```python
1689- * match command:
1690- * case ('quit' as c) | ('go', ('up'|'down') as c):
1691- * ...body
1692- * ```
1693- * `command` is the subject of first the as-pattern, while the second component of `command`
1694- * is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
1695- *
1696- * - as pattern: subject flows to alias as well as to the interior pattern
1697- * - or pattern: subject flows to each alternative
1698- * - literal pattern: flow from the literal to the pattern, to add information
1699- * - capture pattern: subject flows to the variable
1700- * - wildcard pattern: no flow
1701- * - value pattern: flow from the value to the pattern, to add information
1702- * - sequence pattern: each element reads from subject at the associated index
1703- * - star pattern: subject flows to the variable, possibly via a conversion
1704- * - mapping pattern: each value reads from subject at the associated key
1705- * - double star pattern: subject flows to the variable, possibly via a conversion
1706- * - key-value pattern: the value reads from the subject at the key (see mapping pattern)
1707- * - class pattern: all keywords read the appropriate attribute from the subject
1708- * - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
1709- *
1710- * Inside the class pattern, we also find positional arguments. They are converted to
1711- * keyword arguments using the `__match_args__` attribute on the class. We do not
1712- * currently model this.
1713- */
1714- module MatchUnpacking {
1715- /**
1716- * Holds when there is flow from the subject `nodeFrom` to the (top-level) pattern `nodeTo` of a `match` statement.
1717- *
1718- * The subject of a match flows to each top-level pattern
1719- * (a pattern directly under a `case` statement).
1720- *
1721- * We could consider a model closer to use-use-flow, where the subject
1722- * only flows to the first top-level pattern and from there to the
1723- * following ones.
1724- */
1725- predicate matchSubjectFlowStep ( Node nodeFrom , Node nodeTo ) {
1726- exists ( MatchStmt match , Expr subject , Pattern target |
1727- subject = match .getSubject ( ) and
1728- target = match .getCase ( _) .( Case ) .getPattern ( )
1729- |
1730- nodeFrom .asExpr ( ) = subject and
1731- nodeTo .asCfgNode ( ) .getNode ( ) = target
1732- )
1733- }
1734-
1735- /**
1736- * as pattern: subject flows to alias as well as to the interior pattern
1737- * syntax (toplevel): `case pattern as alias:`
1738- */
1739- predicate matchAsFlowStep ( Node nodeFrom , Node nodeTo ) {
1740- exists ( MatchAsPattern subject , Name alias | alias = subject .getAlias ( ) |
1741- // We make the subject flow to the interior pattern via the alias.
1742- // That way, information can propagate from the interior pattern to the alias.
1743- //
1744- // the subject flows to the interior pattern
1745- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1746- nodeTo .asCfgNode ( ) .getNode ( ) = subject .getPattern ( )
1747- or
1748- // the interior pattern flows to the alias
1749- nodeFrom .asCfgNode ( ) .getNode ( ) = subject .getPattern ( ) and
1750- nodeTo .asVar ( ) .getDefinition ( ) .( PatternAliasDefinition ) .getDefiningNode ( ) .getNode ( ) = alias
1751- )
1752- }
1753-
1754- /**
1755- * or pattern: subject flows to each alternative
1756- * syntax (toplevel): `case alt1 | alt2:`
1757- */
1758- predicate matchOrFlowStep ( Node nodeFrom , Node nodeTo ) {
1759- exists ( MatchOrPattern subject , Pattern pattern | pattern = subject .getAPattern ( ) |
1760- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1761- nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1762- )
1763- }
1764-
1765- /**
1766- * literal pattern: flow from the literal to the pattern, to add information
1767- * syntax (toplevel): `case literal:`
1768- */
1769- predicate matchLiteralFlowStep ( Node nodeFrom , Node nodeTo ) {
1770- exists ( MatchLiteralPattern pattern , Expr literal | literal = pattern .getLiteral ( ) |
1771- nodeFrom .asExpr ( ) = literal and
1772- nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1773- )
1774- }
1775-
1776- /**
1777- * capture pattern: subject flows to the variable
1778- * syntax (toplevel): `case var:`
1779- */
1780- predicate matchCaptureFlowStep ( Node nodeFrom , Node nodeTo ) {
1781- exists ( MatchCapturePattern capture , Name var | capture .getVariable ( ) = var |
1782- nodeFrom .asCfgNode ( ) .getNode ( ) = capture and
1783- nodeTo .asVar ( ) .getDefinition ( ) .( PatternCaptureDefinition ) .getDefiningNode ( ) .getNode ( ) = var
1784- )
1785- }
1786-
1787- /**
1788- * value pattern: flow from the value to the pattern, to add information
1789- * syntax (toplevel): `case Dotted.value:`
1790- */
1791- predicate matchValueFlowStep ( Node nodeFrom , Node nodeTo ) {
1792- exists ( MatchValuePattern pattern , Expr value | value = pattern .getValue ( ) |
1793- nodeFrom .asExpr ( ) = value and
1794- nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1795- )
1796- }
1797-
1798- /**
1799- * sequence pattern: each element reads from subject at the associated index
1800- * syntax (toplevel): `case [a, b]:`
1801- */
1802- predicate matchSequenceReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1803- exists ( MatchSequencePattern subject , int index , Pattern element |
1804- element = subject .getPattern ( index )
1805- |
1806- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1807- nodeTo .asCfgNode ( ) .getNode ( ) = element and
1808- (
1809- // tuple content
1810- c .( TupleElementContent ) .getIndex ( ) = index
1811- or
1812- // list content
1813- c instanceof ListElementContent
1814- // set content is excluded from sequence patterns,
1815- // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1816- )
1817- )
1818- }
1819-
1820- /**
1821- * star pattern: subject flows to the variable, possibly via a conversion
1822- * syntax (toplevel): `case *var:`
1823- *
1824- * We decompose this flow into a read step and a store step. The read step
1825- * reads both tuple and list content, the store step only stores list content.
1826- * This way, we convert all content to list content.
1827- *
1828- * This is the read step.
1829- */
1830- predicate matchStarReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1831- exists ( MatchSequencePattern subject , int index , MatchStarPattern star |
1832- star = subject .getPattern ( index )
1833- |
1834- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1835- nodeTo = TStarPatternElementNode ( star ) and
1836- (
1837- // tuple content
1838- c .( TupleElementContent ) .getIndex ( ) >= index
1839- or
1840- // list content
1841- c instanceof ListElementContent
1842- // set content is excluded from sequence patterns,
1843- // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1844- )
1845- )
1846- }
1847-
1848- /**
1849- * star pattern: subject flows to the variable, possibly via a conversion
1850- * syntax (toplevel): `case *var:`
1851- *
1852- * We decompose this flow into a read step and a store step. The read step
1853- * reads both tuple and list content, the store step only stores list content.
1854- * This way, we convert all content to list content.
1855- *
1856- * This is the store step.
1857- */
1858- predicate matchStarStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1859- exists ( MatchStarPattern star |
1860- nodeFrom = TStarPatternElementNode ( star ) and
1861- nodeTo .asCfgNode ( ) .getNode ( ) = star .getTarget ( ) and
1862- c instanceof ListElementContent
1863- )
1864- }
1865-
1866- /**
1867- * mapping pattern: each value reads from subject at the associated key
1868- * syntax (toplevel): `case {"color": c, "height": x}:`
1869- */
1870- predicate matchMappingReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1871- exists (
1872- MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1873- Pattern value
1874- |
1875- keyValue = subject .getAMapping ( ) and
1876- key = keyValue .getKey ( ) and
1877- value = keyValue .getValue ( )
1878- |
1879- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1880- nodeTo .asCfgNode ( ) .getNode ( ) = value and
1881- c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1882- )
1883- }
1884-
1885- /**
1886- * double star pattern: subject flows to the variable, possibly via a conversion
1887- * syntax (toplevel): `case {**var}:`
1888- *
1889- * Dictionary content flows to the double star, but all mentioned keys in the
1890- * mapping pattern should be cleared.
1891- */
1892- predicate matchMappingFlowStep ( Node nodeFrom , Node nodeTo ) {
1893- exists ( MatchMappingPattern subject , MatchDoubleStarPattern dstar |
1894- dstar = subject .getAMapping ( )
1895- |
1896- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1897- nodeTo .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( )
1898- )
1899- }
1900-
1901- /**
1902- * Bindings that are mentioned in a mapping pattern will not be available
1903- * to a double star pattern in the same mapping pattern.
1904- */
1905- predicate matchMappingClearStep ( Node n , Content c ) {
1906- exists (
1907- MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1908- MatchDoubleStarPattern dstar
1909- |
1910- keyValue = subject .getAMapping ( ) and
1911- key = keyValue .getKey ( ) and
1912- dstar = subject .getAMapping ( )
1913- |
1914- n .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( ) and
1915- c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1916- )
1917- }
1918-
1919- /**
1920- * class pattern: all keywords read the appropriate attribute from the subject
1921- * syntax (toplevel): `case ClassName(attr = val):`
1922- */
1923- predicate matchClassReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1924- exists ( MatchClassPattern subject , MatchKeywordPattern keyword , Name attr , Pattern value |
1925- keyword = subject .getKeyword ( _) and
1926- attr = keyword .getAttribute ( ) and
1927- value = keyword .getValue ( )
1928- |
1929- nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1930- nodeTo .asCfgNode ( ) .getNode ( ) = value and
1931- c .( AttributeContent ) .getAttribute ( ) = attr .getId ( )
1932- )
1933- }
1934-
1935- /** All flow steps associated with match. */
1936- predicate matchFlowStep ( Node nodeFrom , Node nodeTo ) {
1937- matchSubjectFlowStep ( nodeFrom , nodeTo )
1938- or
1939- matchAsFlowStep ( nodeFrom , nodeTo )
1940- or
1941- matchOrFlowStep ( nodeFrom , nodeTo )
1942- or
1943- matchLiteralFlowStep ( nodeFrom , nodeTo )
1944- or
1945- matchCaptureFlowStep ( nodeFrom , nodeTo )
1946- or
1947- matchValueFlowStep ( nodeFrom , nodeTo )
1948- or
1949- matchMappingFlowStep ( nodeFrom , nodeTo )
1950- }
1951-
1952- /** All read steps associated with match. */
1953- predicate matchReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1954- matchClassReadStep ( nodeFrom , c , nodeTo )
1955- or
1956- matchSequenceReadStep ( nodeFrom , c , nodeTo )
1957- or
1958- matchMappingReadStep ( nodeFrom , c , nodeTo )
1959- or
1960- matchStarReadStep ( nodeFrom , c , nodeTo )
1961- }
1962-
1963- /** All store steps associated with match. */
1964- predicate matchStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1965- matchStarStoreStep ( nodeFrom , c , nodeTo )
1966- }
1967-
1968- /**
1969- * All clear steps associated with match
1970- */
1971- predicate matchClearStep ( Node n , Content c ) { matchMappingClearStep ( n , c ) }
1972- }
1973-
1974- import MatchUnpacking
1975-
19761665/** Data flows from a sequence to a call to `pop` on the sequence. */
19771666predicate popReadStep ( CfgNode nodeFrom , Content c , CfgNode nodeTo ) {
19781667 // set.pop or list.pop
0 commit comments