ibis-project
diff --git a/‎.flake8‎
Lines changed: 9 additions & 0 deletions b/‎.flake8‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/main.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎environment.yml‎
Lines changed: 1 addition & 0 deletions b/‎environment.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ibis_bigquery/__init__.py‎
Lines changed: 5 additions & 8 deletions b/‎ibis_bigquery/__init__.py‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎ibis_bigquery/backcompat.py‎
Lines changed: 4 additions & 4 deletions b/‎ibis_bigquery/backcompat.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎ibis_bigquery/client.py‎
Lines changed: 44 additions & 59 deletions b/‎ibis_bigquery/client.py‎
Lines changed: 44 additions & 59 deletions
@@ -0,0 +1,9 @@
+[flake8]
+ignore = E203, E266, E501, W503
+exclude =
+  # Standard linting exemptions.
+  **/.nox/**
+  __pycache__,
+  .git,
+  *.pyc,
+  conf.py
@@ -33,14 +33,18 @@ jobs:
       run: mypy --ignore-missing-imports .
       if: always()
 
+    - name: black
+      run: black --check .
+      if: always()
+
     # TODO: don't ignore pydocstyle
     # https://github.com/ibis-project/ibis-bigquery/issues/13
     # - name: pydocstyle
     #   run: pydocstyle --match-dir='^(?!tests|.*udf).*' .
     #   if: always()
 
     - name: isort
-      run: isort --check-only .
+      run: isort --profile black --check-only .
       if: always()
 
   unit-tests:
 
@@ -9,6 +9,7 @@ dependencies:
 - pydata-google-auth
 
 # dev
+- black=19.10b0 # Same as ibis
 - pytest
 - pytest-cov
 - pytest-mock
 
@@ -8,8 +8,7 @@
 from pydata_google_auth import cache
 
 from . import version as ibis_bigquery_version
-from .client import (BigQueryClient, BigQueryDatabase, BigQueryQuery,
-                     BigQueryTable)
+from .client import BigQueryClient, BigQueryDatabase, BigQueryQuery, BigQueryTable
 from .compiler import BigQueryExprTranslator, BigQueryQueryBuilder
 
 try:
@@ -31,15 +30,13 @@
     "https://www.googleapis.com/auth/cloud-platform",
     "https://www.googleapis.com/auth/drive",
 ]
-CLIENT_ID = (
-    "546535678771-gvffde27nd83kfl6qbrnletqvkdmsese.apps.googleusercontent.com"
-)
+CLIENT_ID = "546535678771-gvffde27nd83kfl6qbrnletqvkdmsese.apps.googleusercontent.com"
 CLIENT_SECRET = "iU5ohAF2qcqrujegE3hQ1cPt"
 
 
 class Backend(BaseBackend):
-    name = 'bigquery'
-    kind = 'sql'
+    name = "bigquery"
+    kind = "sql"
     builder = BigQueryQueryBuilder
     translator = BigQueryExprTranslator
     query_class = BigQueryQuery
@@ -51,7 +48,7 @@ class Backend(BaseBackend):
     # https://github.com/ibis-project/ibis-bigquery/issues/30
     supports_divide_by_zero = True
     supports_floating_modulus = False
-    returned_timestamp_unit = 'us'
+    returned_timestamp_unit = "us"
 
     def connect(
         self,
 
@@ -15,7 +15,7 @@
     # 1.2
     from ibis.common import TranslationError
 
-__all__ = ('BaseBackend',)
+__all__ = ("BaseBackend",)
 
 
 class BaseBackend(abc.ABC):
@@ -71,18 +71,18 @@ def dialect(self):
         # want to create subclasses for each of the kinds
         # (e.g. `BaseSQLAlchemyBackend`)
         # TODO check if the below dialects can be merged into a single one
-        if self.kind == 'sqlalchemy':
+        if self.kind == "sqlalchemy":
             from ibis.backends.base_sqlalchemy.alchemy import AlchemyDialect
 
             dialect_class = AlchemyDialect
-        elif self.kind in ('sql', 'pandas'):
+        elif self.kind in ("sql", "pandas"):
             try:
                 from ibis.backends.base_sqlalchemy.compiler import Dialect
             except ImportError:
                 from ibis.sql.compiler import Dialect
 
             dialect_class = Dialect
-        elif self.kind == 'spark':
+        elif self.kind == "spark":
             from ibis.backends.base_sql.compiler import BaseDialect
 
             dialect_class = BaseDialect
 
@@ -28,32 +28,32 @@
 from . import compiler as comp
 from .datatypes import ibis_type_to_bigquery_type
 
-NATIVE_PARTITION_COL = '_PARTITIONTIME'
+NATIVE_PARTITION_COL = "_PARTITIONTIME"
 
 
 _DTYPE_TO_IBIS_TYPE = {
-    'INT64': dt.int64,
-    'FLOAT64': dt.double,
-    'BOOL': dt.boolean,
-    'STRING': dt.string,
-    'DATE': dt.date,
+    "INT64": dt.int64,
+    "FLOAT64": dt.double,
+    "BOOL": dt.boolean,
+    "STRING": dt.string,
+    "DATE": dt.date,
     # FIXME: enforce no tz info
-    'DATETIME': dt.timestamp,
-    'TIME': dt.time,
-    'TIMESTAMP': dt.timestamp,
-    'BYTES': dt.binary,
-    'NUMERIC': dt.Decimal(38, 9),
+    "DATETIME": dt.timestamp,
+    "TIME": dt.time,
+    "TIMESTAMP": dt.timestamp,
+    "BYTES": dt.binary,
+    "NUMERIC": dt.Decimal(38, 9),
 }
 
 
 _LEGACY_TO_STANDARD = {
-    'INTEGER': 'INT64',
-    'FLOAT': 'FLOAT64',
-    'BOOLEAN': 'BOOL',
+    "INTEGER": "INT64",
+    "FLOAT": "FLOAT64",
+    "BOOLEAN": "BOOL",
 }
 
 
-_USER_AGENT_DEFAULT_TEMPLATE = 'ibis/{}'
+_USER_AGENT_DEFAULT_TEMPLATE = "ibis/{}"
 
 
 def _create_client_info(application_name):
@@ -70,16 +70,16 @@ def _create_client_info(application_name):
 def bigquery_field_to_ibis_dtype(field):
     """Convert BigQuery `field` to an ibis type."""
     typ = field.field_type
-    if typ == 'RECORD':
+    if typ == "RECORD":
         fields = field.fields
-        assert fields, 'RECORD fields are empty'
+        assert fields, "RECORD fields are empty"
         names = [el.name for el in fields]
         ibis_types = list(map(dt.dtype, fields))
         ibis_type = dt.Struct(names, ibis_types)
     else:
         ibis_type = _LEGACY_TO_STANDARD.get(typ, typ)
         ibis_type = _DTYPE_TO_IBIS_TYPE.get(ibis_type, ibis_type)
-    if field.mode == 'REPEATED':
+    if field.mode == "REPEATED":
         ibis_type = dt.Array(ibis_type)
     return ibis_type
 
@@ -88,11 +88,11 @@ def bigquery_field_to_ibis_dtype(field):
 def bigquery_schema(table):
     """Infer the schema of a BigQuery `table` object."""
     fields = OrderedDict((el.name, dt.dtype(el)) for el in table.schema)
-    partition_info = table._properties.get('timePartitioning', None)
+    partition_info = table._properties.get("timePartitioning", None)
 
     # We have a partitioned table
     if partition_info is not None:
-        partition_field = partition_info.get('field', NATIVE_PARTITION_COL)
+        partition_field = partition_info.get("field", NATIVE_PARTITION_COL)
 
         # Only add a new column if it's not already a column in the schema
         fields.setdefault(partition_field, dt.timestamp)
@@ -176,13 +176,9 @@ def __init__(self, client, ddl, query_parameters=None):
         super().__init__(client, ddl)
 
         # self.expr comes from the parent class
-        query_parameter_names = dict(
-            lin.traverse(_find_scalar_parameter, self.expr)
-        )
+        query_parameter_names = dict(lin.traverse(_find_scalar_parameter, self.expr))
         self.query_parameters = [
-            bigquery_param(
-                param.to_expr().name(query_parameter_names[param]), value
-            )
+            bigquery_param(param.to_expr().name(query_parameter_names[param]), value)
             for param, value in (query_parameters or {}).items()
         ]
 
@@ -194,9 +190,7 @@ def _fetch(self, cursor):
     def execute(self):
         # synchronous by default
         with self.client._execute(
-            self.compiled_sql,
-            results=True,
-            query_parameters=self.query_parameters,
+            self.compiled_sql, results=True, query_parameters=self.query_parameters,
         ) as cur:
             result = self._fetch(cur)
 
@@ -207,7 +201,7 @@ class BigQueryDatabase(Database):
     """A BigQuery dataset."""
 
 
-bigquery_param = Dispatcher('bigquery_param')
+bigquery_param = Dispatcher("bigquery_param")
 
 
 @bigquery_param.register(ir.StructScalar, OrderedDict)
@@ -229,51 +223,45 @@ def bq_param_array(param, value):
     else:
         if isinstance(param_type.value_type, dt.Struct):
             query_value = [
-                bigquery_param(param[i].name('element_{:d}'.format(i)), struct)
+                bigquery_param(param[i].name("element_{:d}".format(i)), struct)
                 for i, struct in enumerate(value)
             ]
-            bigquery_type = 'STRUCT'
+            bigquery_type = "STRUCT"
         elif isinstance(param_type.value_type, dt.Array):
-            raise TypeError('ARRAY<ARRAY<T>> is not supported in BigQuery')
+            raise TypeError("ARRAY<ARRAY<T>> is not supported in BigQuery")
         else:
             query_value = value
-        result = bq.ArrayQueryParameter(
-            param.get_name(), bigquery_type, query_value
-        )
+        result = bq.ArrayQueryParameter(param.get_name(), bigquery_type, query_value)
         return result
 
 
-@bigquery_param.register(
-    ir.TimestampScalar, (str, datetime.datetime, datetime.date)
-)
+@bigquery_param.register(ir.TimestampScalar, (str, datetime.datetime, datetime.date))
 def bq_param_timestamp(param, value):
     assert isinstance(param.type(), dt.Timestamp), str(param.type())
 
     # TODO(phillipc): Not sure if this is the correct way to do this.
-    timestamp_value = pd.Timestamp(value, tz='UTC').to_pydatetime()
-    return bq.ScalarQueryParameter(
-        param.get_name(), 'TIMESTAMP', timestamp_value
-    )
+    timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime()
+    return bq.ScalarQueryParameter(param.get_name(), "TIMESTAMP", timestamp_value)
 
 
 @bigquery_param.register(ir.StringScalar, str)
 def bq_param_string(param, value):
-    return bq.ScalarQueryParameter(param.get_name(), 'STRING', value)
+    return bq.ScalarQueryParameter(param.get_name(), "STRING", value)
 
 
 @bigquery_param.register(ir.IntegerScalar, int)
 def bq_param_integer(param, value):
-    return bq.ScalarQueryParameter(param.get_name(), 'INT64', value)
+    return bq.ScalarQueryParameter(param.get_name(), "INT64", value)
 
 
 @bigquery_param.register(ir.FloatingScalar, float)
 def bq_param_double(param, value):
-    return bq.ScalarQueryParameter(param.get_name(), 'FLOAT64', value)
+    return bq.ScalarQueryParameter(param.get_name(), "FLOAT64", value)
 
 
 @bigquery_param.register(ir.BooleanScalar, bool)
 def bq_param_boolean(param, value):
-    return bq.ScalarQueryParameter(param.get_name(), 'BOOL', value)
+    return bq.ScalarQueryParameter(param.get_name(), "BOOL", value)
 
 
 @bigquery_param.register(ir.DateScalar, str)
@@ -288,7 +276,7 @@ def bq_param_date_datetime(param, value):
 
 @bigquery_param.register(ir.DateScalar, datetime.date)
 def bq_param_date(param, value):
-    return bq.ScalarQueryParameter(param.get_name(), 'DATE', value)
+    return bq.ScalarQueryParameter(param.get_name(), "DATE", value)
 
 
 class BigQueryTable(ops.DatabaseTable):
@@ -297,15 +285,15 @@ class BigQueryTable(ops.DatabaseTable):
 
 def rename_partitioned_column(table_expr, bq_table, partition_col):
     """Rename native partition column to user-defined name."""
-    partition_info = bq_table._properties.get('timePartitioning', None)
+    partition_info = bq_table._properties.get("timePartitioning", None)
 
     # If we don't have any partiton information, the table isn't partitioned
     if partition_info is None:
         return table_expr
 
     # If we have a partition, but no "field" field in the table properties,
     # then use NATIVE_PARTITION_COL as the default
-    partition_field = partition_info.get('field', NATIVE_PARTITION_COL)
+    partition_field = partition_info.get("field", NATIVE_PARTITION_COL)
 
     # The partition field must be in table_expr columns
     assert partition_field in table_expr.columns
@@ -421,7 +409,7 @@ def _parse_project_and_dataset(self, dataset):
             raise ValueError("Unable to determine BigQuery dataset.")
         project, _, dataset = parse_project_and_dataset(
             self.billing_project,
-            dataset or '{}.{}'.format(self.data_project, self.dataset),
+            dataset or "{}.{}".format(self.data_project, self.dataset),
         )
         return project, dataset
 
@@ -435,7 +423,7 @@ def dataset_id(self):
 
     def table(self, name, database=None):
         t = super().table(name, database=database)
-        project, dataset, name = t.op().name.split('.')
+        project, dataset, name = t.op().name.split(".")
         dataset_ref = self.client.dataset(dataset, project=project)
         table_ref = dataset_ref.table(name)
         bq_table = self.client.get_table(table_ref)
@@ -453,7 +441,7 @@ def _fully_qualified_name(self, name, database):
         return "{}.{}.{}".format(project, dataset, name)
 
     def _get_table_schema(self, qualified_name):
-        dataset, table = qualified_name.rsplit('.', 1)
+        dataset, table = qualified_name.rsplit(".", 1)
         assert dataset is not None, "dataset is None"
         return self.get_schema(table, database=dataset)
 
@@ -511,9 +499,8 @@ def exists_database(self, name):
 
     def list_databases(self, like=None):
         results = [
-            dataset.dataset_id for dataset in self.client.list_datasets(
-                project=self.data_project
-            )
+            dataset.dataset_id
+            for dataset in self.client.list_datasets(project=self.data_project)
         ]
         if like:
             results = [
@@ -538,9 +525,7 @@ def exists_table(self, name, database=None):
     def list_tables(self, like=None, database=None):
         project, dataset = self._parse_project_and_dataset(database)
         dataset_ref = bq.DatasetReference(project, dataset)
-        result = [
-            table.table_id for table in self.client.list_tables(dataset_ref)
-        ]
+        result = [table.table_id for table in self.client.list_tables(dataset_ref)]
         if like:
             result = [
                 table_name