Skip to content
This repository was archived by the owner on Mar 29, 2023. It is now read-only.

Commit 13dd135

Browse files
saschahofmannsaschahofmanntswast
authored
ci: Format black (#68)
* run black * add black ci step * put isort in black compatibility mode * fix flake8 * add lint deps * fix pin * add missing types packages Co-authored-by: saschahofmann <sascha.hofmann@gyana.co.uk> Co-authored-by: Tim Swast <swast@google.com>
1 parent 0ba15c3 commit 13dd135

24 files changed

+756
-897
lines changed

.flake8

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[flake8]
2+
ignore = E203, E266, E501, W503
3+
exclude =
4+
# Standard linting exemptions.
5+
**/.nox/**
6+
__pycache__,
7+
.git,
8+
*.pyc,
9+
conf.py

.github/workflows/main.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,18 @@ jobs:
3333
run: mypy --ignore-missing-imports .
3434
if: always()
3535

36+
- name: black
37+
run: black --check .
38+
if: always()
39+
3640
# TODO: don't ignore pydocstyle
3741
# https://github.com/ibis-project/ibis-bigquery/issues/13
3842
# - name: pydocstyle
3943
# run: pydocstyle --match-dir='^(?!tests|.*udf).*' .
4044
# if: always()
4145

4246
- name: isort
43-
run: isort --check-only .
47+
run: isort --profile black --check-only .
4448
if: always()
4549

4650
unit-tests:

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pydata-google-auth
1010

1111
# dev
12+
- black=19.10b0 # Same as ibis
1213
- pytest
1314
- pytest-cov
1415
- pytest-mock

ibis_bigquery/__init__.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
from pydata_google_auth import cache
99

1010
from . import version as ibis_bigquery_version
11-
from .client import (BigQueryClient, BigQueryDatabase, BigQueryQuery,
12-
BigQueryTable)
11+
from .client import BigQueryClient, BigQueryDatabase, BigQueryQuery, BigQueryTable
1312
from .compiler import BigQueryExprTranslator, BigQueryQueryBuilder
1413

1514
try:
@@ -31,15 +30,13 @@
3130
"https://www.googleapis.com/auth/cloud-platform",
3231
"https://www.googleapis.com/auth/drive",
3332
]
34-
CLIENT_ID = (
35-
"546535678771-gvffde27nd83kfl6qbrnletqvkdmsese.apps.googleusercontent.com"
36-
)
33+
CLIENT_ID = "546535678771-gvffde27nd83kfl6qbrnletqvkdmsese.apps.googleusercontent.com"
3734
CLIENT_SECRET = "iU5ohAF2qcqrujegE3hQ1cPt"
3835

3936

4037
class Backend(BaseBackend):
41-
name = 'bigquery'
42-
kind = 'sql'
38+
name = "bigquery"
39+
kind = "sql"
4340
builder = BigQueryQueryBuilder
4441
translator = BigQueryExprTranslator
4542
query_class = BigQueryQuery
@@ -51,7 +48,7 @@ class Backend(BaseBackend):
5148
# https://github.com/ibis-project/ibis-bigquery/issues/30
5249
supports_divide_by_zero = True
5350
supports_floating_modulus = False
54-
returned_timestamp_unit = 'us'
51+
returned_timestamp_unit = "us"
5552

5653
def connect(
5754
self,

ibis_bigquery/backcompat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# 1.2
1616
from ibis.common import TranslationError
1717

18-
__all__ = ('BaseBackend',)
18+
__all__ = ("BaseBackend",)
1919

2020

2121
class BaseBackend(abc.ABC):
@@ -71,18 +71,18 @@ def dialect(self):
7171
# want to create subclasses for each of the kinds
7272
# (e.g. `BaseSQLAlchemyBackend`)
7373
# TODO check if the below dialects can be merged into a single one
74-
if self.kind == 'sqlalchemy':
74+
if self.kind == "sqlalchemy":
7575
from ibis.backends.base_sqlalchemy.alchemy import AlchemyDialect
7676

7777
dialect_class = AlchemyDialect
78-
elif self.kind in ('sql', 'pandas'):
78+
elif self.kind in ("sql", "pandas"):
7979
try:
8080
from ibis.backends.base_sqlalchemy.compiler import Dialect
8181
except ImportError:
8282
from ibis.sql.compiler import Dialect
8383

8484
dialect_class = Dialect
85-
elif self.kind == 'spark':
85+
elif self.kind == "spark":
8686
from ibis.backends.base_sql.compiler import BaseDialect
8787

8888
dialect_class = BaseDialect

ibis_bigquery/client.py

Lines changed: 44 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,32 @@
2828
from . import compiler as comp
2929
from .datatypes import ibis_type_to_bigquery_type
3030

31-
NATIVE_PARTITION_COL = '_PARTITIONTIME'
31+
NATIVE_PARTITION_COL = "_PARTITIONTIME"
3232

3333

3434
_DTYPE_TO_IBIS_TYPE = {
35-
'INT64': dt.int64,
36-
'FLOAT64': dt.double,
37-
'BOOL': dt.boolean,
38-
'STRING': dt.string,
39-
'DATE': dt.date,
35+
"INT64": dt.int64,
36+
"FLOAT64": dt.double,
37+
"BOOL": dt.boolean,
38+
"STRING": dt.string,
39+
"DATE": dt.date,
4040
# FIXME: enforce no tz info
41-
'DATETIME': dt.timestamp,
42-
'TIME': dt.time,
43-
'TIMESTAMP': dt.timestamp,
44-
'BYTES': dt.binary,
45-
'NUMERIC': dt.Decimal(38, 9),
41+
"DATETIME": dt.timestamp,
42+
"TIME": dt.time,
43+
"TIMESTAMP": dt.timestamp,
44+
"BYTES": dt.binary,
45+
"NUMERIC": dt.Decimal(38, 9),
4646
}
4747

4848

4949
_LEGACY_TO_STANDARD = {
50-
'INTEGER': 'INT64',
51-
'FLOAT': 'FLOAT64',
52-
'BOOLEAN': 'BOOL',
50+
"INTEGER": "INT64",
51+
"FLOAT": "FLOAT64",
52+
"BOOLEAN": "BOOL",
5353
}
5454

5555

56-
_USER_AGENT_DEFAULT_TEMPLATE = 'ibis/{}'
56+
_USER_AGENT_DEFAULT_TEMPLATE = "ibis/{}"
5757

5858

5959
def _create_client_info(application_name):
@@ -70,16 +70,16 @@ def _create_client_info(application_name):
7070
def bigquery_field_to_ibis_dtype(field):
7171
"""Convert BigQuery `field` to an ibis type."""
7272
typ = field.field_type
73-
if typ == 'RECORD':
73+
if typ == "RECORD":
7474
fields = field.fields
75-
assert fields, 'RECORD fields are empty'
75+
assert fields, "RECORD fields are empty"
7676
names = [el.name for el in fields]
7777
ibis_types = list(map(dt.dtype, fields))
7878
ibis_type = dt.Struct(names, ibis_types)
7979
else:
8080
ibis_type = _LEGACY_TO_STANDARD.get(typ, typ)
8181
ibis_type = _DTYPE_TO_IBIS_TYPE.get(ibis_type, ibis_type)
82-
if field.mode == 'REPEATED':
82+
if field.mode == "REPEATED":
8383
ibis_type = dt.Array(ibis_type)
8484
return ibis_type
8585

@@ -88,11 +88,11 @@ def bigquery_field_to_ibis_dtype(field):
8888
def bigquery_schema(table):
8989
"""Infer the schema of a BigQuery `table` object."""
9090
fields = OrderedDict((el.name, dt.dtype(el)) for el in table.schema)
91-
partition_info = table._properties.get('timePartitioning', None)
91+
partition_info = table._properties.get("timePartitioning", None)
9292

9393
# We have a partitioned table
9494
if partition_info is not None:
95-
partition_field = partition_info.get('field', NATIVE_PARTITION_COL)
95+
partition_field = partition_info.get("field", NATIVE_PARTITION_COL)
9696

9797
# Only add a new column if it's not already a column in the schema
9898
fields.setdefault(partition_field, dt.timestamp)
@@ -176,13 +176,9 @@ def __init__(self, client, ddl, query_parameters=None):
176176
super().__init__(client, ddl)
177177

178178
# self.expr comes from the parent class
179-
query_parameter_names = dict(
180-
lin.traverse(_find_scalar_parameter, self.expr)
181-
)
179+
query_parameter_names = dict(lin.traverse(_find_scalar_parameter, self.expr))
182180
self.query_parameters = [
183-
bigquery_param(
184-
param.to_expr().name(query_parameter_names[param]), value
185-
)
181+
bigquery_param(param.to_expr().name(query_parameter_names[param]), value)
186182
for param, value in (query_parameters or {}).items()
187183
]
188184

@@ -194,9 +190,7 @@ def _fetch(self, cursor):
194190
def execute(self):
195191
# synchronous by default
196192
with self.client._execute(
197-
self.compiled_sql,
198-
results=True,
199-
query_parameters=self.query_parameters,
193+
self.compiled_sql, results=True, query_parameters=self.query_parameters,
200194
) as cur:
201195
result = self._fetch(cur)
202196

@@ -207,7 +201,7 @@ class BigQueryDatabase(Database):
207201
"""A BigQuery dataset."""
208202

209203

210-
bigquery_param = Dispatcher('bigquery_param')
204+
bigquery_param = Dispatcher("bigquery_param")
211205

212206

213207
@bigquery_param.register(ir.StructScalar, OrderedDict)
@@ -229,51 +223,45 @@ def bq_param_array(param, value):
229223
else:
230224
if isinstance(param_type.value_type, dt.Struct):
231225
query_value = [
232-
bigquery_param(param[i].name('element_{:d}'.format(i)), struct)
226+
bigquery_param(param[i].name("element_{:d}".format(i)), struct)
233227
for i, struct in enumerate(value)
234228
]
235-
bigquery_type = 'STRUCT'
229+
bigquery_type = "STRUCT"
236230
elif isinstance(param_type.value_type, dt.Array):
237-
raise TypeError('ARRAY<ARRAY<T>> is not supported in BigQuery')
231+
raise TypeError("ARRAY<ARRAY<T>> is not supported in BigQuery")
238232
else:
239233
query_value = value
240-
result = bq.ArrayQueryParameter(
241-
param.get_name(), bigquery_type, query_value
242-
)
234+
result = bq.ArrayQueryParameter(param.get_name(), bigquery_type, query_value)
243235
return result
244236

245237

246-
@bigquery_param.register(
247-
ir.TimestampScalar, (str, datetime.datetime, datetime.date)
248-
)
238+
@bigquery_param.register(ir.TimestampScalar, (str, datetime.datetime, datetime.date))
249239
def bq_param_timestamp(param, value):
250240
assert isinstance(param.type(), dt.Timestamp), str(param.type())
251241

252242
# TODO(phillipc): Not sure if this is the correct way to do this.
253-
timestamp_value = pd.Timestamp(value, tz='UTC').to_pydatetime()
254-
return bq.ScalarQueryParameter(
255-
param.get_name(), 'TIMESTAMP', timestamp_value
256-
)
243+
timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime()
244+
return bq.ScalarQueryParameter(param.get_name(), "TIMESTAMP", timestamp_value)
257245

258246

259247
@bigquery_param.register(ir.StringScalar, str)
260248
def bq_param_string(param, value):
261-
return bq.ScalarQueryParameter(param.get_name(), 'STRING', value)
249+
return bq.ScalarQueryParameter(param.get_name(), "STRING", value)
262250

263251

264252
@bigquery_param.register(ir.IntegerScalar, int)
265253
def bq_param_integer(param, value):
266-
return bq.ScalarQueryParameter(param.get_name(), 'INT64', value)
254+
return bq.ScalarQueryParameter(param.get_name(), "INT64", value)
267255

268256

269257
@bigquery_param.register(ir.FloatingScalar, float)
270258
def bq_param_double(param, value):
271-
return bq.ScalarQueryParameter(param.get_name(), 'FLOAT64', value)
259+
return bq.ScalarQueryParameter(param.get_name(), "FLOAT64", value)
272260

273261

274262
@bigquery_param.register(ir.BooleanScalar, bool)
275263
def bq_param_boolean(param, value):
276-
return bq.ScalarQueryParameter(param.get_name(), 'BOOL', value)
264+
return bq.ScalarQueryParameter(param.get_name(), "BOOL", value)
277265

278266

279267
@bigquery_param.register(ir.DateScalar, str)
@@ -288,7 +276,7 @@ def bq_param_date_datetime(param, value):
288276

289277
@bigquery_param.register(ir.DateScalar, datetime.date)
290278
def bq_param_date(param, value):
291-
return bq.ScalarQueryParameter(param.get_name(), 'DATE', value)
279+
return bq.ScalarQueryParameter(param.get_name(), "DATE", value)
292280

293281

294282
class BigQueryTable(ops.DatabaseTable):
@@ -297,15 +285,15 @@ class BigQueryTable(ops.DatabaseTable):
297285

298286
def rename_partitioned_column(table_expr, bq_table, partition_col):
299287
"""Rename native partition column to user-defined name."""
300-
partition_info = bq_table._properties.get('timePartitioning', None)
288+
partition_info = bq_table._properties.get("timePartitioning", None)
301289

302290
# If we don't have any partiton information, the table isn't partitioned
303291
if partition_info is None:
304292
return table_expr
305293

306294
# If we have a partition, but no "field" field in the table properties,
307295
# then use NATIVE_PARTITION_COL as the default
308-
partition_field = partition_info.get('field', NATIVE_PARTITION_COL)
296+
partition_field = partition_info.get("field", NATIVE_PARTITION_COL)
309297

310298
# The partition field must be in table_expr columns
311299
assert partition_field in table_expr.columns
@@ -421,7 +409,7 @@ def _parse_project_and_dataset(self, dataset):
421409
raise ValueError("Unable to determine BigQuery dataset.")
422410
project, _, dataset = parse_project_and_dataset(
423411
self.billing_project,
424-
dataset or '{}.{}'.format(self.data_project, self.dataset),
412+
dataset or "{}.{}".format(self.data_project, self.dataset),
425413
)
426414
return project, dataset
427415

@@ -435,7 +423,7 @@ def dataset_id(self):
435423

436424
def table(self, name, database=None):
437425
t = super().table(name, database=database)
438-
project, dataset, name = t.op().name.split('.')
426+
project, dataset, name = t.op().name.split(".")
439427
dataset_ref = self.client.dataset(dataset, project=project)
440428
table_ref = dataset_ref.table(name)
441429
bq_table = self.client.get_table(table_ref)
@@ -453,7 +441,7 @@ def _fully_qualified_name(self, name, database):
453441
return "{}.{}.{}".format(project, dataset, name)
454442

455443
def _get_table_schema(self, qualified_name):
456-
dataset, table = qualified_name.rsplit('.', 1)
444+
dataset, table = qualified_name.rsplit(".", 1)
457445
assert dataset is not None, "dataset is None"
458446
return self.get_schema(table, database=dataset)
459447

@@ -511,9 +499,8 @@ def exists_database(self, name):
511499

512500
def list_databases(self, like=None):
513501
results = [
514-
dataset.dataset_id for dataset in self.client.list_datasets(
515-
project=self.data_project
516-
)
502+
dataset.dataset_id
503+
for dataset in self.client.list_datasets(project=self.data_project)
517504
]
518505
if like:
519506
results = [
@@ -538,9 +525,7 @@ def exists_table(self, name, database=None):
538525
def list_tables(self, like=None, database=None):
539526
project, dataset = self._parse_project_and_dataset(database)
540527
dataset_ref = bq.DatasetReference(project, dataset)
541-
result = [
542-
table.table_id for table in self.client.list_tables(dataset_ref)
543-
]
528+
result = [table.table_id for table in self.client.list_tables(dataset_ref)]
544529
if like:
545530
result = [
546531
table_name

0 commit comments

Comments
 (0)