Skip to content
This repository was archived by the owner on Mar 29, 2023. It is now read-only.

Commit 7348bf2

Browse files
authored
feat: add ibis_bigquery.connect and ibis_bigquery.compile functions (#37)
* feat: add `ibis_bigquery.connect` and `ibis_bigquery.compile` functions This should make it easier to use the `ibis_bigquery` module directly. Also, fix the tests to use the methods from this module. * fix lint errors
1 parent 58d624a commit 7348bf2

File tree

8 files changed

+142
-49
lines changed

8 files changed

+142
-49
lines changed

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Using this library directly:
5151
import ibis
5252
import ibis_bigquery
5353
54-
conn = ibis_bigquery.Backend().connect(
54+
conn = ibis_bigquery.connect(
5555
project_id=YOUR_PROJECT_ID,
5656
dataset_id='bigquery-public-data.stackoverflow'
5757
)

docs/bigquery.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.. currentmodule:: ibis.bigquery.api
1+
.. currentmodule:: ibis_bigquery
22

33
.. _backends.bigquery:
44

@@ -40,8 +40,8 @@ project.
4040
will still be billed for any and all queries**.
4141

4242
If you want to query data that lives in a different project than the billing
43-
project you can use the :meth:`ibis.bigquery.client.BigQueryClient.database`
44-
method of :class:`ibis.bigquery.client.BigQueryClient` objects:
43+
project you can use the :meth:`ibis_bigquery.client.BigQueryClient.database`
44+
method of :class:`ibis_bigquery.client.BigQueryClient` objects:
4545

4646
.. code-block:: python
4747

ibis_bigquery/__init__.py

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import google.auth.credentials
66
import google.cloud.bigquery # noqa: F401, fail early if bigquery is missing
7-
import ibis.config
87
import pydata_google_auth
98
from ibis.backends.base import BaseBackend
109
from pydata_google_auth import cache
@@ -52,6 +51,7 @@ def connect(
5251
auth_local_webserver: bool = False,
5352
auth_external_data: bool = False,
5453
auth_cache: str = "default",
54+
partition_column: Optional[str] = "PARTITIONTIME",
5555
) -> BigQueryClient:
5656
"""Create a BigQueryClient for use with Ibis.
5757
@@ -91,6 +91,9 @@ def connect(
9191
Authenticates and does **not** cache credentials.
9292
9393
Defaults to ``'default'``.
94+
partition_column : str
95+
Identifier to use instead of default ``_PARTITIONTIME`` partition
96+
column. Defaults to ``'PARTITIONTIME'``.
9497
9598
Returns
9699
-------
@@ -136,7 +139,96 @@ def connect(
136139
dataset_id=dataset_id,
137140
credentials=credentials,
138141
application_name=application_name,
142+
partition_column=partition_column,
139143
)
140144

141-
def register_options(self):
142-
ibis.config.register_option('partition_col', 'PARTITIONTIME')
145+
146+
def compile(expr, params=None):
147+
"""Compile an expression for BigQuery.
148+
Returns
149+
-------
150+
compiled : str
151+
See Also
152+
--------
153+
ibis.expr.types.Expr.compile
154+
"""
155+
backend = Backend()
156+
return backend.compile(expr, params=params)
157+
158+
159+
def connect(
160+
project_id: Optional[str] = None,
161+
dataset_id: Optional[str] = None,
162+
credentials: Optional[google.auth.credentials.Credentials] = None,
163+
application_name: Optional[str] = None,
164+
auth_local_webserver: bool = False,
165+
auth_external_data: bool = False,
166+
auth_cache: str = "default",
167+
partition_column: Optional[str] = "PARTITIONTIME",
168+
) -> BigQueryClient:
169+
"""Create a BigQueryClient for use with Ibis.
170+
171+
Parameters
172+
----------
173+
project_id : str
174+
A BigQuery project id.
175+
dataset_id : str
176+
A dataset id that lives inside of the project indicated by
177+
`project_id`.
178+
credentials : google.auth.credentials.Credentials
179+
application_name : str
180+
A string identifying your application to Google API endpoints.
181+
auth_local_webserver : bool
182+
Use a local webserver for the user authentication. Binds a
183+
webserver to an open port on localhost between 8080 and 8089,
184+
inclusive, to receive authentication token. If not set, defaults
185+
to False, which requests a token via the console.
186+
auth_external_data : bool
187+
Authenticate using additional scopes required to `query external
188+
data sources
189+
<https://cloud.google.com/bigquery/external-data-sources>`_,
190+
such as Google Sheets, files in Google Cloud Storage, or files in
191+
Google Drive. If not set, defaults to False, which requests the
192+
default BigQuery scopes.
193+
auth_cache : str
194+
Selects the behavior of the credentials cache.
195+
196+
``'default'``
197+
Reads credentials from disk if available, otherwise
198+
authenticates and caches credentials to disk.
199+
200+
``'reauth'``
201+
Authenticates and caches credentials to disk.
202+
203+
``'none'``
204+
Authenticates and does **not** cache credentials.
205+
206+
Defaults to ``'default'``.
207+
partition_column : str
208+
Identifier to use instead of default ``_PARTITIONTIME`` partition
209+
column. Defaults to ``'PARTITIONTIME'``.
210+
211+
Returns
212+
-------
213+
BigQueryClient
214+
215+
"""
216+
backend = Backend()
217+
return backend.connect(
218+
project_id=project_id,
219+
dataset_id=dataset_id,
220+
credentials=credentials,
221+
application_name=application_name,
222+
auth_local_webserver=auth_local_webserver,
223+
auth_external_data=auth_external_data,
224+
auth_cache=auth_cache,
225+
partition_column=partition_column,
226+
)
227+
228+
229+
__all__ = [
230+
"__version__",
231+
"Backend",
232+
"compile",
233+
"connect",
234+
]

ibis_bigquery/client.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,8 @@ class BigQueryTable(ops.DatabaseTable):
290290
pass
291291

292292

293-
def rename_partitioned_column(table_expr, bq_table):
293+
def rename_partitioned_column(table_expr, bq_table, partition_col):
294+
"""Rename native partition column to user-defined name."""
294295
partition_info = bq_table._properties.get('timePartitioning', None)
295296

296297
# If we don't have any partiton information, the table isn't partitioned
@@ -304,14 +305,11 @@ def rename_partitioned_column(table_expr, bq_table):
304305
# The partition field must be in table_expr columns
305306
assert partition_field in table_expr.columns
306307

307-
# User configured partition column name default
308-
col = ibis.options.bigquery.partition_col
309-
310308
# No renaming if the config option is set to None or the partition field
311309
# is not _PARTITIONTIME
312-
if col is None or partition_field != NATIVE_PARTITION_COL:
310+
if partition_col is None or partition_field != NATIVE_PARTITION_COL:
313311
return table_expr
314-
return table_expr.relabel({NATIVE_PARTITION_COL: col})
312+
return table_expr.relabel({NATIVE_PARTITION_COL: partition_col})
315313

316314

317315
def parse_project_and_dataset(
@@ -379,6 +377,7 @@ def __init__(
379377
dataset_id=None,
380378
credentials=None,
381379
application_name=None,
380+
partition_column=None,
382381
):
383382
"""Construct a BigQueryClient.
384383
@@ -391,6 +390,9 @@ def __init__(
391390
credentials : google.auth.credentials.Credentials
392391
application_name : str
393392
A string identifying your application to Google API endpoints.
393+
partition_column : str
394+
Identifier to use instead of default ``_PARTITIONTIME`` partition
395+
column.
394396
395397
"""
396398
self.query_class = backend.query_class
@@ -407,6 +409,7 @@ def __init__(
407409
credentials=credentials,
408410
client_info=_create_client_info(application_name),
409411
)
412+
self.partition_column = partition_column
410413

411414
def _parse_project_and_dataset(self, dataset):
412415
if not dataset and not self.dataset:
@@ -431,7 +434,7 @@ def table(self, name, database=None):
431434
dataset_ref = self.client.dataset(dataset, project=project)
432435
table_ref = dataset_ref.table(name)
433436
bq_table = self.client.get_table(table_ref)
434-
return rename_partitioned_column(t, bq_table)
437+
return rename_partitioned_column(t, bq_table, self.partition_column)
435438

436439
def _build_ast(self, expr, context):
437440
result = comp.build_ast(expr, context)

ibis_bigquery/udf/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import ibis.expr.datatypes as dt
88
import ibis.expr.rules as rlz
99
import ibis.udf.validate as v
10-
from ibis.compat import PY38 # noqa: F401
1110
from ibis.expr.signature import Argument as Arg
1211

1312
from ..compiler import BigQueryUDFNode, compiles
@@ -73,7 +72,7 @@ def udf(input_type, output_type, strict=True, libraries=None):
7372
--------
7473
>>> if PY38:
7574
... import pytest; pytest.skip("Issue #2085")
76-
>>> from ibis.bigquery import udf
75+
>>> from ibis_bigquery import udf
7776
>>> import ibis.expr.datatypes as dt
7877
>>> @udf(input_type=[dt.double], output_type=dt.double)
7978
... def add_one(x):

tests/system/test_client.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pytz
1313
from google.api_core import exceptions
1414

15+
import ibis_bigquery
1516
from ibis_bigquery.client import bigquery_param
1617

1718
pytestmark = pytest.mark.bigquery
@@ -70,7 +71,7 @@ def test_compile_toplevel():
7071

7172
# it works!
7273
expr = t.foo.sum()
73-
result = ibis.bigquery.compile(expr)
74+
result = ibis_bigquery.compile(expr)
7475
# FIXME: remove quotes because bigquery can't use anythig that needs
7576
# quoting?
7677
expected = """\
@@ -192,16 +193,16 @@ def test_cast_string_to_date(alltypes, df, type):
192193

193194

194195
def test_has_partitions(alltypes, parted_alltypes, client):
195-
col = ibis.options.bigquery.partition_col
196+
col = client.partition_column
196197
assert col not in alltypes.columns
197198
assert col in parted_alltypes.columns
198199

199200

200-
def test_different_partition_col_name(client):
201+
def test_different_partition_col_name(monkeypatch, client):
201202
col = 'FOO_BAR'
202-
with ibis.config.option_context('bigquery.partition_col', col):
203-
alltypes = client.table('functional_alltypes')
204-
parted_alltypes = client.table('functional_alltypes_parted')
203+
monkeypatch.setattr(client, 'partition_column', col)
204+
alltypes = client.table('functional_alltypes')
205+
parted_alltypes = client.table('functional_alltypes_parted')
205206
assert col not in alltypes.columns
206207
assert col in parted_alltypes.columns
207208

tests/system/test_connect.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,9 @@
1010

1111
pytestmark = pytest.mark.bigquery
1212

13-
bq_backend = ibis_bigquery.Backend()
14-
1513

1614
def test_repeated_project_name(project_id, credentials):
17-
con = bq_backend.connect(
15+
con = ibis_bigquery.connect(
1816
project_id=project_id,
1917
dataset_id='{}.testing'.format(project_id),
2018
credentials=credentials,
@@ -29,12 +27,12 @@ def mock_credentials(*args, **kwargs):
2927
return creds, 'default-project-id'
3028

3129
monkeypatch.setattr(pydata_google_auth, 'default', mock_credentials)
32-
con = bq_backend.connect(project_id='explicit-project-id',)
30+
con = ibis_bigquery.connect(project_id='explicit-project-id',)
3331
assert con.billing_project == 'explicit-project-id'
3432

3533

3634
def test_without_dataset(project_id, credentials):
37-
con = bq_backend.connect(
35+
con = ibis_bigquery.connect(
3836
project_id=project_id, dataset_id=None, credentials=credentials,
3937
)
4038
with pytest.raises(ValueError, match="Unable to determine BigQuery"):
@@ -46,7 +44,7 @@ def test_application_name_sets_user_agent(
4644
):
4745
mock_client = mock.create_autospec(bq.Client)
4846
monkeypatch.setattr(bq, 'Client', mock_client)
49-
bq_backend.connect(
47+
ibis_bigquery.connect(
5048
project_id=project_id,
5149
dataset_id='bigquery-public-data.stackoverflow',
5250
application_name='my-great-app/0.7.0',
@@ -67,7 +65,7 @@ def mock_default(*args, **kwargs):
6765

6866
monkeypatch.setattr(pydata_google_auth, "default", mock_default)
6967

70-
bq_backend.connect(
68+
ibis_bigquery.connect(
7169
project_id=project_id, dataset_id='bigquery-public-data.stackoverflow',
7270
)
7371

@@ -93,7 +91,7 @@ def mock_default(*args, **kwargs):
9391

9492
monkeypatch.setattr(pydata_google_auth, "default", mock_default)
9593

96-
bq_backend.connect(
94+
ibis_bigquery.connect(
9795
project_id=project_id,
9896
dataset_id='bigquery-public-data.stackoverflow',
9997
auth_local_webserver=True,
@@ -114,7 +112,7 @@ def mock_default(*args, **kwargs):
114112

115113
monkeypatch.setattr(pydata_google_auth, "default", mock_default)
116114

117-
bq_backend.connect(
115+
ibis_bigquery.connect(
118116
project_id=project_id,
119117
dataset_id='bigquery-public-data.stackoverflow',
120118
auth_external_data=True,
@@ -136,7 +134,7 @@ def mock_default(*args, **kwargs):
136134

137135
monkeypatch.setattr(pydata_google_auth, "default", mock_default)
138136

139-
bq_backend.connect(
137+
ibis_bigquery.connect(
140138
project_id=project_id,
141139
dataset_id="bigquery-public-data.stackoverflow",
142140
auth_cache="reauth",
@@ -159,7 +157,7 @@ def mock_default(*args, **kwargs):
159157

160158
monkeypatch.setattr(pydata_google_auth, "default", mock_default)
161159

162-
bq_backend.connect(
160+
ibis_bigquery.connect(
163161
project_id=project_id,
164162
dataset_id="bigquery-public-data.stackoverflow",
165163
auth_cache="none",
@@ -173,7 +171,7 @@ def mock_default(*args, **kwargs):
173171

174172
def test_auth_cache_unknown(project_id):
175173
with pytest.raises(ValueError, match="unexpected value for auth_cache"):
176-
bq_backend.connect(
174+
ibis_bigquery.connect(
177175
project_id=project_id,
178176
dataset_id="bigquery-public-data.stackoverflow",
179177
auth_cache="not_a_real_cache",

0 commit comments

Comments
 (0)