Skip to content
This repository was archived by the owner on Mar 29, 2023. It is now read-only.

Commit b86e585

Browse files
seibstswast
andauthored
bug: substr fails to compile (#94) (#95)
* bug: substr fails to compile (#94) * manually run Main workflow * fix lint Co-authored-by: Tim Swast <swast@google.com>
1 parent 27e1446 commit b86e585

File tree

6 files changed

+128
-121
lines changed

6 files changed

+128
-121
lines changed

ibis_bigquery/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def _string_substring(translator, expr):
291291
raise ValueError("Length parameter should not be a negative value.")
292292

293293
base_substring = operation_registry[ops.Substring]
294-
base_substring(translator, expr)
294+
return base_substring(translator, expr)
295295

296296

297297
def _array_literal_format(expr):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"pyarrow >=1.0.0,<5.0.0dev",
4848
"pydata-google-auth",
4949
# Workaround for ibis 1.x incompatibility with SQLAlchemy 1.4
50-
# AttributeError: module 'sqlalchemy' has no attribute 'Binary'
50+
# AttributeError: module 'sqlalchemy' has no attribute 'Binary'
5151
"sqlalchemy <1.4.0dev",
5252
],
5353
classifiers=[

tests/system/conftest.py

Lines changed: 110 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,49 @@
44
import tempfile
55
import urllib.request
66

7+
import google.auth
8+
import google.auth.exceptions
79
import ibis # noqa: F401
810
import pytest
911
from google.api_core.exceptions import NotFound
1012
from google.cloud import bigquery
11-
import google.auth
12-
import google.auth.exceptions
1313

1414
import ibis_bigquery
1515

1616
DEFAULT_PROJECT_ID = "ibis-gbq"
1717
PROJECT_ID_ENV_VAR = "GOOGLE_BIGQUERY_PROJECT_ID"
1818
DATASET_ID = "ibis_gbq_testing"
19-
TESTING_DATA_URI = (
20-
'https://raw.githubusercontent.com/ibis-project/testing-data/master')
19+
TESTING_DATA_URI = "https://raw.githubusercontent.com/ibis-project/testing-data/master"
2120

2221
bq = ibis_bigquery.Backend()
2322

2423

2524
def pytest_addoption(parser):
26-
parser.addoption('--save-dataset', action='store_true', default=False,
27-
help='saves all test data in the testing dataset')
28-
parser.addoption('--no-refresh-dataset', action='store_true', default=False,
29-
help='do not refresh the test data in the testing dataset')
25+
parser.addoption(
26+
"--save-dataset",
27+
action="store_true",
28+
default=False,
29+
help="saves all test data in the testing dataset",
30+
)
31+
parser.addoption(
32+
"--no-refresh-dataset",
33+
action="store_true",
34+
default=False,
35+
help="do not refresh the test data in the testing dataset",
36+
)
3037

3138

3239
@pytest.fixture(scope="session")
3340
def dataset_id() -> str:
34-
return DATASET_ID
41+
return DATASET_ID
42+
3543

3644
@pytest.fixture(scope="session")
3745
def default_credentials():
3846
try:
39-
credentials, project_id = google.auth.default(scopes=ibis_bigquery.EXTERNAL_DATA_SCOPES)
47+
credentials, project_id = google.auth.default(
48+
scopes=ibis_bigquery.EXTERNAL_DATA_SCOPES
49+
)
4050
except google.auth.excecptions.DefaultCredentialsError as exc:
4151
pytest.skip(f"Could not get GCP credentials: {exc}")
4252

@@ -115,13 +125,13 @@ def public(project_id, credentials):
115125
# Native BigQuery client fixtures
116126
# required to dynamically create the testing dataset,
117127
# the tables, and to populate data into the tables.
118-
@pytest.fixture(scope='session')
128+
@pytest.fixture(scope="session")
119129
def bqclient(client):
120130
return client.client
121131

122132

123133
# Create testing dataset.
124-
@pytest.fixture(autouse=True, scope='session')
134+
@pytest.fixture(autouse=True, scope="session")
125135
def testing_dataset(bqclient, request, dataset_id):
126136
dataset_ref = bigquery.DatasetReference(bqclient.project, dataset_id)
127137
try:
@@ -130,88 +140,81 @@ def testing_dataset(bqclient, request, dataset_id):
130140
pass
131141
yield dataset_ref
132142
if not request.config.getoption("--save-dataset"):
133-
bqclient.delete_dataset(
134-
dataset_ref, delete_contents=True, not_found_ok=True
135-
)
143+
bqclient.delete_dataset(dataset_ref, delete_contents=True, not_found_ok=True)
136144

137145

138-
@pytest.fixture(scope='session')
146+
@pytest.fixture(scope="session")
139147
def functional_alltypes_table(testing_dataset):
140-
return bigquery.TableReference(testing_dataset, 'functional_alltypes')
148+
return bigquery.TableReference(testing_dataset, "functional_alltypes")
141149

142150

143-
@pytest.fixture(autouse=True, scope='session')
151+
@pytest.fixture(autouse=True, scope="session")
144152
def create_functional_alltypes_table(bqclient, functional_alltypes_table):
145153
table = bigquery.Table(functional_alltypes_table)
146154
table.schema = [
147-
bigquery.SchemaField('index', 'INTEGER'),
148-
bigquery.SchemaField('Unnamed_0', 'INTEGER'),
149-
bigquery.SchemaField('id', 'INTEGER'),
150-
bigquery.SchemaField('bool_col', 'BOOLEAN'),
151-
bigquery.SchemaField('tinyint_col', 'INTEGER'),
152-
bigquery.SchemaField('smallint_col', 'INTEGER'),
153-
bigquery.SchemaField('int_col', 'INTEGER'),
154-
bigquery.SchemaField('bigint_col', 'INTEGER'),
155-
bigquery.SchemaField('float_col', 'FLOAT'),
156-
bigquery.SchemaField('double_col', 'FLOAT'),
157-
bigquery.SchemaField('date_string_col', 'STRING'),
158-
bigquery.SchemaField('string_col', 'STRING'),
159-
bigquery.SchemaField('timestamp_col', 'TIMESTAMP'),
160-
bigquery.SchemaField('year', 'INTEGER'),
161-
bigquery.SchemaField('month', 'INTEGER'),
155+
bigquery.SchemaField("index", "INTEGER"),
156+
bigquery.SchemaField("Unnamed_0", "INTEGER"),
157+
bigquery.SchemaField("id", "INTEGER"),
158+
bigquery.SchemaField("bool_col", "BOOLEAN"),
159+
bigquery.SchemaField("tinyint_col", "INTEGER"),
160+
bigquery.SchemaField("smallint_col", "INTEGER"),
161+
bigquery.SchemaField("int_col", "INTEGER"),
162+
bigquery.SchemaField("bigint_col", "INTEGER"),
163+
bigquery.SchemaField("float_col", "FLOAT"),
164+
bigquery.SchemaField("double_col", "FLOAT"),
165+
bigquery.SchemaField("date_string_col", "STRING"),
166+
bigquery.SchemaField("string_col", "STRING"),
167+
bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
168+
bigquery.SchemaField("year", "INTEGER"),
169+
bigquery.SchemaField("month", "INTEGER"),
162170
]
163171
bqclient.create_table(table, exists_ok=True)
164172
return table
165173

166174

167-
@pytest.fixture(autouse=True, scope='session')
175+
@pytest.fixture(autouse=True, scope="session")
168176
def load_functional_alltypes_data(request, bqclient, create_functional_alltypes_table):
169177
if request.config.getoption("--no-refresh-dataset"):
170178
return
171179

172180
table = create_functional_alltypes_table
173181
load_config = bigquery.LoadJobConfig()
174182
load_config.skip_leading_rows = 1 # skip the header row.
175-
load_config.write_disposition = 'WRITE_TRUNCATE'
176-
filepath = download_file(
177-
'{}/functional_alltypes.csv'.format(TESTING_DATA_URI))
178-
with open(filepath.name, 'rb') as csvfile:
183+
load_config.write_disposition = "WRITE_TRUNCATE"
184+
filepath = download_file("{}/functional_alltypes.csv".format(TESTING_DATA_URI))
185+
with open(filepath.name, "rb") as csvfile:
179186
job = bqclient.load_table_from_file(
180-
csvfile,
181-
table,
182-
job_config=load_config,
187+
csvfile, table, job_config=load_config,
183188
).result()
184189
if job.error_result:
185-
print('error')
190+
print("error")
186191

187192

188193
# Ingestion time partitioned table.
189-
@pytest.fixture(scope='session')
194+
@pytest.fixture(scope="session")
190195
def functional_alltypes_parted_table(testing_dataset):
191-
return bigquery.TableReference(
192-
testing_dataset, 'functional_alltypes_parted')
196+
return bigquery.TableReference(testing_dataset, "functional_alltypes_parted")
193197

194198

195-
@pytest.fixture(scope='session')
196-
def create_functional_alltypes_parted_table(
197-
bqclient, functional_alltypes_parted_table):
199+
@pytest.fixture(scope="session")
200+
def create_functional_alltypes_parted_table(bqclient, functional_alltypes_parted_table):
198201
table = bigquery.Table(functional_alltypes_parted_table)
199202
table.schema = [
200-
bigquery.SchemaField('index', 'INTEGER'),
201-
bigquery.SchemaField('Unnamed_0', 'INTEGER'),
202-
bigquery.SchemaField('id', 'INTEGER'),
203-
bigquery.SchemaField('bool_col', 'BOOLEAN'),
204-
bigquery.SchemaField('tinyint_col', 'INTEGER'),
205-
bigquery.SchemaField('smallint_col', 'INTEGER'),
206-
bigquery.SchemaField('int_col', 'INTEGER'),
207-
bigquery.SchemaField('bigint_col', 'INTEGER'),
208-
bigquery.SchemaField('float_col', 'FLOAT'),
209-
bigquery.SchemaField('double_col', 'FLOAT'),
210-
bigquery.SchemaField('date_string_col', 'STRING'),
211-
bigquery.SchemaField('string_col', 'STRING'),
212-
bigquery.SchemaField('timestamp_col', 'TIMESTAMP'),
213-
bigquery.SchemaField('year', 'INTEGER'),
214-
bigquery.SchemaField('month', 'INTEGER'),
203+
bigquery.SchemaField("index", "INTEGER"),
204+
bigquery.SchemaField("Unnamed_0", "INTEGER"),
205+
bigquery.SchemaField("id", "INTEGER"),
206+
bigquery.SchemaField("bool_col", "BOOLEAN"),
207+
bigquery.SchemaField("tinyint_col", "INTEGER"),
208+
bigquery.SchemaField("smallint_col", "INTEGER"),
209+
bigquery.SchemaField("int_col", "INTEGER"),
210+
bigquery.SchemaField("bigint_col", "INTEGER"),
211+
bigquery.SchemaField("float_col", "FLOAT"),
212+
bigquery.SchemaField("double_col", "FLOAT"),
213+
bigquery.SchemaField("date_string_col", "STRING"),
214+
bigquery.SchemaField("string_col", "STRING"),
215+
bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
216+
bigquery.SchemaField("year", "INTEGER"),
217+
bigquery.SchemaField("month", "INTEGER"),
215218
]
216219
table.time_partitioning = bigquery.TimePartitioning(
217220
type_=bigquery.TimePartitioningType.DAY
@@ -221,128 +224,120 @@ def create_functional_alltypes_parted_table(
221224
return table
222225

223226

224-
@pytest.fixture(autouse=True, scope='session')
227+
@pytest.fixture(autouse=True, scope="session")
225228
def load_functional_alltypes_parted_data(
226-
request, bqclient, create_functional_alltypes_parted_table):
229+
request, bqclient, create_functional_alltypes_parted_table
230+
):
227231
if request.config.getoption("--no-refresh-dataset"):
228232
return
229233

230234
table = create_functional_alltypes_parted_table
231235
load_config = bigquery.LoadJobConfig()
232-
load_config.write_disposition = 'WRITE_TRUNCATE'
236+
load_config.write_disposition = "WRITE_TRUNCATE"
233237
load_config.skip_leading_rows = 1 # skip the header row.
234-
filepath = download_file(
235-
'{}/functional_alltypes.csv'.format(TESTING_DATA_URI))
236-
with open(filepath.name, 'rb') as csvfile:
238+
filepath = download_file("{}/functional_alltypes.csv".format(TESTING_DATA_URI))
239+
with open(filepath.name, "rb") as csvfile:
237240
job = bqclient.load_table_from_file(
238-
csvfile,
239-
table,
240-
job_config=load_config,
241+
csvfile, table, job_config=load_config,
241242
).result()
242243
if job.error_result:
243-
print('error')
244+
print("error")
244245

245246

246247
# Create a table with complex data types (nested and repeated).
247-
@pytest.fixture(scope='session')
248+
@pytest.fixture(scope="session")
248249
def struct_bq_table(testing_dataset):
249-
return bigquery.TableReference(testing_dataset, 'struct_table')
250+
return bigquery.TableReference(testing_dataset, "struct_table")
250251

251252

252-
@pytest.fixture(autouse=True, scope='session')
253+
@pytest.fixture(autouse=True, scope="session")
253254
def load_struct_table_data(request, bqclient, struct_bq_table):
254255
if request.config.getoption("--no-refresh-dataset"):
255256
return
256257

257258
load_config = bigquery.LoadJobConfig()
258-
load_config.write_disposition = 'WRITE_TRUNCATE'
259-
load_config.source_format = 'AVRO'
260-
filepath = download_file(
261-
'{}/struct_table.avro'.format(TESTING_DATA_URI))
262-
with open(filepath.name, 'rb') as avrofile:
259+
load_config.write_disposition = "WRITE_TRUNCATE"
260+
load_config.source_format = "AVRO"
261+
filepath = download_file("{}/struct_table.avro".format(TESTING_DATA_URI))
262+
with open(filepath.name, "rb") as avrofile:
263263
job = bqclient.load_table_from_file(
264-
avrofile,
265-
struct_bq_table,
266-
job_config=load_config,
264+
avrofile, struct_bq_table, job_config=load_config,
267265
).result()
268266
if job.error_result:
269-
print('error')
267+
print("error")
270268

271269

272270
# Create empty date-partitioned table.
273-
@pytest.fixture(scope='session')
271+
@pytest.fixture(scope="session")
274272
def date_table(testing_dataset):
275-
return bigquery.TableReference(testing_dataset, 'date_column_parted')
273+
return bigquery.TableReference(testing_dataset, "date_column_parted")
276274

277275

278-
@pytest.fixture(autouse=True, scope='session')
276+
@pytest.fixture(autouse=True, scope="session")
279277
def create_date_table(bqclient, date_table):
280278
table = bigquery.Table(date_table)
281279
table.schema = [
282-
bigquery.SchemaField('my_date_parted_col', 'DATE'),
283-
bigquery.SchemaField('string_col', 'STRING'),
284-
bigquery.SchemaField('int_col', 'INTEGER'),
280+
bigquery.SchemaField("my_date_parted_col", "DATE"),
281+
bigquery.SchemaField("string_col", "STRING"),
282+
bigquery.SchemaField("int_col", "INTEGER"),
285283
]
286-
table.time_partitioning = bigquery.TimePartitioning(
287-
field='my_date_parted_col'
288-
)
284+
table.time_partitioning = bigquery.TimePartitioning(field="my_date_parted_col")
289285
bqclient.create_table(table, exists_ok=True)
290286
return table
291287

292288

293289
# Create empty timestamp-partitioned tables.
294-
@pytest.fixture(scope='session')
290+
@pytest.fixture(scope="session")
295291
def timestamp_table(testing_dataset):
296-
return bigquery.TableReference(testing_dataset, 'timestamp_column_parted')
292+
return bigquery.TableReference(testing_dataset, "timestamp_column_parted")
297293

298294

299-
@pytest.fixture(autouse=True, scope='session')
295+
@pytest.fixture(autouse=True, scope="session")
300296
def create_timestamp_table(bqclient, timestamp_table):
301297
table = bigquery.Table(timestamp_table)
302298
table.schema = [
303-
bigquery.SchemaField('my_timestamp_parted_col', 'DATE'),
304-
bigquery.SchemaField('string_col', 'STRING'),
305-
bigquery.SchemaField('int_col', 'INTEGER'),
299+
bigquery.SchemaField("my_timestamp_parted_col", "DATE"),
300+
bigquery.SchemaField("string_col", "STRING"),
301+
bigquery.SchemaField("int_col", "INTEGER"),
306302
]
307-
table.time_partitioning = bigquery.TimePartitioning(
308-
field='my_timestamp_parted_col'
309-
)
303+
table.time_partitioning = bigquery.TimePartitioning(field="my_timestamp_parted_col")
310304
bqclient.create_table(table, exists_ok=True)
311305

312306

313307
# Create a table with a numeric column
314-
@pytest.fixture(scope='session')
308+
@pytest.fixture(scope="session")
315309
def numeric_bq_table(testing_dataset):
316-
return bigquery.TableReference(testing_dataset, 'numeric_table')
310+
return bigquery.TableReference(testing_dataset, "numeric_table")
317311

318312

319-
@pytest.fixture(scope='session')
313+
@pytest.fixture(scope="session")
320314
def create_numeric_table(bqclient, numeric_bq_table):
321315
table = bigquery.Table(numeric_bq_table)
322316
table.schema = [
323-
bigquery.SchemaField('string_col', 'STRING'),
324-
bigquery.SchemaField('numeric_col', 'NUMERIC'),
317+
bigquery.SchemaField("string_col", "STRING"),
318+
bigquery.SchemaField("numeric_col", "NUMERIC"),
325319
]
326320
bqclient.create_table(table, exists_ok=True)
327321
return table
328322

329323

330-
@pytest.fixture(autouse=True, scope='session')
324+
@pytest.fixture(autouse=True, scope="session")
331325
def load_numeric_data(request, bqclient, create_numeric_table):
332326
if request.config.getoption("--no-refresh-dataset"):
333327
return
334328

335329
load_config = bigquery.LoadJobConfig()
336-
load_config.write_disposition = 'WRITE_TRUNCATE'
337-
load_config.source_format = 'NEWLINE_DELIMITED_JSON'
338-
data = u'''{"string_col": "1st value", "numeric_col": 0.999999999}\n\
339-
{"string_col": "2nd value", "numeric_col": 0.000000002}'''
330+
load_config.write_disposition = "WRITE_TRUNCATE"
331+
load_config.source_format = "NEWLINE_DELIMITED_JSON"
332+
data = """{"string_col": "1st value", "numeric_col": 0.999999999}\n\
333+
{"string_col": "2nd value", "numeric_col": 0.000000002}"""
340334
jsonfile = io.StringIO(data)
341335
table = create_numeric_table
342336
job = bqclient.load_table_from_file(
343-
jsonfile, table, job_config=load_config).result()
337+
jsonfile, table, job_config=load_config
338+
).result()
344339
if job.error_result:
345-
print('error')
340+
print("error")
346341

347342

348343
def download_file(url):

0 commit comments

Comments
 (0)