44import tempfile
55import urllib .request
66
7+ import google .auth
8+ import google .auth .exceptions
79import ibis # noqa: F401
810import pytest
911from google .api_core .exceptions import NotFound
1012from google .cloud import bigquery
11- import google .auth
12- import google .auth .exceptions
1313
1414import ibis_bigquery
1515
1616DEFAULT_PROJECT_ID = "ibis-gbq"
1717PROJECT_ID_ENV_VAR = "GOOGLE_BIGQUERY_PROJECT_ID"
1818DATASET_ID = "ibis_gbq_testing"
19- TESTING_DATA_URI = (
20- 'https://raw.githubusercontent.com/ibis-project/testing-data/master' )
19+ TESTING_DATA_URI = "https://raw.githubusercontent.com/ibis-project/testing-data/master"
2120
2221bq = ibis_bigquery .Backend ()
2322
2423
2524def pytest_addoption (parser ):
26- parser .addoption ('--save-dataset' , action = 'store_true' , default = False ,
27- help = 'saves all test data in the testing dataset' )
28- parser .addoption ('--no-refresh-dataset' , action = 'store_true' , default = False ,
29- help = 'do not refresh the test data in the testing dataset' )
25+ parser .addoption (
26+ "--save-dataset" ,
27+ action = "store_true" ,
28+ default = False ,
29+ help = "saves all test data in the testing dataset" ,
30+ )
31+ parser .addoption (
32+ "--no-refresh-dataset" ,
33+ action = "store_true" ,
34+ default = False ,
35+ help = "do not refresh the test data in the testing dataset" ,
36+ )
3037
3138
3239@pytest .fixture (scope = "session" )
3340def dataset_id () -> str :
34- return DATASET_ID
41+ return DATASET_ID
42+
3543
3644@pytest .fixture (scope = "session" )
3745def default_credentials ():
3846 try :
39- credentials , project_id = google .auth .default (scopes = ibis_bigquery .EXTERNAL_DATA_SCOPES )
47+ credentials , project_id = google .auth .default (
48+ scopes = ibis_bigquery .EXTERNAL_DATA_SCOPES
49+ )
4050 except google .auth .excecptions .DefaultCredentialsError as exc :
4151 pytest .skip (f"Could not get GCP credentials: { exc } " )
4252
@@ -115,13 +125,13 @@ def public(project_id, credentials):
115125# Native BigQuery client fixtures
116126# required to dynamically create the testing dataset,
117127# the tables, and to populate data into the tables.
118- @pytest .fixture (scope = ' session' )
128+ @pytest .fixture (scope = " session" )
119129def bqclient (client ):
120130 return client .client
121131
122132
123133# Create testing dataset.
124- @pytest .fixture (autouse = True , scope = ' session' )
134+ @pytest .fixture (autouse = True , scope = " session" )
125135def testing_dataset (bqclient , request , dataset_id ):
126136 dataset_ref = bigquery .DatasetReference (bqclient .project , dataset_id )
127137 try :
@@ -130,88 +140,81 @@ def testing_dataset(bqclient, request, dataset_id):
130140 pass
131141 yield dataset_ref
132142 if not request .config .getoption ("--save-dataset" ):
133- bqclient .delete_dataset (
134- dataset_ref , delete_contents = True , not_found_ok = True
135- )
143+ bqclient .delete_dataset (dataset_ref , delete_contents = True , not_found_ok = True )
136144
137145
138- @pytest .fixture (scope = ' session' )
146+ @pytest .fixture (scope = " session" )
139147def functional_alltypes_table (testing_dataset ):
140- return bigquery .TableReference (testing_dataset , ' functional_alltypes' )
148+ return bigquery .TableReference (testing_dataset , " functional_alltypes" )
141149
142150
143- @pytest .fixture (autouse = True , scope = ' session' )
151+ @pytest .fixture (autouse = True , scope = " session" )
144152def create_functional_alltypes_table (bqclient , functional_alltypes_table ):
145153 table = bigquery .Table (functional_alltypes_table )
146154 table .schema = [
147- bigquery .SchemaField (' index' , ' INTEGER' ),
148- bigquery .SchemaField (' Unnamed_0' , ' INTEGER' ),
149- bigquery .SchemaField ('id' , ' INTEGER' ),
150- bigquery .SchemaField (' bool_col' , ' BOOLEAN' ),
151- bigquery .SchemaField (' tinyint_col' , ' INTEGER' ),
152- bigquery .SchemaField (' smallint_col' , ' INTEGER' ),
153- bigquery .SchemaField (' int_col' , ' INTEGER' ),
154- bigquery .SchemaField (' bigint_col' , ' INTEGER' ),
155- bigquery .SchemaField (' float_col' , ' FLOAT' ),
156- bigquery .SchemaField (' double_col' , ' FLOAT' ),
157- bigquery .SchemaField (' date_string_col' , ' STRING' ),
158- bigquery .SchemaField (' string_col' , ' STRING' ),
159- bigquery .SchemaField (' timestamp_col' , ' TIMESTAMP' ),
160- bigquery .SchemaField (' year' , ' INTEGER' ),
161- bigquery .SchemaField (' month' , ' INTEGER' ),
155+ bigquery .SchemaField (" index" , " INTEGER" ),
156+ bigquery .SchemaField (" Unnamed_0" , " INTEGER" ),
157+ bigquery .SchemaField ("id" , " INTEGER" ),
158+ bigquery .SchemaField (" bool_col" , " BOOLEAN" ),
159+ bigquery .SchemaField (" tinyint_col" , " INTEGER" ),
160+ bigquery .SchemaField (" smallint_col" , " INTEGER" ),
161+ bigquery .SchemaField (" int_col" , " INTEGER" ),
162+ bigquery .SchemaField (" bigint_col" , " INTEGER" ),
163+ bigquery .SchemaField (" float_col" , " FLOAT" ),
164+ bigquery .SchemaField (" double_col" , " FLOAT" ),
165+ bigquery .SchemaField (" date_string_col" , " STRING" ),
166+ bigquery .SchemaField (" string_col" , " STRING" ),
167+ bigquery .SchemaField (" timestamp_col" , " TIMESTAMP" ),
168+ bigquery .SchemaField (" year" , " INTEGER" ),
169+ bigquery .SchemaField (" month" , " INTEGER" ),
162170 ]
163171 bqclient .create_table (table , exists_ok = True )
164172 return table
165173
166174
167- @pytest .fixture (autouse = True , scope = ' session' )
175+ @pytest .fixture (autouse = True , scope = " session" )
168176def load_functional_alltypes_data (request , bqclient , create_functional_alltypes_table ):
169177 if request .config .getoption ("--no-refresh-dataset" ):
170178 return
171179
172180 table = create_functional_alltypes_table
173181 load_config = bigquery .LoadJobConfig ()
174182 load_config .skip_leading_rows = 1 # skip the header row.
175- load_config .write_disposition = 'WRITE_TRUNCATE'
176- filepath = download_file (
177- '{}/functional_alltypes.csv' .format (TESTING_DATA_URI ))
178- with open (filepath .name , 'rb' ) as csvfile :
183+ load_config .write_disposition = "WRITE_TRUNCATE"
184+ filepath = download_file ("{}/functional_alltypes.csv" .format (TESTING_DATA_URI ))
185+ with open (filepath .name , "rb" ) as csvfile :
179186 job = bqclient .load_table_from_file (
180- csvfile ,
181- table ,
182- job_config = load_config ,
187+ csvfile , table , job_config = load_config ,
183188 ).result ()
184189 if job .error_result :
185- print (' error' )
190+ print (" error" )
186191
187192
188193# Ingestion time partitioned table.
189- @pytest .fixture (scope = ' session' )
194+ @pytest .fixture (scope = " session" )
190195def functional_alltypes_parted_table (testing_dataset ):
191- return bigquery .TableReference (
192- testing_dataset , 'functional_alltypes_parted' )
196+ return bigquery .TableReference (testing_dataset , "functional_alltypes_parted" )
193197
194198
195- @pytest .fixture (scope = 'session' )
196- def create_functional_alltypes_parted_table (
197- bqclient , functional_alltypes_parted_table ):
199+ @pytest .fixture (scope = "session" )
200+ def create_functional_alltypes_parted_table (bqclient , functional_alltypes_parted_table ):
198201 table = bigquery .Table (functional_alltypes_parted_table )
199202 table .schema = [
200- bigquery .SchemaField (' index' , ' INTEGER' ),
201- bigquery .SchemaField (' Unnamed_0' , ' INTEGER' ),
202- bigquery .SchemaField ('id' , ' INTEGER' ),
203- bigquery .SchemaField (' bool_col' , ' BOOLEAN' ),
204- bigquery .SchemaField (' tinyint_col' , ' INTEGER' ),
205- bigquery .SchemaField (' smallint_col' , ' INTEGER' ),
206- bigquery .SchemaField (' int_col' , ' INTEGER' ),
207- bigquery .SchemaField (' bigint_col' , ' INTEGER' ),
208- bigquery .SchemaField (' float_col' , ' FLOAT' ),
209- bigquery .SchemaField (' double_col' , ' FLOAT' ),
210- bigquery .SchemaField (' date_string_col' , ' STRING' ),
211- bigquery .SchemaField (' string_col' , ' STRING' ),
212- bigquery .SchemaField (' timestamp_col' , ' TIMESTAMP' ),
213- bigquery .SchemaField (' year' , ' INTEGER' ),
214- bigquery .SchemaField (' month' , ' INTEGER' ),
203+ bigquery .SchemaField (" index" , " INTEGER" ),
204+ bigquery .SchemaField (" Unnamed_0" , " INTEGER" ),
205+ bigquery .SchemaField ("id" , " INTEGER" ),
206+ bigquery .SchemaField (" bool_col" , " BOOLEAN" ),
207+ bigquery .SchemaField (" tinyint_col" , " INTEGER" ),
208+ bigquery .SchemaField (" smallint_col" , " INTEGER" ),
209+ bigquery .SchemaField (" int_col" , " INTEGER" ),
210+ bigquery .SchemaField (" bigint_col" , " INTEGER" ),
211+ bigquery .SchemaField (" float_col" , " FLOAT" ),
212+ bigquery .SchemaField (" double_col" , " FLOAT" ),
213+ bigquery .SchemaField (" date_string_col" , " STRING" ),
214+ bigquery .SchemaField (" string_col" , " STRING" ),
215+ bigquery .SchemaField (" timestamp_col" , " TIMESTAMP" ),
216+ bigquery .SchemaField (" year" , " INTEGER" ),
217+ bigquery .SchemaField (" month" , " INTEGER" ),
215218 ]
216219 table .time_partitioning = bigquery .TimePartitioning (
217220 type_ = bigquery .TimePartitioningType .DAY
@@ -221,128 +224,120 @@ def create_functional_alltypes_parted_table(
221224 return table
222225
223226
224- @pytest .fixture (autouse = True , scope = ' session' )
227+ @pytest .fixture (autouse = True , scope = " session" )
225228def load_functional_alltypes_parted_data (
226- request , bqclient , create_functional_alltypes_parted_table ):
229+ request , bqclient , create_functional_alltypes_parted_table
230+ ):
227231 if request .config .getoption ("--no-refresh-dataset" ):
228232 return
229233
230234 table = create_functional_alltypes_parted_table
231235 load_config = bigquery .LoadJobConfig ()
232- load_config .write_disposition = ' WRITE_TRUNCATE'
236+ load_config .write_disposition = " WRITE_TRUNCATE"
233237 load_config .skip_leading_rows = 1 # skip the header row.
234- filepath = download_file (
235- '{}/functional_alltypes.csv' .format (TESTING_DATA_URI ))
236- with open (filepath .name , 'rb' ) as csvfile :
238+ filepath = download_file ("{}/functional_alltypes.csv" .format (TESTING_DATA_URI ))
239+ with open (filepath .name , "rb" ) as csvfile :
237240 job = bqclient .load_table_from_file (
238- csvfile ,
239- table ,
240- job_config = load_config ,
241+ csvfile , table , job_config = load_config ,
241242 ).result ()
242243 if job .error_result :
243- print (' error' )
244+ print (" error" )
244245
245246
246247# Create a table with complex data types (nested and repeated).
247- @pytest .fixture (scope = ' session' )
248+ @pytest .fixture (scope = " session" )
248249def struct_bq_table (testing_dataset ):
249- return bigquery .TableReference (testing_dataset , ' struct_table' )
250+ return bigquery .TableReference (testing_dataset , " struct_table" )
250251
251252
252- @pytest .fixture (autouse = True , scope = ' session' )
253+ @pytest .fixture (autouse = True , scope = " session" )
253254def load_struct_table_data (request , bqclient , struct_bq_table ):
254255 if request .config .getoption ("--no-refresh-dataset" ):
255256 return
256257
257258 load_config = bigquery .LoadJobConfig ()
258- load_config .write_disposition = 'WRITE_TRUNCATE'
259- load_config .source_format = 'AVRO'
260- filepath = download_file (
261- '{}/struct_table.avro' .format (TESTING_DATA_URI ))
262- with open (filepath .name , 'rb' ) as avrofile :
259+ load_config .write_disposition = "WRITE_TRUNCATE"
260+ load_config .source_format = "AVRO"
261+ filepath = download_file ("{}/struct_table.avro" .format (TESTING_DATA_URI ))
262+ with open (filepath .name , "rb" ) as avrofile :
263263 job = bqclient .load_table_from_file (
264- avrofile ,
265- struct_bq_table ,
266- job_config = load_config ,
264+ avrofile , struct_bq_table , job_config = load_config ,
267265 ).result ()
268266 if job .error_result :
269- print (' error' )
267+ print (" error" )
270268
271269
272270# Create empty date-partitioned table.
273- @pytest .fixture (scope = ' session' )
271+ @pytest .fixture (scope = " session" )
274272def date_table (testing_dataset ):
275- return bigquery .TableReference (testing_dataset , ' date_column_parted' )
273+ return bigquery .TableReference (testing_dataset , " date_column_parted" )
276274
277275
278- @pytest .fixture (autouse = True , scope = ' session' )
276+ @pytest .fixture (autouse = True , scope = " session" )
279277def create_date_table (bqclient , date_table ):
280278 table = bigquery .Table (date_table )
281279 table .schema = [
282- bigquery .SchemaField (' my_date_parted_col' , ' DATE' ),
283- bigquery .SchemaField (' string_col' , ' STRING' ),
284- bigquery .SchemaField (' int_col' , ' INTEGER' ),
280+ bigquery .SchemaField (" my_date_parted_col" , " DATE" ),
281+ bigquery .SchemaField (" string_col" , " STRING" ),
282+ bigquery .SchemaField (" int_col" , " INTEGER" ),
285283 ]
286- table .time_partitioning = bigquery .TimePartitioning (
287- field = 'my_date_parted_col'
288- )
284+ table .time_partitioning = bigquery .TimePartitioning (field = "my_date_parted_col" )
289285 bqclient .create_table (table , exists_ok = True )
290286 return table
291287
292288
293289# Create empty timestamp-partitioned tables.
294- @pytest .fixture (scope = ' session' )
290+ @pytest .fixture (scope = " session" )
295291def timestamp_table (testing_dataset ):
296- return bigquery .TableReference (testing_dataset , ' timestamp_column_parted' )
292+ return bigquery .TableReference (testing_dataset , " timestamp_column_parted" )
297293
298294
299- @pytest .fixture (autouse = True , scope = ' session' )
295+ @pytest .fixture (autouse = True , scope = " session" )
300296def create_timestamp_table (bqclient , timestamp_table ):
301297 table = bigquery .Table (timestamp_table )
302298 table .schema = [
303- bigquery .SchemaField (' my_timestamp_parted_col' , ' DATE' ),
304- bigquery .SchemaField (' string_col' , ' STRING' ),
305- bigquery .SchemaField (' int_col' , ' INTEGER' ),
299+ bigquery .SchemaField (" my_timestamp_parted_col" , " DATE" ),
300+ bigquery .SchemaField (" string_col" , " STRING" ),
301+ bigquery .SchemaField (" int_col" , " INTEGER" ),
306302 ]
307- table .time_partitioning = bigquery .TimePartitioning (
308- field = 'my_timestamp_parted_col'
309- )
303+ table .time_partitioning = bigquery .TimePartitioning (field = "my_timestamp_parted_col" )
310304 bqclient .create_table (table , exists_ok = True )
311305
312306
313307# Create a table with a numeric column
314- @pytest .fixture (scope = ' session' )
308+ @pytest .fixture (scope = " session" )
315309def numeric_bq_table (testing_dataset ):
316- return bigquery .TableReference (testing_dataset , ' numeric_table' )
310+ return bigquery .TableReference (testing_dataset , " numeric_table" )
317311
318312
319- @pytest .fixture (scope = ' session' )
313+ @pytest .fixture (scope = " session" )
320314def create_numeric_table (bqclient , numeric_bq_table ):
321315 table = bigquery .Table (numeric_bq_table )
322316 table .schema = [
323- bigquery .SchemaField (' string_col' , ' STRING' ),
324- bigquery .SchemaField (' numeric_col' , ' NUMERIC' ),
317+ bigquery .SchemaField (" string_col" , " STRING" ),
318+ bigquery .SchemaField (" numeric_col" , " NUMERIC" ),
325319 ]
326320 bqclient .create_table (table , exists_ok = True )
327321 return table
328322
329323
330- @pytest .fixture (autouse = True , scope = ' session' )
324+ @pytest .fixture (autouse = True , scope = " session" )
331325def load_numeric_data (request , bqclient , create_numeric_table ):
332326 if request .config .getoption ("--no-refresh-dataset" ):
333327 return
334328
335329 load_config = bigquery .LoadJobConfig ()
336- load_config .write_disposition = ' WRITE_TRUNCATE'
337- load_config .source_format = ' NEWLINE_DELIMITED_JSON'
338- data = u''' {"string_col": "1st value", "numeric_col": 0.999999999}\n \
339- {"string_col": "2nd value", "numeric_col": 0.000000002}'''
330+ load_config .write_disposition = " WRITE_TRUNCATE"
331+ load_config .source_format = " NEWLINE_DELIMITED_JSON"
332+ data = """ {"string_col": "1st value", "numeric_col": 0.999999999}\n \
333+ {"string_col": "2nd value", "numeric_col": 0.000000002}"""
340334 jsonfile = io .StringIO (data )
341335 table = create_numeric_table
342336 job = bqclient .load_table_from_file (
343- jsonfile , table , job_config = load_config ).result ()
337+ jsonfile , table , job_config = load_config
338+ ).result ()
344339 if job .error_result :
345- print (' error' )
340+ print (" error" )
346341
347342
348343def download_file (url ):
0 commit comments