|
1 | | -import datetime |
2 | | - |
3 | 1 | import ibis |
4 | 2 | import ibis.expr.datatypes as dt |
5 | | -import ibis.expr.operations as ops |
6 | | -import pandas as pd |
7 | 3 | import pytest |
8 | | -from ibis.expr.types import TableExpr |
9 | | - |
10 | | -import ibis_bigquery as bq |
11 | 4 |
|
12 | 5 | pytestmark = pytest.mark.bigquery |
13 | 6 |
|
@@ -76,153 +69,6 @@ def test_to_timestamp(alltypes, timezone, project_id): |
76 | 69 | assert result == expected |
77 | 70 |
|
78 | 71 |
|
79 | | -@pytest.mark.parametrize( |
80 | | - ('case', 'expected', 'dtype'), |
81 | | - [ |
82 | | - (datetime.date(2017, 1, 1), "DATE '2017-01-01'", dt.date), |
83 | | - (pd.Timestamp('2017-01-01'), "DATE '2017-01-01'", dt.date,), |
84 | | - ('2017-01-01', "DATE '2017-01-01'", dt.date), |
85 | | - ( |
86 | | - datetime.datetime(2017, 1, 1, 4, 55, 59), |
87 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
88 | | - dt.timestamp, |
89 | | - ), |
90 | | - ( |
91 | | - '2017-01-01 04:55:59', |
92 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
93 | | - dt.timestamp, |
94 | | - ), |
95 | | - ( |
96 | | - pd.Timestamp('2017-01-01 04:55:59'), |
97 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
98 | | - dt.timestamp, |
99 | | - ), |
100 | | - ], |
101 | | -) |
102 | | -def test_literal_date(case, expected, dtype): |
103 | | - expr = ibis.literal(case, type=dtype).year() |
104 | | - result = ibis.bigquery.compile(expr) |
105 | | - assert result == f"SELECT EXTRACT(year from {expected}) AS `tmp`" |
106 | | - |
107 | | - |
108 | | -@pytest.mark.parametrize( |
109 | | - ('case', 'expected', 'dtype', 'strftime_func'), |
110 | | - [ |
111 | | - ( |
112 | | - datetime.date(2017, 1, 1), |
113 | | - "DATE '2017-01-01'", |
114 | | - dt.date, |
115 | | - 'FORMAT_DATE', |
116 | | - ), |
117 | | - ( |
118 | | - pd.Timestamp('2017-01-01'), |
119 | | - "DATE '2017-01-01'", |
120 | | - dt.date, |
121 | | - 'FORMAT_DATE', |
122 | | - ), |
123 | | - ('2017-01-01', "DATE '2017-01-01'", dt.date, 'FORMAT_DATE',), |
124 | | - ( |
125 | | - datetime.datetime(2017, 1, 1, 4, 55, 59), |
126 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
127 | | - dt.timestamp, |
128 | | - 'FORMAT_TIMESTAMP', |
129 | | - ), |
130 | | - ( |
131 | | - '2017-01-01 04:55:59', |
132 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
133 | | - dt.timestamp, |
134 | | - 'FORMAT_TIMESTAMP', |
135 | | - ), |
136 | | - ( |
137 | | - pd.Timestamp('2017-01-01 04:55:59'), |
138 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
139 | | - dt.timestamp, |
140 | | - 'FORMAT_TIMESTAMP', |
141 | | - ), |
142 | | - ], |
143 | | -) |
144 | | -def test_day_of_week(case, expected, dtype, strftime_func): |
145 | | - date_var = ibis.literal(case, type=dtype) |
146 | | - expr_index = date_var.day_of_week.index() |
147 | | - result = ibis.bigquery.compile(expr_index) |
148 | | - assert ( |
149 | | - result |
150 | | - == f"SELECT MOD(EXTRACT(DAYOFWEEK FROM {expected}) + 5, 7) AS `tmp`" |
151 | | - ) |
152 | | - |
153 | | - expr_name = date_var.day_of_week.full_name() |
154 | | - result = ibis.bigquery.compile(expr_name) |
155 | | - if strftime_func == 'FORMAT_TIMESTAMP': |
156 | | - assert ( |
157 | | - result |
158 | | - == f"SELECT {strftime_func}('%A', {expected}, 'UTC') AS `tmp`" |
159 | | - ) |
160 | | - else: |
161 | | - assert result == f"SELECT {strftime_func}('%A', {expected}) AS `tmp`" |
162 | | - |
163 | | - |
164 | | -@pytest.mark.parametrize( |
165 | | - ("case", "expected", "dtype"), |
166 | | - [ |
167 | | - ("test of hash", "'test of hash'", dt.string,), |
168 | | - (b"test of hash", "FROM_BASE64('dGVzdCBvZiBoYXNo')", dt.binary,), |
169 | | - ], |
170 | | -) |
171 | | -def test_hash(case, expected, dtype): |
172 | | - string_var = ibis.literal(case, type=dtype) |
173 | | - expr = string_var.hash(how="farm_fingerprint") |
174 | | - result = ibis.bigquery.compile(expr) |
175 | | - assert result == f"SELECT farm_fingerprint({expected}) AS `tmp`" |
176 | | - |
177 | | - |
178 | | -@pytest.mark.parametrize( |
179 | | - ("case", "expected", "how", "dtype"), |
180 | | - [ |
181 | | - ("test", "md5('test')", "md5", dt.string,), |
182 | | - (b"test", "md5(FROM_BASE64('dGVzdA=='))", "md5", dt.binary,), |
183 | | - ("test", "sha1('test')", "sha1", dt.string,), |
184 | | - (b"test", "sha1(FROM_BASE64('dGVzdA=='))", "sha1", dt.binary,), |
185 | | - ("test", "sha256('test')", "sha256", dt.string,), |
186 | | - (b"test", "sha256(FROM_BASE64('dGVzdA=='))", "sha256", dt.binary,), |
187 | | - ("test", "sha512('test')", "sha512", dt.string,), |
188 | | - (b"test", "sha512(FROM_BASE64('dGVzdA=='))", "sha512", dt.binary,), |
189 | | - ], |
190 | | -) |
191 | | -def test_hashbytes(case, expected, how, dtype): |
192 | | - var = ibis.literal(case, type=dtype) |
193 | | - expr = var.hashbytes(how=how) |
194 | | - result = ibis.bigquery.compile(expr) |
195 | | - assert result == f"SELECT {expected} AS `tmp`" |
196 | | - |
197 | | - |
198 | | -@pytest.mark.parametrize( |
199 | | - ('case', 'expected', 'dtype'), |
200 | | - [ |
201 | | - ( |
202 | | - datetime.datetime(2017, 1, 1, 4, 55, 59), |
203 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
204 | | - dt.timestamp, |
205 | | - ), |
206 | | - ( |
207 | | - '2017-01-01 04:55:59', |
208 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
209 | | - dt.timestamp, |
210 | | - ), |
211 | | - ( |
212 | | - pd.Timestamp('2017-01-01 04:55:59'), |
213 | | - "TIMESTAMP '2017-01-01 04:55:59'", |
214 | | - dt.timestamp, |
215 | | - ), |
216 | | - (datetime.time(4, 55, 59), "TIME '04:55:59'", dt.time), |
217 | | - ('04:55:59', "TIME '04:55:59'", dt.time), |
218 | | - ], |
219 | | -) |
220 | | -def test_literal_timestamp_or_time(case, expected, dtype): |
221 | | - expr = ibis.literal(case, type=dtype).hour() |
222 | | - result = ibis.bigquery.compile(expr) |
223 | | - assert result == f"SELECT EXTRACT(hour from {expected}) AS `tmp`" |
224 | | - |
225 | | - |
226 | 72 | def test_window_function(alltypes, project_id): |
227 | 73 | t = alltypes |
228 | 74 | w1 = ibis.window( |
@@ -364,44 +210,6 @@ def test_union_cte( |
364 | 210 | assert result == expected |
365 | 211 |
|
366 | 212 |
|
367 | | -def test_projection_fusion_only_peeks_at_immediate_parent(): |
368 | | - schema = [ |
369 | | - ('file_date', 'timestamp'), |
370 | | - ('PARTITIONTIME', 'date'), |
371 | | - ('val', 'int64'), |
372 | | - ] |
373 | | - table = ibis.table(schema, name='unbound_table') |
374 | | - table = table[table.PARTITIONTIME < ibis.date('2017-01-01')] |
375 | | - table = table.mutate(file_date=table.file_date.cast('date')) |
376 | | - table = table[table.file_date < ibis.date('2017-01-01')] |
377 | | - table = table.mutate(XYZ=table.val * 2) |
378 | | - expr = table.join(table.view())[table] |
379 | | - result = ibis.bigquery.compile(expr) |
380 | | - expected = """\ |
381 | | -WITH t0 AS ( |
382 | | - SELECT * |
383 | | - FROM unbound_table |
384 | | - WHERE `PARTITIONTIME` < DATE '2017-01-01' |
385 | | -), |
386 | | -t1 AS ( |
387 | | - SELECT CAST(`file_date` AS DATE) AS `file_date`, `PARTITIONTIME`, `val` |
388 | | - FROM t0 |
389 | | -), |
390 | | -t2 AS ( |
391 | | - SELECT t1.* |
392 | | - FROM t1 |
393 | | - WHERE t1.`file_date` < DATE '2017-01-01' |
394 | | -), |
395 | | -t3 AS ( |
396 | | - SELECT *, `val` * 2 AS `XYZ` |
397 | | - FROM t2 |
398 | | -) |
399 | | -SELECT t3.* |
400 | | -FROM t3 |
401 | | - CROSS JOIN t3 t4""" |
402 | | - assert result == expected |
403 | | - |
404 | | - |
405 | 213 | def test_bool_reducers(alltypes, project_id): |
406 | 214 | b = alltypes.bool_col |
407 | 215 | expr = b.mean() |
@@ -546,130 +354,3 @@ def test_cov(alltypes, project_id): |
546 | 354 |
|
547 | 355 | with pytest.raises(ValueError): |
548 | 356 | d.cov(d, how='error') |
549 | | - |
550 | | - |
551 | | -@pytest.mark.parametrize( |
552 | | - ('unit', 'expected_unit', 'expected_func'), |
553 | | - [ |
554 | | - ('Y', 'YEAR', 'TIMESTAMP'), |
555 | | - ('Q', 'QUARTER', 'TIMESTAMP'), |
556 | | - ('M', 'MONTH', 'TIMESTAMP'), |
557 | | - ('W', 'WEEK', 'TIMESTAMP'), |
558 | | - ('D', 'DAY', 'TIMESTAMP'), |
559 | | - ('h', 'HOUR', 'TIMESTAMP'), |
560 | | - ('m', 'MINUTE', 'TIMESTAMP'), |
561 | | - ('s', 'SECOND', 'TIMESTAMP'), |
562 | | - ('ms', 'MILLISECOND', 'TIMESTAMP'), |
563 | | - ('us', 'MICROSECOND', 'TIMESTAMP'), |
564 | | - ('Y', 'YEAR', 'DATE'), |
565 | | - ('Q', 'QUARTER', 'DATE'), |
566 | | - ('M', 'MONTH', 'DATE'), |
567 | | - ('W', 'WEEK', 'DATE'), |
568 | | - ('D', 'DAY', 'DATE'), |
569 | | - ('h', 'HOUR', 'TIME'), |
570 | | - ('m', 'MINUTE', 'TIME'), |
571 | | - ('s', 'SECOND', 'TIME'), |
572 | | - ('ms', 'MILLISECOND', 'TIME'), |
573 | | - ('us', 'MICROSECOND', 'TIME'), |
574 | | - ], |
575 | | -) |
576 | | -def test_temporal_truncate(unit, expected_unit, expected_func): |
577 | | - t = ibis.table([('a', getattr(dt, expected_func.lower()))], name='t') |
578 | | - expr = t.a.truncate(unit) |
579 | | - result = ibis.bigquery.compile(expr) |
580 | | - expected = f"""\ |
581 | | -SELECT {expected_func}_TRUNC(`a`, {expected_unit}) AS `tmp` |
582 | | -FROM t""" |
583 | | - assert result == expected |
584 | | - |
585 | | - |
586 | | -@pytest.mark.parametrize('kind', ['date', 'time']) |
587 | | -def test_extract_temporal_from_timestamp(kind): |
588 | | - t = ibis.table([('ts', dt.timestamp)], name='t') |
589 | | - expr = getattr(t.ts, kind)() |
590 | | - result = ibis.bigquery.compile(expr) |
591 | | - expected = f"""\ |
592 | | -SELECT {kind.upper()}(`ts`) AS `tmp` |
593 | | -FROM t""" |
594 | | - assert result == expected |
595 | | - |
596 | | - |
597 | | -def test_now(): |
598 | | - expr = ibis.now() |
599 | | - result = ibis.bigquery.compile(expr) |
600 | | - expected = 'SELECT CURRENT_TIMESTAMP() AS `tmp`' |
601 | | - assert result == expected |
602 | | - |
603 | | - |
604 | | -def test_binary(): |
605 | | - t = ibis.table([('value', 'double')], name='t') |
606 | | - expr = t["value"].cast(dt.binary).name("value_hash") |
607 | | - result = ibis.bigquery.compile(expr) |
608 | | - expected = """\ |
609 | | -SELECT CAST(`value` AS BYTES) AS `tmp` |
610 | | -FROM t""" |
611 | | - assert result == expected |
612 | | - |
613 | | - |
614 | | -def test_bucket(): |
615 | | - t = ibis.table([('value', 'double')], name='t') |
616 | | - buckets = [0, 1, 3] |
617 | | - expr = t.value.bucket(buckets).name('foo') |
618 | | - result = ibis.bigquery.compile(expr) |
619 | | - expected = """\ |
620 | | -SELECT |
621 | | - CASE |
622 | | - WHEN (`value` >= 0) AND (`value` < 1) THEN 0 |
623 | | - WHEN (`value` >= 1) AND (`value` <= 3) THEN 1 |
624 | | - ELSE CAST(NULL AS INT64) |
625 | | - END AS `tmp` |
626 | | -FROM t""" |
627 | | - assert result == expected |
628 | | - |
629 | | - |
630 | | -@pytest.mark.parametrize( |
631 | | - ('kind', 'begin', 'end', 'expected'), |
632 | | - [ |
633 | | - ('preceding', None, 1, 'UNBOUNDED PRECEDING AND 1 PRECEDING'), |
634 | | - ('following', 1, None, '1 FOLLOWING AND UNBOUNDED FOLLOWING'), |
635 | | - ], |
636 | | -) |
637 | | -def test_window_unbounded(kind, begin, end, expected): |
638 | | - t = ibis.table([('a', 'int64')], name='t') |
639 | | - kwargs = {kind: (begin, end)} |
640 | | - expr = t.a.sum().over(ibis.window(**kwargs)) |
641 | | - result = ibis.bigquery.compile(expr) |
642 | | - assert ( |
643 | | - result |
644 | | - == f"""\ |
645 | | -SELECT sum(`a`) OVER (ROWS BETWEEN {expected}) AS `tmp` |
646 | | -FROM t""" |
647 | | - ) |
648 | | - |
649 | | - |
650 | | -def test_large_compile(): |
651 | | - """ |
652 | | - Tests that compiling a large expression tree finishes |
653 | | - within a reasonable amount of time |
654 | | - """ |
655 | | - num_columns = 20 |
656 | | - num_joins = 7 |
657 | | - |
658 | | - class MockBigQueryClient(bq.BigQueryClient): |
659 | | - def __init__(self): |
660 | | - pass |
661 | | - |
662 | | - names = [f"col_{i}" for i in range(num_columns)] |
663 | | - schema = ibis.Schema(names, ['string'] * num_columns) |
664 | | - ibis_client = MockBigQueryClient() |
665 | | - table = TableExpr( |
666 | | - ops.SQLQueryResult("select * from t", schema, ibis_client) |
667 | | - ) |
668 | | - for _ in range(num_joins): |
669 | | - table = table.mutate(dummy=ibis.literal("")) |
670 | | - table = table.left_join(table, ["dummy"])[[table]] |
671 | | - |
672 | | - start = datetime.datetime.now() |
673 | | - table.compile() |
674 | | - delta = datetime.datetime.now() - start |
675 | | - assert delta.total_seconds() < 10 |
0 commit comments