Skip to content

Commit 441bb94

Browse files
authored
Merge branch 'main' into shuowei-anywidget-index-testcase
2 parents bce8e07 + 64995d6 commit 441bb94

File tree

255 files changed

+1544
-1426
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

255 files changed

+1544
-1426
lines changed

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16-
from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler
16+
from bigframes.core.compile.sqlglot.compiler import compile_sql
1717
import bigframes.core.compile.sqlglot.expressions.ai_ops # noqa: F401
1818
import bigframes.core.compile.sqlglot.expressions.array_ops # noqa: F401
1919
import bigframes.core.compile.sqlglot.expressions.blob_ops # noqa: F401
@@ -29,4 +29,4 @@
2929
import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401
3030
import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401
3131

32-
__all__ = ["SQLGlotCompiler"]
32+
__all__ = ["compile_sql"]

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 310 additions & 335 deletions
Large diffs are not rendered by default.

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ def from_table(
134134
this=sge.to_identifier(col_name, quoted=cls.quoted),
135135
alias=sge.to_identifier(alias_name, quoted=cls.quoted),
136136
)
137+
if col_name != alias_name
138+
else sge.to_identifier(col_name, quoted=cls.quoted)
137139
for col_name, alias_name in zip(col_names, alias_names)
138140
]
139141
table_expr = sge.Table(
@@ -227,6 +229,8 @@ def select(
227229
this=expr,
228230
alias=sge.to_identifier(id, quoted=self.quoted),
229231
)
232+
if expr.alias_or_name != id
233+
else expr
230234
for id, expr in selected_cols
231235
]
232236

bigframes/core/rewrite/select_pullup.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
# limitations under the License.
1414

1515
import dataclasses
16+
import functools
1617
from typing import cast
1718

18-
from bigframes.core import expression, nodes
19+
from bigframes.core import expression, identifiers, nodes
1920

2021

2122
def defer_selection(
@@ -26,12 +27,19 @@ def defer_selection(
2627
2728
In many cases, these nodes will be merged or eliminated entirely, simplifying the overall tree.
2829
"""
29-
return nodes.bottom_up(root, pull_up_select)
30+
return nodes.bottom_up(
31+
root, functools.partial(pull_up_select, prefer_source_names=True)
32+
)
3033

3134

32-
def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
35+
def pull_up_select(
36+
node: nodes.BigFrameNode, prefer_source_names: bool
37+
) -> nodes.BigFrameNode:
3338
if isinstance(node, nodes.LeafNode):
34-
return node
39+
if prefer_source_names and isinstance(node, nodes.ReadTableNode):
40+
return pull_up_source_ids(node)
41+
else:
42+
return node
3543
if isinstance(node, nodes.JoinNode):
3644
return pull_up_selects_under_join(node)
3745
if isinstance(node, nodes.ConcatNode):
@@ -42,6 +50,32 @@ def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
4250
return node
4351

4452

53+
def pull_up_source_ids(node: nodes.ReadTableNode) -> nodes.BigFrameNode:
54+
if all(id.sql == source_id for id, source_id in node.scan_list.items):
55+
return node
56+
else:
57+
source_ids = sorted(
58+
set(scan_item.source_id for scan_item in node.scan_list.items)
59+
)
60+
new_scan_list = nodes.ScanList.from_items(
61+
[
62+
nodes.ScanItem(identifiers.ColumnId(source_id), source_id)
63+
for source_id in source_ids
64+
]
65+
)
66+
new_source = dataclasses.replace(node, scan_list=new_scan_list)
67+
new_selection = nodes.SelectionNode(
68+
new_source,
69+
tuple(
70+
nodes.AliasedRef(
71+
expression.DerefOp(identifiers.ColumnId(source_id)), id
72+
)
73+
for id, source_id in node.scan_list.items
74+
),
75+
)
76+
return new_selection
77+
78+
4579
def pull_up_select_unary(node: nodes.UnaryNode) -> nodes.BigFrameNode:
4680
child = node.child
4781
if not isinstance(child, nodes.SelectionNode):

bigframes/session/_io/bigquery/read_gbq_table.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ def get_index_cols(
402402
| bigframes.enums.DefaultIndexKind,
403403
*,
404404
rename_to_schema: Optional[Dict[str, str]] = None,
405+
default_index_type: bigframes.enums.DefaultIndexKind = bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
405406
) -> List[str]:
406407
"""
407408
If we can get a total ordering from the table, such as via primary key
@@ -471,7 +472,11 @@ def get_index_cols(
471472
# find index_cols to use. This is to avoid unexpected performance and
472473
# resource utilization because of the default sequential index. See
473474
# internal issue 335727141.
474-
if _is_table_clustered_or_partitioned(table) and not primary_keys:
475+
if (
476+
_is_table_clustered_or_partitioned(table)
477+
and not primary_keys
478+
and default_index_type == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
479+
):
475480
msg = bfe.format_message(
476481
f"Table '{str(table.reference)}' is clustered and/or "
477482
"partitioned, but BigQuery DataFrames was not able to find a "

bigframes/session/direct_gbq_execution.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ def __init__(
4040
):
4141
self.bqclient = bqclient
4242
self._compile_fn = (
43-
compile.compile_sql
44-
if compiler == "ibis"
45-
else sqlglot.SQLGlotCompiler()._compile_sql
43+
compile.compile_sql if compiler == "ibis" else sqlglot.compile_sql
4644
)
4745
self._publisher = publisher
4846

bigframes/session/loader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ def read_gbq_table(
696696
table=table,
697697
index_col=index_col,
698698
rename_to_schema=rename_to_schema,
699+
default_index_type=self._default_index_type,
699700
)
700701
_check_index_col_param(
701702
index_cols,

bigframes/testing/compiler_session.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@
1616
import typing
1717

1818
import bigframes.core
19-
import bigframes.core.compile.sqlglot as sqlglot
19+
import bigframes.core.compile as compile
2020
import bigframes.session.executor
2121

2222

2323
@dataclasses.dataclass
2424
class SQLCompilerExecutor(bigframes.session.executor.Executor):
2525
"""Executor for SQL compilation using sqlglot."""
2626

27-
compiler = sqlglot
27+
compiler = compile.sqlglot
2828

2929
def to_sql(
3030
self,
@@ -38,9 +38,9 @@ def to_sql(
3838

3939
# Compared with BigQueryCachingExecutor, SQLCompilerExecutor skips
4040
# caching the subtree.
41-
return self.compiler.SQLGlotCompiler().compile(
42-
array_value.node, ordered=ordered
43-
)
41+
return self.compiler.compile_sql(
42+
compile.CompileRequest(array_value.node, sort_rows=ordered)
43+
).sql
4444

4545
def execute(
4646
self,

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`,
4-
`float64_col` AS `bfcol_1`
3+
`float64_col`,
4+
`int64_col`
55
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
66
), `bfcte_1` AS (
77
SELECT
8-
CORR(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
8+
CORR(`int64_col`, `float64_col`) AS `bfcol_2`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`,
4-
`float64_col` AS `bfcol_1`
3+
`float64_col`,
4+
`int64_col`
55
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
66
), `bfcte_1` AS (
77
SELECT
8-
COVAR_SAMP(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
8+
COVAR_SAMP(`int64_col`, `float64_col`) AS `bfcol_2`
99
FROM `bfcte_0`
1010
)
1111
SELECT

0 commit comments

Comments
 (0)