googleapis
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 26 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 26 additions & 0 deletions
diff --git a/‎bigframes/core/blocks.py
Lines changed: 28 additions & 68 deletions b/‎bigframes/core/blocks.py
Lines changed: 28 additions & 68 deletions
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py
Lines changed: 57 additions & 54 deletions b/‎bigframes/core/compile/sqlglot/compiler.py
Lines changed: 57 additions & 54 deletions
@@ -20,6 +20,7 @@ repos:
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
+        exclude: "^tests/unit/core/compile/sqlglot/snapshots"
     -   id: check-yaml
 -   repo: https://github.com/pycqa/isort
     rev: 5.12.0
 
@@ -4,6 +4,32 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.3.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.2.0...v2.3.0) (2025-05-06)
+
+
+### Features
+
+* Add dry_run parameter to `read_gbq()`, `read_gbq_table()` and `read_gbq_query()` ([#1674](https://github.com/googleapis/python-bigquery-dataframes/issues/1674)) ([4c5dee5](https://github.com/googleapis/python-bigquery-dataframes/commit/4c5dee5e6f4b30deb01e258670aa21dbf3ac9aa5))
+
+
+### Bug Fixes
+
+* Guarantee guid thread safety across threads ([#1684](https://github.com/googleapis/python-bigquery-dataframes/issues/1684)) ([cb0267d](https://github.com/googleapis/python-bigquery-dataframes/commit/cb0267deea227ea85f20d6dbef8c29cf03526d7a))
+* Support large lists of lists in bpd.Series() constructor ([#1662](https://github.com/googleapis/python-bigquery-dataframes/issues/1662)) ([0f4024c](https://github.com/googleapis/python-bigquery-dataframes/commit/0f4024c84508c17657a9104ef1f8718094827ada))
+* Use value equality to check types for unix epoch functions and timestamp diff ([#1690](https://github.com/googleapis/python-bigquery-dataframes/issues/1690)) ([81e8fb8](https://github.com/googleapis/python-bigquery-dataframes/commit/81e8fb8627f1d35423dbbdcc99d02ab0ad362d11))
+
+
+### Performance Improvements
+
+* `to_datetime()` now avoids caching inputs unless data is inspected to infer format ([#1667](https://github.com/googleapis/python-bigquery-dataframes/issues/1667)) ([dd08857](https://github.com/googleapis/python-bigquery-dataframes/commit/dd08857f65140cbe5c524050d2d538949897c3cc))
+
+
+### Documentation
+
+* Add a visualization notebook to BigFrame samples ([#1675](https://github.com/googleapis/python-bigquery-dataframes/issues/1675)) ([ee062bf](https://github.com/googleapis/python-bigquery-dataframes/commit/ee062bfc29c27949205ca21d6c1dcd6125300e5e))
+* Fix spacing of k-means code snippet ([#1687](https://github.com/googleapis/python-bigquery-dataframes/issues/1687)) ([99f45dd](https://github.com/googleapis/python-bigquery-dataframes/commit/99f45dd14bd9632d209389a5fef009f18c57adbf))
+* Update snippet for `Create a k-means` model tutorial ([#1664](https://github.com/googleapis/python-bigquery-dataframes/issues/1664)) ([761c364](https://github.com/googleapis/python-bigquery-dataframes/commit/761c364f4df045b9e9d8d3d5fee91d9a87b772db))
+
 ## [2.2.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.1.0...v2.2.0) (2025-04-30)
 
 
 
@@ -22,25 +22,14 @@
 from __future__ import annotations
 
 import ast
-import copy
 import dataclasses
 import datetime
 import functools
 import itertools
 import random
 import textwrap
 import typing
-from typing import (
-    Any,
-    Iterable,
-    List,
-    Literal,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
+from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple, Union
 import warnings
 
 import bigframes_vendored.constants as constants
@@ -69,6 +58,8 @@
 import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
+from bigframes.session import dry_runs
+from bigframes.session import executor as executors
 
 # Type constraint for wherever column labels are used
 Label = typing.Hashable
@@ -821,59 +812,18 @@ def _compute_dry_run(
         if sampling.enable_downsampling:
             raise NotImplementedError("Dry run with sampling is not supported")
 
-        index: List[Any] = []
-        values: List[Any] = []
-
-        index.append("columnCount")
-        values.append(len(self.value_columns))
-        index.append("columnDtypes")
-        values.append(
-            {
-                col: self.expr.get_column_type(self.resolve_label_exact_or_error(col))
-                for col in self.column_labels
-            }
-        )
-
-        index.append("indexLevel")
-        values.append(self.index.nlevels)
-        index.append("indexDtypes")
-        values.append(self.index.dtypes)
-
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
         query_job = self.session._executor.dry_run(expr, ordered)
-        job_api_repr = copy.deepcopy(query_job._properties)
-
-        job_ref = job_api_repr["jobReference"]
-        for key, val in job_ref.items():
-            index.append(key)
-            values.append(val)
-
-        index.append("jobType")
-        values.append(job_api_repr["configuration"]["jobType"])
-
-        query_config = job_api_repr["configuration"]["query"]
-        for key in ("destinationTable", "useLegacySql"):
-            index.append(key)
-            values.append(query_config.get(key))
-
-        query_stats = job_api_repr["statistics"]["query"]
-        for key in (
-            "referencedTables",
-            "totalBytesProcessed",
-            "cacheHit",
-            "statementType",
-        ):
-            index.append(key)
-            values.append(query_stats.get(key))
 
-        index.append("creationTime")
-        values.append(
-            pd.Timestamp(
-                job_api_repr["statistics"]["creationTime"], unit="ms", tz="UTC"
-            )
-        )
+        column_dtypes = {
+            col: self.expr.get_column_type(self.resolve_label_exact_or_error(col))
+            for col in self.column_labels
+        }
 
-        return pd.Series(values, index=index), query_job
+        dry_run_stats = dry_runs.get_query_stats_with_dtypes(
+            query_job, column_dtypes, self.index.dtypes
+        )
+        return dry_run_stats, query_job
 
     def _apply_value_keys_to_expr(self, value_keys: Optional[Iterable[str]] = None):
         expr = self._expr
@@ -1560,12 +1510,19 @@ def retrieve_repr_request_results(
         """
 
         # head caches full underlying expression, so row_count will be free after
-        head_result = self.session._executor.head(self.expr, max_results)
+        executor = self.session._executor
+        executor.cached(
+            array_value=self.expr,
+            config=executors.CacheConfig(optimize_for="head", if_cached="reuse-strict"),
+        )
+        head_result = self.session._executor.execute(
+            self.expr.slice(start=None, stop=max_results, step=None)
+        )
         row_count = self.session._executor.execute(self.expr.row_count()).to_py_scalar()
 
-        df = head_result.to_pandas()
-        self._copy_index_to_pandas(df)
-        return df, row_count, head_result.query_job
+        head_df = head_result.to_pandas()
+        self._copy_index_to_pandas(head_df)
+        return head_df, row_count, head_result.query_job
 
     def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
         expr, result_id = self._expr.promote_offsets()
@@ -2535,9 +2492,12 @@ def cached(self, *, force: bool = False, session_aware: bool = False) -> None:
         # use a heuristic for whether something needs to be cached
         self.session._executor.cached(
             self.expr,
-            force=force,
-            use_session=session_aware,
-            cluster_cols=self.index_columns,
+            config=executors.CacheConfig(
+                optimize_for="auto"
+                if session_aware
+                else executors.HierarchicalKey(tuple(self.index_columns)),
+                if_cached="replace" if force else "reuse-any",
+            ),
         )
 
     def _is_monotonic(
 
@@ -15,24 +15,28 @@
 
 import dataclasses
 import functools
-import itertools
 import typing
 
 from google.cloud import bigquery
 import pyarrow as pa
 import sqlglot.expressions as sge
 
-from bigframes.core import expression, identifiers, nodes, rewrite
+from bigframes.core import expression, guid, identifiers, nodes, rewrite
 from bigframes.core.compile import configs
 import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
 import bigframes.core.compile.sqlglot.sqlglot_ir as ir
 import bigframes.core.ordering as bf_ordering
 
 
-@dataclasses.dataclass(frozen=True)
 class SQLGlotCompiler:
     """Compiles BigFrame nodes into SQL using SQLGlot."""
 
+    uid_gen: guid.SequentialUIDGenerator
+    """Generator for unique identifiers."""
+
+    def __init__(self):
+        self.uid_gen = guid.SequentialUIDGenerator()
+
     def compile(
         self,
         node: nodes.BigFrameNode,
@@ -82,7 +86,7 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
             result_node = typing.cast(
                 nodes.ResultNode, rewrite.column_pruning(result_node)
             )
-            result_node = _remap_variables(result_node)
+            result_node = self._remap_variables(result_node)
             sql = self._compile_result_node(result_node)
             return configs.CompileResult(
                 sql, result_node.schema.to_bigquery(), result_node.order_by
@@ -92,7 +96,7 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
         result_node = dataclasses.replace(result_node, order_by=None)
         result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
 
-        result_node = _remap_variables(result_node)
+        result_node = self._remap_variables(result_node)
         sql = self._compile_result_node(result_node)
         # Return the ordering iff no extra columns are needed to define the row order
         if ordering is not None:
@@ -106,63 +110,62 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
             sql, result_node.schema.to_bigquery(), output_order
         )
 
+    def _remap_variables(self, node: nodes.ResultNode) -> nodes.ResultNode:
+        """Remaps `ColumnId`s in the BFET of a `ResultNode` to produce deterministic UIDs."""
+
+        result_node, _ = rewrite.remap_variables(
+            node, map(identifiers.ColumnId, self.uid_gen.get_uid_stream("bfcol_"))
+        )
+        return typing.cast(nodes.ResultNode, result_node)
+
     def _compile_result_node(self, root: nodes.ResultNode) -> str:
-        sqlglot_ir = compile_node(root.child)
+        sqlglot_ir = self.compile_node(root.child)
         # TODO: add order_by, limit, and selections to sqlglot_expr
         return sqlglot_ir.sql
 
+    @functools.lru_cache(maxsize=5000)
+    def compile_node(self, node: nodes.BigFrameNode) -> ir.SQLGlotIR:
+        """Compiles node into CompileArrayValue. Caches result."""
+        return node.reduce_up(
+            lambda node, children: self._compile_node(node, *children)
+        )
 
-def _replace_unsupported_ops(node: nodes.BigFrameNode):
-    node = nodes.bottom_up(node, rewrite.rewrite_slice)
-    node = nodes.bottom_up(node, rewrite.rewrite_timedelta_expressions)
-    node = nodes.bottom_up(node, rewrite.rewrite_range_rolling)
-    return node
-
-
-def _remap_variables(node: nodes.ResultNode) -> nodes.ResultNode:
-    """Remaps `ColumnId`s in the BFET of a `ResultNode` to produce deterministic UIDs."""
-
-    def anonymous_column_ids() -> typing.Generator[identifiers.ColumnId, None, None]:
-        for i in itertools.count():
-            yield identifiers.ColumnId(name=f"bfcol_{i}")
-
-    result_node, _ = rewrite.remap_variables(node, anonymous_column_ids())
-    return typing.cast(nodes.ResultNode, result_node)
-
-
-@functools.lru_cache(maxsize=5000)
-def compile_node(node: nodes.BigFrameNode) -> ir.SQLGlotIR:
-    """Compiles node into CompileArrayValue. Caches result."""
-    return node.reduce_up(lambda node, children: _compile_node(node, *children))
-
-
-@functools.singledispatch
-def _compile_node(
-    node: nodes.BigFrameNode, *compiled_children: ir.SQLGlotIR
-) -> ir.SQLGlotIR:
-    """Defines transformation but isn't cached, always use compile_node instead"""
-    raise ValueError(f"Can't compile unrecognized node: {node}")
+    @functools.singledispatchmethod
+    def _compile_node(
+        self, node: nodes.BigFrameNode, *compiled_children: ir.SQLGlotIR
+    ) -> ir.SQLGlotIR:
+        """Defines transformation but isn't cached, always use compile_node instead"""
+        raise ValueError(f"Can't compile unrecognized node: {node}")
+
+    @_compile_node.register
+    def compile_readlocal(self, node: nodes.ReadLocalNode, *args) -> ir.SQLGlotIR:
+        pa_table = node.local_data_source.data
+        pa_table = pa_table.select([item.source_id for item in node.scan_list.items])
+        pa_table = pa_table.rename_columns(
+            [item.id.sql for item in node.scan_list.items]
+        )
 
+        offsets = node.offsets_col.sql if node.offsets_col else None
+        if offsets:
+            pa_table = pa_table.append_column(
+                offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
+            )
 
-@_compile_node.register
-def compile_readlocal(node: nodes.ReadLocalNode, *args) -> ir.SQLGlotIR:
-    pa_table = node.local_data_source.data
-    pa_table = pa_table.select([item.source_id for item in node.scan_list.items])
-    pa_table = pa_table.rename_columns([item.id.sql for item in node.scan_list.items])
+        return ir.SQLGlotIR.from_pyarrow(pa_table, node.schema, uid_gen=self.uid_gen)
 
-    offsets = node.offsets_col.sql if node.offsets_col else None
-    if offsets:
-        pa_table = pa_table.append_column(
-            offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
+    @_compile_node.register
+    def compile_selection(
+        self, node: nodes.SelectionNode, child: ir.SQLGlotIR
+    ) -> ir.SQLGlotIR:
+        selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
+            (id.sql, scalar_compiler.compile_scalar_expression(expr))
+            for expr, id in node.input_output_pairs
         )
+        return child.select(selected_cols)
 
-    return ir.SQLGlotIR.from_pyarrow(pa_table, node.schema)
 
-
-@_compile_node.register
-def compile_selection(node: nodes.SelectionNode, child: ir.SQLGlotIR) -> ir.SQLGlotIR:
-    select_cols: typing.Dict[str, sge.Expression] = {
-        id.name: scalar_compiler.compile_scalar_expression(expr)
-        for expr, id in node.input_output_pairs
-    }
-    return child.select(select_cols)
+def _replace_unsupported_ops(node: nodes.BigFrameNode):
+    node = nodes.bottom_up(node, rewrite.rewrite_slice)
+    node = nodes.bottom_up(node, rewrite.rewrite_timedelta_expressions)
+    node = nodes.bottom_up(node, rewrite.rewrite_range_rolling)
+    return node