Skip to content

docs: deprecate bpd.options.bigquery.allow_large_results in favor of bpd.options.compute.allow_large_results #1597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 12, 2025
Merged
18 changes: 18 additions & 0 deletions bigframes/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,24 @@ def is_bigquery_thread_local(self) -> bool:
"""
return self._local.bigquery_options is not None

@property
def _allow_large_results(self) -> bool:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though this is private, please add a docstring explaining the purpose.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

"""The effective 'allow_large_results' setting.

This value is `self.compute.allow_large_results` if set (not `None`),
otherwise it defaults to `self.bigquery.allow_large_results`.

Returns:
bool:
Whether large query results are permitted.
- `True`: The BigQuery result size limit (e.g., 10 GB) is removed.
- `False`: Results are restricted to this limit (potentially faster).
BigQuery will raise an error if this limit is exceeded.
"""
if self.compute.allow_large_results is None:
return self.bigquery.allow_large_results
return self.compute.allow_large_results


options = Options()
"""Global options for default session."""
Expand Down
29 changes: 20 additions & 9 deletions bigframes/_config/bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@
from typing import Literal, Optional
import warnings

import google.api_core.exceptions
import google.auth.credentials

import bigframes.constants
import bigframes.enums
import bigframes.exceptions as bfe

Expand Down Expand Up @@ -239,21 +237,34 @@ def skip_bq_connection_check(self, value: bool):
@property
def allow_large_results(self) -> bool:
"""
Sets the flag to allow or disallow query results larger than 10 GB.
DEPRECATED: Checks the legacy global setting for allowing large results.
Use ``bpd.options.compute.allow_large_results`` instead.

The default setting for this flag is True, which allows queries to return results
exceeding 10 GB by creating an explicit destination table. If set to False, it
restricts the result size to 10 GB, and BigQuery will raise an error if this limit
is exceeded.
Warning: Accessing ``bpd.options.bigquery.allow_large_results`` is deprecated
and this property will be removed in a future version. The configuration for
handling large results has moved.

Returns:
bool: True if large results are allowed with an explicit destination table,
False if results are limited to 10 GB and errors are raised when exceeded.
bool: The value of the deprecated setting.
"""
return self._allow_large_results

@allow_large_results.setter
def allow_large_results(self, value: bool):
warnings.warn(
"Setting `bpd.options.bigquery.allow_large_results` is deprecated, "
"and will be removed in the future. "
"Please use `bpd.options.compute.allow_large_results = <value>` instead. "
"The `bpd.options.bigquery.allow_large_results` option is ignored if "
"`bpd.options.compute.allow_large_results` is set.",
FutureWarning,
stacklevel=2,
)
if self._session_started and self._allow_large_results != value:
raise ValueError(
SESSION_STARTED_MESSAGE.format(attribute="allow_large_results")
)

self._allow_large_results = value

@property
Expand Down
10 changes: 9 additions & 1 deletion bigframes/_config/compute_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ class ComputeOptions:
ai_ops_threshold_autofail (bool):
Guards against unexpected processing of large amount of rows by semantic operators.
When set to True, the operation automatically fails without asking for user inputs.

allow_large_results (bool):
Specifies whether query results can exceed 10 GB. Defaults to False. Setting this
to False (the default) restricts results to 10 GB for potentially faster execution;
BigQuery will raise an error if this limit is exceeded. Setting to True removes
this result size limit.
"""

maximum_bytes_billed: Optional[int] = None
Expand All @@ -97,7 +103,9 @@ class ComputeOptions:
semantic_ops_threshold_autofail = False

ai_ops_confirmation_threshold: Optional[int] = 0
ai_ops_threshold_autofail = False
ai_ops_threshold_autofail: bool = False

allow_large_results: Optional[bool] = None

def assign_extra_query_labels(self, **kwargs: Any) -> None:
"""
Expand Down
13 changes: 9 additions & 4 deletions bigframes/session/bq_caching_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def with_require_table(self, value: bool) -> OutputSpec:

def _get_default_output_spec() -> OutputSpec:
return OutputSpec(
require_bq_table=bigframes.options.bigquery.allow_large_results, cluster_cols=()
require_bq_table=bigframes.options._allow_large_results, cluster_cols=()
)


Expand Down Expand Up @@ -157,9 +157,6 @@ def execute(
ordered: bool = True,
use_explicit_destination: Optional[bool] = None,
) -> executor.ExecuteResult:
if use_explicit_destination is None:
use_explicit_destination = bigframes.options.bigquery.allow_large_results

if bigframes.options.compute.enable_multi_query_execution:
self._simplify_with_caching(array_value)

Expand Down Expand Up @@ -553,6 +550,14 @@ def _execute_plan(
else:
size_bytes = None

if size_bytes is not None and size_bytes >= MAX_SMALL_RESULT_BYTES:
msg = bfe.format_message(
"The query result size has exceeded 10 GB. In BigFrames 2.0 and "
"later, you might need to manually set `allow_large_results=True` in "
"the IO method or adjust the BigFrames option: "
"`bigframes.options.compute.allow_large_results=True`."
)
warnings.warn(msg, FutureWarning)
# Runs strict validations to ensure internal type predictions and ibis are completely in sync
# Do not execute these validations outside of testing suite.
if "PYTEST_CURRENT_TEST" in os.environ:
Expand Down
12 changes: 4 additions & 8 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def resourcemanager_client(

@pytest.fixture(scope="session")
def session() -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(location="US", allow_large_results=False)
context = bigframes.BigQueryOptions(location="US")
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup time
Expand All @@ -158,19 +158,15 @@ def session_load() -> Generator[bigframes.Session, None, None]:

@pytest.fixture(scope="session", params=["strict", "partial"])
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(
location="US", ordering_mode=request.param, allow_large_results=False
)
context = bigframes.BigQueryOptions(location="US", ordering_mode=request.param)
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type


@pytest.fixture(scope="session")
def unordered_session() -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(
location="US", ordering_mode="partial", allow_large_results=False
)
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type
Expand Down Expand Up @@ -1419,7 +1415,7 @@ def floats_product_bf(session, floats_product_pd):

@pytest.fixture(scope="session", autouse=True)
def use_fast_query_path():
with bpd.option_context("bigquery.allow_large_results", False):
with bpd.option_context("compute.allow_large_results", False):
yield


Expand Down
2 changes: 1 addition & 1 deletion tests/system/large/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import bigframes

WIKIPEDIA_TABLE = "bigquery-public-data.samples.wikipedia"
LARGE_TABLE_OPTION = "bigquery.allow_large_results"
LARGE_TABLE_OPTION = "compute.allow_large_results"


def test_to_pandas_batches_raise_when_large_result_not_allowed(session):
Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5081,7 +5081,7 @@ def test_df_bool_interpretation_error(scalars_df_index):

def test_query_job_setters(scalars_df_default_index: dataframe.DataFrame):
# if allow_large_results=False, might not create query job
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):
job_ids = set()
repr(scalars_df_default_index)
assert scalars_df_default_index.query_job is not None
Expand Down
4 changes: 2 additions & 2 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_to_pandas_array_struct_correct_result(session):
def test_to_pandas_override_global_option(scalars_df_index):
# Direct call to_pandas uses global default setting (allow_large_results=True),
# table has 'bqdf' prefix.
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

scalars_df_index.to_pandas()
table_id = scalars_df_index._query_job.destination.table_id
Expand Down Expand Up @@ -324,7 +324,7 @@ def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):

def test_to_arrow_override_global_option(scalars_df_index):
# Direct call to_arrow uses global default setting (allow_large_results=True),
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

scalars_df_index.to_arrow()
table_id = scalars_df_index._query_job.destination.table_id
Expand Down
4 changes: 2 additions & 2 deletions tests/system/small/test_index_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def test_to_pandas_override_global_option(scalars_df_index):
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

bf_index = scalars_df_index.index

Expand All @@ -39,7 +39,7 @@ def test_to_pandas_dry_run(scalars_df_index):


def test_to_numpy_override_global_option(scalars_df_index):
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

bf_index = scalars_df_index.index

Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_progress_bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_progress_bar_scalar_allow_large_results(
capsys.readouterr() # clear output

with bf.option_context(
"display.progress_bar", "terminal", "bigquery.allow_large_results", "True"
"display.progress_bar", "terminal", "compute.allow_large_results", "True"
):
penguins_df_default_index["body_mass_g"].head(10).mean()

Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3960,7 +3960,7 @@ def test_series_bool_interpretation_error(scalars_df_index):

def test_query_job_setters(scalars_dfs):
# if allow_large_results=False, might not create query job
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):
job_ids = set()
df, _ = scalars_dfs
series = df["int64_col"]
Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_series_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


def test_to_pandas_override_global_option(scalars_df_index):
with bigframes.option_context("bigquery.allow_large_results", True):
with bigframes.option_context("compute.allow_large_results", True):

bf_series = scalars_df_index["int64_col"]

Expand Down
22 changes: 22 additions & 0 deletions tests/unit/_config/test_compute_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import bigframes._config as config


def test_default_options():
options = config.compute_options.ComputeOptions()

assert options.allow_large_results is None
assert config.options._allow_large_results is False