Skip to content

fix: make invalid location warning case-insensitive #1044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 31 additions & 22 deletions bigframes/_config/bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,36 @@
UNKNOWN_LOCATION_MESSAGE = "The location '{location}' is set to an unknown value. Did you mean '{possibility}'?"


def _validate_location(value: Optional[str]):

if value is None:
return

if value not in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
location = str(value)
possibility = min(
bigframes.constants.ALL_BIGQUERY_LOCATIONS,
key=lambda item: jellyfish.levenshtein_distance(location, item),
)
warnings.warn(
UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility),
# There are many layers before we get to (possibly) the user's code:
# -> bpd.options.bigquery.location = "us-central-1"
# -> location.setter
# -> _validate_location
stacklevel=3,
category=bigframes.exceptions.UnknownLocationWarning,
)
def _get_validated_location(value: Optional[str]) -> Optional[str]:

if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
return value

location = str(value)

location_lowercase = location.lower()
if location_lowercase in bigframes.constants.BIGQUERY_REGIONS:
return location_lowercase

location_uppercase = location.upper()
if location_uppercase in bigframes.constants.BIGQUERY_MULTIREGIONS:
return location_uppercase

possibility = min(
bigframes.constants.ALL_BIGQUERY_LOCATIONS,
key=lambda item: jellyfish.levenshtein_distance(location, item),
)
warnings.warn(
UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility),
# There are many layers before we get to (possibly) the user's code:
# -> bpd.options.bigquery.location = "us-central-1"
# -> location.setter
# -> _get_validated_location
stacklevel=3,
category=bigframes.exceptions.UnknownLocationWarning,
)

return value


def _validate_ordering_mode(value: str) -> bigframes.enums.OrderingMode:
Expand Down Expand Up @@ -135,8 +145,7 @@ def location(self) -> Optional[str]:
def location(self, value: Optional[str]):
if self._session_started and self._location != value:
raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="location"))
_validate_location(value)
self._location = value
self._location = _get_validated_location(value)

@property
def project(self) -> Optional[str]:
Expand Down
9 changes: 6 additions & 3 deletions bigframes/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
DEFAULT_EXPIRATION = datetime.timedelta(days=7)

# https://cloud.google.com/bigquery/docs/locations
ALL_BIGQUERY_LOCATIONS = frozenset(
BIGQUERY_REGIONS = frozenset(
{
# regions
"us-east5",
"us-south1",
"us-central1",
Expand Down Expand Up @@ -68,11 +67,15 @@
"me-central1",
"me-west1",
"africa-south1",
# multi-regions
}
)
BIGQUERY_MULTIREGIONS = frozenset(
{
"US",
"EU",
}
)
ALL_BIGQUERY_LOCATIONS = frozenset(BIGQUERY_REGIONS.union(BIGQUERY_MULTIREGIONS))

# https://cloud.google.com/storage/docs/regional-endpoints
REP_ENABLED_BIGQUERY_LOCATIONS = frozenset(
Expand Down
39 changes: 32 additions & 7 deletions tests/system/large/test_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import bigframes.session.clients


def _assert_bq_execution_location(session: bigframes.Session):
def _assert_bq_execution_location(
session: bigframes.Session, expected_location: typing.Optional[str] = None
):
df = session.read_gbq(
"""
SELECT "aaa" as name, 111 as number
Expand All @@ -33,10 +35,10 @@ def _assert_bq_execution_location(session: bigframes.Session):
"""
)

assert (
typing.cast(bigquery.QueryJob, df.query_job).location
== session.bqclient.location
)
if expected_location is None:
expected_location = session._location

assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location

result = (
df[["name", "number"]]
Expand All @@ -47,8 +49,7 @@ def _assert_bq_execution_location(session: bigframes.Session):
)

assert (
typing.cast(bigquery.QueryJob, result.query_job).location
== session.bqclient.location
typing.cast(bigquery.QueryJob, result.query_job).location == expected_location
)


Expand Down Expand Up @@ -87,6 +88,30 @@ def test_bq_location(bigquery_location):
_assert_bq_execution_location(session)


@pytest.mark.parametrize(
("set_location", "resolved_location"),
# Sort the set to avoid nondeterminism.
[
(loc.capitalize(), loc)
for loc in sorted(bigframes.constants.ALL_BIGQUERY_LOCATIONS)
],
)
def test_bq_location_non_canonical(set_location, resolved_location):
session = bigframes.Session(
context=bigframes.BigQueryOptions(location=set_location)
)

assert session.bqclient.location == set_location

# by default global endpoint is used
assert (
session.bqclient._connection.API_BASE_URL == "https://bigquery.googleapis.com"
)

# assert that bigframes session honors the location
_assert_bq_execution_location(session, resolved_location)


@pytest.mark.parametrize(
"bigquery_location",
# Sort the set to avoid nondeterminism.
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/_config/test_bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ def test_setter_if_session_started_but_setting_the_same_value(attribute):
[
(None,),
("us-central1",),
("us-Central1",),
("US-CENTRAL1",),
("US",),
("us",),
],
)
def test_location_set_to_valid_no_warning(valid_location):
Expand Down