Skip to content

fix: prevent KeyError in bpd.concat with empty DF and struct/array types DF #1568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bigframes/core/compile/ibis_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,8 @@ def literal_to_ibis_scalar(
# Ibis has bug for casting nulltype to geospatial, so we perform intermediate cast first
geotype = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)
return bigframes_vendored.ibis.literal(None, geotype)
ibis_dtype = BIGFRAMES_TO_IBIS[force_dtype] if force_dtype else None

ibis_dtype = bigframes_dtype_to_ibis_dtype(force_dtype) if force_dtype else None

if pd.api.types.is_list_like(literal):
if validate:
Expand Down
3 changes: 2 additions & 1 deletion tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,15 @@ def nested_structs_df(


@pytest.fixture(scope="session")
def nested_structs_pandas_df() -> pd.DataFrame:
def nested_structs_pandas_df(nested_structs_pandas_type: pd.ArrowDtype) -> pd.DataFrame:
"""pd.DataFrame pointing at test data."""

df = pd.read_json(
DATA_DIR / "nested_structs.jsonl",
lines=True,
)
df = df.set_index("id")
df["person"] = df["person"].astype(nested_structs_pandas_type)
return df


Expand Down
10 changes: 10 additions & 0 deletions tests/system/small/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ def test_concat_dataframe(scalars_dfs, ordered):
assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)


def test_concat_dataframe_w_struct_cols(nested_structs_df, nested_structs_pandas_df):
"""Avoid regressions for internal issue 407107482"""
empty_bf_df = bpd.DataFrame(session=nested_structs_df._block.session)
bf_result = bpd.concat((empty_bf_df, nested_structs_df), ignore_index=True)
bf_result = bf_result.to_pandas()
pd_result = pd.concat((pd.DataFrame(), nested_structs_pandas_df), ignore_index=True)
pd_result.index = pd_result.index.astype("Int64")
pd.testing.assert_frame_equal(bf_result, pd_result)


def test_concat_series(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs
bf_result = bpd.concat(
Expand Down
4 changes: 2 additions & 2 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4381,13 +4381,13 @@ def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col


def test_series_struct_get_field_by_attribute(
nested_structs_df, nested_structs_pandas_df, nested_structs_pandas_type
nested_structs_df, nested_structs_pandas_df
):
if Version(pd.__version__) < Version("2.2.0"):
pytest.skip("struct accessor is not supported before pandas 2.2")

bf_series = nested_structs_df["person"]
df_series = nested_structs_pandas_df["person"].astype(nested_structs_pandas_type)
df_series = nested_structs_pandas_df["person"]

pd.testing.assert_series_equal(
bf_series.address.city.to_pandas(),
Expand Down