Skip to content

fix: Fix bug selecting column repeatedly #1858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions bigframes/core/array_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,12 +330,27 @@ def create_constant(

return self.project_to_id(ex.const(value, dtype))

def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
def select_columns(
self, column_ids: typing.Sequence[str], allow_renames: bool = False
) -> ArrayValue:
# This basically just drops and reorders columns - logically a no-op except as a final step
selections = (
bigframes.core.nodes.AliasedRef.identity(ids.ColumnId(col_id))
for col_id in column_ids
)
selections = []
seen = set()

for id in column_ids:
if id not in seen:
ref = nodes.AliasedRef.identity(ids.ColumnId(id))
elif allow_renames:
ref = nodes.AliasedRef(
ex.deref(id), ids.ColumnId(bigframes.core.guid.generate_guid())
)
else:
raise ValueError(
"Must set allow_renames=True to select columns repeatedly"
)
selections.append(ref)
seen.add(id)

return ArrayValue(
nodes.SelectionNode(
child=self.node,
Expand Down
5 changes: 4 additions & 1 deletion bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,10 @@ def select_column(self, id: str) -> Block:
return self.select_columns([id])

def select_columns(self, ids: typing.Sequence[str]) -> Block:
expr = self._expr.select_columns([*self.index_columns, *ids])
# Allow renames as may end up selecting same columns multiple times
expr = self._expr.select_columns(
[*self.index_columns, *ids], allow_renames=True
)
col_labels = self._get_labels_for_columns(ids)
return Block(expr, self.index_columns, col_labels, self.index.names)

Expand Down
9 changes: 9 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3408,6 +3408,15 @@ def test__dir__with_rename(scalars_dfs):
assert "drop" in results


def test_loc_select_columns_w_repeats(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index[["int64_col", "int64_col", "int64_too"]].to_pandas()
pd_result = scalars_pandas_df_index[["int64_col", "int64_col", "int64_too"]]
pd.testing.assert_frame_equal(
bf_result,
pd_result,
)


@pytest.mark.parametrize(
("start", "stop", "step"),
[
Expand Down