googleapis · sycai · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025
@@ -276,6 +276,26 @@ def label_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]:
             mapping[label] = (*mapping.get(label, ()), id)
         return mapping
 
+    def resolve_label_exact(self, label: Label) -> Optional[str]:
+        """Returns the column id matching the label if there is exactly
+        one such column. If there are multiple columns with the same name,
+        raises an error. If there is no such a column, returns None."""
+        matches = self.label_to_col_id.get(label, [])
+        if len(matches) > 1:
+            raise ValueError(
+                f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
+            )
+        return matches[0] if len(matches) != 0 else None
+
+    def resolve_label_exact_or_error(self, label: Label) -> str:
+        """Returns the column id matching the label if there is exactly
+        one such column. If there are multiple columns with the same name,
+        raises an error. If there is no such a column, raises an error too."""
+        col_id = self.resolve_label_exact(label)
+        if col_id is None:
+            raise ValueError(f"Label {label} not found. {constants.FEEDBACK_LINK}")
+        return col_id
+
     @functools.cached_property
     def col_id_to_index_name(self) -> typing.Mapping[str, Label]:
         """Get column label for value columns, or index name for index columns"""

@@ -180,7 +180,10 @@ def __init__(
                 )
                 block = block.set_index([r_mapping[idx_col] for idx_col in idx_cols])
             if columns:
-                block = block.select_columns(list(columns))  # type:ignore
+                column_ids = [
+                    block.resolve_label_exact_or_error(label) for label in list(columns)
+                ]
+                block = block.select_columns(column_ids)  # type:ignore
             if dtype:
                 bf_dtype = bigframes.dtypes.bigframes_type(dtype)
                 block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
@@ -238,15 +241,7 @@ def _find_indices(
         return [self._block.value_columns.index(col_id) for col_id in col_ids]
 
     def _resolve_label_exact(self, label) -> Optional[str]:
-        """Returns the column id matching the label if there is exactly
-        one such column. If there are multiple columns with the same name,
-        raises an error. If there is no such column, returns None."""
-        matches = self._block.label_to_col_id.get(label, [])
-        if len(matches) > 1:
-            raise ValueError(
-                f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
-            )
-        return matches[0] if len(matches) != 0 else None
+        return self._block.resolve_label_exact(label)
 
     def _sql_names(
         self,

@@ -44,8 +44,15 @@
 def test_df_construct_copy(scalars_dfs):
     columns = ["int64_col", "string_col", "float64_col"]
     scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = dataframe.DataFrame(scalars_df, columns=columns).to_pandas()
-    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
+    # Make the mapping from label to col_id non-trivial
+    bf_df = scalars_df.copy()
+    bf_df["int64_col"] = bf_df["int64_col"] / 2
+    pd_df = scalars_pandas_df.copy()
+    pd_df["int64_col"] = pd_df["int64_col"] / 2
+
+    bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()
+
+    pd_result = pd.DataFrame(pd_df, columns=columns)
     pandas.testing.assert_frame_equal(bf_result, pd_result)