googleapis
diff --git a/‎bigframes/session/__init__.py
Lines changed: 1 addition & 1 deletion b/‎bigframes/session/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/session/loader.py
Lines changed: 88 additions & 7 deletions b/‎bigframes/session/loader.py
Lines changed: 88 additions & 7 deletions
diff --git a/‎tests/system/small/test_session.py
Lines changed: 0 additions & 10 deletions b/‎tests/system/small/test_session.py
Lines changed: 0 additions & 10 deletions
@@ -1174,7 +1174,7 @@ def _read_csv_w_bigquery_engine(
             index_col=index_col,
             columns=columns,
             names=names,
-            is_index_in_columns=True,
+            index_col_in_columns=True,
         )
 
         if dtype is not None:
 
@@ -97,8 +97,30 @@ def _to_index_cols(
 
 
 def _check_column_duplicates(
-    index_cols: Iterable[str], columns: Iterable[str], is_index_in_columns: bool
+    index_cols: Iterable[str], columns: Iterable[str], index_col_in_columns: bool
 ) -> Iterable[str]:
+    """Validates and processes index and data columns for duplicates and overlap.
+
+    This function performs two main tasks:
+    1.  Ensures there are no duplicate column names within the `index_cols` list
+        or within the `columns` list.
+    2.  Based on the `index_col_in_columns` flag, it validates the relationship
+        between `index_cols` and `columns`.
+
+    Args:
+        index_cols (Iterable[str]):
+            An iterable of column names designated as the index.
+        columns (Iterable[str]):
+            An iterable of column names designated as the data columns.
+        index_col_in_columns (bool):
+            A flag indicating how to handle overlap between `index_cols` and
+            `columns`.
+            - If `False`, the two lists must be disjoint (contain no common
+              elements). An error is raised if any overlap is found.
+            - If `True`, `index_cols` is expected to be a subset of
+              `columns`. An error is raised if an index column is not found
+              in the `columns` list.
+    """
     index_cols_list = list(index_cols) if index_cols is not None else []
     columns_list = list(columns) if columns is not None else []
     set_index = set(index_cols_list)
@@ -119,7 +141,7 @@ def _check_column_duplicates(
             "All column names specified in 'columns' must be unique."
         )
 
-    if is_index_in_columns:
+    if index_col_in_columns:
         if not set_index.issubset(set_columns):
             raise ValueError(
                 f"The specified index column(s) were not found: {set_index - set_columns}. "
@@ -405,7 +427,7 @@ def read_gbq_table(  # type: ignore[overload-overlap]
         dry_run: Literal[False] = ...,
         force_total_order: Optional[bool] = ...,
         n_rows: Optional[int] = None,
-        is_index_in_columns: bool = False,
+        index_col_in_columns: bool = False,
     ) -> dataframe.DataFrame:
         ...
 
@@ -428,7 +450,7 @@ def read_gbq_table(
         dry_run: Literal[True] = ...,
         force_total_order: Optional[bool] = ...,
         n_rows: Optional[int] = None,
-        is_index_in_columns: bool = False,
+        index_col_in_columns: bool = False,
     ) -> pandas.Series:
         ...
 
@@ -450,8 +472,67 @@ def read_gbq_table(
         dry_run: bool = False,
         force_total_order: Optional[bool] = None,
         n_rows: Optional[int] = None,
-        is_index_in_columns: bool = False,
+        index_col_in_columns: bool = False,
     ) -> dataframe.DataFrame | pandas.Series:
+        """Read a BigQuery table into a BigQuery DataFrames DataFrame.
+
+        This method allows you to create a DataFrame from a BigQuery table.
+        You can specify the columns to load, an index column, and apply
+        filters.
+
+        Args:
+            table_id (str):
+                The identifier of the BigQuery table to read.
+            index_col (Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind, optional):
+                The column(s) to use as the index for the DataFrame. This can be
+                a single column name or a list of column names. If not provided,
+                a default index will be used based on the session's
+                ``default_index_type``.
+            columns (Iterable[str], optional):
+                The columns to read from the table. If not specified, all
+                columns will be read.
+            names (Optional[Iterable[str]], optional):
+                A list of column names to use for the resulting DataFrame. This
+                is useful if you want to rename the columns as you read the
+                data.
+            max_results (Optional[int], optional):
+                The maximum number of rows to retrieve from the table. If not
+                specified, all rows will be loaded.
+            use_cache (bool, optional):
+                Whether to use cached results for the query. Defaults to True.
+                Setting this to False will force a re-execution of the query.
+            filters (third_party_pandas_gbq.FiltersType, optional):
+                A list of filters to apply to the data. Filters are specified
+                as a list of tuples, where each tuple contains a column name,
+                an operator (e.g., '==', '!='), and a value.
+            enable_snapshot (bool, optional):
+                If True, a snapshot of the table is used to ensure that the
+                DataFrame is deterministic, even if the underlying table
+                changes. Defaults to True.
+            dry_run (bool, optional):
+                If True, the function will not actually execute the query but
+                will instead return statistics about the table. Defaults to False.
+            force_total_order (Optional[bool], optional):
+                If True, a total ordering is enforced on the DataFrame, which
+                can be useful for operations that require a stable row order.
+                If None, the session's default behavior is used.
+            n_rows (Optional[int], optional):
+                The number of rows to consider for type inference and other
+                metadata operations. This does not limit the number of rows
+                in the final DataFrame.
+            index_col_in_columns (bool, optional):
+                Specifies if the ``index_col`` is also present in the ``columns``
+                list. Defaults to ``False``.
+
+                * If ``False``, ``index_col`` and ``columns`` must specify
+                    distinct sets of columns. An error will be raised if any
+                    column is found in both.
+                * If ``True``, the column(s) in ``index_col`` are expected to
+                    also be present in the ``columns`` list. This is useful
+                    when the index is selected from the data columns (e.g., in a
+                    ``read_csv`` scenario). The column will be used as the
+                    DataFrame's index and removed from the list of value columns.
+        """
         import bigframes._tools.strings
         import bigframes.dataframe as dataframe
 
@@ -534,7 +615,7 @@ def read_gbq_table(
             names=names,
         )
         columns = list(
-            _check_column_duplicates(index_cols, columns, is_index_in_columns)
+            _check_column_duplicates(index_cols, columns, index_col_in_columns)
         )
 
         for key in index_cols:
@@ -818,7 +899,7 @@ def read_gbq_query(
 
         index_cols = _to_index_cols(index_col)
         columns = _check_column_duplicates(
-            index_cols, columns, is_index_in_columns=False
+            index_cols, columns, index_col_in_columns=False
         )
 
         filters_copy1, filters_copy2 = itertools.tee(filters)
 
@@ -1320,10 +1320,6 @@ def test_read_csv_for_names_less_than_columns(session, df_and_gcs_csv_for_two_co
     assert bf_df.shape == pd_df.shape
     assert bf_df.columns.tolist() == pd_df.columns.tolist()
 
-    # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
-    # (b/280889935) or guarantee row ordering.
-    bf_df = bf_df.sort_index()
-
     # Pandas's index name is None, while BigFrames's index name is "rowindex".
     pd_df.index.name = "rowindex"
     pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
@@ -1527,9 +1523,6 @@ def test_read_csv_w_usecols_and_indexcol(session, df_and_local_csv):
     assert bf_df.shape == pd_df.shape
     assert bf_df.columns.tolist() == pd_df.columns.tolist()
 
-    # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
-    # (b/280889935) or guarantee row ordering.
-    bf_df = bf_df.sort_index()
     pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
 
 
@@ -1585,9 +1578,6 @@ def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index):
         bf_df = session.read_csv(
             path, engine="bigquery", index_col="rowindex", encoding="ISO-8859-1"
         )
-        # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
-        # (b/280889935) or guarantee row ordering.
-        bf_df = bf_df.sort_index()
         pd.testing.assert_frame_equal(
             bf_df.to_pandas(), penguins_pandas_df_default_index
         )
Original file line number	Diff line number	Diff line change
`@@ -1174,7 +1174,7 @@ def _read_csv_w_bigquery_engine(`
`1174`	`1174`	`index_col=index_col,`
`1175`	`1175`	`columns=columns,`
`1176`	`1176`	`names=names,`
`1177`		`- is_index_in_columns=True,`
	`1177`	`+ index_col_in_columns=True,`
`1178`	`1178`	`)`
`1179`	`1179`
`1180`	`1180`	`if dtype is not None:`