@@ -97,8 +97,30 @@ def _to_index_cols(
97
97
98
98
99
99
def _check_column_duplicates (
100
- index_cols : Iterable [str ], columns : Iterable [str ], is_index_in_columns : bool
100
+ index_cols : Iterable [str ], columns : Iterable [str ], index_col_in_columns : bool
101
101
) -> Iterable [str ]:
102
+ """Validates and processes index and data columns for duplicates and overlap.
103
+
104
+ This function performs two main tasks:
105
+ 1. Ensures there are no duplicate column names within the `index_cols` list
106
+ or within the `columns` list.
107
+ 2. Based on the `index_col_in_columns` flag, it validates the relationship
108
+ between `index_cols` and `columns`.
109
+
110
+ Args:
111
+ index_cols (Iterable[str]):
112
+ An iterable of column names designated as the index.
113
+ columns (Iterable[str]):
114
+ An iterable of column names designated as the data columns.
115
+ index_col_in_columns (bool):
116
+ A flag indicating how to handle overlap between `index_cols` and
117
+ `columns`.
118
+ - If `False`, the two lists must be disjoint (contain no common
119
+ elements). An error is raised if any overlap is found.
120
+ - If `True`, `index_cols` is expected to be a subset of
121
+ `columns`. An error is raised if an index column is not found
122
+ in the `columns` list.
123
+ """
102
124
index_cols_list = list (index_cols ) if index_cols is not None else []
103
125
columns_list = list (columns ) if columns is not None else []
104
126
set_index = set (index_cols_list )
@@ -119,7 +141,7 @@ def _check_column_duplicates(
119
141
"All column names specified in 'columns' must be unique."
120
142
)
121
143
122
- if is_index_in_columns :
144
+ if index_col_in_columns :
123
145
if not set_index .issubset (set_columns ):
124
146
raise ValueError (
125
147
f"The specified index column(s) were not found: { set_index - set_columns } . "
@@ -405,7 +427,7 @@ def read_gbq_table( # type: ignore[overload-overlap]
405
427
dry_run : Literal [False ] = ...,
406
428
force_total_order : Optional [bool ] = ...,
407
429
n_rows : Optional [int ] = None ,
408
- is_index_in_columns : bool = False ,
430
+ index_col_in_columns : bool = False ,
409
431
) -> dataframe .DataFrame :
410
432
...
411
433
@@ -428,7 +450,7 @@ def read_gbq_table(
428
450
dry_run : Literal [True ] = ...,
429
451
force_total_order : Optional [bool ] = ...,
430
452
n_rows : Optional [int ] = None ,
431
- is_index_in_columns : bool = False ,
453
+ index_col_in_columns : bool = False ,
432
454
) -> pandas .Series :
433
455
...
434
456
@@ -450,8 +472,67 @@ def read_gbq_table(
450
472
dry_run : bool = False ,
451
473
force_total_order : Optional [bool ] = None ,
452
474
n_rows : Optional [int ] = None ,
453
- is_index_in_columns : bool = False ,
475
+ index_col_in_columns : bool = False ,
454
476
) -> dataframe .DataFrame | pandas .Series :
477
+ """Read a BigQuery table into a BigQuery DataFrames DataFrame.
478
+
479
+ This method allows you to create a DataFrame from a BigQuery table.
480
+ You can specify the columns to load, an index column, and apply
481
+ filters.
482
+
483
+ Args:
484
+ table_id (str):
485
+ The identifier of the BigQuery table to read.
486
+ index_col (Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind, optional):
487
+ The column(s) to use as the index for the DataFrame. This can be
488
+ a single column name or a list of column names. If not provided,
489
+ a default index will be used based on the session's
490
+ ``default_index_type``.
491
+ columns (Iterable[str], optional):
492
+ The columns to read from the table. If not specified, all
493
+ columns will be read.
494
+ names (Optional[Iterable[str]], optional):
495
+ A list of column names to use for the resulting DataFrame. This
496
+ is useful if you want to rename the columns as you read the
497
+ data.
498
+ max_results (Optional[int], optional):
499
+ The maximum number of rows to retrieve from the table. If not
500
+ specified, all rows will be loaded.
501
+ use_cache (bool, optional):
502
+ Whether to use cached results for the query. Defaults to True.
503
+ Setting this to False will force a re-execution of the query.
504
+ filters (third_party_pandas_gbq.FiltersType, optional):
505
+ A list of filters to apply to the data. Filters are specified
506
+ as a list of tuples, where each tuple contains a column name,
507
+ an operator (e.g., '==', '!='), and a value.
508
+ enable_snapshot (bool, optional):
509
+ If True, a snapshot of the table is used to ensure that the
510
+ DataFrame is deterministic, even if the underlying table
511
+ changes. Defaults to True.
512
+ dry_run (bool, optional):
513
+ If True, the function will not actually execute the query but
514
+ will instead return statistics about the table. Defaults to False.
515
+ force_total_order (Optional[bool], optional):
516
+ If True, a total ordering is enforced on the DataFrame, which
517
+ can be useful for operations that require a stable row order.
518
+ If None, the session's default behavior is used.
519
+ n_rows (Optional[int], optional):
520
+ The number of rows to consider for type inference and other
521
+ metadata operations. This does not limit the number of rows
522
+ in the final DataFrame.
523
+ index_col_in_columns (bool, optional):
524
+ Specifies if the ``index_col`` is also present in the ``columns``
525
+ list. Defaults to ``False``.
526
+
527
+ * If ``False``, ``index_col`` and ``columns`` must specify
528
+ distinct sets of columns. An error will be raised if any
529
+ column is found in both.
530
+ * If ``True``, the column(s) in ``index_col`` are expected to
531
+ also be present in the ``columns`` list. This is useful
532
+ when the index is selected from the data columns (e.g., in a
533
+ ``read_csv`` scenario). The column will be used as the
534
+ DataFrame's index and removed from the list of value columns.
535
+ """
455
536
import bigframes ._tools .strings
456
537
import bigframes .dataframe as dataframe
457
538
@@ -534,7 +615,7 @@ def read_gbq_table(
534
615
names = names ,
535
616
)
536
617
columns = list (
537
- _check_column_duplicates (index_cols , columns , is_index_in_columns )
618
+ _check_column_duplicates (index_cols , columns , index_col_in_columns )
538
619
)
539
620
540
621
for key in index_cols :
@@ -818,7 +899,7 @@ def read_gbq_query(
818
899
819
900
index_cols = _to_index_cols (index_col )
820
901
columns = _check_column_duplicates (
821
- index_cols , columns , is_index_in_columns = False
902
+ index_cols , columns , index_col_in_columns = False
822
903
)
823
904
824
905
filters_copy1 , filters_copy2 = itertools .tee (filters )
0 commit comments