File tree Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Original file line number Diff line number Diff line change @@ -620,15 +620,31 @@ def to_pandas_batches(
620
620
ordered = True ,
621
621
use_explicit_destination = allow_large_results ,
622
622
)
623
+
624
+ total_batches = 0
623
625
for df in execute_result .to_pandas_batches (
624
626
page_size = page_size , max_results = max_results
625
627
):
628
+ total_batches += 1
626
629
self ._copy_index_to_pandas (df )
627
630
if squeeze :
628
631
yield df .squeeze (axis = 1 )
629
632
else :
630
633
yield df
631
634
635
+ # To reduce the number of edge cases to consider when working with the
636
+ # results of this, always return at least one DataFrame. See:
637
+ # b/428918844.
638
+ if total_batches == 0 :
639
+ df = pd .DataFrame (
640
+ {
641
+ col : pd .Series ([], dtype = self .expr .get_column_type (col ))
642
+ for col in itertools .chain (self .value_columns , self .index_columns )
643
+ }
644
+ )
645
+ self ._copy_index_to_pandas (df )
646
+ yield df
647
+
632
648
def _copy_index_to_pandas (self , df : pd .DataFrame ):
633
649
"""Set the index on pandas DataFrame to match this block.
634
650
Original file line number Diff line number Diff line change @@ -347,6 +347,28 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
347
347
pd .testing .assert_series_equal (actual , expected )
348
348
349
349
350
+ def test_to_pandas_batches_w_empty_dataframe (session ):
351
+ """Verify to_pandas_batches() APIs returns at least one DataFrame.
352
+
353
+ See b/428918844 for additional context.
354
+ """
355
+ empty = bpd .DataFrame (
356
+ {
357
+ "idx1" : [],
358
+ "idx2" : [],
359
+ "col1" : pandas .Series ([], dtype = "string[pyarrow]" ),
360
+ "col2" : pandas .Series ([], dtype = "Int64" ),
361
+ },
362
+ session = session ,
363
+ ).set_index (["idx1" , "idx2" ], drop = True )
364
+
365
+ results = list (empty .to_pandas_batches ())
366
+ assert len (results ) == 1
367
+ assert list (results [0 ].index .names ) == ["idx1" , "idx2" ]
368
+ assert list (results [0 ].columns ) == ["col1" , "col2" ]
369
+ pandas .testing .assert_series_equal (results [0 ].dtypes , empty .dtypes )
370
+
371
+
350
372
@pytest .mark .parametrize ("allow_large_results" , (True , False ))
351
373
def test_to_pandas_batches_w_page_size_and_max_results (session , allow_large_results ):
352
374
"""Verify to_pandas_batches() APIs returns the expected page size.
You can’t perform that action at this time.
0 commit comments