googleapis · tswast · Jan 28, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 22, 2025
@@ -483,7 +483,19 @@ def between(self, left, right, inclusive="both"):
         )
 
     def case_when(self, caselist) -> Series:
-        cases = list(itertools.chain(*caselist, (True, self)))
+        cases = []
+
+        for condition, output in itertools.chain(caselist, [(True, self)]):
+            cases.append(condition)
+            cases.append(output)
+            # In pandas, the default value if no case matches is the original value.
+            # This makes it impossible to change the type of the column, but if
+            # the condition is always True, we know it will match and no subsequent
+            # conditions matter (including the fallback to `self`). This break allows
+            # the type to change (see: internal issue 349926559).
+            if condition is True:
+                break
+
         return self._apply_nary_op(
             ops.case_when_op,
             cases,

@@ -2862,6 +2862,42 @@ def test_series_case_when(scalars_dfs_maybe_ordered):
     )
 
 
+def test_series_case_when_change_type(scalars_dfs_maybe_ordered):
+    pytest.importorskip(
+        "pandas",
+        minversion="2.2.0",
+        reason="case_when added in pandas 2.2.0",
+    )
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    bf_series = scalars_df["int64_col"]
+    pd_series = scalars_pandas_df["int64_col"]
+
+    # TODO(tswast): pandas case_when appears to assume True when a value is
+    # null. I suspect this should be considered a bug in pandas.
+
+    bf_conditions = [
+        ((bf_series > 645).fillna(True), scalars_df["string_col"]),
+        ((bf_series <= -100).fillna(True), pd.NA),
+        (True, "not_found"),
+    ]
+
+    pd_conditions = [
+        ((pd_series > 645).fillna(True), scalars_pandas_df["string_col"]),
+        ((pd_series <= -100).fillna(True), pd.NA),
+        # pandas currently fails if both the condition and the value are literals.
+        ([True] * len(pd_series), ["not_found"] * len(pd_series)),
+    ]
+
+    bf_result = bf_series.case_when(bf_conditions).to_pandas()
+    pd_result = pd_series.case_when(pd_conditions)
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result.astype("string[pyarrow]"),
+    )
+
+
 def test_to_frame(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 

@@ -2648,6 +2648,21 @@ def case_when(
             3    2
             Name: c, dtype: Int64
 
+        If you'd like to change the type, add a case with the condition True at the end of the case list
+
+            >>> c.case_when(
+            ...     caselist=[
+            ...         (a.gt(0), 'a'),  # condition, replacement
+            ...         (b.gt(0), 'b'),
+            ...         (True, 'c'),
+            ...     ]
+            ... )
+            0    c
+            1    b
+            2    a
+            3    a
+            Name: c, dtype: string
+
         **See also:**
 
         - :func:`bigframes.pandas.Series.mask` : Replace values where the condition is True.