@@ -2048,6 +2048,98 @@ def where(self, cond, other):
2048
2048
"""
2049
2049
raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
2050
2050
2051
+ def mask (self , cond , other ):
2052
+ """Replace values where the condition is False.
2053
+
2054
+ **Examples:**
2055
+
2056
+ >>> import bigframes.pandas as bpd
2057
+ >>> bpd.options.display.progress_bar = None
2058
+
2059
+ >>> df = bpd.DataFrame({'a': [20, 10, 0], 'b': [0, 10, 20]})
2060
+ >>> df
2061
+ a b
2062
+ 0 20 0
2063
+ 1 10 10
2064
+ 2 0 20
2065
+ <BLANKLINE>
2066
+ [3 rows x 2 columns]
2067
+
2068
+ You can filter the values in the dataframe based on a condition. The
2069
+ values matching the condition would be kept, and not matching would be
2070
+ replaced. The default replacement value is ``NA``. For example, when the
2071
+ condition is a dataframe:
2072
+
2073
+ >>> df.mask(df > 0)
2074
+ a b
2075
+ 0 <NA> 0
2076
+ 1 <NA> <NA>
2077
+ 2 0 <NA>
2078
+ <BLANKLINE>
2079
+ [3 rows x 2 columns]
2080
+
2081
+ You can specify a custom replacement value for non-matching values.
2082
+
2083
+ >>> df.mask(df > 0, -1)
2084
+ a b
2085
+ 0 -1 0
2086
+ 1 -1 -1
2087
+ 2 0 -1
2088
+ <BLANKLINE>
2089
+ [3 rows x 2 columns]
2090
+
2091
+ Besides dataframe, the condition can be a series too. For example:
2092
+
2093
+ >>> df.mask(df['a'] > 10, -1)
2094
+ a b
2095
+ 0 -1 -1
2096
+ 1 10 10
2097
+ 2 0 20
2098
+ <BLANKLINE>
2099
+ [3 rows x 2 columns]
2100
+
2101
+ As for the replacement, it can be a dataframe too. For example:
2102
+
2103
+ >>> df.mask(df > 10, -df)
2104
+ a b
2105
+ 0 -20 0
2106
+ 1 10 10
2107
+ 2 0 -20
2108
+ <BLANKLINE>
2109
+ [3 rows x 2 columns]
2110
+
2111
+ >>> df.mask(df['a'] > 10, -df)
2112
+ a b
2113
+ 0 -20 0
2114
+ 1 10 10
2115
+ 2 0 20
2116
+ <BLANKLINE>
2117
+ [3 rows x 2 columns]
2118
+
2119
+ Please note, replacement doesn't support Series for now. In pandas, when
2120
+ specifying a Series as replacement, the axis value should be specified
2121
+ at the same time, which is not supported in bigframes DataFrame.
2122
+
2123
+ Args:
2124
+ cond (bool Series/DataFrame, array-like, or callable):
2125
+ Where cond is False, keep the original value. Where True, replace
2126
+ with corresponding value from other. If cond is callable, it is
2127
+ computed on the Series/DataFrame and returns boolean
2128
+ Series/DataFrame or array. The callable must not change input
2129
+ Series/DataFrame (though pandas doesn’t check it).
2130
+ other (scalar, DataFrame, or callable):
2131
+ Entries where cond is True are replaced with corresponding value
2132
+ from other. If other is callable, it is computed on the
2133
+ DataFrame and returns scalar or DataFrame. The callable must not
2134
+ change input DataFrame (though pandas doesn’t check it). If not
2135
+ specified, entries will be filled with the corresponding NULL
2136
+ value (np.nan for numpy dtypes, pd.NA for extension dtypes).
2137
+
2138
+ Returns:
2139
+ DataFrame: DataFrame after the replacement.
2140
+ """
2141
+ raise NotImplementedError (constants .ABSTRACT_METHOD_ERROR_MESSAGE )
2142
+
2051
2143
# ----------------------------------------------------------------------
2052
2144
# Sorting
2053
2145
0 commit comments