Skip to content

Commit 82797de

Browse files
committed
fix errors in pandas 1.5 and address comments
1 parent 9721eb4 commit 82797de

File tree

9 files changed

+93
-41
lines changed

9 files changed

+93
-41
lines changed

‎bigframes/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
import bigframes.formatting_helpers as formatter
6060
import bigframes.operations as ops
6161
import bigframes.operations.aggregations as agg_ops
62-
import bigframes.operations.plot as plot
62+
import bigframes.operations.plotting as plotting
6363
import bigframes.series
6464
import bigframes.series as bf_series
6565
import bigframes.session._io.bigquery
@@ -3196,6 +3196,6 @@ def get_right_id(id):
31963196

31973197
@property
31983198
def plot(self):
3199-
return plot.PlotAccessor(self)
3199+
return plotting.PlotAccessor(self)
32003200

32013201
__matmul__ = dot

‎bigframes/operations/_matplotlib/core.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from abc import ABC, abstractmethod
16-
15+
import abc
1716
import matplotlib.pyplot as plt
1817

1918

20-
class MPLPlot(ABC):
21-
@abstractmethod
19+
class MPLPlot(abc.ABC):
20+
@abc.abstractmethod
2221
def generate(self):
2322
pass
2423

‎bigframes/operations/_matplotlib/hist.py

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
import pandas as pd
2020

2121
import bigframes.constants as constants
22-
from bigframes.operations._matplotlib.core import MPLPlot
22+
import bigframes.operations._matplotlib.core as bfplt
2323

2424

25-
class HistPlot(MPLPlot):
25+
class HistPlot(bfplt.MPLPlot):
2626
@property
2727
def _kind(self) -> Literal["hist"]:
2828
return "hist"
@@ -54,44 +54,53 @@ def __init__(
5454
self.data = self._compute_plot_data(data)
5555

5656
def generate(self) -> None:
57-
hist_bars = self._calculate_hist_bar(self.data, self.bins)
58-
59-
bin_edges = None
60-
hist_x = {}
61-
weights = {}
62-
for col_name, hist_bar in hist_bars.items():
63-
left = hist_bar.index.get_level_values("left_exclusive")
64-
right = hist_bar.index.get_level_values("right_inclusive")
65-
66-
hist_x[col_name] = pd.Series((left + right) / 2.0)
67-
weights[col_name] = hist_bar.values
68-
if bin_edges is None:
69-
bin_edges = left.union(right)
70-
else:
71-
bin_edges = left.union(right).union(bin_edges)
72-
73-
bins = None
74-
if bin_edges is not None:
75-
_, bins = np.histogram(
76-
bin_edges, bins=self.bins, range=self.kwargs.get("range", None)
57+
"""
58+
Calculates weighted histograms through BigQuery and plots them through pandas
59+
native histogram plot.
60+
"""
61+
hist_bars = self._calculate_hist_bars(self.data, self.bins)
62+
bin_edges = self._calculate_bin_edges(
63+
hist_bars, self.bins, self.kwargs.get("range", None)
64+
)
65+
66+
weights = {
67+
col_name: hist_bar.values for col_name, hist_bar in hist_bars.items()
68+
}
69+
hist_x = {
70+
col_name: pd.Series(
71+
(
72+
hist_bar.index.get_level_values("left_exclusive")
73+
+ hist_bar.index.get_level_values("right_inclusive")
74+
)
75+
/ 2.0
7776
)
77+
for col_name, hist_bar in hist_bars.items()
78+
}
7879

79-
# Fills with NA values when items have different lengths.
80-
ordered_columns = self.data.columns.values
80+
# Align DataFrames for plotting despite potential differences in column
81+
# lengths, filling shorter columns with zeros.
8182
hist_x_pd = pd.DataFrame(
8283
list(itertools.zip_longest(*hist_x.values())), columns=list(hist_x.keys())
83-
).sort_index(axis=1)
84+
).sort_index(axis=1)[self.data.columns.values]
8485
weights_pd = pd.DataFrame(
8586
list(itertools.zip_longest(*weights.values())), columns=list(weights.keys())
86-
).sort_index(axis=1)
87+
).sort_index(axis=1)[self.data.columns.values]
88+
hist_x_pd.fillna(0, inplace=True)
89+
weights_pd.fillna(0, inplace=True)
8790

88-
self.axes = hist_x_pd[ordered_columns].plot.hist(
89-
bins=bins,
90-
weights=np.array(weights_pd[ordered_columns].values),
91+
self.axes = hist_x_pd.plot.hist(
92+
bins=bin_edges,
93+
weights=np.array(weights_pd.values),
9194
**self.kwargs,
9295
) # type: ignore
9396

9497
def _compute_plot_data(self, data):
98+
"""
99+
Prepares data for plotting, focusing on numeric data types.
100+
101+
Raises:
102+
TypeError: If the input data contains no numeric columns.
103+
"""
95104
# Importing at the top of the file causes a circular import.
96105
import bigframes.series as series
97106

@@ -118,7 +127,13 @@ def _compute_plot_data(self, data):
118127
return numeric_data
119128

120129
@staticmethod
121-
def _calculate_hist_bar(data, bins):
130+
def _calculate_hist_bars(data, bins):
131+
"""
132+
Calculates histogram bars for each column in a BigFrames DataFrame, and
133+
returns a dictionary where keys are column names and values are pandas
134+
Series. The series values are the histogram bins' heights with a
135+
multi-index defining 'left_exclusive' and 'right_inclusive' bin edges.
136+
"""
122137
import bigframes.pandas as bpd
123138

124139
# TODO: Optimize this by batching multiple jobs into one.
@@ -132,3 +147,23 @@ def _calculate_hist_bar(data, bins):
132147
.sort_index(level="left_exclusive")
133148
)
134149
return hist_bar
150+
151+
@staticmethod
152+
def _calculate_bin_edges(hist_bars, bins, range):
153+
"""
154+
Calculate bin edges from the histogram bars.
155+
"""
156+
bin_edges = None
157+
for _, hist_bar in hist_bars.items():
158+
left = hist_bar.index.get_level_values("left_exclusive")
159+
right = hist_bar.index.get_level_values("right_inclusive")
160+
if bin_edges is None:
161+
bin_edges = left.union(right)
162+
else:
163+
bin_edges = left.union(right).union(bin_edges)
164+
165+
if bin_edges is None:
166+
return None
167+
168+
_, bins = np.histogram(bin_edges, bins=bins, range=range)
169+
return bins

‎bigframes/operations/plot.py renamed to ‎bigframes/operations/plotting.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import bigframes.operations._matplotlib as plotbackend
1919
import third_party.bigframes_vendored.pandas.plotting._core as vendordt
2020

21-
2221
class PlotAccessor:
2322
__doc__ = vendordt.PlotAccessor.__doc__
2423

‎bigframes/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
import bigframes.operations.aggregations as agg_ops
5151
import bigframes.operations.base
5252
import bigframes.operations.datetimes as dt
53-
import bigframes.operations.plot as plot
53+
import bigframes.operations.plotting as plotting
5454
import bigframes.operations.strings as strings
5555
import bigframes.operations.structs as structs
5656
import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series
@@ -1560,7 +1560,7 @@ def str(self) -> strings.StringMethods:
15601560

15611561
@property
15621562
def plot(self):
1563-
return plot.PlotAccessor(self)
1563+
return plotting.PlotAccessor(self)
15641564

15651565
def _slice(
15661566
self,

‎docs/reference/bigframes.pandas/frame.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,14 @@ DataFrame
77
:members:
88
:inherited-members:
99
:undoc-members:
10+
11+
Accessors
12+
---------
13+
14+
Plotting handling
15+
^^^^^^^^^^^^^^^
16+
17+
.. automodule:: bigframes.operations.plotting
18+
:members:
19+
:inherited-members:
20+
:undoc-members:

‎docs/reference/bigframes.pandas/series.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,11 @@ Struct handling
4242
:members:
4343
:inherited-members:
4444
:undoc-members:
45+
46+
Plotting handling
47+
^^^^^^^^^^^^^^^
48+
49+
.. automodule:: bigframes.operations.plotting
50+
:members:
51+
:inherited-members:
52+
:undoc-members:

‎third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5231,7 +5231,7 @@ def plot(self):
52315231
Make plots of Dataframes.
52325232
52335233
Returns:
5234-
bigframes.operations.plot.PlotAccessor:
5234+
bigframes.operations.plotting.PlotAccessor:
52355235
An accessor making plots.
52365236
"""
52375237
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

‎third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3117,7 +3117,7 @@ def plot(self):
31173117
Make plots of Series.
31183118
31193119
Returns:
3120-
bigframes.operations.plot.PlotAccessor:
3120+
bigframes.operations.plotting.PlotAccessor:
31213121
An accessor making plots.
31223122
"""
31233123
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)