Skip to content

Commit 333d0ac

Browse files
committed
address comments offline
1 parent 0ba590f commit 333d0ac

File tree

3 files changed

+37
-8
lines changed

3 files changed

+37
-8
lines changed

‎bigframes/core/compile/aggregate_compiler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def compile_aggregate(
3838
) -> ibis_types.Value:
3939
if isinstance(aggregate, ex.UnaryAggregation):
4040
input = scalar_compiler.compile_expression(aggregate.arg, bindings=bindings)
41-
return compile_unary_agg(aggregate.op, input, order_by=order_by)
41+
return compile_unary_agg(
42+
aggregate.op, input, order_by=order_by if aggregate.op.can_order_by else []
43+
)
4244
elif isinstance(aggregate, ex.BinaryAggregation):
4345
left = scalar_compiler.compile_expression(aggregate.left, bindings=bindings)
4446
right = scalar_compiler.compile_expression(aggregate.right, bindings=bindings)

‎bigframes/core/compile/compiled.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,15 +305,17 @@ def aggregate(
305305
aggregations: typing.Sequence[typing.Tuple[ex.Aggregation, str]],
306306
by_column_ids: typing.Sequence[str] = (),
307307
dropna: bool = True,
308-
) -> UnorderedIR:
308+
) -> OrderedIR:
309309
"""
310310
Apply aggregations to the expression.
311311
Arguments:
312312
aggregations: input_column_id, operation, output_column_id tuples
313-
by_column_id: column id of the aggregation key, this is preserved through the transform
313+
by_column_id: column id of the aggregation key, this is preserved through
314+
the transform
314315
dropna: whether null keys should be dropped
315316
Returns:
316-
UnorderedIR
317+
OrderedIR: the grouping key is a unique-valued column and has ordering
318+
information.
317319
"""
318320
table = self._to_ibis_expr()
319321
bindings = {col: table[col] for col in self.column_ids}
@@ -323,18 +325,32 @@ def aggregate(
323325
}
324326
if by_column_ids:
325327
result = table.group_by(by_column_ids).aggregate(**stats)
328+
# Must have deterministic ordering, so order by the unique "by" column
329+
ordering = ExpressionOrdering(
330+
tuple([ascending_over(column_id) for column_id in by_column_ids]),
331+
total_ordering_columns=frozenset(by_column_ids),
332+
)
326333
columns = tuple(result[key] for key in result.columns)
327-
expr = UnorderedIR(result, columns=columns)
334+
expr = OrderedIR(result, columns=columns, ordering=ordering)
328335
if dropna:
329336
for column_id in by_column_ids:
330337
expr = expr._filter(expr._get_ibis_column(column_id).notnull())
331338
return expr
332339
else:
333340
aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)}
334341
result = table.aggregate(**aggregates)
342+
# Ordering is irrelevant for single-row output, but set ordering id regardless
343+
# as other ops(join etc.) expect it.
344+
# TODO: Maybe can make completely empty
345+
ordering = ExpressionOrdering(
346+
ordering_value_columns=tuple([]),
347+
total_ordering_columns=frozenset([]),
348+
)
335349
return UnorderedIR(
336350
result,
337351
columns=[result[col_id] for col_id in [*stats.keys()]],
352+
hidden_ordering_columns=[result[ORDER_ID_COLUMN]],
353+
ordering=ordering,
338354
)
339355

340356
def _uniform_sampling(self, fraction: float) -> UnorderedIR:
@@ -523,7 +539,8 @@ def from_pandas(
523539
"""
524540
Builds an in-memory only (SQL only) expr from a pandas dataframe.
525541
526-
Assumed that the dataframe has unique string column names and bigframes-suppported dtypes.
542+
Assumed that the dataframe has unique string column names and bigframes-suppported
543+
dtypes.
527544
"""
528545

529546
# ibis memtable cannot handle NA, must convert to None
@@ -560,7 +577,8 @@ def _hidden_column_ids(self) -> typing.Sequence[str]:
560577

561578
@property
562579
def _ibis_order(self) -> Sequence[ibis_types.Value]:
563-
"""Returns a sequence of ibis values which can be directly used to order a table expression. Has direction modifiers applied."""
580+
"""Returns a sequence of ibis values which can be directly used to order a
581+
table expression. Has direction modifiers applied."""
564582
return _convert_ordering_to_table_values(
565583
{**self._column_names, **self._hidden_ordering_column_names},
566584
self._ordering.all_ordering_columns,
@@ -602,7 +620,8 @@ def aggregate(
602620
Apply aggregations to the expression.
603621
Arguments:
604622
aggregations: input_column_id, operation, output_column_id tuples
605-
by_column_id: column id of the aggregation key, this is preserved through the transform
623+
by_column_id: column id of the aggregation key, this is preserved through
624+
the transform
606625
dropna: whether null keys should be dropped
607626
Returns:
608627
OrderedIR

‎bigframes/operations/aggregations.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ def arguments(self) -> int:
5252
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
5353
return input_types[0]
5454

55+
@property
56+
def can_order_by(self):
57+
return False
58+
5559

5660
@dataclasses.dataclass(frozen=True)
5761
class AggregateOp(WindowOp):
@@ -226,6 +230,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
226230
class ArrayAggOp(UnaryAggregateOp):
227231
name: ClassVar[str] = "arrayagg"
228232

233+
@property
234+
def can_order_by(self):
235+
return True
236+
229237
@property
230238
def skips_nulls(self):
231239
return True

0 commit comments

Comments
 (0)