@@ -305,15 +305,17 @@ def aggregate(
305
305
aggregations : typing .Sequence [typing .Tuple [ex .Aggregation , str ]],
306
306
by_column_ids : typing .Sequence [str ] = (),
307
307
dropna : bool = True ,
308
- ) -> UnorderedIR :
308
+ ) -> OrderedIR :
309
309
"""
310
310
Apply aggregations to the expression.
311
311
Arguments:
312
312
aggregations: input_column_id, operation, output_column_id tuples
313
- by_column_id: column id of the aggregation key, this is preserved through the transform
313
+ by_column_id: column id of the aggregation key, this is preserved through
314
+ the transform
314
315
dropna: whether null keys should be dropped
315
316
Returns:
316
- UnorderedIR
317
+ OrderedIR: the grouping key is a unique-valued column and has ordering
318
+ information.
317
319
"""
318
320
table = self ._to_ibis_expr ()
319
321
bindings = {col : table [col ] for col in self .column_ids }
@@ -323,18 +325,32 @@ def aggregate(
323
325
}
324
326
if by_column_ids :
325
327
result = table .group_by (by_column_ids ).aggregate (** stats )
328
+ # Must have deterministic ordering, so order by the unique "by" column
329
+ ordering = ExpressionOrdering (
330
+ tuple ([ascending_over (column_id ) for column_id in by_column_ids ]),
331
+ total_ordering_columns = frozenset (by_column_ids ),
332
+ )
326
333
columns = tuple (result [key ] for key in result .columns )
327
- expr = UnorderedIR (result , columns = columns )
334
+ expr = OrderedIR (result , columns = columns , ordering = ordering )
328
335
if dropna :
329
336
for column_id in by_column_ids :
330
337
expr = expr ._filter (expr ._get_ibis_column (column_id ).notnull ())
331
338
return expr
332
339
else :
333
340
aggregates = {** stats , ORDER_ID_COLUMN : ibis_types .literal (0 )}
334
341
result = table .aggregate (** aggregates )
342
+ # Ordering is irrelevant for single-row output, but set ordering id regardless
343
+ # as other ops(join etc.) expect it.
344
+ # TODO: Maybe can make completely empty
345
+ ordering = ExpressionOrdering (
346
+ ordering_value_columns = tuple ([]),
347
+ total_ordering_columns = frozenset ([]),
348
+ )
335
349
return UnorderedIR (
336
350
result ,
337
351
columns = [result [col_id ] for col_id in [* stats .keys ()]],
352
+ hidden_ordering_columns = [result [ORDER_ID_COLUMN ]],
353
+ ordering = ordering ,
338
354
)
339
355
340
356
def _uniform_sampling (self , fraction : float ) -> UnorderedIR :
@@ -523,7 +539,8 @@ def from_pandas(
523
539
"""
524
540
Builds an in-memory only (SQL only) expr from a pandas dataframe.
525
541
526
- Assumed that the dataframe has unique string column names and bigframes-suppported dtypes.
542
+ Assumed that the dataframe has unique string column names and bigframes-suppported
543
+ dtypes.
527
544
"""
528
545
529
546
# ibis memtable cannot handle NA, must convert to None
@@ -560,7 +577,8 @@ def _hidden_column_ids(self) -> typing.Sequence[str]:
560
577
561
578
@property
562
579
def _ibis_order (self ) -> Sequence [ibis_types .Value ]:
563
- """Returns a sequence of ibis values which can be directly used to order a table expression. Has direction modifiers applied."""
580
+ """Returns a sequence of ibis values which can be directly used to order a
581
+ table expression. Has direction modifiers applied."""
564
582
return _convert_ordering_to_table_values (
565
583
{** self ._column_names , ** self ._hidden_ordering_column_names },
566
584
self ._ordering .all_ordering_columns ,
@@ -602,7 +620,8 @@ def aggregate(
602
620
Apply aggregations to the expression.
603
621
Arguments:
604
622
aggregations: input_column_id, operation, output_column_id tuples
605
- by_column_id: column id of the aggregation key, this is preserved through the transform
623
+ by_column_id: column id of the aggregation key, this is preserved through
624
+ the transform
606
625
dropna: whether null keys should be dropped
607
626
Returns:
608
627
OrderedIR
0 commit comments