googleapis · sycai · Feb 24, 2025 · Feb 21, 2025 · Feb 21, 2025 · Feb 24, 2025
@@ -231,7 +231,11 @@ def _(
     column: ibis_types.NumericColumn,
     window=None,
 ) -> ibis_types.NumericValue:
-    return _apply_window_if_present(column.quantile(op.q), window)
+    result = column.quantile(op.q)
+    if op.should_floor_result:
+        result = result.floor()  # type:ignore
+
+    return _apply_window_if_present(result, window)
 
 
 @compile_unary_agg.register
@@ -242,7 +246,8 @@ def _(
     window=None,
     # order_by: typing.Sequence[ibis_types.Value] = [],
 ) -> ibis_types.NumericValue:
-    return _apply_window_if_present(column.mean(), window)
+    result = column.mean().floor() if op.should_floor_result else column.mean()
+    return _apply_window_if_present(result, window)
 
 
 @compile_unary_agg.register
@@ -306,10 +311,11 @@ def _(
 @numeric_op
 def _(
     op: agg_ops.StdOp,
-    x: ibis_types.Column,
+    x: ibis_types.NumericColumn,
     window=None,
 ) -> ibis_types.Value:
-    return _apply_window_if_present(cast(ibis_types.NumericColumn, x).std(), window)
+    result = x.std().floor() if op.should_floor_result else x.std()
+    return _apply_window_if_present(result, window)
 
 
 @compile_unary_agg.register

@@ -70,6 +70,19 @@ def rewrite_timedelta_expressions(root: nodes.BigFrameNode) -> nodes.BigFrameNod
             root.skip_reproject_unsafe,
         )
 
+    if isinstance(root, nodes.AggregateNode):
+        updated_aggregations = tuple(
+            (_rewrite_aggregation(agg, root.child.schema), col_id)
+            for agg, col_id in root.aggregations
+        )
+        return nodes.AggregateNode(
+            root.child,
+            updated_aggregations,
+            root.by_column_ids,
+            root.order_by,
+            root.dropna,
+        )
+
     return root
 
 
@@ -196,17 +209,34 @@ def _rewrite_aggregation(
 ) -> ex.Aggregation:
     if not isinstance(aggregation, ex.UnaryAggregation):
         return aggregation
-    if not isinstance(aggregation.op, aggs.DiffOp):
-        return aggregation
 
     if isinstance(aggregation.arg, ex.DerefOp):
         input_type = schema.get_type(aggregation.arg.id.sql)
     else:
         input_type = aggregation.arg.dtype
 
-    if dtypes.is_datetime_like(input_type):
+    if isinstance(aggregation.op, aggs.DiffOp) and dtypes.is_datetime_like(input_type):
         return ex.UnaryAggregation(
             aggs.TimeSeriesDiffOp(aggregation.op.periods), aggregation.arg
         )
 
+    if isinstance(aggregation.op, aggs.StdOp) and input_type is dtypes.TIMEDELTA_DTYPE:
+        return ex.UnaryAggregation(
+            aggs.StdOp(should_floor_result=True), aggregation.arg
+        )
+
+    if isinstance(aggregation.op, aggs.MeanOp) and input_type is dtypes.TIMEDELTA_DTYPE:
+        return ex.UnaryAggregation(
+            aggs.MeanOp(should_floor_result=True), aggregation.arg
+        )
+
+    if (
+        isinstance(aggregation.op, aggs.QuantileOp)
+        and input_type is dtypes.TIMEDELTA_DTYPE
+    ):
+        return ex.UnaryAggregation(
+            aggs.QuantileOp(q=aggregation.op.q, should_floor_result=True),
+            aggregation.arg,
+        )
+
     return aggregation
@@ -142,13 +142,16 @@ class SumOp(UnaryAggregateOp):
     name: ClassVar[str] = "sum"
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
-        if not dtypes.is_numeric(input_types[0]):
-            raise TypeError(f"Type {input_types[0]} is not numeric")
-        if pd.api.types.is_bool_dtype(input_types[0]):
-            return dtypes.INT_DTYPE
-        else:
+        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+            return dtypes.TIMEDELTA_DTYPE
+
+        if dtypes.is_numeric(input_types[0]):
+            if pd.api.types.is_bool_dtype(input_types[0]):
+                return dtypes.INT_DTYPE
             return input_types[0]
 
+        raise TypeError(f"Type {input_types[0]} is not numeric or timedelta")
+
 
 @dataclasses.dataclass(frozen=True)
 class MedianOp(UnaryAggregateOp):
@@ -171,6 +174,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 @dataclasses.dataclass(frozen=True)
 class QuantileOp(UnaryAggregateOp):
     q: float
+    should_floor_result: bool = False
 
     @property
     def name(self):
@@ -181,6 +185,8 @@ def order_independent(self) -> bool:
         return True
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+            return dtypes.TIMEDELTA_DTYPE
         return signatures.UNARY_REAL_NUMERIC.output_type(input_types[0])
 
 
@@ -224,7 +230,11 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 class MeanOp(UnaryAggregateOp):
     name: ClassVar[str] = "mean"
 
+    should_floor_result: bool = False
+
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+            return dtypes.TIMEDELTA_DTYPE
         return signatures.UNARY_REAL_NUMERIC.output_type(input_types[0])
 
 
@@ -262,7 +272,12 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 class StdOp(UnaryAggregateOp):
     name: ClassVar[str] = "std"
 
+    should_floor_result: bool = False
+
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+            return dtypes.TIMEDELTA_DTYPE
+
         return signatures.FixedOutputType(
             dtypes.is_numeric, dtypes.FLOAT_DTYPE, "numeric"
         ).output_type(input_types[0])

@@ -465,3 +465,49 @@ def test_timedelta_ordering(session):
     pandas.testing.assert_series_equal(
         actual_result, expected_result, check_index_type=False
     )
+
+
+def test_timedelta_cumsum(temporal_dfs):
+    bf_df, pd_df = temporal_dfs
+
+    actual_result = bf_df["timedelta_col_1"].cumsum().to_pandas()
+
+    expected_result = pd_df["timedelta_col_1"].cumsum()
+    _assert_series_equal(actual_result, expected_result)
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        pytest.param(lambda x: x.min(), id="min"),
+        pytest.param(lambda x: x.max(), id="max"),
+        pytest.param(lambda x: x.sum(), id="sum"),
+        pytest.param(lambda x: x.mean(), id="mean"),
+        pytest.param(lambda x: x.median(), id="median"),
+        pytest.param(lambda x: x.quantile(0.5), id="quantile"),
+        pytest.param(lambda x: x.std(), id="std"),
+    ],
+)
+def test_timedelta_agg__timedelta_result(temporal_dfs, agg_func):
+    bf_df, pd_df = temporal_dfs
+
+    actual_result = agg_func(bf_df["timedelta_col_1"])
+
+    expected_result = agg_func(pd_df["timedelta_col_1"]).floor("us")
+    assert actual_result == expected_result
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        pytest.param(lambda x: x.count(), id="count"),
+        pytest.param(lambda x: x.nunique(), id="nunique"),
+    ],
+)
+def test_timedelta_agg__int_result(temporal_dfs, agg_func):
+    bf_df, pd_df = temporal_dfs
+
+    actual_result = agg_func(bf_df["timedelta_col_1"])
+
+    expected_result = agg_func(pd_df["timedelta_col_1"])
+    assert actual_result == expected_result