Skip to content

Commit b598aa8

Browse files
chore: support addition between a timestamp and a timedelta (#1369)
* chore: support addition between a timestamp and a timedelta * test_timestamp_dff * fix conftest.py * support numpy and pyarrow timedelta literals * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix format * use local fixture for testing * Remove pyarrow duration scalar support. * fix format * remove redundant imports * fix mypy * update timedelta literals during tree rewrites * update type conversions in tests to make py 3.9 happy * fix add operator for integers --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 24962cd commit b598aa8

File tree

10 files changed

+287
-10
lines changed

10 files changed

+287
-10
lines changed

‎bigframes/core/compile/scalar_op_compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,11 @@ def timestamp_diff_op_impl(x: ibis_types.TimestampValue, y: ibis_types.Timestamp
742742
return x.delta(y, "microsecond")
743743

744744

745+
@scalar_op_compiler.register_binary_op(ops.timestamp_add_op)
746+
def timestamp_add_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerValue):
747+
return x + y.to_interval("us")
748+
749+
745750
@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
746751
def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp):
747752
supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"]

‎bigframes/core/rewrite/operators.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from bigframes import dtypes
2020
from bigframes import operations as ops
2121
from bigframes.core import expression as ex
22-
from bigframes.core import nodes, schema
22+
from bigframes.core import nodes, schema, utils
2323

2424

2525
@dataclasses.dataclass
@@ -50,7 +50,7 @@ def _rewrite_expressions(expr: ex.Expression, schema: schema.ArraySchema) -> _Ty
5050
return _TypedExpr(expr, schema.get_type(expr.id.sql))
5151

5252
if isinstance(expr, ex.ScalarConstantExpression):
53-
return _TypedExpr(expr, expr.dtype)
53+
return _rewrite_scalar_constant_expr(expr)
5454

5555
if isinstance(expr, ex.OpExpression):
5656
updated_inputs = tuple(
@@ -61,12 +61,23 @@ def _rewrite_expressions(expr: ex.Expression, schema: schema.ArraySchema) -> _Ty
6161
raise AssertionError(f"Unexpected expression type: {type(expr)}")
6262

6363

64+
def _rewrite_scalar_constant_expr(expr: ex.ScalarConstantExpression) -> _TypedExpr:
65+
if expr.dtype is dtypes.TIMEDELTA_DTYPE:
66+
int_repr = utils.timedelta_to_micros(expr.value) # type: ignore
67+
return _TypedExpr(ex.const(int_repr, expr.dtype), expr.dtype)
68+
69+
return _TypedExpr(expr, expr.dtype)
70+
71+
6472
def _rewrite_op_expr(
6573
expr: ex.OpExpression, inputs: typing.Tuple[_TypedExpr, ...]
6674
) -> _TypedExpr:
6775
if isinstance(expr.op, ops.SubOp):
6876
return _rewrite_sub_op(inputs[0], inputs[1])
6977

78+
if isinstance(expr.op, ops.AddOp):
79+
return _rewrite_add_op(inputs[0], inputs[1])
80+
7081
input_types = tuple(map(lambda x: x.dtype, inputs))
7182
return _TypedExpr(expr, expr.op.output_type(*input_types))
7283

@@ -80,3 +91,24 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
8091
result_op.as_expr(left.expr, right.expr),
8192
result_op.output_type(left.dtype, right.dtype),
8293
)
94+
95+
96+
def _rewrite_add_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
97+
if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
98+
return _TypedExpr(
99+
ops.timestamp_add_op.as_expr(left.expr, right.expr),
100+
ops.timestamp_add_op.output_type(left.dtype, right.dtype),
101+
)
102+
103+
if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right.dtype):
104+
# Re-arrange operands such that timestamp is always on the left and timedelta is
105+
# always on the right.
106+
return _TypedExpr(
107+
ops.timestamp_add_op.as_expr(right.expr, left.expr),
108+
ops.timestamp_add_op.output_type(right.dtype, left.dtype),
109+
)
110+
111+
return _TypedExpr(
112+
ops.add_op.as_expr(left.expr, right.expr),
113+
ops.add_op.output_type(left.dtype, right.dtype),
114+
)

‎bigframes/core/utils.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import datetime
1415
import functools
1516
import re
1617
import typing
1718
from typing import Hashable, Iterable, List
1819
import warnings
1920

2021
import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
22+
import numpy as np
2123
import pandas as pd
2224
import pandas.api.types as pdtypes
2325
import typing_extensions
@@ -187,9 +189,22 @@ def wrapper(*args, **kwargs):
187189
return decorator
188190

189191

190-
def timedelta_to_micros(td: pd.Timedelta) -> int:
191-
# td.value returns total nanoseconds.
192-
return td.value // 1000
192+
def timedelta_to_micros(
193+
timedelta: typing.Union[pd.Timedelta, datetime.timedelta, np.timedelta64]
194+
) -> int:
195+
if isinstance(timedelta, pd.Timedelta):
196+
# pd.Timedelta.value returns total nanoseconds.
197+
return timedelta.value // 1000
198+
199+
if isinstance(timedelta, np.timedelta64):
200+
return timedelta.astype("timedelta64[us]").astype(np.int64)
201+
202+
if isinstance(timedelta, datetime.timedelta):
203+
return (
204+
(timedelta.days * 3600 * 24) + timedelta.seconds
205+
) * 1_000_000 + timedelta.microseconds
206+
207+
raise TypeError(f"Unrecognized input type: {type(timedelta)}")
193208

194209

195210
def replace_timedeltas_with_micros(dataframe: pd.DataFrame) -> List[str]:

‎bigframes/dtypes.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@
105105
pd.Timestamp,
106106
datetime.date,
107107
datetime.time,
108+
pd.Timedelta,
109+
datetime.timedelta,
110+
np.timedelta64,
108111
]
109112
LOCAL_SCALAR_TYPES = typing.get_args(LOCAL_SCALAR_TYPE)
110113

@@ -420,7 +423,7 @@ def arrow_dtype_to_bigframes_dtype(arrow_dtype: pa.DataType) -> Dtype:
420423
return pd.ArrowDtype(arrow_dtype)
421424

422425
if pa.types.is_duration(arrow_dtype):
423-
return pd.ArrowDtype(arrow_dtype)
426+
return TIMEDELTA_DTYPE
424427

425428
# BigFrames doesn't distinguish between string and large_string because the
426429
# largest string (2 GB) is already larger than the largest BigQuery row.
@@ -562,6 +565,10 @@ def _is_bigframes_dtype(dtype) -> bool:
562565

563566

564567
def _infer_dtype_from_python_type(type: type) -> Dtype:
568+
if type in (datetime.timedelta, pd.Timedelta, np.timedelta64):
569+
# Must check timedelta type first. Otherwise other branchs will be evaluated to true
570+
# E.g. np.timedelta64 is a sublcass as np.integer
571+
return TIMEDELTA_DTYPE
565572
if issubclass(type, (bool, np.bool_)):
566573
return BOOL_DTYPE
567574
if issubclass(type, (int, np.integer)):

‎bigframes/operations/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
from bigframes.operations.numeric_ops import (
104104
abs_op,
105105
add_op,
106+
AddOp,
106107
arccos_op,
107108
arccosh_op,
108109
arcsin_op,
@@ -177,7 +178,7 @@
177178
)
178179
from bigframes.operations.struct_ops import StructFieldOp, StructOp
179180
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
180-
from bigframes.operations.timedelta_ops import ToTimedeltaOp
181+
from bigframes.operations.timedelta_ops import timestamp_add_op, ToTimedeltaOp
181182

182183
__all__ = [
183184
# Base ops
@@ -249,6 +250,7 @@
249250
"second_op",
250251
"normalize_op",
251252
# Timedelta ops
253+
"timestamp_add_op",
252254
"ToTimedeltaOp",
253255
# Datetime ops
254256
"date_op",
@@ -263,6 +265,7 @@
263265
# Numeric ops
264266
"abs_op",
265267
"add_op",
268+
"AddOp",
266269
"arccos_op",
267270
"arccosh_op",
268271
"arcsin_op",

‎bigframes/operations/numeric_ops.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,18 @@ def output_type(self, *input_types):
116116
if all(map(dtypes.is_string_like, input_types)) and len(set(input_types)) == 1:
117117
# String addition
118118
return input_types[0]
119+
120+
# Timestamp addition.
121+
if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
122+
return left_type
123+
if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right_type):
124+
return right_type
125+
119126
if (left_type is None or dtypes.is_numeric(left_type)) and (
120127
right_type is None or dtypes.is_numeric(right_type)
121128
):
122129
# Numeric addition
123130
return dtypes.coerce_to_common(left_type, right_type)
124-
# TODO: Add temporal addition once delta types supported
125131
raise TypeError(f"Cannot add dtypes {left_type} and {right_type}")
126132

127133

‎bigframes/operations/timedelta_ops.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,32 @@ class ToTimedeltaOp(base_ops.UnaryOp):
2525
name: typing.ClassVar[str] = "to_timedelta"
2626
unit: typing.Literal["us", "ms", "s", "m", "h", "d", "W"]
2727

28-
def output_type(self, *input_types):
28+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
2929
if input_types[0] in (dtypes.INT_DTYPE, dtypes.FLOAT_DTYPE):
3030
return dtypes.TIMEDELTA_DTYPE
3131
raise TypeError("expected integer or float input")
32+
33+
34+
@dataclasses.dataclass(frozen=True)
35+
class TimestampAdd(base_ops.BinaryOp):
36+
name: typing.ClassVar[str] = "timestamp_add"
37+
38+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
39+
# timestamp + timedelta => timestamp
40+
if (
41+
dtypes.is_datetime_like(input_types[0])
42+
and input_types[1] is dtypes.TIMEDELTA_DTYPE
43+
):
44+
return input_types[0]
45+
# timedelta + timestamp => timestamp
46+
if input_types[0] is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(
47+
input_types[1]
48+
):
49+
return input_types[1]
50+
51+
raise TypeError(
52+
f"unsupported types for timestamp_add. left: {input_types[0]} right: {input_types[1]}"
53+
)
54+
55+
56+
timestamp_add_op = TimestampAdd()

‎tests/data/scalars.jsonl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
{"bool_col": false, "bytes_col": "R8O8dGVuIFRhZw==", "date_col": "1980-03-14", "datetime_col": "1980-03-14 15:16:17", "geography_col": null, "int64_col": "55555", "int64_too": "0", "numeric_col": "5.555555", "float64_col": "555.555", "rowindex": 5, "rowindex_2": 5, "string_col": "Güten Tag!", "time_col": "15:16:17.181921", "timestamp_col": "1980-03-14T15:16:17.181921Z"}
77
{"bool_col": true, "bytes_col": "SGVsbG8JQmlnRnJhbWVzIQc=", "date_col": "2023-05-23", "datetime_col": "2023-05-23 11:37:01", "geography_col": "MULTIPOINT (20 20, 10 40, 40 30, 30 10)", "int64_col": "101202303", "int64_too": "2", "numeric_col": "-10.090807", "float64_col": "-123.456", "rowindex": 6, "rowindex_2": 6, "string_col": "capitalize, This ", "time_col": "01:02:03.456789", "timestamp_col": "2023-05-23T11:42:55.000001Z"}
88
{"bool_col": true, "bytes_col": null, "date_col": "2038-01-20", "datetime_col": "2038-01-19 03:14:08", "geography_col": null, "int64_col": "-214748367", "int64_too": "2", "numeric_col": "11111111.1", "float64_col": "42.42", "rowindex": 7, "rowindex_2": 7, "string_col": " سلام", "time_col": "12:00:00.000001", "timestamp_col": "2038-01-19T03:14:17.999999Z"}
9-
{"bool_col": false, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": "2", "int64_too": "1", "numeric_col": null, "float64_col": "6.87", "rowindex": 8, "rowindex_2": 8, "string_col": "T", "time_col": null, "timestamp_col": null}
9+
{"bool_col": false, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": "2", "int64_too": "1", "numeric_col": null, "float64_col": "6.87", "rowindex": 8, "rowindex_2": 8, "string_col": "T", "time_col": null, "timestamp_col": null}

0 commit comments

Comments
 (0)