Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: (Preview) Support arithmetics between dates and timedeltas #1413

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,21 @@ def timestamp_sub_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerVal
return x - y.to_interval("us")


@scalar_op_compiler.register_binary_op(ops.date_diff_op)
def date_diff_op_impl(x: ibis_types.DateValue, y: ibis_types.DateValue):
return (x.delta(y, "day") * UNIT_TO_US_CONVERSION_FACTORS["d"]).floor() # type: ignore


@scalar_op_compiler.register_binary_op(ops.date_add_op)
def date_add_op_impl(x: ibis_types.DateValue, y: ibis_types.IntegerValue):
return x.cast("timestamp") + y.to_interval("us") # type: ignore


@scalar_op_compiler.register_binary_op(ops.date_sub_op)
def date_sub_op_impl(x: ibis_types.DateValue, y: ibis_types.IntegerValue):
return x.cast("timestamp") - y.to_interval("us") # type: ignore


@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp):
supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"]
Expand Down
14 changes: 14 additions & 0 deletions bigframes/core/rewrite/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right)

if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.DATE_DTYPE:
return _TypedExpr.create_op_expr(ops.date_diff_op, left, right)

if left.dtype == dtypes.DATE_DTYPE and right.dtype is dtypes.TIMEDELTA_DTYPE:
return _TypedExpr.create_op_expr(ops.date_sub_op, left, right)

return _TypedExpr.create_op_expr(ops.sub_op, left, right)


Expand All @@ -163,6 +169,14 @@ def _rewrite_add_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
# always on the right.
return _TypedExpr.create_op_expr(ops.timestamp_add_op, right, left)

if left.dtype == dtypes.DATE_DTYPE and right.dtype is dtypes.TIMEDELTA_DTYPE:
return _TypedExpr.create_op_expr(ops.date_add_op, left, right)

if left.dtype is dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.DATE_DTYPE:
# Re-arrange operands such that date is always on the left and timedelta is
# always on the right.
return _TypedExpr.create_op_expr(ops.date_add_op, right, left)

return _TypedExpr.create_op_expr(ops.add_op, left, right)


Expand Down
6 changes: 6 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
ne_op,
)
from bigframes.operations.date_ops import (
date_diff_op,
day_op,
dayofweek_op,
month_op,
Expand Down Expand Up @@ -184,6 +185,8 @@
from bigframes.operations.struct_ops import StructFieldOp, StructOp
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
from bigframes.operations.timedelta_ops import (
date_add_op,
date_sub_op,
timedelta_floor_op,
timestamp_add_op,
timestamp_sub_op,
Expand Down Expand Up @@ -249,6 +252,7 @@
"upper_op",
"ZfillOp",
# Date ops
"date_diff_op",
"day_op",
"month_op",
"year_op",
Expand All @@ -260,6 +264,8 @@
"second_op",
"normalize_op",
# Timedelta ops
"date_add_op",
"date_sub_op",
"timedelta_floor_op",
"timestamp_add_op",
"timestamp_sub_op",
Expand Down
23 changes: 23 additions & 0 deletions bigframes/operations/date_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import typing

from bigframes import dtypes
from bigframes.operations import base_ops
import bigframes.operations.type as op_typing

Expand Down Expand Up @@ -39,3 +43,22 @@
name="quarter",
type_signature=op_typing.DATELIKE_ACCESSOR,
)


@dataclasses.dataclass(frozen=True)
class DateDiffOp(base_ops.BinaryOp):
name: typing.ClassVar[str] = "date_diff"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
if input_types[0] is not input_types[1]:
raise TypeError(
f"two inputs have different types. left: {input_types[0]}, right: {input_types[1]}"
)

if input_types[0] != dtypes.DATE_DTYPE:
raise TypeError("expected date input")

return dtypes.TIMEDELTA_DTYPE


date_diff_op = DateDiffOp()
14 changes: 13 additions & 1 deletion bigframes/operations/numeric_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,18 @@ def output_type(self, *input_types):
# String addition
return input_types[0]

# Timestamp addition.
# Temporal addition.
if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
return left_type
if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right_type):
return right_type

if left_type == dtypes.DATE_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
return dtypes.DATETIME_DTYPE

if left_type == dtypes.TIMEDELTA_DTYPE and right_type == dtypes.DATE_DTYPE:
return dtypes.DATETIME_DTYPE

if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE:
return dtypes.TIMEDELTA_DTYPE

Expand All @@ -155,9 +161,15 @@ def output_type(self, *input_types):
if dtypes.is_datetime_like(left_type) and dtypes.is_datetime_like(right_type):
return dtypes.TIMEDELTA_DTYPE

if left_type == dtypes.DATE_DTYPE and right_type == dtypes.DATE_DTYPE:
return dtypes.TIMEDELTA_DTYPE

if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
return left_type

if left_type == dtypes.DATE_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
return dtypes.DATETIME_DTYPE

if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE:
return dtypes.TIMEDELTA_DTYPE

Expand Down
47 changes: 47 additions & 0 deletions bigframes/operations/timedelta_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
timestamp_add_op = TimestampAddOp()


@dataclasses.dataclass(frozen=True)
class TimestampSubOp(base_ops.BinaryOp):
name: typing.ClassVar[str] = "timestamp_sub"

Expand All @@ -96,3 +97,49 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT


timestamp_sub_op = TimestampSubOp()


@dataclasses.dataclass(frozen=True)
class DateAddOp(base_ops.BinaryOp):
name: typing.ClassVar[str] = "date_add"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
# date + timedelta => timestamp without timezone
if (
input_types[0] == dtypes.DATE_DTYPE
and input_types[1] == dtypes.TIMEDELTA_DTYPE
):
return dtypes.DATETIME_DTYPE
# timedelta + date => timestamp without timezone
if (
input_types[0] == dtypes.TIMEDELTA_DTYPE
and input_types[1] == dtypes.DATE_DTYPE
):
return dtypes.DATETIME_DTYPE

raise TypeError(
f"unsupported types for date_add. left: {input_types[0]} right: {input_types[1]}"
)


date_add_op = DateAddOp()


@dataclasses.dataclass(frozen=True)
class DateSubOp(base_ops.BinaryOp):
name: typing.ClassVar[str] = "date_sub"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
# date - timedelta => timestamp without timezone
if (
input_types[0] == dtypes.DATE_DTYPE
and input_types[1] == dtypes.TIMEDELTA_DTYPE
):
return dtypes.DATETIME_DTYPE

raise TypeError(
f"unsupported types for date_sub. left: {input_types[0]} right: {input_types[1]}"
)


date_sub_op = DateSubOp()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"numpy >=1.24.0",
"pandas >=1.5.3",
"pandas-gbq >=0.26.0",
"pyarrow >=10.0.1",
"pyarrow >=15.0.2",
"pydata-google-auth >=1.8.2",
"requests >=2.27.1",
"sqlglot >=23.6.3",
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.12.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pandas==2.2.0
2 changes: 1 addition & 1 deletion testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jellyfish==0.8.9
numpy==1.24.0
pandas==1.5.3
pandas-gbq==0.26.0
pyarrow==10.0.1
pyarrow==15.0.2
pydata-google-auth==1.8.2
requests==2.27.1
scikit-learn==1.2.2
Expand Down
61 changes: 61 additions & 0 deletions tests/system/small/operations/test_dates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime

import pandas as pd
import pandas.testing

from bigframes import dtypes


def test_date_diff_between_series(session):
pd_df = pd.DataFrame(
{
"col_1": [datetime.date(2025, 1, 2), datetime.date(2025, 2, 1)],
"col_2": [datetime.date(2024, 1, 2), datetime.date(2026, 1, 30)],
}
).astype(dtypes.DATE_DTYPE)
bf_df = session.read_pandas(pd_df)

actual_result = (bf_df["col_1"] - bf_df["col_2"]).to_pandas()

expected_result = (pd_df["col_1"] - pd_df["col_2"]).astype(dtypes.TIMEDELTA_DTYPE)
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


def test_date_diff_literal_sub_series(scalars_dfs):
bf_df, pd_df = scalars_dfs
literal = datetime.date(2030, 5, 20)

actual_result = (literal - bf_df["date_col"]).to_pandas()

expected_result = (literal - pd_df["date_col"]).astype(dtypes.TIMEDELTA_DTYPE)
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


def test_date_diff_series_sub_literal(scalars_dfs):
bf_df, pd_df = scalars_dfs
literal = datetime.date(1980, 5, 20)

actual_result = (bf_df["date_col"] - literal).to_pandas()

expected_result = (pd_df["date_col"] - literal).astype(dtypes.TIMEDELTA_DTYPE)
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)
Loading