Skip to content

Commit 647ea5e

Browse files
committed
update to binary operator for series supports
1 parent 159950c commit 647ea5e

File tree

6 files changed

+84
-73
lines changed

6 files changed

+84
-73
lines changed

‎bigframes/bigquery/__init__.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,11 @@ def json_set(
177177
>>> bpd.options.display.progress_bar = None
178178
179179
>>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"]
180-
>>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")])
181-
0 {"a":100,"b":"hi"}
180+
>>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100)])
181+
0 {"a":100}
182+
Name: data, dtype: string
183+
>>> bbq.json_set(s, json_path_value_pairs=[("$.b", "hi")])
184+
0 {"a":1,"b":"hi"}
182185
Name: data, dtype: string
183186
184187
Args:
@@ -193,19 +196,23 @@ def json_set(
193196
"""
194197
# SQLGlot parser does not support the "create_if_missing => true" syntax, so
195198
# create_if_missing is not currently implemented.
196-
json_path_value_tuples = []
197-
for json_path_value_pair in json_path_value_pairs:
198-
if len(json_path_value_pair) != 2:
199-
raise ValueError(
200-
"Incorrect format: Expected (<json_path>, <json_value>), but found: "
201-
+ f"{json_path_value_pair}"
202-
)
203-
json_path_value_tuples.append(tuple(json_path_value_pair))
204-
205-
return series._apply_unary_op(
206-
ops.JSONSet(
207-
json_path_value_pairs=tuple(json_path_value_tuples),
199+
200+
# Currently limited to single JSON path/value pairs (binary operations only).
201+
if len(json_path_value_pairs) != 1:
202+
raise ValueError(
203+
"Expected exactly one JSON path and value pair but found "
204+
+ f"{len(json_path_value_pairs)} pairs."
208205
)
206+
207+
if len(json_path_value_pairs[0]) != 2:
208+
raise ValueError(
209+
"Incorrect format: Expected (<json_path>, <json_value>), but found: "
210+
+ f"{json_path_value_pairs[0]}"
211+
)
212+
213+
json_path, json_value = json_path_value_pairs[0]
214+
return series._apply_binary_op(
215+
json_value, ops.JSONSet(json_path=json_path), alignment="left"
209216
)
210217

211218

‎bigframes/core/compile/scalar_op_compiler.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -895,11 +895,12 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
895895

896896

897897
# JSON Ops
898-
@scalar_op_compiler.register_unary_op(ops.JSONSet, pass_op=True)
899-
def json_set_op_impl(x: ibis_types.Value, op: ops.JSONSet):
898+
@scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
899+
def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
900900
return vendored_ibis_ops.JSONSet(
901901
x,
902-
json_path_value_pairs=op.json_path_value_pairs,
902+
json_value=y,
903+
json_path=op.json_path,
903904
).to_expr()
904905

905906

‎bigframes/operations/__init__.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -602,19 +602,6 @@ def output_type(self, *input_types):
602602
return dtypes.STRING_DTYPE
603603

604604

605-
## JSON Ops
606-
@dataclasses.dataclass(frozen=True)
607-
class JSONSet(UnaryOp):
608-
name: typing.ClassVar[str] = "json_set"
609-
json_path_value_pairs: typing.Tuple[typing.Tuple[str, typing.Any], ...]
610-
611-
def output_type(self, *input_types):
612-
input_type = input_types[0]
613-
if not dtypes.is_json_like(input_type):
614-
raise TypeError("Input type must be an JSON or JSON-formatted string type.")
615-
return input_type
616-
617-
618605
# Binary Ops
619606
fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE)
620607
maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE)
@@ -720,6 +707,19 @@ def output_type(self, *input_types):
720707
strconcat_op = StrConcatOp()
721708

722709

710+
## JSON Ops
711+
@dataclasses.dataclass(frozen=True)
712+
class JSONSet(BinaryOp):
713+
name: typing.ClassVar[str] = "json_set"
714+
json_path: str
715+
716+
def output_type(self, *input_types):
717+
input_type = input_types[0]
718+
if not dtypes.is_json_like(input_type):
719+
raise TypeError("Input type must be an JSON or JSON-formatted string type.")
720+
return input_type
721+
722+
723723
# Ternary Ops
724724
@dataclasses.dataclass(frozen=True)
725725
class WhereOp(TernaryOp):

‎tests/system/small/bigquery/test_json.py

Lines changed: 41 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import json
1616

1717
import pandas as pd
18+
import pytest
1819

1920
import bigframes.bigquery as bbq
2021
import bigframes.pandas as bpd
@@ -27,54 +28,59 @@ def _get_series_from_json(json_data):
2728
return bpd.read_gbq(sql)["data"]
2829

2930

30-
def test_json_set():
31-
init_json = [
32-
{"a": 1},
33-
]
34-
s = _get_series_from_json(init_json)
35-
actual = bbq.json_set(s, json_path_value_pairs=[("$.a", 10)])
31+
@pytest.mark.parametrize(
32+
("json_path", "expected_json"),
33+
[
34+
pytest.param("$.a", [{"a": 10}], id="simple"),
35+
pytest.param("$.a.b.c", [{"a": {"b": {"c": 10, "d": []}}}], id="nested"),
36+
],
37+
)
38+
def test_json_set_at_json_path(json_path, expected_json):
39+
s = _get_series_from_json([{"a": {"b": {"c": "tester", "d": []}}}])
40+
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
3641

37-
expected_json = [
38-
{"a": 10},
39-
]
4042
expected = _get_series_from_json(expected_json)
4143
pd.testing.assert_series_equal(
4244
actual.to_pandas(),
4345
expected.to_pandas(),
4446
)
4547

4648

47-
def test_json_set_w_nested_json():
48-
init_json = [
49-
{"a": {"b": {"c": "tester", "d": []}}},
50-
]
51-
s = _get_series_from_json(init_json)
52-
actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b.c", "user")])
49+
@pytest.mark.parametrize(
50+
("json_value", "expected_json"),
51+
[
52+
pytest.param(10, [{"a": {"b": 10}}, {"a": {"b": 10}}], id="int"),
53+
pytest.param(0.333, [{"a": {"b": 0.333}}, {"a": {"b": 0.333}}], id="float"),
54+
pytest.param("eng", [{"a": {"b": "eng"}}, {"a": {"b": "eng"}}], id="string"),
55+
pytest.param([1, 1], [{"a": {"b": 1}}, {"a": {"b": 1}}], id="series"),
56+
],
57+
)
58+
def test_json_set_at_json_value_type(json_value, expected_json):
59+
s = _get_series_from_json([{"a": {"b": "dev"}}, {"a": {"b": [1, 2]}}])
60+
actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
5361

54-
expected_json = [
55-
{"a": {"b": {"c": "user", "d": []}}},
56-
]
5762
expected = _get_series_from_json(expected_json)
5863
pd.testing.assert_series_equal(
5964
actual.to_pandas(),
6065
expected.to_pandas(),
6166
)
6267

6368

64-
def test_json_set_w_ordered_pairs():
65-
init_json: object = [
66-
{"a": {"b": {"c": {}}}},
67-
]
68-
s = _get_series_from_json(init_json)
69-
actual = bbq.json_set(
70-
s, json_path_value_pairs=[("$.a.b.e", "user"), ("$.a.b.e", "dev")]
71-
)
72-
73-
expected_json: object = [
74-
{"a": {"b": {"c": {}, "e": "dev"}}},
75-
]
76-
expected = _get_series_from_json(expected_json)
77-
pd.testing.assert_series_equal(
78-
actual.to_pandas(),
79-
expected.to_pandas(),
80-
)
69+
@pytest.mark.parametrize(
70+
("json_path_value_pairs"),
71+
[
72+
pytest.param(
73+
[("$.a", 1), ("$.b", 2)],
74+
id="two_pairs",
75+
marks=pytest.mark.xfail(raises=ValueError),
76+
),
77+
pytest.param(
78+
[("$.a", 1, 100)],
79+
id="invalid_pair",
80+
marks=pytest.mark.xfail(raises=ValueError),
81+
),
82+
],
83+
)
84+
def test_json_set_w_invalid_param(json_path_value_pairs):
85+
s = _get_series_from_json([{"a": {"b": {"c": {}, "e": "dev"}}}])
86+
bbq.json_set(s, json_path_value_pairs=json_path_value_pairs)

‎third_party/bigframes_vendored/ibis/backends/bigquery/registry.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,9 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
6060

6161
def _json_set(translator, op: vendored_ibis_ops.JSONSet):
6262
arg = translator.translate(op.arg)
63-
json_path_value_pairs_list = [
64-
translator.translate(item) for pair in op.json_path_value_pairs for item in pair
65-
]
66-
return f"JSON_SET(PARSE_JSON({arg}), {', '.join(json_path_value_pairs_list)})"
63+
json_value = translator.translate(op.json_value)
64+
json_path = translator.translate(op.json_path)
65+
return f"JSON_SET(PARSE_JSON({arg}), {json_path}, {json_value})"
6766

6867

6968
patched_ops = {
Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/json.py
22
from __future__ import annotations
33

4-
import ibis.common.typing as ibis_typing
54
import ibis.expr.datatypes as dt
65
import ibis.expr.operations.core as ibis_ops_core
76
import ibis.expr.rules as rlz
@@ -12,9 +11,8 @@ class ToJsonString(ibis_ops_core.Unary):
1211

1312

1413
class JSONSet(ibis_ops_core.Unary):
15-
json_path_value_pairs: ibis_typing.VarTuple[
16-
ibis_typing.VarTuple[ibis_ops_core.Value[dt.Any]]
17-
]
14+
json_value: ibis_ops_core.Value[dt.Any]
15+
json_path: ibis_ops_core.Value[dt.String]
1816

1917
shape = rlz.shape_like("arg")
2018
dtype = rlz.dtype_like("arg")

0 commit comments

Comments
 (0)