Skip to content

Commit fdb9c40

Browse files
committed
supports more than two pairs, defines as ibis udf
1 parent b5ffb59 commit fdb9c40

File tree

6 files changed

+59
-42
lines changed

6 files changed

+59
-42
lines changed

‎bigframes/bigquery/__init__.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -197,23 +197,18 @@ def json_set(
197197
# SQLGlot parser does not support the "create_if_missing => true" syntax, so
198198
# create_if_missing is not currently implemented.
199199

200-
# Currently limited to single JSON path/value pairs (binary operations only).
201-
if len(json_path_value_pairs) != 1:
202-
raise ValueError(
203-
"Expected exactly one JSON path and value pair but found "
204-
+ f"{len(json_path_value_pairs)} pairs."
205-
)
206-
207-
if len(json_path_value_pairs[0]) != 2:
208-
raise ValueError(
209-
"Incorrect format: Expected (<json_path>, <json_value>), but found: "
210-
+ f"{json_path_value_pairs[0]}"
200+
for json_path_value_pair in json_path_value_pairs:
201+
if len(json_path_value_pair) != 2:
202+
raise ValueError(
203+
"Incorrect format: Expected (<json_path>, <json_value>), but found: "
204+
+ f"{json_path_value_pair}"
205+
)
206+
207+
json_path, json_value = json_path_value_pair
208+
series = series._apply_binary_op(
209+
json_value, ops.JSONSet(json_path=json_path), alignment="left"
211210
)
212-
213-
json_path, json_value = json_path_value_pairs[0]
214-
return series._apply_binary_op(
215-
json_value, ops.JSONSet(json_path=json_path), alignment="left"
216-
)
211+
return series
217212

218213

219214
def vector_search(

‎bigframes/core/compile/scalar_op_compiler.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -897,11 +897,21 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
897897
# JSON Ops
898898
@scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
899899
def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
900-
return vendored_ibis_ops.JSONSet(
901-
x,
902-
json_value=y,
903-
json_path=op.json_path,
904-
).to_expr()
900+
if x.type().is_json():
901+
return json_set(
902+
json_obj=x,
903+
json_path=op.json_path,
904+
json_value=y,
905+
).to_expr()
906+
else:
907+
# Enabling JSON type eliminates the need for less efficient string conversions.
908+
return vendored_ibis_ops.ToJsonString(
909+
json_set(
910+
json_obj=parse_json(x),
911+
json_path=op.json_path,
912+
json_value=y,
913+
)
914+
).to_expr()
905915

906916

907917
### Binary Ops
@@ -1479,3 +1489,15 @@ def float_floor(a: float) -> float:
14791489
def float_ceil(a: float) -> float:
14801490
"""Convert string to timestamp."""
14811491
return 0 # pragma: NO COVER
1492+
1493+
1494+
@ibis.udf.scalar.builtin(name="parse_json")
1495+
def parse_json(a: str) -> ibis_dtypes.JSON:
1496+
"""Converts a JSON-formatted STRING value to a JSON value."""
1497+
1498+
1499+
@ibis.udf.scalar.builtin(name="json_set")
1500+
def json_set(
1501+
json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.str, json_value
1502+
) -> ibis_dtypes.JSON:
1503+
"""Produces a new SQL JSON value with the specified JSON data inserted or replaced."""

‎bigframes/dtypes.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,12 @@ def is_json_like(type: ExpressionType) -> bool:
245245
return type == STRING_DTYPE
246246

247247

248+
def is_json_encoding_type(type: ExpressionType) -> bool:
249+
# Types can be converted into JSON.
250+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
251+
return type != GEO_DTYPE
252+
253+
248254
def is_numeric(type: ExpressionType) -> bool:
249255
return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
250256

‎bigframes/operations/__init__.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -714,10 +714,21 @@ class JSONSet(BinaryOp):
714714
json_path: str
715715

716716
def output_type(self, *input_types):
717-
input_type = input_types[0]
718-
if not dtypes.is_json_like(input_type):
719-
raise TypeError("Input type must be an JSON or JSON-formatted string type.")
720-
return input_type
717+
left_type = input_types[0]
718+
right_type = input_types[1]
719+
if not dtypes.is_json_like(left_type):
720+
raise TypeError(
721+
"Input type must be an valid JSON object or JSON-formatted string type."
722+
+ f" Received type: {left_type}"
723+
)
724+
if not dtypes.is_json_encoding_type(right_type):
725+
raise TypeError(
726+
"The value to be assigned must be a type that can be encoded as JSON."
727+
+ f"Received type: {right_type}"
728+
)
729+
730+
# After JSON type implementation, ONLY return JSON data.
731+
return left_type
721732

722733

723734
# Ternary Ops

‎third_party/bigframes_vendored/ibis/backends/bigquery/registry.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,6 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
5858
return f"ARRAY_AGG({arg} IGNORE NULLS {order_by_sql})"
5959

6060

61-
def _json_set(translator, op: vendored_ibis_ops.JSONSet):
62-
arg = translator.translate(op.arg)
63-
json_value = translator.translate(op.json_value)
64-
json_path = translator.translate(op.json_path)
65-
return f"JSON_SET(PARSE_JSON({arg}), {json_path}, {json_value})"
66-
67-
6861
patched_ops = {
6962
vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles, # type:ignore
7063
vendored_ibis_ops.FirstNonNullValue: _first_non_null_value, # type:ignore
@@ -74,7 +67,6 @@ def _json_set(translator, op: vendored_ibis_ops.JSONSet):
7467
vendored_ibis_ops.SafeCastToDatetime: _safe_cast_to_datetime, # type:ignore
7568
ibis_reductions.Quantile: _quantile, # type:ignore
7669
vendored_ibis_ops.ArrayAggregate: _array_aggregate, # type:ignore
77-
vendored_ibis_ops.JSONSet: _json_set, # type:ignore
7870
}
7971

8072
OPERATION_REGISTRY.update(patched_ops)

‎third_party/bigframes_vendored/ibis/expr/operations/json.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,7 @@
33

44
import ibis.expr.datatypes as dt
55
import ibis.expr.operations.core as ibis_ops_core
6-
import ibis.expr.rules as rlz
76

87

98
class ToJsonString(ibis_ops_core.Unary):
109
dtype = dt.string
11-
12-
13-
class JSONSet(ibis_ops_core.Unary):
14-
json_value: ibis_ops_core.Value[dt.Any]
15-
json_path: ibis_ops_core.Value[dt.String]
16-
17-
shape = rlz.shape_like("arg")
18-
dtype = rlz.dtype_like("arg")

0 commit comments

Comments
 (0)