Skip to content

feat: add GeoSeries.from_xy() #1364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Feb 6, 2025
15 changes: 14 additions & 1 deletion bigframes/core/compile/ibis_types.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per the notebook failure, please add some logic to ibis_dtype_to_bigframes_dtype looking for all ibis geography-y types and returning the gpd.array.GeometryDtype().

Something like

if isinstance(ibis_dtype, ibis_dtypes.GeoSpatial):
    return gpd.array.GeometryDtype()

around here:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason we can look for ibis_dtypes.GeoSpatial is that all the geography-y types have it as a superclass. See:

Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
ibis_dtypes.JSON,
]

IBIS_GEO_TYPE = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)


BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, bigframes.dtypes.Dtype]] = (
(ibis_dtypes.boolean, pd.BooleanDtype()),
Expand All @@ -70,7 +72,7 @@
pd.ArrowDtype(pa.decimal256(76, 38)),
),
(
ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True),
IBIS_GEO_TYPE,
gpd.array.GeometryDtype(),
),
(ibis_dtypes.json, db_dtypes.JSONDtype()),
Expand Down Expand Up @@ -177,6 +179,14 @@ def cast_ibis_value(
ibis_dtypes.timestamp,
),
ibis_dtypes.binary: (ibis_dtypes.string,),
ibis_dtypes.point: (IBIS_GEO_TYPE,),
ibis_dtypes.geometry: (IBIS_GEO_TYPE,),
ibis_dtypes.geography: (IBIS_GEO_TYPE,),
ibis_dtypes.linestring: (IBIS_GEO_TYPE,),
ibis_dtypes.polygon: (IBIS_GEO_TYPE,),
ibis_dtypes.multilinestring: (IBIS_GEO_TYPE,),
ibis_dtypes.multipoint: (IBIS_GEO_TYPE,),
ibis_dtypes.multipolygon: (IBIS_GEO_TYPE,),
}

value = ibis_value_to_canonical_type(value)
Expand Down Expand Up @@ -282,6 +292,9 @@ def ibis_dtype_to_bigframes_dtype(
if isinstance(ibis_dtype, ibis_dtypes.JSON):
return bigframes.dtypes.JSON_DTYPE

if isinstance(ibis_dtype, ibis_dtypes.GeoSpatial):
return gpd.array.GeometryDtype()

if ibis_dtype in IBIS_TO_BIGFRAMES:
return IBIS_TO_BIGFRAMES[ibis_dtype]
elif isinstance(ibis_dtype, ibis_dtypes.Decimal):
Expand Down
7 changes: 7 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,13 @@ def geo_area_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.GeoSpatialValue, x).area()


@scalar_op_compiler.register_binary_op(ops.geo_st_geogpoint_op, pass_op=False)
def geo_st_geogpoint_op_impl(x: ibis_types.Value, y: ibis_types.Value):
return typing.cast(ibis_types.NumericValue, x).point(
typing.cast(ibis_types.NumericValue, y)
)


# Parameterized ops
@scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True)
def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp):
Expand Down
9 changes: 9 additions & 0 deletions bigframes/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,12 @@ def area(self, crs=None) -> bigframes.series.Series: # type: ignore
raise NotImplementedError(
f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
)

@classmethod
def from_xy(cls, x, y, index=None, session=None, **kwargs) -> GeoSeries:
# TODO: if either x or y is local and the other is remote. Use the
# session from the remote object.
series_x = bigframes.series.Series(x, index=index, session=session, **kwargs)
series_y = bigframes.series.Series(y, index=index, session=session, **kwargs)

return cls(series_x._apply_binary_op(series_y, ops.geo_st_geogpoint_op))
8 changes: 7 additions & 1 deletion bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,12 @@
SqlScalarOp,
where_op,
)
from bigframes.operations.geo_ops import geo_area_op, geo_x_op, geo_y_op
from bigframes.operations.geo_ops import (
geo_area_op,
geo_st_geogpoint_op,
geo_x_op,
geo_y_op,
)
from bigframes.operations.json_ops import (
JSONExtract,
JSONExtractArray,
Expand Down Expand Up @@ -337,6 +342,7 @@
"geo_x_op",
"geo_y_op",
"geo_area_op",
"geo_st_geogpoint_op",
# Numpy ops mapping
"NUMPY_TO_BINOP",
"NUMPY_TO_OP",
Expand Down
4 changes: 4 additions & 0 deletions bigframes/operations/geo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@
dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like"
),
)

geo_st_geogpoint_op = base_ops.create_binary_op(
name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo()
)
14 changes: 14 additions & 0 deletions bigframes/operations/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,20 @@ def output_type(
return bigframes.dtypes.coerce_to_common(left_type, right_type)


@dataclasses.dataclass
class BinaryNumericGeo(BinaryTypeSignature):
"""Type signature for geo functions like from_xy that can map ints to ints."""

def output_type(
self, left_type: ExpressionType, right_type: ExpressionType
) -> ExpressionType:
if (left_type is not None) and not bigframes.dtypes.is_numeric(left_type):
raise TypeError(f"Type {left_type} is not numeric")
if (right_type is not None) and not bigframes.dtypes.is_numeric(right_type):
raise TypeError(f"Type {right_type} is not numeric")
return bigframes.dtypes.GEO_DTYPE


@dataclasses.dataclass
class BinaryRealNumeric(BinaryTypeSignature):
"""Type signature for real-valued functions like divide, arctan2, pow."""
Expand Down
125 changes: 82 additions & 43 deletions notebooks/geo/geoseries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load the Counties table from the Census Bureau US Boundaries dataset"
"### 1. Load the Counties table from the Census Bureau US Boundaries dataset"
]
},
{
Expand All @@ -56,7 +56,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n",
"/usr/local/google/home/arwas/src/bigframes3/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n",
" warnings.warn(msg, category=bfe.DefaultIndexWarning)\n"
]
}
Expand All @@ -69,7 +69,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a series from the int_point_geom column"
"### 2. Create a series from the int_point_geom column"
]
},
{
Expand Down Expand Up @@ -103,11 +103,11 @@
{
"data": {
"text/plain": [
"37 POINT (-91.19496 39.98605)\n",
"406 POINT (-84.86717 33.92103)\n",
"926 POINT (-82.47974 35.33641)\n",
"940 POINT (-75.50298 39.09709)\n",
"996 POINT (-92.56434 39.8298)\n",
"171 POINT (-95.50742 42.39186)\n",
"219 POINT (-105.42894 37.27755)\n",
"402 POINT (-93.34905 32.10121)\n",
"526 POINT (-84.60469 43.29233)\n",
"677 POINT (-89.5681 37.04779)\n",
"Name: int_point_geom, dtype: geometry"
]
},
Expand Down Expand Up @@ -136,11 +136,11 @@
{
"data": {
"text/plain": [
"0 POINT (-91.19496 39.98605)\n",
"1 POINT (-84.86717 33.92103)\n",
"2 POINT (-82.47974 35.33641)\n",
"3 POINT (-75.50298 39.09709)\n",
"4 POINT (-92.56434 39.8298)\n",
"0 POINT (-95.50742 42.39186)\n",
"1 POINT (-105.42894 37.27755)\n",
"2 POINT (-93.34905 32.10121)\n",
"3 POINT (-84.60469 43.29233)\n",
"4 POINT (-89.5681 37.04779)\n",
"dtype: geometry"
]
},
Expand Down Expand Up @@ -185,11 +185,11 @@
{
"data": {
"text/plain": [
"0 -91.194961\n",
"1 -84.867169\n",
"2 -82.479741\n",
"3 -75.502982\n",
"4 -92.56434\n",
"0 -95.507421\n",
"1 -105.42894\n",
"2 -93.34905\n",
"3 -84.60469\n",
"4 -89.568097\n",
"dtype: Float64"
]
},
Expand Down Expand Up @@ -217,11 +217,11 @@
{
"data": {
"text/plain": [
"0 39.986053\n",
"1 33.92103\n",
"2 35.336415\n",
"3 39.097088\n",
"4 39.829795\n",
"0 42.39186\n",
"1 37.277547\n",
"2 32.101213\n",
"3 43.292326\n",
"4 37.047793\n",
"dtype: Float64"
]
},
Expand Down Expand Up @@ -367,11 +367,11 @@
{
"data": {
"text/plain": [
"10 POLYGON ((-101.7778 40.34969, -101.77812 40.34...\n",
"127 POLYGON ((-89.22333 44.50398, -89.22334 44.499...\n",
"253 POLYGON ((-76.69446 37.07288, -76.69515 37.072...\n",
"261 POLYGON ((-98.70136 44.45055, -98.70136 44.450...\n",
"303 POLYGON ((-85.99565 30.28131, -85.99566 30.280...\n",
"54 POLYGON ((-93.76575 45.06448, -93.76575 45.064...\n",
"256 POLYGON ((-89.83723 42.68318, -89.83732 42.682...\n",
"266 POLYGON ((-104.19381 39.56523, -104.19464 39.5...\n",
"485 MULTIPOLYGON (((-91.05884 32.17233, -91.05891 ...\n",
"765 POLYGON ((-83.61848 38.1557, -83.61861 38.1554...\n",
"Name: county_geom, dtype: geometry"
]
},
Expand All @@ -389,7 +389,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Convert the geometry collection to `bigframes.gopandas.GeoSeries`"
"### 2. Convert the geometry collection to `bigframes.gopandas.GeoSeries`"
]
},
{
Expand All @@ -400,11 +400,11 @@
{
"data": {
"text/plain": [
"0 POLYGON ((-101.7778 40.34969, -101.77812 40.34...\n",
"1 POLYGON ((-89.22333 44.50398, -89.22334 44.499...\n",
"2 POLYGON ((-76.69446 37.07288, -76.69515 37.072...\n",
"3 POLYGON ((-98.70136 44.45055, -98.70136 44.450...\n",
"4 POLYGON ((-85.99565 30.28131, -85.99566 30.280...\n",
"0 POLYGON ((-93.76575 45.06448, -93.76575 45.064...\n",
"1 POLYGON ((-89.83723 42.68318, -89.83732 42.682...\n",
"2 POLYGON ((-104.19381 39.56523, -104.19464 39.5...\n",
"3 MULTIPOLYGON (((-91.05884 32.17233, -91.05891 ...\n",
"4 POLYGON ((-83.61848 38.1557, -83.61861 38.1554...\n",
"dtype: geometry"
]
},
Expand Down Expand Up @@ -442,14 +442,14 @@
"outputs": [
{
"ename": "NotImplementedError",
"evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.34.0",
"evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.35.0",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n",
"File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:66\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 68\u001b[0m )\n",
"\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.34.0"
"File \u001b[0;32m~/src/bigframes3/bigframes/geopandas/geoseries.py:66\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 68\u001b[0m )\n",
"\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.35.0"
]
}
],
Expand All @@ -461,7 +461,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area"
"### 3. Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area"
]
},
{
Expand All @@ -481,11 +481,11 @@
{
"data": {
"text/plain": [
"0 2382382043.48891\n",
"1 1977633097.26862\n",
"2 939388839.499466\n",
"3 3269015229.381782\n",
"4 2678752241.321673\n",
"0 1567505274.453911\n",
"1 1511436852.079554\n",
"2 4789800692.948824\n",
"3 1686877416.586061\n",
"4 740944862.916908\n",
"dtype: Float64"
]
},
Expand All @@ -498,6 +498,45 @@
"geom_area = bbq.st_area(five_geom)\n",
"geom_area"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use `bigframes.geopandas.GeoSeries.from_xy()` to create a GeoSeries of `Point` geometries. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Reuse the `geo_points.x` and `geo_points.y` results by passing them to `.from_xy()` "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 POINT (-95.50742 42.39186)\n",
"1 POINT (-105.42894 37.27755)\n",
"2 POINT (-93.34905 32.10121)\n",
"3 POINT (-84.60469 43.29233)\n",
"4 POINT (-89.5681 37.04779)\n",
"dtype: geometry"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bigframes.geopandas.GeoSeries.from_xy(geo_points.x, geo_points.y)"
]
}
],
"metadata": {
Expand Down
20 changes: 20 additions & 0 deletions tests/system/small/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,23 @@ def test_geo_area_not_supported():
),
):
bf_series.area


def test_geo_from_xy():
x = [2.5, 5, -3.0]
y = [0.5, 1, 1.5]
bf_result = (
bigframes.geopandas.GeoSeries.from_xy(x, y)
.astype(geopandas.array.GeometryDtype())
.to_pandas()
)
pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype(
geopandas.array.GeometryDtype()
)

pd.testing.assert_series_equal(
bf_result,
pd_result,
check_series_type=False,
check_index=False,
)
Loading