Skip to content

deps: support a shapely versions 1.8.5+ #1621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions bigframes/core/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import math
from typing import cast, Collection, Iterable, Mapping, Optional, TYPE_CHECKING, Union

import shapely # type: ignore
import shapely.geometry.base # type: ignore

import bigframes.core.compile.googlesql as googlesql

Expand All @@ -33,9 +33,19 @@
import bigframes.core.ordering


# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0.
try:
from shapely.io import to_wkt # type: ignore
except ImportError:
from shapely.wkt import dumps # type: ignore

to_wkt = dumps


### Writing SQL Values (literals, column references, table references, etc.)
def simple_literal(value: bytes | str | int | bool | float | datetime.datetime | None):
"""Return quoted input string."""

# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals
if value is None:
return "NULL"
Expand Down Expand Up @@ -65,8 +75,8 @@ def simple_literal(value: bytes | str | int | bool | float | datetime.datetime |
return f"DATE('{value.isoformat()}')"
elif isinstance(value, datetime.time):
return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))"
elif isinstance(value, shapely.Geometry):
return f"ST_GEOGFROMTEXT({simple_literal(shapely.to_wkt(value))})"
elif isinstance(value, shapely.geometry.base.BaseGeometry):
return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})"
elif isinstance(value, decimal.Decimal):
# TODO: disambiguate BIGNUMERIC based on scale and/or precision
return f"CAST('{str(value)}' AS NUMERIC)"
Expand Down
4 changes: 2 additions & 2 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import numpy as np
import pandas as pd
import pyarrow as pa
import shapely # type: ignore
import shapely.geometry # type: ignore

# Type hints for Pandas dtypes supported by BigQuery DataFrame
Dtype = Union[
Expand Down Expand Up @@ -506,7 +506,7 @@ def bigframes_dtype_to_literal(
if isinstance(bigframes_dtype, pd.StringDtype):
return "string"
if isinstance(bigframes_dtype, gpd.array.GeometryDtype):
return shapely.Point((0, 0))
return shapely.geometry.Point((0, 0))

raise TypeError(
f"No literal conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
Expand Down
5 changes: 4 additions & 1 deletion bigframes/session/_io/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ def arrow_to_pandas(

if dtype == geopandas.array.GeometryDtype():
series = geopandas.GeoSeries.from_wkt(
column,
# Use `to_pylist()` is a workaround for TypeError: object of type
# 'pyarrow.lib.StringScalar' has no len() on older pyarrow,
# geopandas, shapely combinations.
column.to_pylist(),
# BigQuery geography type is based on the WGS84 reference ellipsoid.
crs="EPSG:4326",
)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"pyarrow >=15.0.2",
"pydata-google-auth >=1.8.2",
"requests >=2.27.1",
"shapely >=1.8.5",
"sqlglot >=23.6.3",
"tabulate >=0.9",
"ipywidgets >=7.7.1",
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pyarrow==15.0.2
pydata-google-auth==1.8.2
requests==2.27.1
scikit-learn==1.2.2
shapely==1.8.5
sqlglot==23.6.3
tabulate==0.9
ipywidgets==7.7.1
Expand Down
8 changes: 6 additions & 2 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pandas as pd
import pyarrow as pa # type: ignore
import pytest
import shapely # type: ignore
import shapely.geometry # type: ignore

import bigframes.dtypes as dtypes
import bigframes.features
Expand Down Expand Up @@ -229,7 +229,11 @@ def test_series_construct_from_list_escaped_strings():

def test_series_construct_geodata():
pd_series = pd.Series(
[shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)],
[
shapely.geometry.Point(1, 1),
shapely.geometry.Point(2, 2),
shapely.geometry.Point(3, 3),
],
dtype=gpd.array.GeometryDtype(),
)

Expand Down
31 changes: 16 additions & 15 deletions tests/unit/core/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@

import datetime
import decimal
import re

import pytest
import shapely # type: ignore
import shapely.geometry # type: ignore

from bigframes.core import sql


@pytest.mark.parametrize(
("value", "expected"),
("value", "expected_pattern"),
(
# Try to have some literals for each scalar data type:
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
Expand All @@ -32,44 +33,44 @@
(False, "False"),
(
b"\x01\x02\x03ABC",
r"b'\x01\x02\x03ABC'",
re.escape(r"b'\x01\x02\x03ABC'"),
),
(
datetime.date(2025, 1, 1),
"DATE('2025-01-01')",
re.escape("DATE('2025-01-01')"),
),
(
datetime.datetime(2025, 1, 2, 3, 45, 6, 789123),
"DATETIME('2025-01-02T03:45:06.789123')",
re.escape("DATETIME('2025-01-02T03:45:06.789123')"),
),
(
shapely.Point(0, 1),
"ST_GEOGFROMTEXT('POINT (0 1)')",
shapely.geometry.Point(0, 1),
r"ST_GEOGFROMTEXT\('POINT \(0[.]?0* 1[.]?0*\)'\)",
),
# TODO: INTERVAL type (e.g. from dateutil.relativedelta)
# TODO: JSON type (TBD what Python object that would correspond to)
(123, "123"),
(decimal.Decimal("123.75"), "CAST('123.75' AS NUMERIC)"),
(123, re.escape("123")),
(decimal.Decimal("123.75"), re.escape("CAST('123.75' AS NUMERIC)")),
# TODO: support BIGNUMERIC by looking at precision/scale of the DECIMAL
(123.75, "123.75"),
(123.75, re.escape("123.75")),
# TODO: support RANGE type
("abc", "'abc'"),
("abc", re.escape("'abc'")),
# TODO: support STRUCT type (possibly another method?)
(
datetime.time(12, 34, 56, 789123),
"TIME(DATETIME('1970-01-01 12:34:56.789123'))",
re.escape("TIME(DATETIME('1970-01-01 12:34:56.789123'))"),
),
(
datetime.datetime(
2025, 1, 2, 3, 45, 6, 789123, tzinfo=datetime.timezone.utc
),
"TIMESTAMP('2025-01-02T03:45:06.789123+00:00')",
re.escape("TIMESTAMP('2025-01-02T03:45:06.789123+00:00')"),
),
),
)
def test_simple_literal(value, expected):
def test_simple_literal(value, expected_pattern):
got = sql.simple_literal(value)
assert got == expected
assert re.match(expected_pattern, got) is not None


def test_create_vector_search_sql_simple():
Expand Down
10 changes: 5 additions & 5 deletions third_party/bigframes_vendored/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ def x(self) -> bigframes.series.Series:

>>> import bigframes.pandas as bpd
>>> import geopandas.array
>>> import shapely
>>> import shapely.geometry
>>> bpd.options.display.progress_bar = None

>>> series = bpd.Series(
... [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
... [shapely.geometry.Point(1, 2), shapely.geometry.Point(2, 3), shapely.geometry.Point(3, 4)],
... dtype=geopandas.array.GeometryDtype()
... )
>>> series.geo.x
Expand All @@ -72,11 +72,11 @@ def y(self) -> bigframes.series.Series:

>>> import bigframes.pandas as bpd
>>> import geopandas.array
>>> import shapely
>>> import shapely.geometry
>>> bpd.options.display.progress_bar = None

>>> series = bpd.Series(
... [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
... [shapely.geometry.Point(1, 2), shapely.geometry.Point(2, 3), shapely.geometry.Point(3, 4)],
... dtype=geopandas.array.GeometryDtype()
... )
>>> series.geo.y
Expand All @@ -101,7 +101,7 @@ def boundary(self) -> bigframes.geopandas.GeoSeries:

>>> import bigframes.pandas as bpd
>>> import geopandas.array
>>> import shapely
>>> import shapely.geometry
>>> bpd.options.display.progress_bar = None

>>> from shapely.geometry import Polygon, LineString, Point
Expand Down
23 changes: 14 additions & 9 deletions third_party/bigframes_vendored/ibis/expr/datatypes/value.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,15 +312,16 @@ def normalize(typ, value):
)
return frozendict({k: normalize(t, value[k]) for k, t in dtype.items()})
elif dtype.is_geospatial():
import shapely as shp
import shapely
import shapely.geometry

if isinstance(value, (tuple, list)):
if dtype.is_point():
return shp.Point(value)
return shapely.geometry.Point(value)
elif dtype.is_linestring():
return shp.LineString(value)
return shapely.geometry.LineString(value)
elif dtype.is_polygon():
return shp.Polygon(
return shapely.geometry.Polygon(
toolz.concat(
map(
attrgetter("coords"),
Expand All @@ -329,19 +330,23 @@ def normalize(typ, value):
)
)
elif dtype.is_multipoint():
return shp.MultiPoint(tuple(map(partial(normalize, dt.point), value)))
return shapely.geometry.MultiPoint(
tuple(map(partial(normalize, dt.point), value))
)
elif dtype.is_multilinestring():
return shp.MultiLineString(
return shapely.geometry.MultiLineString(
tuple(map(partial(normalize, dt.linestring), value))
)
elif dtype.is_multipolygon():
return shp.MultiPolygon(map(partial(normalize, dt.polygon), value))
return shapely.geometry.MultiPolygon(
map(partial(normalize, dt.polygon), value)
)
else:
raise IbisTypeError(f"Unsupported geospatial type: {dtype}")
elif isinstance(value, shp.geometry.base.BaseGeometry):
elif isinstance(value, shapely.geometry.base.BaseGeometry):
return value
else:
return shp.from_wkt(value)
return shapely.from_wkt(value)
elif dtype.is_date():
return normalize_datetime(value).date()
elif dtype.is_time():
Expand Down
24 changes: 12 additions & 12 deletions third_party/bigframes_vendored/ibis/expr/types/geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def contains(self, right: GeoSpatialValue) -> ir.BooleanValue:
>>> ibis.options.interactive = True
>>> import shapely
>>> t = ibis.examples.zones.fetch()
>>> p = shapely.Point(935996.821, 191376.75) # centroid for zone 1
>>> p = shapely.geometry.Point(935996.821, 191376.75) # centroid for zone 1
>>> plit = ibis.literal(p, "geometry")
>>> t.geom.contains(plit).name("contains")
┏━━━━━━━━━━┓
Expand Down Expand Up @@ -197,7 +197,7 @@ def covers(self, right: GeoSpatialValue) -> ir.BooleanValue:

Polygon area center in zone 1

>>> z1_ctr_buff = shapely.Point(935996.821, 191376.75).buffer(10)
>>> z1_ctr_buff = shapely.geometry.Point(935996.821, 191376.75).buffer(10)
>>> z1_ctr_buff_lit = ibis.literal(z1_ctr_buff, "geometry")
>>> t.geom.covers(z1_ctr_buff_lit).name("covers")
┏━━━━━━━━━┓
Expand Down Expand Up @@ -242,7 +242,7 @@ def covered_by(self, right: GeoSpatialValue) -> ir.BooleanValue:

Polygon area center in zone 1

>>> pol_big = shapely.Point(935996.821, 191376.75).buffer(10000)
>>> pol_big = shapely.geometry.Point(935996.821, 191376.75).buffer(10000)
>>> pol_big_lit = ibis.literal(pol_big, "geometry")
>>> t.geom.covered_by(pol_big_lit).name("covered_by")
┏━━━━━━━━━━━━┓
Expand All @@ -262,7 +262,7 @@ def covered_by(self, right: GeoSpatialValue) -> ir.BooleanValue:
│ False │
│ … │
└────────────┘
>>> pol_small = shapely.Point(935996.821, 191376.75).buffer(100)
>>> pol_small = shapely.geometry.Point(935996.821, 191376.75).buffer(100)
>>> pol_small_lit = ibis.literal(pol_small, "geometry")
>>> t.geom.covered_by(pol_small_lit).name("covered_by")
┏━━━━━━━━━━━━┓
Expand Down Expand Up @@ -387,7 +387,7 @@ def disjoint(self, right: GeoSpatialValue) -> ir.BooleanValue:
>>> ibis.options.interactive = True
>>> import shapely
>>> t = ibis.examples.zones.fetch()
>>> p = shapely.Point(935996.821, 191376.75) # zone 1 centroid
>>> p = shapely.geometry.Point(935996.821, 191376.75) # zone 1 centroid
>>> plit = ibis.literal(p, "geometry")
>>> t.geom.disjoint(plit).name("disjoint")
┏━━━━━━━━━━┓
Expand Down Expand Up @@ -435,7 +435,7 @@ def d_within(
>>> ibis.options.interactive = True
>>> import shapely
>>> t = ibis.examples.zones.fetch()
>>> penn_station = shapely.Point(986345.399, 211974.446)
>>> penn_station = shapely.geometry.Point(986345.399, 211974.446)
>>> penn_lit = ibis.literal(penn_station, "geometry")

Check zones within 1000ft of Penn Station centroid
Expand Down Expand Up @@ -578,7 +578,7 @@ def intersects(self, right: GeoSpatialValue) -> ir.BooleanValue:
>>> ibis.options.interactive = True
>>> import shapely
>>> t = ibis.examples.zones.fetch()
>>> p = shapely.Point(935996.821, 191376.75) # zone 1 centroid
>>> p = shapely.geometry.Point(935996.821, 191376.75) # zone 1 centroid
>>> plit = ibis.literal(p, "geometry")
>>> t.geom.intersects(plit).name("intersects")
┏━━━━━━━━━━━━┓
Expand Down Expand Up @@ -675,7 +675,7 @@ def overlaps(self, right: GeoSpatialValue) -> ir.BooleanValue:

Polygon center in an edge point of zone 1

>>> p_edge_buffer = shapely.Point(933100.918, 192536.086).buffer(100)
>>> p_edge_buffer = shapely.geometry.Point(933100.918, 192536.086).buffer(100)
>>> buff_lit = ibis.literal(p_edge_buffer, "geometry")
>>> t.geom.overlaps(buff_lit).name("overlaps")
┏━━━━━━━━━━┓
Expand Down Expand Up @@ -720,7 +720,7 @@ def touches(self, right: GeoSpatialValue) -> ir.BooleanValue:

Edge point of zone 1

>>> p_edge = shapely.Point(933100.9183527103, 192536.08569720192)
>>> p_edge = shapely.geometry.Point(933100.9183527103, 192536.08569720192)
>>> p_edge_lit = ibis.literal(p_edge, "geometry")
>>> t.geom.touches(p_edge_lit).name("touches")
┏━━━━━━━━━┓
Expand Down Expand Up @@ -765,7 +765,7 @@ def distance(self, right: GeoSpatialValue) -> ir.FloatingValue:

Penn station zone centroid

>>> penn_station = shapely.Point(986345.399, 211974.446)
>>> penn_station = shapely.geometry.Point(986345.399, 211974.446)
>>> penn_lit = ibis.literal(penn_station, "geometry")
>>> t.geom.distance(penn_lit).name("distance_penn")
┏━━━━━━━━━━━━━━━┓
Expand Down Expand Up @@ -886,7 +886,7 @@ def union(self, right: GeoSpatialValue) -> GeoSpatialValue:

Penn station zone centroid

>>> penn_station = shapely.Point(986345.399, 211974.446)
>>> penn_station = shapely.geometry.Point(986345.399, 211974.446)
>>> penn_lit = ibis.literal(penn_station, "geometry")
>>> t.geom.centroid().union(penn_lit).name("union_centroid_penn")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
Expand Down Expand Up @@ -1312,7 +1312,7 @@ def within(self, right: GeoSpatialValue) -> ir.BooleanValue:
>>> ibis.options.interactive = True
>>> import shapely
>>> t = ibis.examples.zones.fetch()
>>> penn_station_buff = shapely.Point(986345.399, 211974.446).buffer(5000)
>>> penn_station_buff = shapely.geometry.Point(986345.399, 211974.446).buffer(5000)
>>> penn_lit = ibis.literal(penn_station_buff, "geometry")
>>> t.filter(t.geom.within(penn_lit))["zone"]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
Expand Down