|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [], |
| 8 | + "source": [ |
| 9 | + "# Copyright 2025 Google LLC\n", |
| 10 | + "#\n", |
| 11 | + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", |
| 12 | + "# you may not use this file except in compliance with the License.\n", |
| 13 | + "# You may obtain a copy of the License at\n", |
| 14 | + "#\n", |
| 15 | + "# https://www.apache.org/licenses/LICENSE-2.0\n", |
| 16 | + "#\n", |
| 17 | + "# Unless required by applicable law or agreed to in writing, software\n", |
| 18 | + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", |
| 19 | + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", |
| 20 | + "# See the License for the specific language governing permissions and\n", |
| 21 | + "# limitations under the License." |
| 22 | + ] |
| 23 | + }, |
| 24 | + { |
| 25 | + "cell_type": "markdown", |
| 26 | + "metadata": {}, |
| 27 | + "source": [ |
| 28 | + "# Analyzing a GEOGRAPHY column with `bigframes.geopandas.GeoSeries`" |
| 29 | + ] |
| 30 | + }, |
| 31 | + { |
| 32 | + "cell_type": "code", |
| 33 | + "execution_count": 2, |
| 34 | + "metadata": {}, |
| 35 | + "outputs": [], |
| 36 | + "source": [ |
| 37 | + "import bigframes\n", |
| 38 | + "import bigframes.geopandas\n", |
| 39 | + "import bigframes.pandas as bpd\n", |
| 40 | + "import shapely\n", |
| 41 | + "bpd.options.display.progress_bar = None" |
| 42 | + ] |
| 43 | + }, |
| 44 | + { |
| 45 | + "cell_type": "markdown", |
| 46 | + "metadata": {}, |
| 47 | + "source": [ |
| 48 | + "### Load the Counties table from the Census Bureau US Boundaries dataset" |
| 49 | + ] |
| 50 | + }, |
| 51 | + { |
| 52 | + "cell_type": "code", |
| 53 | + "execution_count": 3, |
| 54 | + "metadata": {}, |
| 55 | + "outputs": [ |
| 56 | + { |
| 57 | + "name": "stderr", |
| 58 | + "output_type": "stream", |
| 59 | + "text": [ |
| 60 | + "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:274: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n", |
| 61 | + " warnings.warn(msg, category=bfe.DefaultIndexWarning)\n" |
| 62 | + ] |
| 63 | + } |
| 64 | + ], |
| 65 | + "source": [ |
| 66 | + "df = bpd.read_gbq(\"bigquery-public-data.geo_us_boundaries.counties\")" |
| 67 | + ] |
| 68 | + }, |
| 69 | + { |
| 70 | + "cell_type": "markdown", |
| 71 | + "metadata": {}, |
| 72 | + "source": [ |
| 73 | + "### Create a series from the int_point_geom column" |
| 74 | + ] |
| 75 | + }, |
| 76 | + { |
| 77 | + "cell_type": "code", |
| 78 | + "execution_count": 4, |
| 79 | + "metadata": {}, |
| 80 | + "outputs": [], |
| 81 | + "source": [ |
| 82 | + "point_geom_series = df['int_point_geom']" |
| 83 | + ] |
| 84 | + }, |
| 85 | + { |
| 86 | + "cell_type": "markdown", |
| 87 | + "metadata": {}, |
| 88 | + "source": [ |
| 89 | + "## The `GeoSeries` constructor accepts local data or a `bigframes.pandas.Series` object." |
| 90 | + ] |
| 91 | + }, |
| 92 | + { |
| 93 | + "cell_type": "markdown", |
| 94 | + "metadata": {}, |
| 95 | + "source": [ |
| 96 | + "### 1. Create a GeoSeries from local data with `Peek`" |
| 97 | + ] |
| 98 | + }, |
| 99 | + { |
| 100 | + "cell_type": "code", |
| 101 | + "execution_count": 12, |
| 102 | + "metadata": {}, |
| 103 | + "outputs": [ |
| 104 | + { |
| 105 | + "data": { |
| 106 | + "text/plain": [ |
| 107 | + "54 POINT (-93.47523 45.00612)\n", |
| 108 | + "256 POINT (-89.60507 42.67552)\n", |
| 109 | + "266 POINT (-104.11408 39.31516)\n", |
| 110 | + "485 POINT (-91.23193 32.34688)\n", |
| 111 | + "765 POINT (-83.42808 38.20427)\n", |
| 112 | + "Name: int_point_geom, dtype: geometry" |
| 113 | + ] |
| 114 | + }, |
| 115 | + "execution_count": 12, |
| 116 | + "metadata": {}, |
| 117 | + "output_type": "execute_result" |
| 118 | + } |
| 119 | + ], |
| 120 | + "source": [ |
| 121 | + "five_geo_points = point_geom_series.peek(n = 5)\n", |
| 122 | + "five_geo_points" |
| 123 | + ] |
| 124 | + }, |
| 125 | + { |
| 126 | + "cell_type": "markdown", |
| 127 | + "metadata": {}, |
| 128 | + "source": [ |
| 129 | + "### Convert the five geo points to `bigframes.gopandas.GeoSeries`" |
| 130 | + ] |
| 131 | + }, |
| 132 | + { |
| 133 | + "cell_type": "markdown", |
| 134 | + "metadata": {}, |
| 135 | + "source": [ |
| 136 | + "#### Note: TypeError is raised if the GEOGRAPHY column contains geometry type other than `Point`." |
| 137 | + ] |
| 138 | + }, |
| 139 | + { |
| 140 | + "cell_type": "code", |
| 141 | + "execution_count": 6, |
| 142 | + "metadata": {}, |
| 143 | + "outputs": [ |
| 144 | + { |
| 145 | + "data": { |
| 146 | + "text/plain": [ |
| 147 | + "0 POINT (-86.87338 38.37334)\n", |
| 148 | + "1 POINT (-118.48037 46.25461)\n", |
| 149 | + "2 POINT (-92.5617 32.30429)\n", |
| 150 | + "3 POINT (-83.46189 39.55525)\n", |
| 151 | + "4 POINT (-119.46779 47.21363)\n", |
| 152 | + "dtype: geometry" |
| 153 | + ] |
| 154 | + }, |
| 155 | + "execution_count": 6, |
| 156 | + "metadata": {}, |
| 157 | + "output_type": "execute_result" |
| 158 | + } |
| 159 | + ], |
| 160 | + "source": [ |
| 161 | + "geo_points = bigframes.geopandas.GeoSeries(\n", |
| 162 | + " [point for point in five_geo_points]\n", |
| 163 | + ")\n", |
| 164 | + "geo_points" |
| 165 | + ] |
| 166 | + }, |
| 167 | + { |
| 168 | + "cell_type": "markdown", |
| 169 | + "metadata": {}, |
| 170 | + "source": [ |
| 171 | + "### Retrieve the x (longitude) and y (latitude) from the GeoSeries with `.x` and `.y`." |
| 172 | + ] |
| 173 | + }, |
| 174 | + { |
| 175 | + "cell_type": "markdown", |
| 176 | + "metadata": {}, |
| 177 | + "source": [ |
| 178 | + "### `.x`" |
| 179 | + ] |
| 180 | + }, |
| 181 | + { |
| 182 | + "cell_type": "code", |
| 183 | + "execution_count": 7, |
| 184 | + "metadata": {}, |
| 185 | + "outputs": [ |
| 186 | + { |
| 187 | + "data": { |
| 188 | + "text/plain": [ |
| 189 | + "0 -86.873385\n", |
| 190 | + "1 -118.48037\n", |
| 191 | + "2 -92.5617\n", |
| 192 | + "3 -83.461893\n", |
| 193 | + "4 -119.467788\n", |
| 194 | + "dtype: Float64" |
| 195 | + ] |
| 196 | + }, |
| 197 | + "execution_count": 7, |
| 198 | + "metadata": {}, |
| 199 | + "output_type": "execute_result" |
| 200 | + } |
| 201 | + ], |
| 202 | + "source": [ |
| 203 | + "geo_points.x" |
| 204 | + ] |
| 205 | + }, |
| 206 | + { |
| 207 | + "cell_type": "markdown", |
| 208 | + "metadata": {}, |
| 209 | + "source": [ |
| 210 | + "### `.y`" |
| 211 | + ] |
| 212 | + }, |
| 213 | + { |
| 214 | + "cell_type": "code", |
| 215 | + "execution_count": 8, |
| 216 | + "metadata": {}, |
| 217 | + "outputs": [ |
| 218 | + { |
| 219 | + "data": { |
| 220 | + "text/plain": [ |
| 221 | + "0 38.373344\n", |
| 222 | + "1 46.254606\n", |
| 223 | + "2 32.30429\n", |
| 224 | + "3 39.555246\n", |
| 225 | + "4 47.213633\n", |
| 226 | + "dtype: Float64" |
| 227 | + ] |
| 228 | + }, |
| 229 | + "execution_count": 8, |
| 230 | + "metadata": {}, |
| 231 | + "output_type": "execute_result" |
| 232 | + } |
| 233 | + ], |
| 234 | + "source": [ |
| 235 | + "geo_points.y" |
| 236 | + ] |
| 237 | + }, |
| 238 | + { |
| 239 | + "cell_type": "markdown", |
| 240 | + "metadata": {}, |
| 241 | + "source": [ |
| 242 | + "### 2. Alternatively, use the `.geo` accessor to access GeoSeries methods from a `bigframes.pandas.Series` object." |
| 243 | + ] |
| 244 | + }, |
| 245 | + { |
| 246 | + "cell_type": "markdown", |
| 247 | + "metadata": {}, |
| 248 | + "source": [ |
| 249 | + "#### `geo.x`" |
| 250 | + ] |
| 251 | + }, |
| 252 | + { |
| 253 | + "cell_type": "code", |
| 254 | + "execution_count": 13, |
| 255 | + "metadata": {}, |
| 256 | + "outputs": [ |
| 257 | + { |
| 258 | + "data": { |
| 259 | + "text/plain": [ |
| 260 | + "0 -101.298265\n", |
| 261 | + "1 -99.111085\n", |
| 262 | + "2 -66.58687\n", |
| 263 | + "3 -102.601791\n", |
| 264 | + "4 -71.578625\n", |
| 265 | + "5 -88.961529\n", |
| 266 | + "6 -87.492986\n", |
| 267 | + "7 -82.422666\n", |
| 268 | + "8 -100.208166\n", |
| 269 | + "9 -85.815939\n", |
| 270 | + "10 -101.681133\n", |
| 271 | + "11 -119.516659\n", |
| 272 | + "12 -89.398306\n", |
| 273 | + "13 -107.78848\n", |
| 274 | + "14 -91.159306\n", |
| 275 | + "15 -113.887042\n", |
| 276 | + "16 -83.470416\n", |
| 277 | + "17 -98.520146\n", |
| 278 | + "18 -83.911718\n", |
| 279 | + "19 -87.321865\n", |
| 280 | + "20 -91.727626\n", |
| 281 | + "21 -93.466093\n", |
| 282 | + "22 -101.143324\n", |
| 283 | + "23 -78.657634\n", |
| 284 | + "24 -94.272323\n", |
| 285 | + "dtype: Float64" |
| 286 | + ] |
| 287 | + }, |
| 288 | + "execution_count": 13, |
| 289 | + "metadata": {}, |
| 290 | + "output_type": "execute_result" |
| 291 | + } |
| 292 | + ], |
| 293 | + "source": [ |
| 294 | + "point_geom_series.geo.x" |
| 295 | + ] |
| 296 | + }, |
| 297 | + { |
| 298 | + "cell_type": "markdown", |
| 299 | + "metadata": {}, |
| 300 | + "source": [ |
| 301 | + "#### `geo.y`" |
| 302 | + ] |
| 303 | + }, |
| 304 | + { |
| 305 | + "cell_type": "code", |
| 306 | + "execution_count": 14, |
| 307 | + "metadata": {}, |
| 308 | + "outputs": [ |
| 309 | + { |
| 310 | + "data": { |
| 311 | + "text/plain": [ |
| 312 | + "0 46.710819\n", |
| 313 | + "1 29.353661\n", |
| 314 | + "2 18.211152\n", |
| 315 | + "3 38.835646\n", |
| 316 | + "4 41.869768\n", |
| 317 | + "5 39.860237\n", |
| 318 | + "6 36.892059\n", |
| 319 | + "7 38.143642\n", |
| 320 | + "8 34.524623\n", |
| 321 | + "9 30.862007\n", |
| 322 | + "10 40.180165\n", |
| 323 | + "11 46.228125\n", |
| 324 | + "12 36.054196\n", |
| 325 | + "13 38.154731\n", |
| 326 | + "14 38.761902\n", |
| 327 | + "15 44.928506\n", |
| 328 | + "16 30.447232\n", |
| 329 | + "17 29.448671\n", |
| 330 | + "18 42.602532\n", |
| 331 | + "19 34.529776\n", |
| 332 | + "20 33.957675\n", |
| 333 | + "21 42.037538\n", |
| 334 | + "22 29.875285\n", |
| 335 | + "23 36.299884\n", |
| 336 | + "24 44.821657\n", |
| 337 | + "dtype: Float64" |
| 338 | + ] |
| 339 | + }, |
| 340 | + "execution_count": 14, |
| 341 | + "metadata": {}, |
| 342 | + "output_type": "execute_result" |
| 343 | + } |
| 344 | + ], |
| 345 | + "source": [ |
| 346 | + "point_geom_series.geo.y" |
| 347 | + ] |
| 348 | + } |
| 349 | + ], |
| 350 | + "metadata": { |
| 351 | + "kernelspec": { |
| 352 | + "display_name": "venv", |
| 353 | + "language": "python", |
| 354 | + "name": "python3" |
| 355 | + }, |
| 356 | + "language_info": { |
| 357 | + "codemirror_mode": { |
| 358 | + "name": "ipython", |
| 359 | + "version": 3 |
| 360 | + }, |
| 361 | + "file_extension": ".py", |
| 362 | + "mimetype": "text/x-python", |
| 363 | + "name": "python", |
| 364 | + "nbconvert_exporter": "python", |
| 365 | + "pygments_lexer": "ipython3", |
| 366 | + "version": "3.9.19" |
| 367 | + } |
| 368 | + }, |
| 369 | + "nbformat": 4, |
| 370 | + "nbformat_minor": 2 |
| 371 | +} |
0 commit comments