-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: New geoaccessor
to_h3
to convert geometry to h3 index (#739)
* ENH: Turn point to h3 * Drop activate to turn geodataframe to dataframe * Add `column` for Series's xy_to_h3 * Add `drop` option * Add basic documentation for xy_to_h3 * Add example for geodataframe.xy_to_h3 * Add example for geoseries.xy_to_h3 * Set default column name * BUG: should return geodataframe * Simplify a bit * update default value * Add new advice * correct path * New advice for h3-pandas * Raise error while keep original data and missing column names * simplify via to_geoframe * Add raises part * New advice for h3-pandas * `xy_to_h3` -> `points_to_h3` * index methodes * adjust the place of import block * require geometry is point type * Add other error informations * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * lint codes * correct method name * use sphinx link * relink * add h3 into package * Create polygons_to_h3.py * merge `points_to_h3` and `polygons_to_h3` into `to_h3` * do `rename `when drop is True * Update error message * lint codes * miss importing `xy` * update example * merge `points_to_h3` and `polygons_to_h3` into `to_h3` * lint codes * update example * Update example * Simplify a bit * simplify a bit * use getattr to simplify * move 'import' block to top * `column` -> `name` * `column` -> `name` use `name` first * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update to_h3.py * Simplify a bit * adapt the new feature * simplify a bit * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add blank lines * Create test_to_h3.py * BOT: auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * lint codes * correct variable * cut blank lines * adjust parameters sequence * Create test_to_h3.py * add more examples * should use and * simplify a bit * Simplify a bit * correct logic Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
54916ba
commit 95618ef
Showing
10 changed files
with
416 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ dependencies: | |
- topojson | ||
- joblib | ||
- rapidfuzz | ||
- h3-py | ||
# testing | ||
- pytest | ||
- pytest-cov | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Hashable | ||
|
||
import geopandas as gpd | ||
import pandas as pd | ||
|
||
from dtoolkit.accessor.dataframe import repeat | ||
from dtoolkit.accessor.series import len as s_len | ||
from dtoolkit.geoaccessor.geodataframe.drop_geometry import drop_geometry | ||
from dtoolkit.geoaccessor.geoseries.to_h3 import points_to_h3 | ||
from dtoolkit.geoaccessor.geoseries.to_h3 import polygons_to_h3 | ||
from dtoolkit.geoaccessor.register import register_geodataframe_method | ||
|
||
|
||
@register_geodataframe_method | ||
def to_h3( | ||
df: gpd.GeoDataFrame, | ||
/, | ||
resolution: int, | ||
drop: bool = True, | ||
name: Hashable = "h3", | ||
) -> pd.DataFrame | gpd.GeoDataFrame: | ||
""" | ||
Convert Point to containing H3 cell index. | ||
Parameters | ||
---------- | ||
resolution : int | ||
H3 resolution. | ||
drop : bool, default True | ||
Whether to drop the geometry column. | ||
name : Hashable, default "h3" | ||
Name of the column to store the H3 cell index. | ||
Returns | ||
------- | ||
DataFrame or GeoDataFrame | ||
DataFrame if drop is True else GeoDataFrame. | ||
Raises | ||
------ | ||
ModuleNotFoundError | ||
If don't have module named 'h3'. | ||
TypeError | ||
If the geometry is not Point. | ||
ValueError | ||
If the CRS is not WGS84 or EPSG:4326. | ||
See Also | ||
-------- | ||
h3.latlon_to_h3 | ||
dtoolkit.geoaccessor.geoseries.to_h3 | ||
Examples | ||
-------- | ||
>>> import dtoolkit.geoaccessor | ||
>>> import pandas as pd | ||
Points to h3 indexes. | ||
>>> df = pd.DataFrame({"x": [122, 100], "y": [55, 1]}).from_xy('x', 'y', crs=4326) | ||
>>> df | ||
x y geometry | ||
0 122 55 POINT (122.00000 55.00000) | ||
1 100 1 POINT (100.00000 1.00000) | ||
>>> df.to_h3(8) | ||
x y h3 | ||
0 122 55 612845052823076863 | ||
1 100 1 614269156845420543 | ||
Keep the geometry column. | ||
>>> df.to_h3(8, drop=False) | ||
x y geometry h3 | ||
0 122 55 POINT (122.00000 55.00000) 612845052823076863 | ||
1 100 1 POINT (100.00000 1.00000) 614269156845420543 | ||
Polygons to h3 indexes. | ||
>>> df = pd.DataFrame( | ||
... { | ||
... "label": ["a", "b"], | ||
... "wkt": [ | ||
... "POLYGON ((1 0, 1 1, 0 1, 0 0, 1 0))", | ||
... "POLYGON ((2 1, 2 2, 1 2, 1 1, 2 1))", | ||
... ], | ||
... }, | ||
... ).from_wkt("wkt", crs=4326, drop=True) | ||
>>> df | ||
label geometry | ||
0 a POLYGON ((1.00000 0.00000, 1.00000 1.00000, 0.... | ||
1 b POLYGON ((2.00000 1.00000, 2.00000 2.00000, 1.... | ||
>>> df.to_h3(4) | ||
label h3 | ||
0 a 596538839648960511 | ||
0 a 596538693620072447 | ||
0 a 596538685030137855 | ||
0 a 596538848238895103 | ||
0 a 596537920525959167 | ||
0 a 596538813879156735 | ||
0 a 596538856828829695 | ||
0 a 596538805289222143 | ||
1 b 596538229763604479 | ||
1 b 596537946295762943 | ||
1 b 596540780974178303 | ||
1 b 596540729434570751 | ||
1 b 596540772384243711 | ||
1 b 596538212583735295 | ||
1 b 596540763794309119 | ||
1 b 596537954885697535 | ||
1 b 596540746614439935 | ||
1 b 596538195403866111 | ||
1 b 596541030082281471 | ||
""" | ||
|
||
if df.crs != 4326: | ||
raise ValueError(f"Only support 'EPSG:4326' CRS, but got {df.crs!r}.") | ||
|
||
if all(df.geom_type == "Point"): | ||
h3 = points_to_h3(df.geometry, resolution=resolution).rename(name) | ||
elif all(df.geom_type == "Polygon"): | ||
h3_list = polygons_to_h3(df.geometry, resolution=resolution) | ||
h3 = h3_list.explode().rename(name) | ||
df = repeat(df, s_len(h3_list)) | ||
else: | ||
raise TypeError("Only support 'Point' or 'Polygon' geometry type.") | ||
|
||
return pd.concat((drop_geometry(df) if drop else df, h3), axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Hashable | ||
|
||
import geopandas as gpd | ||
import pandas as pd | ||
|
||
from dtoolkit.accessor.series import getattr as s_getattr | ||
from dtoolkit.accessor.series import len as s_len | ||
from dtoolkit.geoaccessor.geoseries.xy import xy | ||
from dtoolkit.geoaccessor.register import register_geoseries_method | ||
from dtoolkit.geoaccessor.series import to_geoframe | ||
|
||
|
||
@register_geoseries_method | ||
def to_h3( | ||
s: gpd.GeoSeries, | ||
/, | ||
resolution: int, | ||
drop: bool = True, | ||
name: Hashable = "h3", | ||
) -> pd.Series | gpd.GeoDataFrame: | ||
""" | ||
Convert Point or Polygon to H3 cell index. | ||
Parameters | ||
---------- | ||
resolution : int | ||
H3 resolution. | ||
name : Hashable, default "h3" | ||
Name of the column to store the H3 cell index. If ``name`` is None, there will | ||
use ``s.name`` as the column name. | ||
drop : bool, default True | ||
Whether to drop the geometry column. | ||
Returns | ||
------- | ||
Series or GeoDataFrame | ||
Series if drop is True else GeoDataFrame. | ||
Raises | ||
------ | ||
ModuleNotFoundError | ||
If don't have module named 'h3'. | ||
TypeError | ||
If the geometry type is not Point or Polygon. | ||
ValueError | ||
- If the CRS is not WGS84 or EPSG:4326. | ||
- If ``drop=False`` but ``name=None`` or ``s.name=None``. | ||
See Also | ||
-------- | ||
h3.geo_to_h3 | ||
dtoolkit.geoaccessor.geodataframe.to_h3 | ||
Examples | ||
-------- | ||
>>> import dtoolkit.geoaccessor | ||
>>> import pandas as pd | ||
Points to h3 indexes. | ||
>>> s = pd.Series( | ||
... [ | ||
... "POINT (122 55)", | ||
... "POINT (100 1)", | ||
... ], | ||
... ).from_wkt(crs=4326, drop=True) | ||
>>> s | ||
0 POINT (122.00000 55.00000) | ||
1 POINT (100.00000 1.00000) | ||
dtype: geometry | ||
>>> s.to_h3(8) | ||
0 612845052823076863 | ||
1 614269156845420543 | ||
dtype: int64 | ||
Keep the geometry column. | ||
>>> s.to_h3(8, drop=False) | ||
h3 geometry | ||
0 612845052823076863 POINT (122.00000 55.00000) | ||
1 614269156845420543 POINT (100.00000 1.00000) | ||
Polygons to h3 indexes. | ||
>>> s = pd.Series( | ||
... [ | ||
... "POLYGON ((1 0, 1 1, 0 1, 0 0, 1 0))", | ||
... "POLYGON ((2 1, 2 2, 1 2, 1 1, 2 1))", | ||
... ], | ||
... ).from_wkt(crs=4326, drop=True) | ||
>>> s | ||
0 POLYGON ((1.00000 0.00000, 1.00000 1.00000, 0.... | ||
1 POLYGON ((2.00000 1.00000, 2.00000 2.00000, 1.... | ||
dtype: geometry | ||
>>> s.to_h3(4) | ||
0 596538839648960511 | ||
0 596538693620072447 | ||
0 596538685030137855 | ||
0 596538848238895103 | ||
0 596537920525959167 | ||
0 596538813879156735 | ||
0 596538856828829695 | ||
0 596538805289222143 | ||
1 596538229763604479 | ||
1 596537946295762943 | ||
1 596540780974178303 | ||
1 596540729434570751 | ||
1 596540772384243711 | ||
1 596538212583735295 | ||
1 596540763794309119 | ||
1 596537954885697535 | ||
1 596540746614439935 | ||
1 596538195403866111 | ||
1 596541030082281471 | ||
dtype: object | ||
""" | ||
|
||
# TODO: Advices for h3-pandas | ||
# 1. use `import h3.api.numpy_int as h3` instead of `import h3` | ||
# 2. compat with h3-py 4 | ||
# 3. requires crs is 4326 | ||
# 4. consider h3-py as the accessor of Series | ||
# 5. use h3 cell index as the index of DataFrame, this may be a problem, | ||
# cause it is not unique actually. | ||
# 6. Speed up creating points / polygons via pygeos | ||
|
||
if s.crs != 4326: | ||
raise ValueError(f"Only support 'EPSG:4326' CRS, but got {s.crs!r}.") | ||
if not drop and name is None and s.name is None: | ||
raise ValueError( | ||
"to keep the original data requires setting the 'name' of " | ||
f"{s.__class__.__name__!r} or 'name'.", | ||
) | ||
|
||
if all(s.geom_type == "Point"): | ||
h3 = points_to_h3(s, resolution=resolution) | ||
elif all(s.geom_type == "Polygon"): | ||
h3_list = polygons_to_h3(s, resolution=resolution) | ||
h3 = h3_list.explode() | ||
s = s.repeat(s_len(h3_list)) | ||
else: | ||
raise TypeError("Only support 'Point' or 'Polygon' geometry type.") | ||
|
||
return h3 if drop else to_geoframe(h3.rename(name or s.name), geometry=s) | ||
|
||
|
||
def points_to_h3(s: gpd.GeoSeries, /, resolution: int) -> pd.Series: | ||
# TODO: Use `latlon_to_h3` instead of `geo_to_h3` | ||
# While h3-py release 4, `latlon_to_h3` is not available. | ||
|
||
# requires h3 >= 4 | ||
# from h3.api.numpy_int import latlng_to_cell | ||
# requires h3 < 4 | ||
from h3.api.numpy_int import geo_to_h3 | ||
|
||
return xy(s, reverse=True, frame=False, name=None).apply( | ||
lambda yx: geo_to_h3(*yx, resolution), | ||
) | ||
|
||
|
||
def polygons_to_h3(s: gpd.GeoSeries, /, resolution: int) -> pd.Series: | ||
# TODO: Use `polygon_to_cells` instead of `geo_to_h3` | ||
# While h3-py release 4, `polygon_to_cells` is not available. | ||
|
||
# requires h3 >= 4 | ||
# from h3.api.numpy_int import polygon_to_cells | ||
# requires h3 < 4 | ||
from h3.api.numpy_int import polyfill | ||
|
||
# If `geo_json_conformant` is True, the coordinate could be (lon, lat). | ||
return s_getattr(s, "__geo_interface__").apply( | ||
polyfill, | ||
res=resolution, | ||
geo_json_conformant=True, | ||
) |
Oops, something went wrong.