From 65896540cb678440c7cc8bf89f7ff75b02943cc2 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 12 Apr 2024 11:22:38 +0200 Subject: [PATCH 1/4] Update Python version --- .github/workflows/python-tests.yml | 6 +- README.md | 178 +++++++++++++++++++++++++++++ README.rst | 167 --------------------------- setup.py | 7 +- 4 files changed, 183 insertions(+), 175 deletions(-) create mode 100644 README.md delete mode 100644 README.rst diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 19701a6..c0f2505 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 @@ -23,10 +23,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - if [ ${{ matrix.python-version }} == 'pypy3' ] - then - pip install --extra-index-url https://antocuni.github.io/pypy-wheels/manylinux2010 numpy pandas - fi pip install thefuzz[speedup] pip install -e . - name: Test with pytest diff --git a/README.md b/README.md new file mode 100644 index 0000000..d743500 --- /dev/null +++ b/README.md @@ -0,0 +1,178 @@ +--- +title: pgeocode +--- + +[![pypi](https://img.shields.io/pypi/v/pgeocode.svg)](https://pypi.org/project/pgeocode/) +[![condaforge](https://img.shields.io/conda/vn/conda-forge/pgeocode.svg)](https://anaconda.org/conda-forge/pgeocode) +[![rdfd](https://readthedocs.org/projects/pgeocode/badge/?version=latest)](http://pgeocode.readthedocs.io/) +[![GHactions](https://github.com/symerio/pgeocode/workflows/Test/badge.svg)](https://github.com/symerio/pgeocode/actions?query=branch%3Amaster+) + +Postal code geocoding and distance calculations + +pgeocode is a Python library for high performance off-line querying of +GPS coordinates, region name and municipality name from postal codes. +Distances between postal codes as well as general distance queries are +also supported. The used +[GeoNames](http://download.geonames.org/export/zip/) database includes +postal codes for 83 countries. + +Currently, only queries within the same country are supported. + +For additional documentation see +[pgeocode.readthedocs.io](https://pgeocode.readthedocs.io). + +# Installation + +pgeocode requires Python 3.8+ as well as `numpy` and `pandas` packages. +It can be installed with, + +``` +pip install pgeocode +``` + +or + +``` +conda install -c conda-forge pgeocode +``` + +# Quickstart + +**Postal code queries** + +```python +>>> import pgeocode + +>>> nomi = pgeocode.Nominatim('fr') +>>> nomi.query_postal_code("75013") +postal_code 75013 +country_code FR +place_name Paris 13 +state_name Île-de-France +state_code 11 +county_name Paris +county_code 75 +community_name Paris +community_code 751 +latitude 48.8322 +longitude 2.3561 +accuracy 5 + +>>> nomi.query_postal_code(["75013", "69006"]) + postal_code place_name state_name latitude longitude +0 75013 Paris 13 Île-de-France 48.8322 2.3561 +1 69006 Lyon 06 Auvergne-Rhône-Alpes 45.7679 4.8506 +``` + +**Place name queries** + +```python +>>> import pgeocode + +>>> nomi = pgeocode.Nominatim('fr') +>>> nomi.query_location("Antibes", top_k=3) + country_code postal_code place_name state_name state_code ... community_name community_code latitude longitude accuracy +49553 FR 06160 Antibes Provence-Alpes-Côte d'Azur 93.0 ... Grasse 061 43.5858 7.1083 5 +49787 FR 06600 Antibes Provence-Alpes-Côte d'Azur 93.0 ... Grasse 061 43.5858 7.1083 5 +49788 FR 06601 CEDEX Antibes Provence-Alpes-Côte d'Azur 93.0 ... Grasse 061 43.5858 7.1083 5 + +>>> nomi.query_location("Straassborg", top_k=3, fuzzy_threshold=80) + country_code postal_code place_name state_name state_code ... community_name community_code latitude longitude accuracy +25461 FR 67000 Strasbourg Grand Est 44.0 ... Strasbourg 678 48.5839 7.7455 5 +25462 FR 67001 CEDEX Strasbourg Grand Est 44.0 ... Strasbourg 678 48.5839 7.7455 5 +25463 FR 67002 CEDEX Strasbourg Grand Est 44.0 ... Strasbourg 678 48.5839 7.7455 5 +``` + +**Distance calculations** + +```python +>>> dist = pgeocode.GeoDistance('fr') +>>> dist.query_postal_code("75013", "69006") +389.156 +>>> dist.query_postal_code(["75013", "75014", "75015"], ["69006", "69005", "69004"]) +array([ 389.15648697, 390.12577967, 390.49857655]) +``` + +# Geocoding format + +The result of a geo-localistion query is a `pandas.DataFrame` with the +following columns, + +- `country_code`: iso country code, 2 characters +- `postal_code` : postal code +- `place_name` : place name (e.g. town, city etc) +- `state_name` : 1. order subdivision (state) +- `state_code` : 1. order subdivision (state) +- `county_name` : 2. order subdivision (county/province) +- `county_code` : 2. order subdivision (county/province) +- `community_name` : 3. order subdivision (community) +- `community_code` : 3. order subdivision (community) +- `latitude` : estimated latitude (wgs84) +- `longitude` : estimated longitude (wgs84) +- `accuracy` : accuracy of lat/lng from 1=estimated to 6=centroid + +# Configuration and defaults + +**Storage directory** + +Defaults to `~/.cache/pgeocode`, it is the directory where data is +downloaded for later consumption. It can be changed using the +environment variable `PGEOCODE_DATA_DIR`, i.e. +`export PGEOCODE_DATA_DIR=/tmp/pgeocode_data`. + +**Data sources** + +Data sources are provided as a list in the `pgeocode.DOWNLOAD_URL` +variable. The default value is, + +``` python +DOWNLOAD_URL = [ + "https://download.geonames.org/export/zip/{country}.zip", + "https://symerio.github.io/postal-codes-data/data/geonames/{country}.txt", +] +``` + +Data sources are tried from first to last until one works. Here the +second link is a mirror of the first. + +It is also possible to extend this variable with third party data +sources, as long as they follow the same format. See for instance +[postal-codes-data](https://github.com/symerio/postal-codes-data/tree/master/data/geonames) +repository for examples of data files. + +# License + +The pgeocode package is distributed under the 3-clause BSD license. + +# Supported countries + +The list of countries available in the GeoNames database, with the +corresponding country codes, are given below, + +Andorra (AD), Argentina (AR), American Samoa (AS), Austria (AT), +Australia (AU), Åland Islands (AX), Azerbaijan (AZ), Bangladesh (BD), +Belgium (BE), Bulgaria (BG), Bermuda (BM), Brazil (BR), Belarus (BY), +Canada (CA), Switzerland (CH), Chile (CL), Colombia (CO), Costa Rica +(CR), Cyprus (CY), Czechia (CZ), Germany (DE), Denmark (DK), Dominican +Republic (DO), Algeria (DZ), Estonia (EE), Spain (ES), Finland (FI), +Federated States of Micronesia (FM), Faroe Islands (FO), France (FR), +United Kingdom of Great Britain and Northern Ireland (GB), French Guiana +(GF), Guernsey (GG), Greenland (GL), Guadeloupe (GP), Guatemala (GT), +Guam (GU), Croatia (HR), Haiti (HT), Hungary (HU), Ireland (IE), Isle of +Man (IM), India (IN), Iceland (IS), Italy (IT), Jersey (JE), Japan (JP), +Republic of Korea (KR), Liechtenstein (LI), Sri Lanka (LK), Lithuania +(LT), Luxembourg (LU), Latvia (LV), Monaco (MC), Republic of Moldova +(MD), Marshall Islands (MH), The former Yugoslav Republic of Macedonia +(MK), Northern Mariana Islands (MP), Martinique (MQ), Malta (MT), Malawi +(MW), Mexico (MX), Malaysia (MY), New Caledonia (NC), Netherlands (NL), +Norway (NO), New Zealand (NZ), Peru (PE), Philippines (PH), Pakistan +(PK), Poland (PL), Saint Pierre and Miquelon (PM), Puerto Rico (PR), +Portugal (PT), Palau (PW), Réunion (RE), Romania (RO), Serbia (RS), +Russian Federation (RU), Sweden (SE), Singapore (SG), Slovenia (SI), +Svalbard and Jan Mayen Islands (SJ), Slovakia (SK), San Marino (SM), +Thailand (TH), Turkey (TR), Ukraine (UA), United States of America (US), +Uruguay (UY), Holy See (VA), United States Virgin Islands (VI), Wallis +and Futuna Islands (WF), Mayotte (YT), South Africa (ZA) + +See [GeoNames database](http://download.geonames.org/export/zip/) for +more information. The pgeocode package is maintained by [Symerio](https://www.symerio.com). diff --git a/README.rst b/README.rst deleted file mode 100644 index a9b74c0..0000000 --- a/README.rst +++ /dev/null @@ -1,167 +0,0 @@ -pgeocode -======== - -|pypi| |condaforge| |rdfd| |GHactions| - -.. |pypi| image:: https://img.shields.io/pypi/v/pgeocode.svg - :target: https://pypi.org/project/pgeocode/ - -.. |condaforge| image:: https://img.shields.io/conda/vn/conda-forge/pgeocode.svg - :target: https://anaconda.org/conda-forge/pgeocode - -.. |rdfd| image:: https://readthedocs.org/projects/pgeocode/badge/?version=latest - :target: http://pgeocode.readthedocs.io/ - -.. |GHactions| image:: https://github.com/symerio/pgeocode/workflows/Test/badge.svg - :target: https://github.com/symerio/pgeocode/actions?query=branch%3Amaster+ - - -Postal code geocoding and distance calculations - -pgeocode is a Python library for high performance off-line querying of GPS coordinates, region name and municipality name -from postal codes. Distances between postal codes as well as general -distance queries are also supported. -The used `GeoNames `_ database includes postal codes for 83 countries. - -Currently, only queries within the same country are supported. - -For additional documentation see `pgeocode.readthedocs.io `_. - - -Installation ------------- - -pgeocode requires Python 3.8+ as well as ``numpy`` and ``pandas`` packages. It can be installed with, - -.. code:: - - pip install pgeocode - -or - -.. code:: - - conda install -c conda-forge pgeocode - -Quickstart ----------- - -**Postal code queries** - -.. code:: python - - >>> import pgeocode - - >>> nomi = pgeocode.Nominatim('fr') - >>> nomi.query_postal_code("75013") - postal_code 75013 - country_code FR - place_name Paris 13 - state_name Île-de-France - state_code 11 - county_name Paris - county_code 75 - community_name Paris - community_code 751 - latitude 48.8322 - longitude 2.3561 - accuracy 5 - - >>> nomi.query_postal_code(["75013", "69006"]) - postal_code place_name state_name latitude longitude - 0 75013 Paris 13 Île-de-France 48.8322 2.3561 - 1 69006 Lyon 06 Auvergne-Rhône-Alpes 45.7679 4.8506 - -**Place name queries** - -.. code:: python - - >>> import pgeocode - - >>> nomi = pgeocode.Nominatim('fr') - >>> nomi.query_location("Antibes", top_k=3) - country_code postal_code place_name state_name state_code ... community_name community_code latitude longitude accuracy - 49553 FR 06160 Antibes Provence-Alpes-Côte d'Azur 93.0 ... Grasse 061 43.5858 7.1083 5 - 49787 FR 06600 Antibes Provence-Alpes-Côte d'Azur 93.0 ... Grasse 061 43.5858 7.1083 5 - 49788 FR 06601 CEDEX Antibes Provence-Alpes-Côte d'Azur 93.0 ... Grasse 061 43.5858 7.1083 5 - - >>> nomi.query_location("Straassborg", top_k=3, fuzzy_threshold=80) - country_code postal_code place_name state_name state_code ... community_name community_code latitude longitude accuracy - 25461 FR 67000 Strasbourg Grand Est 44.0 ... Strasbourg 678 48.5839 7.7455 5 - 25462 FR 67001 CEDEX Strasbourg Grand Est 44.0 ... Strasbourg 678 48.5839 7.7455 5 - 25463 FR 67002 CEDEX Strasbourg Grand Est 44.0 ... Strasbourg 678 48.5839 7.7455 5 - -**Distance calculations** - -.. code:: python - - >>> dist = pgeocode.GeoDistance('fr') - >>> dist.query_postal_code("75013", "69006") - 389.156 - >>> dist.query_postal_code(["75013", "75014", "75015"], ["69006", "69005", "69004"]) - array([ 389.15648697, 390.12577967, 390.49857655]) - - -Geocoding format ----------------- - -The result of a geo-localistion query is a ``pandas.DataFrame`` with the following columns, - -* ``country_code``: iso country code, 2 characters -* ``postal_code`` : postal code -* ``place_name`` : place name (e.g. town, city etc) -* ``state_name`` : 1. order subdivision (state) -* ``state_code`` : 1. order subdivision (state) -* ``county_name`` : 2. order subdivision (county/province) -* ``county_code`` : 2. order subdivision (county/province) -* ``community_name`` : 3. order subdivision (community) -* ``community_code`` : 3. order subdivision (community) -* ``latitude`` : estimated latitude (wgs84) -* ``longitude`` : estimated longitude (wgs84) -* ``accuracy`` : accuracy of lat/lng from 1=estimated to 6=centroid - - -Configuration and defaults --------------------------- - -**Storage directory** - -Defaults to ``~/.cache/pgeocode``, it is the directory where data is downloaded -for later consumption. It can be changed using the environment variable -``PGEOCODE_DATA_DIR``, i.e. ``export PGEOCODE_DATA_DIR=/tmp/pgeocode_data``. - -**Data sources** - -Data sources are provided as a list in the ``pgeocode.DOWNLOAD_URL`` variable. -The default value is, - -.. code:: python - - DOWNLOAD_URL = [ - "https://download.geonames.org/export/zip/{country}.zip", - "https://symerio.github.io/postal-codes-data/data/geonames/{country}.txt", - ] - -Data sources are tried from first to last until one works. Here the second link is a mirror -of the first. - -It is also possible to extend this variable with third party data sources, as -long as they follow the same format. See for instance -`postal-codes-data `_ -repository for examples of data files. - - -License -------- - -The pgeocode package is distributed under the 3-clause BSD license. - - -Supported countries -------------------- - -The list of countries available in the GeoNames database, with the corresponding country codes, are given below, - -Andorra (AD), Argentina (AR), American Samoa (AS), Austria (AT), Australia (AU), Åland Islands (AX), Azerbaijan (AZ), Bangladesh (BD), Belgium (BE), Bulgaria (BG), Bermuda (BM), Brazil (BR), Belarus (BY), Canada (CA), Switzerland (CH), Chile (CL), Colombia (CO), Costa Rica (CR), Cyprus (CY), Czechia (CZ), Germany (DE), Denmark (DK), Dominican Republic (DO), Algeria (DZ), Estonia (EE), Spain (ES), Finland (FI), Federated States of Micronesia (FM), Faroe Islands (FO), France (FR), United Kingdom of Great Britain and Northern Ireland (GB), French Guiana (GF), Guernsey (GG), Greenland (GL), Guadeloupe (GP), Guatemala (GT), Guam (GU), Croatia (HR), Haiti (HT), Hungary (HU), Ireland (IE), Isle of Man (IM), India (IN), Iceland (IS), Italy (IT), Jersey (JE), Japan (JP), Republic of Korea (KR), Liechtenstein (LI), Sri Lanka (LK), Lithuania (LT), Luxembourg (LU), Latvia (LV), Monaco (MC), Republic of Moldova (MD), Marshall Islands (MH), The former Yugoslav Republic of Macedonia (MK), Northern Mariana Islands (MP), Martinique (MQ), Malta (MT), Malawi (MW), Mexico (MX), Malaysia (MY), New Caledonia (NC), Netherlands (NL), Norway (NO), New Zealand (NZ), Peru (PE), Philippines (PH), Pakistan (PK), Poland (PL), Saint Pierre and Miquelon (PM), Puerto Rico (PR), Portugal (PT), Palau (PW), Réunion (RE), Romania (RO), Serbia (RS), Russian Federation (RU), Sweden (SE), Singapore (SG), Slovenia (SI), Svalbard and Jan Mayen Islands (SJ), Slovakia (SK), San Marino (SM), Thailand (TH), Turkey (TR), Ukraine (UA), United States of America (US), Uruguay (UY), Holy See (VA), United States Virgin Islands (VI), Wallis and Futuna Islands (WF), Mayotte (YT), South Africa (ZA) - -See `GeoNames database `_ for more information. diff --git a/setup.py b/setup.py index fc45cbb..ab93f21 100644 --- a/setup.py +++ b/setup.py @@ -34,13 +34,14 @@ def find_version(*file_paths): setup( name="pgeocode", - description="Approximate geocoding", - long_description=open("README.rst").read(), + description="Postal code geocoding", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", version=find_version("pgeocode.py"), author="Roman Yurchak", author_email="roman.yurchak@symerio.com", py_modules=["pgeocode"], - python_requires=">=3.8", + python_requires=">=3.10", install_requires=["requests", "numpy", "pandas"], extras_require={ "fuzzy": ["thefuzz"], From 71257ec468fb36179bc3a2338bd797170b5eb29e Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 12 Apr 2024 11:29:23 +0200 Subject: [PATCH 2/4] Fix documentation build --- .readthedocs.yml | 12 ++++++++++++ README.md | 4 +--- doc/conf.py | 6 +++--- 3 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 .readthedocs.yml diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..a442ce6 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,12 @@ +version: "2" + +build: + tools: + python: "3.11" + +python: + install: + - requirements: doc/requirements.txt + +sphinx: + configuration: doc/conf.py diff --git a/README.md b/README.md index d743500..2528d2a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ ---- -title: pgeocode ---- +# pgeocode [![pypi](https://img.shields.io/pypi/v/pgeocode.svg)](https://pypi.org/project/pgeocode/) [![condaforge](https://img.shields.io/conda/vn/conda-forge/pgeocode.svg)](https://anaconda.org/conda-forge/pgeocode) diff --git a/doc/conf.py b/doc/conf.py index 1d9f9bf..26d596e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -45,6 +45,7 @@ "sphinx.ext.autosummary", "sphinxcontrib.napoleon", "sphinx.ext.linkcode", + "myst_parser", ] @@ -58,15 +59,14 @@ # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # -# source_suffix = ['.rst', '.md'] -source_suffix = ".rst" +source_suffix = [".rst", ".md"] # The master toctree document. master_doc = "index" # General information about the project. project = "pgeocode" -copyright = "2018, Symerio" +copyright = "2023, Symerio" author = "Roman Yurchak" # The version info for the project you're documenting, acts as replacement for From 380f1aa01f449deadc761fe9290cb9f0829be956 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 12 Apr 2024 11:30:38 +0200 Subject: [PATCH 3/4] Update pre-commit to python 3.10 --- README.md | 11 ++++------- pgeocode.py | 8 ++++---- pyproject.toml | 4 ++-- test_pgeocode.py | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 2528d2a..0bde51e 100644 --- a/README.md +++ b/README.md @@ -21,18 +21,13 @@ For additional documentation see # Installation -pgeocode requires Python 3.8+ as well as `numpy` and `pandas` packages. +pgeocode requires Python 3.10+ as well as `numpy` and `pandas` packages. It can be installed with, ``` pip install pgeocode ``` -or - -``` -conda install -c conda-forge pgeocode -``` # Quickstart @@ -142,6 +137,8 @@ repository for examples of data files. The pgeocode package is distributed under the 3-clause BSD license. +The pgeocode package is maintained by [Symerio](https://www.symerio.com). + # Supported countries The list of countries available in the GeoNames database, with the @@ -173,4 +170,4 @@ Uruguay (UY), Holy See (VA), United States Virgin Islands (VI), Wallis and Futuna Islands (WF), Mayotte (YT), South Africa (ZA) See [GeoNames database](http://download.geonames.org/export/zip/) for -more information. The pgeocode package is maintained by [Symerio](https://www.symerio.com). +more information. diff --git a/pgeocode.py b/pgeocode.py index d598029..686c550 100644 --- a/pgeocode.py +++ b/pgeocode.py @@ -7,7 +7,7 @@ import urllib.request import warnings from io import BytesIO -from typing import Any, List, Optional, Tuple +from typing import Any from zipfile import ZipFile import numpy as np @@ -182,7 +182,7 @@ def _open_extract_url(url: str, country: str) -> Any: @contextlib.contextmanager -def _open_extract_cycle_url(urls: List[str], country: str) -> Any: +def _open_extract_cycle_url(urls: list[str], country: str) -> Any: """Same as _open_extract_url but cycle through URLs until one works We start by opening the first URL in the list, and if fails @@ -245,7 +245,7 @@ def __init__(self, country: str = "fr", unique: bool = True): self.unique = unique @staticmethod - def _get_data(country: str) -> Tuple[str, pd.DataFrame]: + def _get_data(country: str) -> tuple[str, pd.DataFrame]: """Load the data from disk; otherwise download and save it""" data_path = os.path.join(STORAGE_DIR, country.upper() + ".txt") @@ -349,7 +349,7 @@ def query_location( self, name: str, top_k: int = 100, - fuzzy_threshold: Optional[int] = None, + fuzzy_threshold: int | None = None, col: str = "place_name", ) -> pd.DataFrame: """Get location information from a place name diff --git a/pyproject.toml b/pyproject.toml index db743dc..467d3d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = ["setuptools>=42", "wheel"] [tool.ruff] -target-version = "py38" +target-version = "py310" [tool.ruff.lint] select = [ @@ -20,7 +20,7 @@ select = [ ignore = ["E402", "E501", "E731", "E741"] [tool.mypy] -python_version = "3.8" +python_version = "3.10" show_error_codes = true warn_unreachable = true diff --git a/test_pgeocode.py b/test_pgeocode.py index f14d245..d676619 100644 --- a/test_pgeocode.py +++ b/test_pgeocode.py @@ -175,7 +175,7 @@ def test_haversine_distance(): y = x * rng.rand(N, 2) d_ref = np.zeros(N) - for idx, (x_coord, y_coord) in enumerate(zip(x, y)): + for idx, (x_coord, y_coord) in enumerate(zip(x, y, strict=False)): d_ref[idx] = great_circle(x_coord, y_coord).km d_pred = haversine_distance(x, y) From 42a1c67eb0b24094ff0951eefc16c5029020da9b Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 12 Apr 2024 11:32:47 +0200 Subject: [PATCH 4/4] Rm conda-forge badge --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 0bde51e..6f51105 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # pgeocode [![pypi](https://img.shields.io/pypi/v/pgeocode.svg)](https://pypi.org/project/pgeocode/) -[![condaforge](https://img.shields.io/conda/vn/conda-forge/pgeocode.svg)](https://anaconda.org/conda-forge/pgeocode) [![rdfd](https://readthedocs.org/projects/pgeocode/badge/?version=latest)](http://pgeocode.readthedocs.io/) [![GHactions](https://github.com/symerio/pgeocode/workflows/Test/badge.svg)](https://github.com/symerio/pgeocode/actions?query=branch%3Amaster+)