Skip to content

Commit

Permalink
Merge pull request #24 from mbasa/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
mbasa authored Jul 24, 2024
2 parents bf9db77 + f37b9e6 commit a85c7a4
Show file tree
Hide file tree
Showing 28 changed files with 10,513 additions and 195 deletions.
7 changes: 4 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ DBPASS=postgres
DBNAME=addresses
DBHOST=localhost
DBPORT=5432
YEAR_ISJ=2020
YEAR_KSJ=2021
YEAR_ESTAT=2015
#YEAR_ISJ="2023,2017,2013"
YEAR_ISJ=2023
YEAR_KSJ=2023
YEAR_ESTAT=2020
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
steps:
- uses: ncipollo/release-action@v1
with:
#artifacts: "target/excavator.jar"
artifacts: "README.md,LICENSE.TXT,.env.example,benchmark,data-patches,scripts,sql"
#replacesArtifacts: true
tag: "PgGeocoder-run${{ github.run_number }}"
name: "PgGeocoder"
Expand Down
66 changes: 66 additions & 0 deletions .github/workflows/test-ubuntu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Test Ubuntu

on:
push:
pull_request:
branches:
- develop
workflow_dispatch:

jobs:
test-ubuntu:
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
os: [ubuntu-24.04]

steps:
- name: Checkout pgGeocoder
uses: actions/checkout@v4

- name: Set PostgreSQL/PostGIS major version
run: |
pg_major=$(psql --version | grep -Po '(?<=psql \(PostgreSQL\) )[^;]+(?=\.\d+ \()')
echo "PG_MAJOR=${pg_major}" >> $GITHUB_ENV
echo "POSTGIS_MAJOR=3" >> $GITHUB_ENV
- name: Add PostgreSQL APT repository
run: |
sudo apt-get install curl ca-certificates gnupg
curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ \
$(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
libtap-parser-sourcehandler-pgtap-perl \
postgresql-${PG_MAJOR} \
postgresql-${PG_MAJOR}-pgtap \
postgresql-${PG_MAJOR}-postgis-${POSTGIS_MAJOR} \
postgresql-${PG_MAJOR}-postgis-${POSTGIS_MAJOR}-scripts
- name: Start PostgreSQL (with trust authentication)
run: |
sudo sed -i "s/\(peer\|scram-sha-256\)$/trust/g" "/etc/postgresql/${PG_MAJOR}/main/pg_hba.conf"
sudo cat "/etc/postgresql/${PG_MAJOR}/main/pg_hba.conf"
sudo systemctl start postgresql.service
- name: Wait PostgreSQL launch
run: |
pg_isready -U postgres -h localhost -p 5432
psql -U postgres -h localhost -p 5432 -c "SELECT version();"
- name: Set up database
run: |
cp .env.example .env
cp tests/.env.test tests/.env
bash tests/create_test_db_from_fixtures.sh
- name: Run test
run: |
cd tests
pg_prove -U postgres -h localhost -p 5432 -d addresses_test addresses.test.sql
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

data/
work/
tests/fixtures/address_?.csv
.DS_Store
.project
.idea/
Expand Down
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@
DBNAME=addresses
DBHOST=localhost
DBPORT=5432
YEAR_ISJ=2020
YEAR_KSJ=2021
YEAR_ESTAT=2015
YEAR_ISJ=2023
YEAR_KSJ=2023
YEAR_ESTAT=2020
```
**Note:** To download and install address data from multiple years to have a historical data set, the `YEAR_ISJ` parameter can be set as follows:
```
YEAR_ISJ="2023,2017,2013"
```
Be aware though that this will require substantial disk space resources.

3. Create address database (with same as `.env` values).
(If the database exists, drop it at first.)
```bash
# dropdb -U postgres addresses
$ dropdb -U postgres addresses
$ createdb -U postgres addresses
```
4. Run install and download/import scripts.
Expand Down Expand Up @@ -125,7 +131,7 @@ $ psql -U postgres addresses
- Website: https://nlftp.mlit.go.jp/ksj/index.html
- Format: ESRI Shapefile (or GML)
- 行政区域データ:
- Website: https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-v3_0.html
- Website: https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-2023.html
- Geometry Type: Polygon
- Remarks:
- "City Level" (市区町村レベル) admin boundary data.
Expand All @@ -135,7 +141,7 @@ $ psql -U postgres addresses
- Remarks:
- "City Office" (市区町村役場) point data.
- 国・都道府県の機関データ:
- Website: https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-P28.html
- Website: https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-P28-2022.html
- Geometry Type: Point
- Remarks:
- Geovernment data which includes "Prefectural Office" (都道府県庁) point data.
Expand All @@ -148,7 +154,7 @@ $ psql -U postgres addresses
## Notes
* For `Bulk Geocoding`, wherein addresses located in a field of a table are geocoded, please see this [WIKI Entry](https://github.com/mbasa/pgGeocoder/wiki/bulk_geocoding).
* For `Bulk Geocoding`, wherein addresses located in a field of a table are geocoded, please see this [WIKI Entry](https://github.com/mbasa/pgGeocoder/wiki/Bulk-Geocoding).
* To create `TRIGGERS` that will geocode addresses automatically on an `INSERT` or `UPDATE` operation, please see this [WIKI Entry](https://github.com/mbasa/pgGeocoder/wiki/Creating-Triggers-for-the-Geocoder).
Expand Down
3 changes: 3 additions & 0 deletions data-patches/isj/patches/2024061101_replace_kamagaya.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
UPDATE pggeocoder.address_s SET shikuchoson = '鎌ケ谷市' WHERE todofuken = '千葉県' AND shikuchoson = '鎌ヶ谷市';
UPDATE pggeocoder.address_o SET shikuchoson = '鎌ケ谷市' WHERE todofuken = '千葉県' AND shikuchoson = '鎌ヶ谷市';
UPDATE pggeocoder.address_c SET shikuchoson = '鎌ケ谷市' WHERE todofuken = '千葉県' AND shikuchoson = '鎌ヶ谷市';
42 changes: 21 additions & 21 deletions data-patches/isj/patches/address_s.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
todofuken,shikuchoson,tr_shikuchoson,lat,lon,code,geog
北海道,札幌市,,43.061972,141.354374,01100,SRID=4326;POINT(141.354374 43.061972)
宮城県,仙台市,,38.268008,140.869617,04100,SRID=4326;POINT(140.869617 38.268008)
埼玉県,さいたま市,,35.861515,139.645502,11100,SRID=4326;POINT(139.645502 35.861515)
千葉県,千葉市,,35.607331,140.10638,12100,SRID=4326;POINT(140.10638 35.607331)
神奈川県,横浜市,,35.444035,139.637954,14100,SRID=4326;POINT(139.637954 35.444035)
神奈川県,川崎市,,35.530806,139.703012,14130,SRID=4326;POINT(139.703012 35.530806)
神奈川県,相模原市,,35.571376,139.373268,14150,SRID=4326;POINT(139.373268 35.571376)
新潟県,新潟市,,37.916128,139.036402,15100,SRID=4326;POINT(139.036402 37.916128)
静岡県,静岡市,,34.975473,138.382388,22100,SRID=4326;POINT(138.382388 34.975473)
静岡県,浜松市,,34.710865,137.726117,22130,SRID=4326;POINT(137.726117 34.710865)
愛知県,名古屋市,,35.181433,136.906421,23100,SRID=4326;POINT(136.906421 35.181433)
京都府,京都市,,35.011574,135.768181,26100,SRID=4326;POINT(135.768181 35.011574)
大阪府,大阪市,,34.693891,135.502046,27100,SRID=4326;POINT(135.502046 34.693891)
大阪府,堺市,,34.573354,135.48302,27140,SRID=4326;POINT(135.48302 34.573354)
兵庫県,神戸市,,34.689495,135.195728,28100,SRID=4326;POINT(135.195728 34.689495)
岡山県,岡山市,,34.655107,133.919566,33100,SRID=4326;POINT(133.919566 34.655107)
広島県,広島市,,34.385253,132.455337,34100,SRID=4326;POINT(132.455337 34.385253)
福岡県,北九州市,,33.883408,130.875183,40100,SRID=4326;POINT(130.875183 33.883408)
福岡県,福岡市,,33.590313,130.401735,40130,SRID=4326;POINT(130.401735 33.590313)
熊本県,熊本市,,32.803078,130.707897,43100,SRID=4326;POINT(130.707897 32.803078)
todofuken,shikuchoson,tr_shikuchoson,lat,lon,code,geog,year
北海道,札幌市,,43.061972,141.354374,01100,SRID=4326;POINT(141.354374 43.061972),2023
宮城県,仙台市,,38.268008,140.869617,04100,SRID=4326;POINT(140.869617 38.268008),2023
埼玉県,さいたま市,,35.861515,139.645502,11100,SRID=4326;POINT(139.645502 35.861515),2023
千葉県,千葉市,,35.607331,140.10638,12100,SRID=4326;POINT(140.10638 35.607331),2023
神奈川県,横浜市,,35.444035,139.637954,14100,SRID=4326;POINT(139.637954 35.444035),2023
神奈川県,川崎市,,35.530806,139.703012,14130,SRID=4326;POINT(139.703012 35.530806),2023
神奈川県,相模原市,,35.571376,139.373268,14150,SRID=4326;POINT(139.373268 35.571376),2023
新潟県,新潟市,,37.916128,139.036402,15100,SRID=4326;POINT(139.036402 37.916128),2023
静岡県,静岡市,,34.975473,138.382388,22100,SRID=4326;POINT(138.382388 34.975473),2023
静岡県,浜松市,,34.710865,137.726117,22130,SRID=4326;POINT(137.726117 34.710865),2023
愛知県,名古屋市,,35.181433,136.906421,23100,SRID=4326;POINT(136.906421 35.181433),2023
京都府,京都市,,35.011574,135.768181,26100,SRID=4326;POINT(135.768181 35.011574),2023
大阪府,大阪市,,34.693891,135.502046,27100,SRID=4326;POINT(135.502046 34.693891),2023
大阪府,堺市,,34.573354,135.48302,27140,SRID=4326;POINT(135.48302 34.573354),2023
兵庫県,神戸市,,34.689495,135.195728,28100,SRID=4326;POINT(135.195728 34.689495),2023
岡山県,岡山市,,34.655107,133.919566,33100,SRID=4326;POINT(133.919566 34.655107),2023
広島県,広島市,,34.385253,132.455337,34100,SRID=4326;POINT(132.455337 34.385253),2023
福岡県,北九州市,,33.883408,130.875183,40100,SRID=4326;POINT(130.875183 33.883408),2023
福岡県,福岡市,,33.590313,130.401735,40130,SRID=4326;POINT(130.401735 33.590313),2023
熊本県,熊本市,,32.803078,130.707897,43100,SRID=4326;POINT(130.707897 32.803078),2023
15 changes: 8 additions & 7 deletions scripts/download_estat.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
#!/bin/bash
# ------------------------------------------------------------------------------
# Copyright(c) 2013-2021 Georepublic
# Copyright(c) 2013- Georepublic
#
# Usage:
# ------
# bash scripts/download_estat.sh [Census Year]
#
# Examples:
# ---------
# bash scripts/download_estat.sh 2015
# bash scripts/download_estat.sh 2020
#
# ------------------------------------------------------------------------------

set -e # Exit script immediately on first error.
#set -x # Print commands and their arguments as they are executed.

YEAR_TCODES=(
"2015 A002005212015"
"2020 A002005212020"
"2015 A002005212015" # JGD2011 datum support started from 2015
#"2010 A002005212010"
#"2005 A002005212005"
#"2000 A002005512000"
)

function exit_with_usage()
{
echo "Usage: bash scripts/download_estat.sh [Census Year (ex. 2019)]" 1>&2
echo "Usage: bash scripts/download_estat.sh [Census Year (ex. 2020)]" 1>&2
for i in "${YEAR_TCODES[@]}"; do
year_tcode=(`echo "${i}"`)
year="${year_tcode[0]}"
Expand Down Expand Up @@ -71,11 +72,11 @@ BASE_URL="https://www.e-stat.go.jp/gis/statmap-search/data"
echo -e "Downloading zip files and extracting shp files..."
for pref_code in $(seq -w 1 47); do
# echo "Downloading prefecture ${i} in ${tcode} ..."
url="${BASE_URL}?dlserveyId=${tcode}&code=${pref_code}&coordSys=1&format=shape&downloadType=5"
zip="${OUT_ZIP_DIR}/${tcode}DDSWC${pref_code}.zip"
url="${BASE_URL}?dlserveyId=${tcode}&code=${pref_code}&coordSys=1&format=shape&downloadType=5&datum=2011"
zip="${OUT_ZIP_DIR}/${tcode}DDSWC${pref_code}-JGD2011.zip"
if [ ! -e "${zip}" ] ; then
curl -s "${url}" > "${zip}"
sleep 2
sleep 5
fi
unzip -qq -jo ${zip} -d ${OUT_SHP_DIR}
echo -ne "."
Expand Down
17 changes: 10 additions & 7 deletions scripts/download_isj.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@

# Inspired by https://github.com/IMI-Tool-Project/imi-enrichment-address/blob/master/tools/download.sh

# 2020(令和2年) ~ 2015(平成27年)
# 2023(令和5年) ~ 2015(平成27年)
# Don't support <= H20, because oaza level data is not completed
# "[year] [era_year] [oaza_ver] [gaiku_ver]"
YEAR_VERSIONS=(
"2023 R5 17.0b 22.0a"
"2022 R4 16.0b 21.0a"
"2021 R3 15.0b 20.0a"
"2020 R2 14.0b 19.0a"
"2019 R1 13.0b 18.0a"
"2018 H30 12.0b 17.0a"
"2017 H29 11.0b 16.0a"
"2016 H28 10.0b 15.0a"
"2015 H27 09.0b 14.0a"
#"2014 H26 08.0b 13.0a"
#"2013 H25 07.0b 12.0a"
#"2012 H24 06.0b 11.0a"
#"2011 H23 05.0b 10.0a"
#"2010 H22 04.0b 09.0a"
#"2009 H21 03.0b 08.0a"
"2014 H26 08.0b 13.0a"
"2013 H25 07.0b 12.0a"
"2012 H24 06.0b 11.0a"
"2011 H23 05.0b 10.0a"
"2010 H22 04.0b 09.0a"
"2009 H21 03.0b 08.0a"
)

function exit_with_usage()
Expand Down
21 changes: 12 additions & 9 deletions scripts/download_ksj.sh
Original file line number Diff line number Diff line change
@@ -1,33 +1,36 @@
#!/bin/bash
# ------------------------------------------------------------------------------
# Copyright(c) 2021 Georepublic
# Copyright(c) 2021- Georepublic
#
# Usage:
# ------
# bash scripts/download_ksj.sh [Year of admin boundary]
#
# Examples:
# ---------
# bash scripts/download_ksj.sh 2021
# bash scripts/download_ksj.sh 2023
#
# ------------------------------------------------------------------------------

#set -e # Exit script immediately on first error.
#set -x # Print commands and their arguments as they are executed.

YEAR_FNAMES=(
#"2024 N03-20240101" # Encoding changed from SJIS to UTF8 and prefecture data is added
"2023 N03-20230101"
#"2022 N03-20220101" # Prefecture data is merged
"2021 N03-20210101"
"2020 N03-20200101"
"2019 N03-190101"
"2018 N03-180101"
"2017 N03-170101"
"2016 N03-160101"
"2015 N03-150101"
#"2017 N03-170101"
#"2016 N03-160101"
#"2015 N03-150101"
)

function exit_with_usage()
{
echo "Usage: bash scripts/download_ksj.sh [Year (ex. 2021)]" 1>&2
echo "Usage: bash scripts/download_ksj.sh [Year (ex. 2023)]" 1>&2
for i in "${YEAR_FNAMES[@]}"; do
year_fname=(`echo "${i}"`)
year="${year_fname[0]}"
Expand Down Expand Up @@ -99,7 +102,7 @@ for shp in ${OUT_ADMIN_BOUNDARY_SHP_DIR}/*.shp; do
sql=${OUT_ADMIN_BOUNDARY_SQL_DIR}/`basename ${shp} .shp`.sql
#echo "${shp} => ${sql}"
# ogrinfo --format PGDump
ogr2ogr -s_srs EPSG:4612 \
ogr2ogr -s_srs EPSG:6668 \
-t_srs EPSG:4326 \
-f PGDump \
${sql} \
Expand All @@ -117,8 +120,8 @@ done

# Download government zip
echo "Downloading government zip file and extracting shp file..."
url="https://nlftp.mlit.go.jp/ksj/gml/data/P28/P28-13/P28-13.zip"
zip="${OUT_GOVERNMENT_ZIP_DIR}/P28-13.zip"
url="https://nlftp.mlit.go.jp/ksj/gml/data/P28/P28-22/P28-22.zip"
zip="${OUT_GOVERNMENT_ZIP_DIR}/P28-22.zip"
if [ ! -e "${zip}" ] ; then
curl -s "${url}" > "${zip}"
fi
Expand Down
19 changes: 17 additions & 2 deletions scripts/import_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,23 @@ fi

echo "YEAR ESTAT:${T_YEAR_ESTAT}"

/bin/bash scripts/download_isj.sh ${T_YEAR_ISJ}
/bin/bash scripts/import_isj.sh ${T_YEAR_ISJ}
##--
##-- Importing Multiple Years for ISJ
##--
array=(`echo $T_YEAR_ISJ | sed 's/,/\n/g'`)
array_len=${#array[@]}

for((i=0;i<$array_len-1;i++))
do
/bin/bash scripts/download_isj.sh ${array[$i]}
/bin/bash scripts/import_isj.sh ${array[$i]} nopatch
done

/bin/bash scripts/download_isj.sh ${array[($array_len-1)]}
/bin/bash scripts/import_isj.sh ${array[($array_len-1)]}

##--

/bin/bash scripts/download_estat.sh ${T_YEAR_ESTAT}
/bin/bash scripts/import_estat.sh ${T_YEAR_ESTAT}
/bin/bash scripts/download_ksj.sh ${T_YEAR_KSJ}
Expand Down
2 changes: 1 addition & 1 deletion scripts/import_estat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ source .env

function exit_with_usage()
{
echo "Usage: bash scripts/import_estat.sh [Year (ex. 2019)]" 1>&2
echo "Usage: bash scripts/import_estat.sh [Year (ex. 2020)]" 1>&2
exit 1
}

Expand Down
Loading

0 comments on commit a85c7a4

Please sign in to comment.