Skip to content

Commit

Permalink
Add z2h. Publish 0.0.4.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alalalalaki committed Jan 19, 2021
1 parent e392fb6 commit 9670fe0
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 2 deletions.
4 changes: 4 additions & 0 deletions jpstat/estat/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .. import config
from .api import API
from .util.clean import clean_dict_cols
from ..util.z2h import str_z2h


def get_list(statsCode=None, searchWord=None, outputRaw=False, key=None, lang=None, **kwargs):
Expand All @@ -23,6 +24,7 @@ def get_list(statsCode=None, searchWord=None, outputRaw=False, key=None, lang=No
'STATISTICS_NAME', 'TITLE',
'SURVEY_DATE', 'OPEN_DATE', 'OVERALL_TOTAL_NUMBER']
df = df[cols_simple].pipe(clean_dict_cols, ['STAT_NAME', 'GOV_ORG', 'TITLE'])
df = df.applymap(str_z2h)
return df


Expand All @@ -31,6 +33,7 @@ def get_stat(key=None, lang=None,):
data = api.get_list(statsNameList="Y")
df = pd.DataFrame(data['DATALIST_INF']['LIST_INF'])
df = df.pipe(clean_dict_cols, ['STAT_NAME', 'GOV_ORG'])
df = df.applymap(str_z2h)
return df


Expand Down Expand Up @@ -61,6 +64,7 @@ def get_data(statsDataId, return_note=True, key=None, lang=None, **kwargs):
df.drop(col_name, axis=1, inplace=True)
df['Value'] = df['$']
df.drop('$', axis=1, inplace=True)
df = df.applymap(str_z2h)
if return_note:
try:
note = pd.DataFrame(data['STATISTICAL_DATA']['DATA_INF']['NOTE'])
Expand Down
47 changes: 47 additions & 0 deletions jpstat/util/z2h.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
This code is generally borrown from under-maintained pkg `japandas`
and used to turn 「全角」strings to 「半角」strings.
"""

__version__ = 0.1

from unicodedata import normalize

# soundmarks require special handlings
_ZALPHA = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz')
_ZSYMBOL = '!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ '
_ZDIGIT = '0123456789'

# mapping from full-width to half-width
_ALPHA_MAPPER = {c: normalize('NFKC', c) for c in _ZALPHA}
_DIGIT_MAPPER = {c: normalize('NFKC', c) for c in _ZDIGIT}
_SYMBOL_MAPPER = {c: normalize('NFKC', c) for c in _ZSYMBOL}

# adding symbols that un-normalizable
# https://www.utf8-chartable.de/unicode-utf8-table.pl?start=12224&names=-&utf8=string-literal
_ZSYMBOL_MAPPER = {"〜": "~", }
_SYMBOL_MAPPER.update(_ZSYMBOL_MAPPER)


def _ord_dict(dict):
return {ord(k): v for k, v in dict.items()}


# for unicode.translate
_Z2H_ALPHA = _ord_dict(_ALPHA_MAPPER)
_Z2H_DIGIT = _ord_dict(_DIGIT_MAPPER)
_Z2H_SYMBOL = _ord_dict(_SYMBOL_MAPPER)

mapper = dict()
mapper.update(_Z2H_ALPHA)
mapper.update(_Z2H_DIGIT)
mapper.update(_Z2H_SYMBOL)


def str_z2h(string: str, ):
try:
res = string.translate(mapper)
return res
except AttributeError:
return string
2 changes: 1 addition & 1 deletion jpstat/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.3.2'
__version__ = '0.0.4'
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "jpstat"
version = "0.0.3.2"
version = "0.0.4"
description = "A python library for accessing official statistics of Japan."
authors = ["Xuanli Zhu <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 9670fe0

Please sign in to comment.