diff --git a/.github/static/get_dic2owl_deps.py b/.github/static/get_dic2owl_deps.py index f97d707..1935b11 100644 --- a/.github/static/get_dic2owl_deps.py +++ b/.github/static/get_dic2owl_deps.py @@ -31,7 +31,7 @@ def main(argv_input: list = None) -> Set[str]: for file in args.requirements_files: if not file.exists(): raise FileNotFoundError(f"Could not find {file} !") - with open(file.resolve(), "r") as handle: + with open(file.resolve(), "r", encoding="utf8") as handle: for line in handle.readlines(): match = requirements_regex.fullmatch(line) if match is None: diff --git a/dic2owl/dic2owl/dic2owl.py b/dic2owl/dic2owl/dic2owl.py index 96ec239..8c4340f 100644 --- a/dic2owl/dic2owl/dic2owl.py +++ b/dic2owl/dic2owl/dic2owl.py @@ -3,6 +3,8 @@ Python script for generating an ontology corresponding to a CIF dictionary. """ +from __future__ import annotations + from contextlib import redirect_stderr from os import devnull as DEVNULL from pathlib import Path @@ -77,10 +79,6 @@ class Generator: "ontology/cif-ddl.ttl" ) - # TODO: - # Should `comments` be replaced with a dict `annotations` for annotating - # the ontology itself? If so, we should import Dublin Core. - def __init__( self, dicfile: "StrPath", @@ -187,10 +185,8 @@ def _add_data_value(self, item: dict) -> None: for ddl_name, value in item.items(): if ddl_name.startswith("_type."): if ddl_name == "_type.dimension": - # TODO - fix special case pass elif value == "Implied": - # TODO - fix special case pass else: parents.append(self.ddl[value]) @@ -221,11 +217,6 @@ class `cls`. def _add_metadata(self) -> None: """Adds metadata to the generated ontology.""" - # TODO: - # Is there a way to extract metadata from the dic object like - # _dictionary_audit.version? - # onto.set_version(version="XXX") - for comment in self.comments: self.onto.metadata.comment.append(comment) self.onto.metadata.comment.append( @@ -275,4 +266,4 @@ def main( overwrite=True, ) - return gen # XXX - just for debugging + return gen diff --git a/dic2owl/pyproject.toml b/dic2owl/pyproject.toml index 7933d79..1a43e8b 100644 --- a/dic2owl/pyproject.toml +++ b/dic2owl/pyproject.toml @@ -3,6 +3,7 @@ line-length = 79 target-version = ['py37', 'py38', 'py39'] [tool.mypy] +python_version = "3.7" ignore_missing_imports = true scripts_are_modules = true warn_unused_configs = true @@ -10,656 +11,8 @@ show_error_codes = true allow_redefinition = true [tool.pytest.ini_options] -minversion = "6.0" -required_plugins = "pytest-cov>=3.0" +minversion = "7.4" addopts = "-rs --cov=./dic2owl/dic2owl/ --cov-report=term" filterwarnings = [ "ignore:.*imp module.*:DeprecationWarning", ] - -[tool.pylint.MASTER] -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -# extension-pkg-allow-list= - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. (This is an alternative name to extension-pkg-allow-list -# for backward compatibility.) -# extension-pkg-whitelist= - -# Return non-zero exit code if any of these messages/categories are detected, -# even if score is above --fail-under value. Syntax same as enable. Messages -# specified are enabled, while categories only check already-enabled messages. -# fail-on= - -# Specify a score threshold to be exceeded before program exits with error. -fail-under=10.0 - -# Files or directories to be skipped. They should be base names, not paths. -ignore='CVS' - -# Add files or directories matching the regex patterns to the ignore-list. The -# regex matches against paths. -# ignore-paths= - -# Files or directories matching the regex patterns are skipped. The regex -# matches against base names, not paths. -# ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python module names) to load, -# usually to register additional checkers. -# load-plugins= - -# Pickle collected data for later comparisons. -persistent=true - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=true - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=false - - -[tool.pylint.MESSAGES_CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. -# confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=[ - 'print-statement', - 'parameter-unpacking', - 'unpacking-in-except', - 'old-raise-syntax', - 'backtick', - 'long-suffix', - 'old-ne-operator', - 'old-octal-literal', - 'import-star-module-level', - 'non-ascii-bytes-literal', - 'raw-checker-failed', - 'bad-inline-option', - 'locally-disabled', - 'file-ignored', - 'suppressed-message', - 'useless-suppression', - 'deprecated-pragma', - 'use-symbolic-message-instead', - 'apply-builtin', - 'basestring-builtin', - 'buffer-builtin', - 'cmp-builtin', - 'coerce-builtin', - 'execfile-builtin', - 'file-builtin', - 'long-builtin', - 'raw_input-builtin', - 'reduce-builtin', - 'standarderror-builtin', - 'unicode-builtin', - 'xrange-builtin', - 'coerce-method', - 'delslice-method', - 'getslice-method', - 'setslice-method', - 'no-absolute-import', - 'old-division', - 'dict-iter-method', - 'dict-view-method', - 'next-method-called', - 'metaclass-assignment', - 'indexing-exception', - 'raising-string', - 'reload-builtin', - 'oct-method', - 'hex-method', - 'nonzero-method', - 'cmp-method', - 'input-builtin', - 'round-builtin', - 'intern-builtin', - 'unichr-builtin', - 'map-builtin-not-iterating', - 'zip-builtin-not-iterating', - 'range-builtin-not-iterating', - 'filter-builtin-not-iterating', - 'using-cmp-argument', - 'eq-without-hash', - 'div-method', - 'idiv-method', - 'rdiv-method', - 'exception-message-attribute', - 'invalid-str-codec', - 'sys-max-int', - 'bad-python3-import', - 'deprecated-string-function', - 'deprecated-str-translate-call', - 'deprecated-itertools-function', - 'deprecated-types-field', - 'next-method-defined', - 'dict-items-not-iterating', - 'dict-keys-not-iterating', - 'dict-values-not-iterating', - 'deprecated-operator-function', - 'deprecated-urllib-function', - 'xreadlines-attribute', - 'deprecated-sys-function', - 'exception-escape', - 'comprehension-escape', - 'unspecified-encoding', - 'fixme' -] - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable='c-extension-no-member' - - -[tool.pylint.REPORTS] - -# Python expression which should return a score less than or equal to 10. You -# have access to the variables 'error', 'warning', 'refactor', and 'convention' -# which contain the number of messages in each category, as well as 'statement' -# which is the total number of statements analyzed. This score is used by the -# global evaluation report (RP0004). -evaluation='10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)' - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format='text' - -# Tells whether to display a full report or only the messages. -reports=false - -# Activate the evaluation score. -score=true - - -[tool.pylint.REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=['sys.exit', 'argparse.parse_error'] - - -[tool.pylint.STRING] - -# This flag controls whether inconsistent-quotes generates a warning when the -# character used as a quote delimiter is used inconsistently within a module. -check-quote-consistency=false - -# This flag controls whether the implicit-str-concat should generate a warning -# on implicit string concatenation in sequences defined over several lines. -check-str-concat-over-line-jumps=false - - -[tool.pylint.MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=[ - 'FIXME', - 'XXX', - 'TODO' -] - -# Regular expression of note tags to take in consideration. -#notes-rgx= - - -[tool.pylint.SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. Available dictionaries: none. To make it work, -# install the 'python-enchant' package. -# spelling-dict= - -# List of comma separated words that should be considered directives if they -# appear and the beginning of a comment and should not be checked. -spelling-ignore-comment-directives=[ - 'fmt: on', - 'fmt: off', - 'noqa:', - 'noqa', - 'nosec', - 'isort:skip', - 'mypy:' -] - -# List of comma separated words that should not be checked. -# spelling-ignore-words= - -# A path to a file that contains the private dictionary; one word per line. -# spelling-private-dict-file= - -# Tells whether to store unknown words to the private dictionary (see the -# --spelling-private-dict-file option) instead of raising a message. -spelling-store-unknown-words=false - - -[tool.pylint.BASIC] - -# Naming style matching correct argument names. -argument-naming-style='snake_case' - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style='snake_case' - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=[ - 'foo', - 'bar', - 'baz', - 'toto', - 'tutu', - 'tata' -] - -# Bad variable names regexes, separated by a comma. If names match any regex, -# they will always be refused -# bad-names-rgxs= - -# Naming style matching correct class attribute names. -class-attribute-naming-style='any' - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. -#class-attribute-rgx= - -# Naming style matching correct class constant names. -class-const-naming-style='UPPER_CASE' - -# Regular expression matching correct class constant names. Overrides class- -# const-naming-style. -#class-const-rgx= - -# Naming style matching correct class names. -class-naming-style='PascalCase' - -# Regular expression matching correct class names. Overrides class-naming- -# style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style='UPPER_CASE' - -# Regular expression matching correct constant names. Overrides const-naming- -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style='snake_case' - -# Regular expression matching correct function names. Overrides function- -# naming-style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=[ - 'i', - 'j', - 'k', - 'ex', - 'Run', - '_' -] - -# Good variable names regexes, separated by a comma. If names match any regex, -# they will always be accepted -# good-names-rgxs= - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=false - -# Naming style matching correct inline iteration names. -inlinevar-naming-style='any' - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style='snake_case' - -# Regular expression matching correct method names. Overrides method-naming- -# style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style='snake_case' - -# Regular expression matching correct module names. Overrides module-naming- -# style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -# name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx='^_' - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes='abc.abstractproperty' - -# Naming style matching correct variable names. -variable-naming-style='snake_case' - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. -#variable-rgx= - - -[tool.pylint.LOGGING] - -# The type of string formatting that logging methods do. `old` means using % -# formatting, `new` is for `{}` formatting. -logging-format-style='old' - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules='logging' - - -[tool.pylint.VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -# additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=true - -# List of names allowed to shadow builtins -# allowed-redefined-builtins= - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=['cb_', '_cb'] - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx='_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_' - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names='_.*|^ignored_|^unused_' - -# Tells whether we should check for unused import in __init__ files. -init-import=false - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=['six.moves', 'past.builtins', 'future.builtins', 'builtins', 'io'] - - -[tool.pylint.TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators='contextlib.contextmanager' - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -# generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=true - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=true - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=true - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=['optparse.Values', 'thread._local', '_thread._local'] - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis). It -# supports qualified module names, as well as Unix pattern matching. -# ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=true - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - -# List of decorators that change the signature of a decorated function. -# signature-mutators= - - -[tool.pylint.FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -# expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines='^\s*(# )??$' - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=80 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=false - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=false - - -[oylint.SIMILARITIES] - -# Comments are removed from the similarity computation -ignore-comments=true - -# Docstrings are removed from the similarity computation -ignore-docstrings=true - -# Imports are removed from the similarity computation -ignore-imports=false - -# Signatures are removed from the similarity computation -ignore-signatures=false - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[tool.pylint.DESIGN] - -# List of qualified class names to ignore when countint class parents (see -# R0901) -# ignored-parents= - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement (see R0916). -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[tool.pylint.IMPORTS] - -# List of modules that can be imported at any level, not just the top level -# one. -# allow-any-import-level= - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=false - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=false - -# Deprecated modules which should not be used, separated by a comma. -# deprecated-modules= - -# Output a graph (.gv or any supported image format) of external dependencies -# to the given file (report RP0402 must not be disabled). -# ext-import-graph= - -# Output a graph (.gv or any supported image format) of all (i.e. internal and -# external) dependencies to the given file (report RP0402 must not be -# disabled). -# import-graph= - -# Output a graph (.gv or any supported image format) of internal dependencies -# to the given file (report RP0402 must not be disabled). -# int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -# known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party='enchant' - -# Couples of modules and preferred modules, separated by a comma. -# preferred-modules= - - -[tool.pylint.CLASSES] - -# Warn about protected attribute access inside special methods -check-protected-access-in-special-methods=false - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=[ - '__init__', - '__new__', - 'setUp', - '__post_init__' -] - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=[ - '_asdict', - '_fields', - '_replace', - '_source', - '_make' -] - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg='cls' - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg='cls' - - -[tool.pylint.EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "BaseException, Exception". -overgeneral-exceptions=['BaseException', 'Exception'] diff --git a/dic2owl/setup.py b/dic2owl/setup.py index b84c4f5..55c997b 100644 --- a/dic2owl/setup.py +++ b/dic2owl/setup.py @@ -16,7 +16,7 @@ PACKAGE_DIR = Path(__file__).parent.resolve() -with open(PACKAGE_DIR / "dic2owl/__init__.py", "r") as handle: +with open(PACKAGE_DIR / "dic2owl/__init__.py", "r", encoding="utf8") as handle: VERSION = AUTHOR = AUTHOR_EMAIL = None for line in handle.readlines(): VERSION_match = re.match( @@ -50,14 +50,16 @@ AUTHOR = AUTHOR.group("author") # type: ignore AUTHOR_EMAIL = AUTHOR_EMAIL.group("email") # type: ignore -with open(PACKAGE_DIR / "requirements.txt", "r") as handle: +with open(PACKAGE_DIR / "requirements.txt", "r", encoding="utf8") as handle: BASE = [ f"{_.strip()}" for _ in handle.readlines() if not _.startswith("#") and "git+" not in _ ] -with open(PACKAGE_DIR / "requirements_dev.txt", "r") as handle: +with open( + PACKAGE_DIR / "requirements_dev.txt", "r", encoding="utf8" +) as handle: DEV = [ f"{_.strip()}" for _ in handle.readlines() @@ -71,7 +73,7 @@ author_email=AUTHOR_EMAIL, url="https://github.com/emmo-repo/CIF-ontology", description="Ontologize CIF dictionaries (`.dic`) using OWL.", - long_description=(PACKAGE_DIR / "README.md").read_text(), + long_description=(PACKAGE_DIR / "README.md").read_text(encoding="utf8"), long_description_content_type="text/markdown", packages=find_packages(), include_package_data=True,