Skip to content

Commit

Permalink
checks.py: More AdditionalChecks
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed (ODSC) committed Sep 13, 2024
1 parent ffb381d commit b09abf7
Show file tree
Hide file tree
Showing 28 changed files with 1,014 additions and 1 deletion.
12 changes: 12 additions & 0 deletions libcovebods/run_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
libcovebods.tasks.checks.CheckEntityTypeAndEntitySubtypeAlign,
libcovebods.tasks.checks.CheckEntitySecurityListingsMICSCodes,
libcovebods.tasks.checks.CheckEntitySecurityListingsMICSCodesRecord,
libcovebods.tasks.checks.CheckSourceRetrievedAtFutureDate,
libcovebods.tasks.checks.CheckStatementDateFutureDate,
libcovebods.tasks.checks.CheckAnnotationCreationDateFutureDate,
libcovebods.tasks.checks.CheckStatementPublicationDateFutureDate,
libcovebods.tasks.checks.CheckStatementPersonDateOfDeathSane,
libcovebods.tasks.checks.CheckStatementEntityFoundationDissolutionDates,
libcovebods.tasks.statistics.StatisticsCountEntityStatements,
libcovebods.tasks.statistics.StatisticsCountEntityRecordStatements,
libcovebods.tasks.statistics.StatisticsCountPersonStatements,
Expand All @@ -33,6 +39,12 @@
libcovebods.tasks.checks.CheckEntityTypeAndEntitySubtypeAlign,
libcovebods.tasks.checks.CheckEntitySecurityListingsMICSCodes,
libcovebods.tasks.checks.CheckEntitySecurityListingsMICSCodesRecord,
libcovebods.tasks.checks.CheckSourceRetrievedAtFutureDate,
libcovebods.tasks.checks.CheckStatementDateFutureDate,
libcovebods.tasks.checks.CheckAnnotationCreationDateFutureDate,
libcovebods.tasks.checks.CheckStatementPublicationDateFutureDate,
libcovebods.tasks.checks.CheckStatementPersonDateOfDeathSane,
libcovebods.tasks.checks.CheckStatementEntityFoundationDissolutionDates,
libcovebods.tasks.statistics.StatisticsCountEntityStatements,
libcovebods.tasks.statistics.StatisticsCountEntityRecordStatements,
libcovebods.tasks.statistics.StatisticsCountPersonStatements,
Expand Down
145 changes: 144 additions & 1 deletion libcovebods/tasks/checks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from datetime import datetime, timedelta
from libcove2.common import get_orgids_prefixes # type: ignore

from libcovebods.base_task import AdditionalCheck
from libcovebods.utils import get_year_from_bods_birthdate_or_deathdate
from libcovebods.utils import get_year_from_bods_birthdate_or_deathdate, parse_date_field


class LegacyChecks(AdditionalCheck):
Expand Down Expand Up @@ -775,3 +776,145 @@ def check_entity_statement_first_pass(self, statement):
"statement": statement.get("statementId"),
}
)

class CheckSourceRetrievedAtFutureDate(AdditionalCheck):
def __init__(self, lib_cove_bods_config, schema_object):
super().__init__(lib_cove_bods_config, schema_object)

@staticmethod
def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool:
return schema_object.is_schema_version_equal_to_or_greater_than("0.4")

def check_statement_first_pass(self, statement):
if ("source" in statement and isinstance(statement["source"], dict) and
"retrievedAt" in statement["source"] and statement["source"]["retrievedAt"]):
retrieved_at = parse_date_field(statement["source"]["retrievedAt"])
if retrieved_at and retrieved_at > datetime.now():
self._additional_check_results.append(
{
"type": "statement_source_retrieved_at_future_date",
"statement_type": None,
"statement": statement.get("statementId"),
})

class CheckStatementDateFutureDate(AdditionalCheck):
def __init__(self, lib_cove_bods_config, schema_object):
super().__init__(lib_cove_bods_config, schema_object)

@staticmethod
def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool:
return schema_object.is_schema_version_equal_to_or_greater_than("0.4")

def check_statement_first_pass(self, statement):
if ("statementDate" in statement and statement["statementDate"]):
statement_date = parse_date_field(statement["statementDate"])
if statement_date and statement_date > datetime.now():
self._additional_check_results.append(
{
"type": "statement_date_is_future_date",
"statement_type": None,
"statement": statement.get("statementId"),
})

class CheckAnnotationCreationDateFutureDate(AdditionalCheck):
def __init__(self, lib_cove_bods_config, schema_object):
super().__init__(lib_cove_bods_config, schema_object)

@staticmethod
def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool:
return schema_object.is_schema_version_equal_to_or_greater_than("0.4")

def check_statement_first_pass(self, statement):
if "annotations" in statement and isinstance(statement["annotations"], list):
for annotation in statement["annotations"]:
print(annotation)
if (isinstance(annotation, dict) and "creationDate" in annotation and
annotation["creationDate"]):
creation_date = parse_date_field(annotation["creationDate"])
print(creation_date)
if creation_date and creation_date > datetime.now():
self._additional_check_results.append(
{
"type": "statement_annotation_creation_date_is_future_date",
"statement_type": None,
"statement": statement.get("statementId"),
})


class CheckStatementPublicationDateFutureDate(AdditionalCheck):
def __init__(self, lib_cove_bods_config, schema_object):
super().__init__(lib_cove_bods_config, schema_object)

@staticmethod
def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool:
return schema_object.is_schema_version_equal_to_or_greater_than("0.4")

def check_statement_first_pass(self, statement):
if ("publicationDetails" in statement and isinstance(statement["publicationDetails"], dict)
and "publicationDate" in statement["publicationDetails"] and
statement["publicationDetails"]["publicationDate"]):
publication_date = parse_date_field(statement["publicationDetails"]["publicationDate"])
if publication_date and publication_date > datetime.now():
self._additional_check_results.append(
{
"type": "statement_publication_date_is_future_date",
"statement_type": None,
"statement": statement.get("statementId"),
})


class CheckStatementPersonDateOfDeathSane(AdditionalCheck):
def __init__(self, lib_cove_bods_config, schema_object):
super().__init__(lib_cove_bods_config, schema_object)

@staticmethod
def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool:
return schema_object.is_schema_version_equal_to_or_greater_than("0.4")

def check_person_statement_first_pass(self, statement):
if ("recordDetails" in statement and isinstance(statement["recordDetails"], dict)
and "deathDate" in statement["recordDetails"] and
statement["recordDetails"]["deathDate"]):
death_date = parse_date_field(statement["recordDetails"]["deathDate"])
if death_date:
if (death_date > datetime.now() or death_date < datetime.strptime("1800-01-01", "%Y-%m-%d")):
self._additional_check_results.append(
{
"type": "statement_person_death_date_not_sensible_value",
"statement_type": None,
"statement": statement.get("statementId"),
})
elif ("birthDate" in statement["recordDetails"] and
statement["recordDetails"]["birthDate"]):
birth_date = parse_date_field(statement["recordDetails"]["birthDate"])
if death_date < birth_date or (death_date - birth_date).days > 43830:
self._additional_check_results.append(
{
"type": "statement_person_death_date_not_sensible_value",
"statement_type": None,
"statement": statement.get("statementId"),
})


class CheckStatementEntityFoundationDissolutionDates(AdditionalCheck):
def __init__(self, lib_cove_bods_config, schema_object):
super().__init__(lib_cove_bods_config, schema_object)

@staticmethod
def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool:
return schema_object.is_schema_version_equal_to_or_greater_than("0.4")

def check_entity_statement_first_pass(self, statement):
if ("recordDetails" in statement and isinstance(statement["recordDetails"], dict)
and "foundingDate" in statement["recordDetails"] and
statement["recordDetails"]["foundingDate"] and "dissolutionDate" in
statement["recordDetails"] and statement["recordDetails"]["dissolutionDate"]):
founding_date = parse_date_field(statement["recordDetails"]["foundingDate"])
dissolution_date = parse_date_field(statement["recordDetails"]["dissolutionDate"])
if founding_date > dissolution_date:
self._additional_check_results.append(
{
"type": "statement_entity_dissolution_before_founding_date",
"statement_type": None,
"statement": statement.get("statementId"),
})
19 changes: 19 additions & 0 deletions libcovebods/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import re

from dateutil import parser
from pytz import UTC
Expand Down Expand Up @@ -36,3 +37,21 @@ def get_statement_type(statement, schema_object):
return None
else:
return statement.get("statementType")

def parse_date_field(date_str):
print(date_str)
if "-" in date_str or len(date_str) == 4:
if "T" in date_str:
if "Z" in date_str:
return datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ")
else:
return datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
else:
if re.match(r"^[0-9]{4}$", date_str):
return datetime.datetime.strptime(date_str, "%Y")
elif re.match(r"^[0-9]{4}-[0-9]{2}$", date_str):
return datetime.datetime.strptime(date_str, "%Y-%m")
else:
return datetime.datetime.strptime(date_str, "%Y-%m-%d")
else:
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"source": {
"retrievedAt": "2099-01-02"
},
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"source": {
"retrievedAt": "2084-01-02T01:01:00Z"
},
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"source": {
"retrievedAt": "2023-01-02"
},
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"source": {
"retrievedAt": "2017-01-02T01:01:00Z"
},
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"annotations":[
{
"statementPointerTarget":"",
"motivation": "commenting",
"creationDate": "2200-11-02"
}
],
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"annotations":[
{
"statementPointerTarget":"",
"motivation": "commenting",
"creationDate": "2100-11-02T00:00:00Z"
}
],
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2020-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"annotations":[
{
"statementPointerTarget":"",
"motivation": "commenting",
"creationDate": "2022-11-02"
}
],
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2099-03-04",
"recordId": "c359f58d2977",
"recordType": "entity",
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[
{
"statementId": "1dc0e987-5c57-4a1c-b3ad-61353b66a9b7",
"declarationSubject": "c359f58d2977",
"statementDate": "2099-03-04T12:12:13Z",
"recordId": "c359f58d2977",
"recordType": "entity",
"recordDetails": {
"isComponent": false,
"entityType": {
"type": "registeredEntity"
}
}
}
]
Loading

0 comments on commit b09abf7

Please sign in to comment.