-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcrawl-back-eval.py
executable file
·92 lines (76 loc) · 2.58 KB
/
crawl-back-eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
#
# crawl-back-eval
#
# evaluation of a historian replacement
#
# 2023-06-18 v0.0.2 [email protected]
import argparse
import sys
import os
from loguru import logger
from crawler.core.config import config_read
from crawler.core.database import (
database_connect,
database_disconnect
)
from crawler.core.main import crawl_back_image_sources
def main():
working_directory = os.getcwd()
program_directory = os.path.dirname(os.path.abspath(__file__))
parser = argparse.ArgumentParser(
description="tests crawl back functions"
)
parser.add_argument(
"--config",
type=str,
required=False,
help="specify the config file to be used (default: <path_to_crawler_dir>/etc/config.yaml)",
)
parser.add_argument(
"--sources",
type=str,
required=False,
help="specify the sources file to be used - overrides value from config file",
)
args = parser.parse_args()
# static for debug
log_level = "DEBUG"
log_format = (
"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | "
"<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
)
logger.remove()
logger.add(sys.stderr, format=log_format, level=log_level, colorize=True)
logger.info("crawl back evaluator v0.0.2 started")
# read configuration
if args.config is not None:
config_filename = args.config
else:
# default
config_filename = program_directory + "/etc/config.yaml"
config = config_read(config_filename, "configuration")
if config is None:
logger.error("ERROR: Unable to open config " + config_filename)
raise SystemExit(1)
# read the image sources
if args.sources is not None:
sources_filename = args.sources
else:
sources_filename = config["sources_name"]
# connect to database
database = database_connect(config["database_name"])
if database is None:
logger.error("Could not open database %s" % config["database_name"])
logger.error(
'Run "./image-crawler.py --init-db" to create a new database OR config check your etc/config.yaml'
)
sys.exit(1)
image_source_catalog = config_read(sources_filename, "source catalog")
if image_source_catalog is None:
logger.error("Unable to open image source catalog " + sources_filename)
raise SystemExit(1)
crawl_back_image_sources(image_source_catalog, database)
database_disconnect(database)
if __name__ == "__main__":
main()