-
Notifications
You must be signed in to change notification settings - Fork 0
/
insert_provenance_data.sh
executable file
·98 lines (61 loc) · 2.72 KB
/
insert_provenance_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash
# This script inserts all the provenance files generated by the analyses and
# the ontology definitions into a GraphDB repository.
# To speed up the process, the files will be inserted offline, before the
# server is initialized. Any existing repository will be overwritten.
#
# GraphDB desktop must be already installed according to the README.md file
# in `code/triple_store`.
# Deletes the temp directory where ontologies were cloned/downloaded
function cleanup {
rm -rf "$TMP_FOLDER"
}
trap cleanup EXIT
# Path where TTL files will be read from (saved by the analysis scripts)
PROV_PATH=./outputs/analyses
# URL to the Git repository with NEAO source
NEAO_REPO="-b dev/0.1.0_doc --single-branch [email protected]:INM-6/neuroephys_analysis_ontology.git"
# URL to get the W3C PROV-O OWL source
PROV_URL="http://www.w3.org/ns/prov-o-20130430"
# Path to the InsertRDF utility from GraphDB
IMPORT_RDF=/opt/graphdb-desktop/lib/app/bin/importrdf
# Path to the repository config file
REPO_CONFIG=./code/triple_store/config/repo_config.ttl
REPO_NAME="provenance"
# Clone/download the ontology files into a temporary folder
TMP_FOLDER=$(mktemp -d)
NEAO_FOLDER="$TMP_FOLDER/neao"
git clone $NEAO_REPO "$NEAO_FOLDER"
NEAO_SRC="$NEAO_FOLDER/doc/releases/0.1.0"
PROV_O_FILE="$TMP_FOLDER/provo.ttl"
wget $PROV_URL -O "$PROV_O_FILE"
ALPACA_FILE="$TMP_FOLDER/alpaca.ttl"
ALPACA_ONTOLOGY=$(python -c "from alpaca.ontology import ONTOLOGY_SOURCE; print(ONTOLOGY_SOURCE)")
cp "$ALPACA_ONTOLOGY" "$ALPACA_FILE"
# Rename files from OWL to TTL to allow using the import tool
# This is needed for TTL files with .owl extension, such as Protégé sources
# find "$NEAO_SRC" -type f -name '*.owl' -exec mv -- {} {}.ttl \;
# Create lists of TTL files with provenance information
PSD_FILES=$(find "$PROV_PATH" -name '*psd*.ttl' -print0 | xargs --null)
ISI_FILES=$(find "$PROV_PATH" -name '*isi*.ttl' -print0 | xargs --null)
# Loading is done offline, kill any running process
killall -q -w graphdb-desktop
# Insert the ontologies and the TTL files with provenance
$IMPORT_RDF -Dgraphdb.inference.concurrency=6 load -m parallel -f -c $REPO_CONFIG \
"$PROV_O_FILE" "$ALPACA_FILE" \
"$NEAO_SRC/neao.ttl" \
"$PSD_FILES" "$ISI_FILES"
# Insert triples mapping PROV-O/Alpaca triples to NEAO
# This requires instantiation of the GraphDB server
echo "Inserting NEAO triples"
PYTHONPATH=$(pwd)/code
export PYTHONPATH
cd ./code/triple_store
./launch.sh restart_log
python ./scripts/update_data.py --repository="$REPO_NAME" \
../neao_mapping/insert_neao_steps.sparql \
../neao_mapping/insert_neao_implementation.sparql \
../neao_mapping/insert_container_outputs.sparql
cd ../..
killall -q -w graphdb-desktop
echo "All data inserted into GraphDB"