Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REL-1197: Rdf export/import from neo4j #3890

Merged
merged 2 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion helpers/data/class.GenerisAdapterRdf.php
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ private function addResource(Graph $graph, core_kernel_classes_Resource $resourc
) {
continue;
}
$graph->add($triple->subject, $triple->predicate, $triple->object);
$graph->addResource($triple->subject, $triple->predicate, $triple->object);
} else {
if ($this->isSerializedFile($triple->object)) {
continue;
Expand Down
52 changes: 52 additions & 0 deletions migrations/Version202309111518342234_tao.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; under version 2
* of the License (non-upgradable).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (c) 2023(original work) Open Assessment Technologies SA;
*/

declare(strict_types=1);

namespace oat\tao\migrations;

use Doctrine\DBAL\Schema\Schema;
use oat\tao\scripts\SyncModels;
use oat\tao\scripts\tools\migrations\AbstractMigration;

/**
* phpcs:disable Squiz.Classes.ValidClassName
*/
final class Version202309111518342234_tao extends AbstractMigration
{
public function getDescription(): string
{
return 'Update Ontology models';
}

public function up(Schema $schema): void
{
$this->addReport(
$this->propagate(new SyncModels())([])
);
}

public function down(Schema $schema): void
{
$this->throwIrreversibleMigrationException(
'The models should be updated via `SyncModels` script after reverting their RDF definitions.'
);
}
}
74 changes: 44 additions & 30 deletions scripts/tools/MigrateSqlToNeo4j.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
use oat\oatbox\extension\script\ScriptAction;
use oat\oatbox\reporting\Report;
use oat\tao\model\TaoOntology;
use WikibaseSolutions\CypherDSL\Query;

/**
* php -dmemory_limit=1G index.php 'oat\tao\scripts\tools\MigrateSqlToNeo4j' -u -i -s 10000 -n 10000 -vvv
Expand Down Expand Up @@ -242,12 +243,12 @@ public function extractDataFromSqlStorage(int $chunkSize): \Generator
*/
public function loadNTripleToNeo4j($neo4j, string $nTriple, int $neo4jChunkSize): void
{
$nTriple = $this->escapeTriple($nTriple);

$result = $neo4j->run(<<<CYPHER
CALL n10s.rdf.import.inline('${nTriple}',"N-Triples",{commitSize:${neo4jChunkSize}}) YIELD terminationStatus, extraInfo
$result = $neo4j->run(
<<<CYPHER
CALL n10s.rdf.import.inline(\$nTriple,"N-Triples",{commitSize:${neo4jChunkSize}}) YIELD terminationStatus, extraInfo
RETURN terminationStatus, extraInfo
CYPHER
CYPHER,
['nTriple' => $nTriple]
);

$responseMessage = $result->first();
Expand All @@ -263,31 +264,6 @@ public function loadNTripleToNeo4j($neo4j, string $nTriple, int $neo4jChunkSize)
$this->logInfo('Chunk of triples successfully loaded.');
}

public function escapeTriple(string $nTriple): string
{
$escapeCharacters = [
'\\\\' => '\\\\\\\\', //Escape double slash
'\"' => '\\\\"', // Escaped slash in escaped double quote
'\n' => '\\\\n', // Escaped slash in EOL
'\r' => '\\\\r', // Escaped slash in carriage return
'\t' => '\\\\t', // Escaped slash in horizontal tab
"'" => "\'", //Escape single quote
];

$escapeList = [];
foreach ($escapeCharacters as $needle => $replacement) {
if (strpos($nTriple, $needle) !== false) {
$escapeList[$needle] = $replacement;
}
}

if (!empty($escapeList)) {
$nTriple = str_replace(array_keys($escapeList), array_values($escapeList), $nTriple);
}

return $nTriple;
}

protected function provideOptions(): array
{
return [
Expand Down Expand Up @@ -354,10 +330,48 @@ protected function run(): Report
foreach ($nTripleList as $nTriple) {
$this->loadNTripleToNeo4j($neo4j, $nTriple, $neo4jChunkSize);
}

$this->addSystemLabel($neo4j, $sqlChunkSize, $neo4jChunkSize);
} catch (\Throwable $e) {
return Report::createError($e->getMessage());
}

return Report::createSuccess('Data transfer finished successfully.');
}

private function addSystemLabel($neo4j, int $sqlChunkSize, int $neo4jChunkSize)
{
$sql = $this->getSqlAdapter();
$nonSystemModelId = \core_kernel_persistence_smoothsql_SmoothModel::DEFAULT_WRITABLE_MODEL;

/** @var \Doctrine\DBAL\ForwardCompatibility\Result $idResult */
$result = $sql->query(<<<SQL
SELECT subject
FROM statements
WHERE modelid <> {$nonSystemModelId}
GROUP BY subject;
SQL);

$subjectList = [];
while ($r = $result->fetchColumn()) {
$subjectList[] = $r;

if (count($subjectList) >= $neo4jChunkSize) {
$systemNode = Query::node('Resource');
$query = Query::new()->match($systemNode)
->where($systemNode->property('uri')->in($subjectList))
->set($systemNode->labeled('System'));
$neo4j->runStatement($query);
$subjectList = [];
}
}

if (!empty($subjectList)) {
$systemNode = Query::node('Resource');
$query = Query::new()->match($systemNode)
->where($systemNode->property('uri')->in($subjectList))
->set($systemNode->labeled('System'));
$neo4j->run($query->build());
}
}
}
58 changes: 40 additions & 18 deletions scripts/update/OntologyUpdater.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,25 @@
namespace oat\tao\scripts\update;

use AppendIterator;
use oat\generis\model\kernel\persistence\file\FileModel;
use oat\generis\model\data\ModelManager;
use helpers_RdfDiff;
use core_kernel_persistence_smoothsql_SmoothModel;
use common_persistence_SqlPersistence;
use common_ext_ExtensionsManager;
use core_kernel_persistence_smoothsql_SmoothIterator;
use helpers_RdfDiff;
use oat\generis\model\data\Model;
use oat\generis\model\data\ModelManager;
use oat\generis\model\GenerisRdf;
use oat\generis\model\kernel\persistence\file\FileModel;
use oat\tao\model\extension\ExtensionModel;
use oat\tao\model\user\TaoRoles;

class OntologyUpdater
{
public static function syncModels()
{
$currentModel = ModelManager::getModel();
$modelIds = array_diff($currentModel->getReadableModels(), ['1']);

$persistence = common_persistence_SqlPersistence::getPersistence('default');

$smoothIterator = new core_kernel_persistence_smoothsql_SmoothIterator($persistence, $modelIds);

$nominalModel = new AppendIterator();
foreach (common_ext_ExtensionsManager::singleton()->getInstalledExtensions() as $ext) {
$nominalModel->append(new ExtensionModel($ext));
}
$langModel = \tao_models_classes_LanguageService::singleton()->getLanguageDefinition();
$nominalModel->append($langModel);
$existingTriples = self::getCurrentTriples($currentModel);
$nominalTriples = self::getNominalTriples();

$diff = helpers_RdfDiff::create($smoothIterator, $nominalModel);
$diff = helpers_RdfDiff::create($existingTriples, $nominalTriples);
self::logDiff($diff);

$diff->applyTo($currentModel);
Expand Down Expand Up @@ -82,4 +73,35 @@ protected static function logDiff(\helpers_RdfDiff $diff)
FileModel::toFile($path . DIRECTORY_SEPARATOR . 'add.rdf', $diff->getTriplesToAdd());
FileModel::toFile($path . DIRECTORY_SEPARATOR . 'remove.rdf', $diff->getTriplesToRemove());
}

public static function getNominalTriples(): \Traversable
{
$nominalModel = new AppendIterator();
foreach (common_ext_ExtensionsManager::singleton()->getInstalledExtensions() as $ext) {
$nominalModel->append(new ExtensionModel($ext));
}
$langModel = \tao_models_classes_LanguageService::singleton()->getLanguageDefinition();
$nominalModel->append($langModel);
return $nominalModel;
}

public static function getCurrentTriples(Model $currentModel): \Traversable
{
return new \CallbackFilterIterator(
$currentModel->getRdfInterface()->getIterator(),
function (\core_kernel_classes_Triple $item) {
/**
* Those includes generated with a script and created in non-system space, so we ignore them.
* @see \tao_install_ExtensionInstaller::installManagementRole
*/
$isAutomaticIncludeRole = $item->subject === TaoRoles::GLOBAL_MANAGER
&& $item->predicate === GenerisRdf::PROPERTY_ROLE_INCLUDESROLE;

// GrantAccess field added to entities in non-system space and also should be ignored for now.
$isGrantAccess = $item->predicate === 'http://www.tao.lu/Ontologies/taoFuncACL.rdf#GrantAccess';

return !$isGrantAccess && !$isAutomaticIncludeRole;
}
);
}
}
Loading