From 8d074a293c0d5577ffa9e684f00779654063a010 Mon Sep 17 00:00:00 2001 From: pietercolpaert Date: Mon, 15 Apr 2024 15:19:14 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20SEMICeu/?= =?UTF-8?q?LDES-DCAT-AP-feeds@9e31c41fe92430044783049ca12e6a5aef7d8d50=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index.html | 2516 ---------------------------------------------------- shape.ttl | 23 +- 2 files changed, 17 insertions(+), 2522 deletions(-) delete mode 100644 index.html diff --git a/index.html b/index.html deleted file mode 100644 index 0ccd534..0000000 --- a/index.html +++ /dev/null @@ -1,2516 +0,0 @@ - - - - The DCAT-AP Feed specification - - - - - - - - - - - - - -
-

-

The DCAT-AP Feed specification

-

Living Standard,

-
-
-
This version: -
https://semiceu.github.io/LDES-DCAT-AP-feeds/index.html -
Issue Tracking: -
GitHub -
Inline In Spec -
Editors: -
-
- Pieter Colpaert -
- Matthias Palmér -
-
-
- -
-
-
-

Abstract

-

Publishing a full data dump repetitevely will delegate change detection -- a fault-prone process -- to data consumers. - - With DCAT-AP Feeds we propose that DCAT-AP catalog maintainers publish an event source API that can help to replicate the catalog towards a harvester, and always keep it in-sync in the way that is intended by the publisher. - Therefore, this spec describes how to publish your DCAT-AP entity changes using the Activity Streams vocabulary and LDES. - It also provides a specification for harvesters to provide transparency into their harvesting progress.

-
-
- -
-

1. Publishing changes about DCAT-AP entities

-

A DCAT-AP Feed is a Linked Data Event Stream with ActivityStream entities Create, Update and Delete in it about the DCAT-AP entities in a catalog. -DCAT-AP Feeds uses the [activitystreams-vocabulary] to indicate the type of change. -Three type of activities can be described:

- -

These activities MUST provide using the property object an IRI of the DCAT-AP entity (this thus cannot be a blank node), SHOULD come with a published property with an xsd:dateTime datatype, and SHOULD provide a type. -The activity MUST be identified using an IRI. -The payload of the DCAT-AP entity MUST be provided in the named graph with the activity IRI as the graph.

-

Note: When a harvester processes the set of quads in the named graph, it can create or replace all quads in a named graph of the DCAT-AP entity, whom’s IRI then possible is a concatenation of the entity IRI with the LDES IRI in order to ensure that multiple representations of the DCAT-AP entities from various sources can be provided.

-

Fall-backs for when one of these optional properties are not available:

- -

All activities are immutable: once published one cannot alter the same member again. -Each activity MUST be a member of an append-only change log or event stream typed EventStream, that MUST be given an IRI. -This EventStream is the DCAT-AP Feed that conforms to the Linked Data Event Stream specification. -On a DCAT-AP Feed, the timestampPath MUST be set to published, unless the publisher knows what they are doing, or when the timestamp cannot be provided. -The versionOfPath MUST be set to object. -This configures the property that will be used to point to the entity that is being altered.

-

A DCAT-AP Feed harvester SHOULD implement the full LDES specification, or re-use an existing LDES Client.

-
- -

A JSON-LD example:

-
{
-    "@context" : "TODO",
-    "@id": "#Feed",
-    "@type": "EventStream",
-    "shape": "https://semiceu.github.io/LDES-DCAT-AP-feeds/shape.ttl#ActivityShape",
-    "title": "My DCAT-AP Feed",
-    "timestampPath": "published",
-    "versionOfPath": "object",
-    "view": {
-        "@id": "_currentpageurl_",
-        "comment": "in this object we will be able to add more specific info about this page or view"
-    },
-    "member": [
-        {
-            "@id": "https://example.org/Dataset1#Event1",
-            "@type": "Create",
-            "object": "https://example.org/Dataset1",
-            "published" : "2023-10-01T12:00:00Z",
-            "@graph": {
-                "@id": "https://example.org/Dataset1",
-                "@type": "Dataset",
-                "comment": "Everything in here is the actual data that needs to be upserted"
-            }
-        },
-        {
-            "@id": "https://example.org/Dataset1#Event2",
-            "@type": "Delete",
-            "object": "https://example.org/Dataset1",
-            "published" : "2023-10-01T13:00:00Z"
-        }
-    ]
-}
-
-

Or the same data in TRiG:

-
<#Feed> a ldes:EventStream ;
-        tree:shape <https://semiceu.github.io/LDES-DCAT-AP-feeds/shape.ttl#ActivityShape> ;
-        dct:title "My DCAT-AP Feed" ;
-        ldes:timestampPath as:published ;
-        tree:view <> ;
-        tree:member <https://example.org/Dataset1#Event1>, <https://example.org/Dataset1#Event2> .
-
-# This member is further described in the default graph 
-<https://example.org/Dataset1#Event1> a as:Create ;
-    as:object <https://example.org/Dataset1> ;
-    as:published "2023-10-01T12:00:00Z"^^xsd:dateTime .
-
-<https://example.org/Dataset1#Event1>  {
-    <https://example.org/Dataset1> a dcat:Dataset ;
-        ## The (updated) representation of this particular dataset
-        ## ...
-}
-<https://example.org/Dataset1#Event2> a as:Delete ;
-    as:object <https://example.org/Dataset1> ;
-    as:published "2023-10-01T13:00:00Z"^^xsd:dateTime .
-
-
-

A DCAT-AP Feed MUST be published using either application/ld+json or application/trig and it MUST set the Content-Type header accordingly. -In this spec, examples are provided for both serializations. -Through content negotiation, other formats MAY be provided.

-

This context information MUST be present:

-
# Typing it as an EventStream
-<#Feed> a ldes:EventStream ;
-        # Indicating every member will adhere to the ActivityShape defined by the DCAT-AP-Feeds specification
-        tree:shape <https://semiceu.github.io/LDES-DCAT-AP-feeds/shape.ttl#ActivityShape> ;
-        # Indicating the timestampPath will be as:published
-        ldes:timestampPath as:published ;
-        # The current page is a page of this event stream
-        tree:view <> ;  # See pagination and retention policies for extra controls we will be able to describe here
-        # a link to all members
-        tree:member <...> .
-
-

The shape.ttl is part of this specification. -A DCAT-AP Feeds provider SHOULD test their members before adding them to the feed.

-

Note: The DCAT-AP Feed shapes graph extend the official DCAT-APv3 shapes, but don’t fork it: we only add the concepts of how to use these shapes in an DCAT-AP Feed.

-

1.1. Entity types

-

In DCAT-AP2.2 entity types are divided into main and supportive entity types based on their importance in the application profile. In DCAT-AP Feeds we need to make a slightly different division based on how they appear in the event stream. We will refer to the following three kind of entity types:

-
    -
  1. -

    Standalone - these entities will appear in the event stream.

    -
  2. -

    Embedded - these entities will always be provided as part of standalone entities.

    -
  3. -

    Referenced - these entities are never described with triples, they are only referred to via their URIs.

    -
-

Note: LDES feed publishers should not add references to standalone entities before they have been added. Conversely, when removing entities all references should be removed first.

-

Note: Any dcat:CatalogRecord entities can be provided as part the dcat:Dataset entity. Alternatively, and perhaps more appropriately, the event itself could be seen as an dcat:CatalogRecord with modification date and other useful information.

-

1.1.1. Standalone entities

-

The main entity types are identified based on their class:

- -

Note: Only standalone entities that could be part of exactly one other standalone entity -- although not recommended -- can instead be optionally included in the parent standalone entity, allowing the option of having a blank node for that standalone entity that now becomes an embedded entity. This for example allows to embed a dcat:Distribution

-

1.1.2. Embedded entities

- The embedded entity types are identified based on their class: - -

1.1.3. Referenced entities

-

The referenced entity types are identified based on the properties that point to them:

- -

1.2. Retention policies

-

Without further explanation, a server publishing a Linked Data Event Stream (LDES), is considered to keep the full history of all elements. -In DCAT-AP Feeds, harvesters are generally not interested in the full history. -Therefore we recommend only keeping the latest activity (the create, updates, and remove entities) about an entity in the feed, yet transparently indicating this retention policy.

-

It may also be possible that the data catalog does not keep track of the removed entities. -In this case, it will be impossible to probide the remove activities. -While it is not recommended, in this case, a DCAT-AP Feeds provider MUST document the implicit remove retention policy.

-

Having to keep remove activities indefinetily will be difficult after a long period of time. -Therefore a third retention policy can be put in place in order to say that deletions are not kept in the feed after a certain period of time.

-

1.2.1. LatestVersionSubset with deletions

-

By adding a latest version subset retention policy, we will allow for only the last activities of an object to be added.

-
- -
<> ldes:retentionPolicy [
-        a ldes:LatestVersionSubset ;
-        ldes:amount 1    
-    ] .
-
-
-

1.2.2. Without Delete Activities

-

When this retention policy is in place, a harvester MUST check for implicit deletions.

-

A ldes:ImplicitDeletionPolicy has been proposed that will force LDES clients to check for deleted members: https://github.com/SEMICeu/LinkedDataEventStreams/issues/50

-

1.2.3. Partial remove activities

-

The list of removed datasets can get quite big in the long run without big benefits. -You MAY indicate to a harvester that you will only publish remove activities for a specific duration, -but publish all latest version of anything else.

-

This feature is under discussion as this cannot currently be processed in LDES: https://github.com/SEMICeu/LinkedDataEventStreams/issues/50

-

1.3. Pagination

-

TODO:

- -
- -
@prefix : <https://data.example.org/feed> .
-@prefix ldes: <https://w3id.org/ldes#>.
-@prefix tree: <https://w3id.org/tree#>.
-@prefix as:  <https://www.w3.org/ns/activitystreams#>.
-@prefix dcterms: <http://purl.org/dc/terms/>.
-@prefix dcat: <http://www.w3.org/ns/dcat#>.
-@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
-
-
-:#Stream a ldes:EventStream ;
-    tree:member <Dataset1#Event1>, <DataService1#Event1> ;
-    ldes:timestampPath as:published ;
-    ldes:versionOfPath as:object ;
-    tree:view : .
-
-: tree:viewDescription [
-    #recommended: a retention policy to only keep the last update about a thing
-        ldes:retentionPolicy [
-            a ldes:LatestVersionSubset ;
-            ldes:amount 1    
-        ]
-    ] ;
-    # Recommended: multiple pages in a B-tree fragmentation
-    tree:relation [
-        a tree:GreaterThanOrEqualToRelation ;
-        tree:path as:published ;
-        tree:value "2020-01-01T00:00:00Z"^^xsd:dateTime ;
-        tree:node :2020
-    ] ,
-    [
-        a tree:LessThanRelation ;
-        tree:path as:published ;
-        tree:value  "2021-01-01T00:00:00Z"^^xsd:dateTime ;
-        tree:node :2020
-    ]
-    #... More relations
-    .
-
-
-

2. Publishing a harverster’s event log

-

A DCAT-AP feeds harvester consumes one or more DCAT-AP Feeds. -In order to do so, it SHOULD use an LDES compliant client. -For the emitted objects by such an LDES client, the harvester can count on the fact that the official SHACL shape validates. -The payload of an update will be contained within the

-

A harvester SHOULD publish the status of their logging on a page.

-

Note: Currently there is no further text on what this status log should look like or how it should be described. We are waiting for consensus on this in the general LDES specification that should be a topic in the SEMIC LDES standardization activity starting September 2024.

-
-
-

Conformance

-

Conformance requirements are expressed with a combination of descriptive assertions and RFC 2119 terminology. - The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” - in the normative parts of this document - are to be interpreted as described in RFC 2119. - However, for readability, - these words do not appear in all uppercase letters in this specification.

-

All of the text of this specification is normative - except sections explicitly marked as non-normative, examples, and notes. [RFC2119]

-

Examples in this specification are introduced with the words “for example” - or are set apart from the normative text with class="example", like this:

-
This is an example of an informative example.
-

Informative notes begin with the word “Note” - and are set apart from the normative text with class="note", like this:

-

Note, this is an informative note.

-
- -

References

-

Normative References

-
-
[ACTIVITYPUB] -
Christopher Webber; Jessica Tallon. ActivityPub. URL: https://w3c.github.io/activitypub/ -
[ACTIVITYSTREAMS-VOCABULARY] -
James Snell; Evan Prodromou. Activity Vocabulary. URL: https://w3c.github.io/activitystreams/vocabulary/ -
[RFC2119] -
S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://datatracker.ietf.org/doc/html/rfc2119 -
-

Issues Index

-
-
A ldes:ImplicitDeletionPolicy has been proposed that will force LDES clients to check for deleted members: https://github.com/SEMICeu/LinkedDataEventStreams/issues/50
-
This feature is under discussion as this cannot currently be processed in LDES: https://github.com/SEMICeu/LinkedDataEventStreams/issues/50
-
\ No newline at end of file diff --git a/shape.ttl b/shape.ttl index 54f75ef..f5081c6 100644 --- a/shape.ttl +++ b/shape.ttl @@ -48,39 +48,50 @@ _:n3-6 rdf:first dcatap:Catalog_Shape; dcatap:Catalog_Shape a sh:NodeShape; sh:targetClass ; sh:property _:n3-29, _:n3-30, _:n3-31, _:n3-32, _:n3-33, _:n3-34, _:n3-35, _:n3-36, _:n3-37, _:n3-38, _:n3-39, _:n3-40, _:n3-41, _:n3-42, _:n3-43, _:n3-44, _:n3-45, _:n3-46, _:n3-47; - rdfs:label "Catalog"@en. + rdfs:label "Catalog"@en; + sh:class . _:n3-7 rdf:first dcatap:Dataset_Shape; rdf:rest _:n3-8. dcatap:Dataset_Shape a sh:NodeShape; sh:targetClass ; sh:property _:n3-60, _:n3-61, _:n3-62, _:n3-63, _:n3-64, _:n3-65, _:n3-66, _:n3-67, _:n3-68, _:n3-69, _:n3-70, _:n3-71, _:n3-72, _:n3-73, _:n3-74, _:n3-75, _:n3-76, _:n3-77, _:n3-78, _:n3-79, _:n3-80, _:n3-81, _:n3-82, _:n3-83, _:n3-84, _:n3-85, _:n3-86, _:n3-87, _:n3-88, _:n3-89, _:n3-90, _:n3-91, _:n3-92, _:n3-93, _:n3-94, _:n3-95; - rdfs:label "Dataset"@en. + rdfs:label "Dataset"@en; + sh:class . _:n3-8 rdf:first dcatap:Distribution_Shape; rdf:rest _:n3-9. dcatap:Distribution_Shape a sh:NodeShape; sh:targetClass ; sh:property _:n3-110, _:n3-111, _:n3-112, _:n3-113, _:n3-114, _:n3-115, _:n3-116, _:n3-117, _:n3-118, _:n3-119, _:n3-120, _:n3-121, _:n3-122, _:n3-123, _:n3-124, _:n3-125, _:n3-126, _:n3-127, _:n3-128, _:n3-129, _:n3-130, _:n3-131, _:n3-132, _:n3-133; - rdfs:label "Distribution"@en. + rdfs:label "Distribution"@en; + sh:class . _:n3-9 rdf:first dcatap:DataService_Shape; rdf:rest _:n3-10. dcatap:DataService_Shape a sh:NodeShape; sh:targetClass ; sh:property _:n3-52, _:n3-53, _:n3-54, _:n3-55, _:n3-56, _:n3-57, _:n3-58, _:n3-59; - rdfs:label "Data Service"@en. + rdfs:label "Data Service"@en; + sh:class . _:n3-10 rdf:first dcatap:Agent_Shape; rdf:rest _:n3-11. dcatap:Agent_Shape a sh:NodeShape; sh:targetClass ; sh:property _:n3-18, _:n3-19; - rdfs:label "Agent"@en. + rdfs:label "Agent"@en; + sh:class . _:n3-11 rdf:first dcatap:Kind_Shape; rdf:rest _:n3-12. dcatap:Kind_Shape a sh:NodeShape; sh:targetClass vcard:Kind; rdfs:label "Kind"@en; - rdfs:comment "Placeholder shape for vcard:Kind, currently not present in DCAT-AP v3 shapes". + rdfs:comment "Placeholder shape for vcard:Kind, currently not present in DCAT-AP v3 shapes"; + sh:class vcard:Kind. _:n3-12 rdf:first dcatap:LicenseDocument_Shape; rdf:rest rdf:nil. +dcatap:LicenseDocument_Shape a sh:NodeShape; + sh:targetClass ; + rdfs:label "LicenseDocument"@en; + rdfs:comment "Placeholder shape for dcterms:LicenseDocument, currently not present in DCAT-AP v3 shapes"; + sh:class . ; ; ;