diff --git a/ckanext/dcat/profiles/__init__.py b/ckanext/dcat/profiles/__init__.py
index 6d30a244..668de499 100644
--- a/ckanext/dcat/profiles/__init__.py
+++ b/ckanext/dcat/profiles/__init__.py
@@ -25,4 +25,5 @@
from .euro_dcat_ap_3 import EuropeanDCATAP3Profile
from .dcat_us_3 import DCATUS3Profile
from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
+from .euro_health_dcat_ap import EuropeanHealthDCATAPProfile
from .schemaorg import SchemaOrgProfile
diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py
index 30b989e7..a93eeb5c 100644
--- a/ckanext/dcat/profiles/base.py
+++ b/ckanext/dcat/profiles/base.py
@@ -2,16 +2,16 @@
import json
from urllib.parse import quote
+from ckan.lib.helpers import resource_formats
+from ckan.model.license import LicenseRegister
+from ckantoolkit import ObjectNotFound, asbool, aslist, config, get_action, url_for
from dateutil.parser import parse as parse_date
-from rdflib import term, URIRef, BNode, Literal
-from rdflib.namespace import Namespace, RDF, XSD, SKOS, RDFS, ORG
-from geomet import wkt, InvalidGeoJSONException
+from geomet import InvalidGeoJSONException, wkt
+from rdflib import BNode, Literal, URIRef, term
+from rdflib.namespace import ORG, RDF, RDFS, SKOS, XSD, Namespace
-from ckantoolkit import config, url_for, asbool, aslist, get_action, ObjectNotFound
-from ckan.model.license import LicenseRegister
-from ckan.lib.helpers import resource_formats
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
-from ckanext.dcat.validators import is_year, is_year_month, is_date
+from ckanext.dcat.validators import is_date, is_year, is_year_month
CNT = Namespace("http://www.w3.org/2011/content#")
DCT = Namespace("http://purl.org/dc/terms/")
diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py
index 3a2742a1..bea68935 100644
--- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py
+++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py
@@ -1,6 +1,6 @@
import json
-from rdflib import URIRef, BNode, Literal
+from rdflib import URIRef, BNode, Literal, term
from .base import RDFProfile, CleanedURIRef, URIRefOrLiteral
from .base import (
RDF,
@@ -10,6 +10,7 @@
FOAF,
SKOS,
LOCN,
+ RDFS,
)
@@ -118,6 +119,11 @@ def _parse_list_value(data_dict, field_name):
if agents:
dataset_dict[key] = agents
+ # Add any qualifiedRelations
+ qual_relations = self._relationship_details(dataset_ref, DCAT.qualifiedRelation)
+ if qual_relations:
+ dataset_dict["qualified_relation"] = qual_relations
+
# Repeating subfields: resources
for schema_field in self._dataset_schema["resource_fields"]:
if "repeating_subfields" in schema_field:
@@ -227,6 +233,10 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
spatial_ref, field[1], item[field[0]]
)
+ self._add_relationship(
+ dataset_ref, dataset_dict, "qualified_relation", DCAT.qualifiedRelation
+ )
+
resources = dataset_dict.get("resources", [])
for resource in resources:
if resource.get("access_services"):
@@ -292,6 +302,80 @@ def _add_agents(
_type=URIRefOrLiteral,
)
+ def _relationship_details(self, subject, predicate):
+ """
+ Returns a list of dicts with details about a dcat:Relationship property, e.g.
+ dcat:qualifiedRelation
+
+ Both subject and predicate must be rdflib URIRef or BNode objects
+
+ Returns keys for uri, role, and relation with the values set to
+ an empty string if they could not be found.
+ """
+
+ relations = []
+ for relation in self.g.objects(subject, predicate):
+ relation_details = {}
+ relation_details["uri"] = (
+ str(relation) if isinstance(relation, term.URIRef) else ""
+ )
+ relation_details["role"] = self._object_value(relation, DCAT.hadRole)
+ relation_details["relation"] = self._object_value(relation, DCT.relation)
+ relations.append(relation_details)
+
+ return relations
+
+ def _add_relationship(
+ self,
+ dataset_ref,
+ dataset_dict,
+ relation_key,
+ rdf_predicate,
+ ):
+ """
+ Adds one or more Relationships to the RDF graph.
+
+ :param dataset_ref: The RDF reference of the dataset
+ :param dataset_dict: The dataset dictionary containing agent information
+ :param relation_key: field name in the CKAN dict (.e.g. "qualifiedRelation")
+ :param rdf_predicate: The RDF predicate (DCAT.qualifiedRelation)
+ """
+ relation = dataset_dict.get(relation_key)
+ if (
+ isinstance(relation, list)
+ and len(relation)
+ and self._not_empty_dict(relation[0])
+ ):
+ relations = relation
+
+ for relation in relations:
+
+ agent_uri = relation.get("uri")
+ if agent_uri:
+ agent_ref = CleanedURIRef(agent_uri)
+ else:
+ agent_ref = BNode()
+
+ self.g.add((agent_ref, RDF.type, DCAT.Relationship))
+ self.g.add((dataset_ref, rdf_predicate, agent_ref))
+
+ self._add_triple_from_dict(
+ relation,
+ agent_ref,
+ DCT.relation,
+ "relation",
+ _type=URIRefOrLiteral,
+ _class=RDFS.Resource,
+ )
+ self._add_triple_from_dict(
+ relation,
+ agent_ref,
+ DCAT.hadRole,
+ "role",
+ _type=URIRefOrLiteral,
+ _class=DCAT.Role,
+ )
+
@staticmethod
def _not_empty_dict(data_dict):
return any(data_dict.values())
diff --git a/ckanext/dcat/profiles/euro_health_dcat_ap.py b/ckanext/dcat/profiles/euro_health_dcat_ap.py
new file mode 100644
index 00000000..7e3702f1
--- /dev/null
+++ b/ckanext/dcat/profiles/euro_health_dcat_ap.py
@@ -0,0 +1,152 @@
+from rdflib import XSD, Literal, URIRef
+from rdflib.namespace import Namespace
+
+from ckanext.dcat.profiles.base import URIRefOrLiteral
+from ckanext.dcat.profiles.euro_dcat_ap_3 import EuropeanDCATAP3Profile
+
+# HealthDCAT-AP namespace. Note: not finalized yet
+HEALTHDCATAP = Namespace("http://healthdataportal.eu/ns/health#")
+
+# Data Privacy Vocabulary namespace
+DPV = Namespace("https://w3id.org/dpv#")
+
+namespaces = {
+ "healthdcatap": HEALTHDCATAP,
+ "dpv": DPV,
+}
+
+
+class EuropeanHealthDCATAPProfile(EuropeanDCATAP3Profile):
+ """
+ A profile implementing HealthDCAT-AP, a health-related extension of the DCAT
+ application profile for sharing information about Catalogues containing Datasets
+ and Data Services descriptions in Europe.
+ """
+
+ def parse_dataset(self, dataset_dict, dataset_ref):
+ # Call super method for DCAT-AP 3 properties
+ dataset_dict = super(EuropeanHealthDCATAPProfile, self).parse_dataset(
+ dataset_dict, dataset_ref
+ )
+
+ dataset_dict = self._parse_health_fields(dataset_dict, dataset_ref)
+
+ return dataset_dict
+
+ def _parse_health_fields(self, dataset_dict, dataset_ref):
+ self.__parse_healthdcat_stringvalues(dataset_dict, dataset_ref)
+
+ self.__parse_healthdcat_intvalues(dataset_dict, dataset_ref)
+
+ # Add the HDAB. There should only ever be one but you never know
+ agents = self._agents_details(dataset_ref, HEALTHDCATAP.hdab)
+ if agents:
+ dataset_dict["hdab"] = agents
+
+ # Retention period
+ retention_start, retention_end = self._time_interval(
+ dataset_ref, HEALTHDCATAP.retentionPeriod, dcat_ap_version=2
+ )
+ retention_dict = {}
+ if retention_start is not None:
+ retention_dict["start"] = retention_start
+ if retention_end is not None:
+ retention_dict["end"] = retention_end
+ if retention_dict:
+ dataset_dict["retention_period"] = [retention_dict]
+
+ return dataset_dict
+
+ def __parse_healthdcat_intvalues(self, dataset_dict, dataset_ref):
+ for key, predicate in (
+ ("min_typical_age", HEALTHDCATAP.minTypicalAge),
+ ("max_typical_age", HEALTHDCATAP.maxTypicalAge),
+ ("number_of_records", HEALTHDCATAP.numberOfRecords),
+ ("number_of_unique_individuals", HEALTHDCATAP.numberOfUniqueIndividuals),
+ ):
+ value = self._object_value_int(dataset_ref, predicate)
+ # A zero value evaluates as False but is definitely not a None
+ if value is not None:
+ dataset_dict[key] = value
+
+ def __parse_healthdcat_stringvalues(self, dataset_dict, dataset_ref):
+ for (key, predicate,) in (
+ ("analytics", HEALTHDCATAP.analytics),
+ ("code_values", HEALTHDCATAP.hasCodeValues),
+ ("coding_system", HEALTHDCATAP.hasCodingSystem),
+ ("health_category", HEALTHDCATAP.healthCategory),
+ ("health_theme", HEALTHDCATAP.healthTheme),
+ ("legal_basis", DPV.hasLegalBasis),
+ ("personal_data", DPV.hasPersonalData),
+ ("population_coverage", HEALTHDCATAP.populationCoverage),
+ ("publisher_note", HEALTHDCATAP.publisherNote),
+ ("publisher_type", HEALTHDCATAP.publisherType),
+ ("purpose", DPV.hasPurpose),
+ ):
+ values = self._object_value_list(dataset_ref, predicate)
+ if values:
+ dataset_dict[key] = values
+
+ def graph_from_dataset(self, dataset_dict, dataset_ref):
+ super().graph_from_dataset(dataset_dict, dataset_ref)
+ for prefix, namespace in namespaces.items():
+ self.g.bind(prefix, namespace)
+
+ # key, predicate, fallbacks, _type, _class
+ items = [
+ ("analytics", HEALTHDCATAP.analytics, None, URIRefOrLiteral),
+ ("code_values", HEALTHDCATAP.hasCodeValues, None, URIRefOrLiteral),
+ ("coding_system", HEALTHDCATAP.hasCodingSystem, None, URIRefOrLiteral),
+ ("health_category", HEALTHDCATAP.healthCategory, None, URIRefOrLiteral),
+ ("health_theme", HEALTHDCATAP.healthCategory, None, URIRefOrLiteral),
+ ("legal_basis", DPV.hasLegalBasis, None, URIRefOrLiteral),
+ (
+ "population_coverage",
+ HEALTHDCATAP.populationCoverage,
+ None,
+ URIRefOrLiteral,
+ ),
+ ("personal_data", DPV.hasPersonalData, None, URIRef),
+ ("publisher_note", HEALTHDCATAP.publisherNote, None, URIRefOrLiteral),
+ ("publisher_type", HEALTHDCATAP.publisherType, None, URIRefOrLiteral),
+ ("purpose", DPV.hasPurpose, None, URIRefOrLiteral),
+ ]
+ self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)
+
+ items = [
+ ("min_typical_age", HEALTHDCATAP.minTypicalAge),
+ ("max_typical_age", HEALTHDCATAP.maxTypicalAge),
+ ("number_of_records", HEALTHDCATAP.numberOfRecords),
+ ("number_of_unique_individuals", HEALTHDCATAP.numberOfUniqueIndividuals),
+ ]
+ for key, predicate in items:
+ self._add_nonneg_integer_triple(dataset_dict, dataset_ref, key, predicate)
+
+ self._add_agents(dataset_ref, dataset_dict, "hdab", HEALTHDCATAP.hdab)
+
+ def _add_nonneg_integer_triple(self, dataset_dict, dataset_ref, key, predicate):
+ """
+ Adds non-negative integers to the Dataset graph (xsd:nonNegativeInteger)
+
+ dataset_ref: subject of Graph
+ key: scheming key in CKAN
+ predicate: predicate to use
+ """
+ value = self._get_dict_value(dataset_dict, key)
+
+ if value:
+ try:
+ if int(value) < 0:
+ raise ValueError("Not a non-negative integer")
+ self.g.add(
+ (
+ dataset_ref,
+ predicate,
+ Literal(int(value), datatype=XSD.nonNegativeInteger),
+ )
+ )
+ except (ValueError, TypeError):
+ self.g.add((dataset_ref, predicate, Literal(value)))
+
+ def graph_from_catalog(self, catalog_dict, catalog_ref):
+ super().graph_from_catalog(catalog_dict, catalog_ref)
diff --git a/ckanext/dcat/schemas/dcat_ap_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml
index aee4fffd..0dcd1b49 100644
--- a/ckanext/dcat/schemas/dcat_ap_full.yaml
+++ b/ckanext/dcat/schemas/dcat_ap_full.yaml
@@ -268,6 +268,23 @@ dataset_fields:
help_inline: true
help_text: This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset.
+- field_name: qualified_relation
+ label: Qualified relation
+ repeating_label: Relationship
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: relation
+ label: Relation
+ help_text: The resource related to the source resource.
+
+ - field_name: role
+ label: Role
+ help_text: The function of an entity or agent with respect to another entity or resource.
+ help_text: A description of a relationship with another resource.
+
#- field_name: hvd_category
# label: HVD Category
# preset: multiple_text
diff --git a/ckanext/dcat/schemas/dcat_us_full.yaml b/ckanext/dcat/schemas/dcat_us_full.yaml
index 6f55903f..31478bb9 100644
--- a/ckanext/dcat/schemas/dcat_us_full.yaml
+++ b/ckanext/dcat/schemas/dcat_us_full.yaml
@@ -331,6 +331,23 @@ dataset_fields:
- field_name: license
label: License
+- field_name: qualified_relation
+ label: Qualified relation
+ repeating_label: Relationship
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: relation
+ label: Relation
+ help_text: The resource related to the source resource.
+
+ - field_name: role
+ label: Role
+ help_text: The function of an entity or agent with respect to another entity or resource.
+ help_text: A description of a relationship with another resource.
+
# Note: if not provided, this will be autogenerated
- field_name: uri
label: URI
diff --git a/ckanext/dcat/schemas/health_dcat_ap.yaml b/ckanext/dcat/schemas/health_dcat_ap.yaml
new file mode 100644
index 00000000..6245756d
--- /dev/null
+++ b/ckanext/dcat/schemas/health_dcat_ap.yaml
@@ -0,0 +1,598 @@
+scheming_version: 2
+dataset_type: dataset
+about: Schema for HealthDCAT-AP
+about_url: http://github.com/ckan/ckanext-dcat
+
+dataset_fields:
+
+- field_name: title
+ label: Title
+ preset: title
+ required: true
+ help_text: A descriptive title for the dataset.
+
+- field_name: name
+ label: URL
+ preset: dataset_slug
+ form_placeholder: eg. my-dataset
+
+- field_name: notes
+ label: Description
+ required: true
+ form_snippet: markdown.html
+ help_text: A free-text account of the dataset.
+
+- field_name: tag_string
+ label: Keywords
+ preset: tag_string_autocomplete
+ form_placeholder: eg. economy, mental health, government
+ help_text: Keywords or tags describing the dataset. Use commas to separate multiple values.
+
+- field_name: contact
+ label: Contact points
+ repeating_label: Contact point
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: name
+ label: Name
+
+ - field_name: email
+ label: Email
+ display_snippet: email.html
+
+ - field_name: identifier
+ label: Identifier
+ help_text: Unique identifier for the contact point. Such as a ROR ID.
+
+ help_text: Contact information for enquiries about the dataset.
+
+- field_name: publisher
+ label: Publisher
+ repeating_label: Publisher
+ repeating_once: true
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: name
+ label: Name
+
+ - field_name: email
+ label: Email
+ display_snippet: email.html
+
+ - field_name: url
+ label: URL
+ display_snippet: link.html
+
+ - field_name: type
+ label: Type
+
+ - field_name: identifier
+ label: Identifier
+ help_text: Unique identifier for the publisher, such as a ROR ID.
+ help_text: Entity responsible for making the dataset available.
+
+- field_name: creator
+ label: Creator
+ repeating_label: Creator
+ repeating_once: true
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+ help_text: URI of the creator, if available.
+
+ - field_name: name
+ label: Name
+ help_text: Name of the entity or person who created the dataset.
+
+ - field_name: email
+ label: Email
+ display_snippet: email.html
+ help_text: Contact email of the creator.
+
+ - field_name: url
+ label: URL
+ display_snippet: link.html
+ help_text: URL for more information about the creator.
+
+ - field_name: type
+ label: Type
+ help_text: Type of creator (e.g., Organization, Person).
+
+ - field_name: identifier
+ label: Identifier
+ help_text: Unique identifier for the creator, such as an ORCID or ROR ID.
+
+- field_name: license_id
+ label: License
+ form_snippet: license.html
+ help_text: License definitions and additional information can be found at http://opendefinition.org/.
+
+- field_name: owner_org
+ label: Organization
+ preset: dataset_organization
+ help_text: The CKAN organization the dataset belongs to.
+
+- field_name: url
+ label: Landing page
+ form_placeholder: http://example.com/dataset.json
+ display_snippet: link.html
+ help_text: Web page that can be navigated to gain access to the dataset, its distributions and/or additional information.
+
+ # Note: this will fall back to metadata_created if not present
+- field_name: issued
+ label: Release date
+ preset: dcat_date
+ help_text: Date of publication of the dataset.
+
+ # Note: this will fall back to metadata_modified if not present
+- field_name: modified
+ label: Modification date
+ preset: dcat_date
+ help_text: Most recent date on which the dataset was changed, updated or modified.
+
+- field_name: version
+ label: Version
+ validators: ignore_missing unicode_safe package_version_validator
+ help_text: Version number or other version designation of the dataset.
+
+- field_name: version_notes
+ label: Version notes
+ validators: ignore_missing unicode_safe
+ form_snippet: markdown.html
+ display_snippet: markdown.html
+ help_text: A description of the differences between this version and a previous version of the dataset.
+
+ # Note: CKAN will generate a unique identifier for each dataset
+- field_name: identifier
+ label: Identifier
+ help_text: A unique identifier of the dataset.
+
+- field_name: frequency
+ label: Frequency
+ help_text: The frequency at which dataset is published.
+
+- field_name: provenance
+ label: Provenance
+ form_snippet: markdown.html
+ display_snippet: markdown.html
+ help_text: A statement about the lineage of the dataset.
+
+- field_name: dcat_type
+ label: Type
+ help_text: The type of the dataset.
+ # TODO: controlled vocabulary?
+
+- field_name: temporal_coverage
+ label: Temporal coverage
+ repeating_subfields:
+
+ - field_name: start
+ label: Start
+ preset: dcat_date
+
+ - field_name: end
+ label: End
+ preset: dcat_date
+ help_text: The temporal period or periods the dataset covers.
+
+- field_name: temporal_resolution
+ label: Temporal resolution
+ help_text: Minimum time period resolvable in the dataset.
+
+- field_name: spatial_coverage
+ label: Spatial coverage
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: text
+ label: Label
+
+ - field_name: geom
+ label: Geometry
+
+ - field_name: bbox
+ label: Bounding Box
+
+ - field_name: centroid
+ label: Centroid
+ help_text: A geographic region that is covered by the dataset.
+
+- field_name: spatial_resolution_in_meters
+ label: Spatial resolution in meters
+ help_text: Minimum spatial separation resolvable in a dataset, measured in meters.
+
+- field_name: access_rights
+ label: Access rights
+ validators: ignore_missing unicode_safe
+ help_text: Information that indicates whether the dataset is Open Data, has access restrictions or is not public.
+
+- field_name: alternate_identifier
+ label: Other identifier
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: This property refers to a secondary identifier of the dataset, such as MAST/ADS, DataCite, DOI, etc.
+
+- field_name: theme
+ label: Theme
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: A category of the dataset. A Dataset may be associated with multiple themes.
+
+- field_name: language
+ label: Language
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: Language or languages of the dataset.
+ # TODO: language form snippet / validator / graph
+
+- field_name: documentation
+ label: Documentation
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: A page or document about this dataset.
+
+- field_name: conforms_to
+ label: Conforms to
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: An implementing rule or other specification that the dataset follows.
+
+- field_name: is_referenced_by
+ label: Is referenced by
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: A related resource, such as a publication, that references, cites, or otherwise points to the dataset.
+
+- field_name: analytics
+ label: Analytics
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ An analytics distribution of the dataset.
+ Publishers are encouraged to provide URLs pointing to API endpoints or document
+ repositories where users can access or request associated resources such as
+ technical reports of the dataset, quality measurements, usability indicators,...
+ or analytics services.
+
+- field_name: applicable_legislation
+ label: Applicable legislation
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: The legislation that mandates the creation or management of the dataset.
+
+- field_name: code_values
+ label: Code values
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: Health classifications and their codes associated with the dataset.
+
+- field_name: coding_system
+ label: Coding system
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ Coding systems in use (e.g. ICD-10-CM, DGRs, SNOMED CT, ...).
+ To comply with HealthDCAT-AP, Wikidata URIs MUST be used.
+
+- field_name: purpose
+ label: Purpose
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: A free text statement of the purpose of the processing of data or personal data.
+
+- field_name: health_category
+ label: Health category
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ The health category to which this dataset belongs as described in the Commission Regulation on
+ the European Health Data Space laying down a list of categories of electronic data for
+ secondary use, Art.33.
+
+- field_name: health_theme
+ label: Health theme
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ A category of the Dataset or tag describing the Dataset.
+
+- field_name: legal_basis
+ label: Legal basis
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: The legal basis used to justify processing of personal data.
+
+- field_name: min_typical_age
+ label: Minimum typical age
+ validators: ignore_missing int_validator
+ form_snippet: number.html
+ help_text: Minimum typical age of the population within the dataset.
+
+- field_name: max_typical_age
+ label: Maximum typical age
+ validators: ignore_missing int_validator
+ form_snippet: number.html
+ help_text: Maximum typical age of the population within the dataset.
+
+- field_name: number_of_records
+ label: Number of records
+ validators: ignore_missing int_validator
+ form_snippet: number.html
+ help_text: Size of the dataset in terms of the number of records
+
+- field_name: number_of_unique_individuals
+ label: Number of records for unique individuals.
+ validators: ignore_missing int_validator
+ form_snippet: number.html
+ help_text: Number of records for unique individuals.
+
+- field_name: personal_data
+ label: Personal data
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: Key elements that represent an individual in the dataset.
+
+- field_name: publisher_note
+ label: Publisher note
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ A description of the publisher activities.
+
+- field_name: publisher_type
+ label: Publisher type
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ A type of organisation that makes the Dataset available.
+
+- field_name: population_coverage
+ label: Population coverage
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: >
+ A definition of the population within the dataset.
+
+- field_name: retention_period
+ label: Retention period
+ repeating_subfields:
+
+ - field_name: start
+ label: Start
+ preset: dcat_date
+
+ - field_name: end
+ label: End
+ preset: dcat_date
+
+ help_text: A temporal period which the dataset is available for secondary use.
+
+
+# Officially there can only be one HDAB for now, but keep it repeating subfield just in case
+- field_name: hdab
+ label: Health data access body
+ repeating_label: Health data access body
+ repeating_once: true
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: name
+ label: Name
+
+ - field_name: email
+ label: Email
+ display_snippet: email.html
+
+ - field_name: url
+ label: URL
+ display_snippet: link.html
+
+ - field_name: type
+ label: Type
+
+ - field_name: identifier
+ label: Identifier
+ help_text: Unique identifier for the HDAB, such as a ROR ID.
+ help_text: Health Data Access Body supporting access to data in the Member State.
+
+- field_name: qualified_relation
+ label: Qualified relation
+ repeating_label: Relationship
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: relation
+ label: Relation
+ help_text: The resource related to the source resource.
+
+ - field_name: role
+ label: Role
+ help_text: The function of an entity or agent with respect to another entity or resource.
+ help_text: A description of a relationship with another resource.
+
+# Note: if not provided, this will be autogenerated
+- field_name: uri
+ label: URI
+ help_text: An URI for this dataset (if not provided it will be autogenerated).
+
+# TODO: relation-based properties are not yet included (e.g. is_version_of, source, sample, etc)
+#
+resource_fields:
+
+- field_name: url
+ label: URL
+ preset: resource_url_upload
+
+- field_name: name
+ label: Name
+ form_placeholder:
+ help_text: A descriptive title for the resource.
+
+- field_name: description
+ label: Description
+ form_snippet: markdown.html
+ help_text: A free-text account of the resource.
+
+- field_name: format
+ label: Format
+ preset: resource_format_autocomplete
+ help_text: File format. If not provided it will be guessed.
+
+- field_name: mimetype
+ label: Media type
+ validators: if_empty_guess_format ignore_missing unicode_safe
+ help_text: Media type for this format. If not provided it will be guessed.
+
+- field_name: compress_format
+ label: Compress format
+ help_text: The format of the file in which the data is contained in a compressed form.
+
+- field_name: package_format
+ label: Package format
+ help_text: The format of the file in which one or more data files are grouped together.
+
+- field_name: size
+ label: Size
+ validators: ignore_missing int_validator
+ form_snippet: number.html
+ display_snippet: file_size.html
+ help_text: File size in bytes
+
+- field_name: hash
+ label: Hash
+ help_text: Checksum of the downloaded file.
+
+- field_name: hash_algorithm
+ label: Hash Algorithm
+ help_text: Algorithm used to calculate to checksum.
+
+- field_name: rights
+ label: Rights
+ form_snippet: markdown.html
+ display_snippet: markdown.html
+ help_text: Some statement about the rights associated with the resource.
+
+- field_name: availability
+ label: Availability
+ help_text: Indicates how long it is planned to keep the resource available.
+
+- field_name: status
+ label: Status
+ preset: select
+ choices:
+ - value: http://purl.org/adms/status/Completed
+ label: Completed
+ - value: http://purl.org/adms/status/UnderDevelopment
+ label: Under Development
+ - value: http://purl.org/adms/status/Deprecated
+ label: Deprecated
+ - value: http://purl.org/adms/status/Withdrawn
+ label: Withdrawn
+ help_text: The status of the resource in the context of maturity lifecycle.
+
+- field_name: license
+ label: License
+ help_text: License in which the resource is made available. If not provided will be inherited from the dataset.
+
+- field_name: has_version
+ label: Has version
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_inline: true
+ help_text: This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset.
+
+ # Note: this falls back to the standard resource url field
+- field_name: access_url
+ label: Access URL
+ help_text: URL that gives access to the dataset (defaults to the standard resource URL).
+
+ # Note: this falls back to the standard resource url field
+- field_name: download_url
+ label: Download URL
+ display_snippet: link.html
+ help_text: URL that provides a direct link to a downloadable file (defaults to the standard resource URL).
+
+- field_name: issued
+ label: Release date
+ preset: dcat_date
+ help_text: Date of publication of the resource.
+
+- field_name: modified
+ label: Modification date
+ preset: dcat_date
+ help_text: Most recent date on which the resource was changed, updated or modified.
+
+- field_name: temporal_resolution
+ label: Temporal resolution
+ help_text: Minimum time period resolvable in the distribution.
+
+- field_name: spatial_resolution_in_meters
+ label: Spatial resolution in meters
+ help_text: Minimum spatial separation resolvable in the distribution, measured in meters.
+
+- field_name: language
+ label: Language
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: Language or languages of the resource.
+
+- field_name: documentation
+ label: Documentation
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: A page or document about this resource.
+
+- field_name: conforms_to
+ label: Conforms to
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: An established schema to which the described resource conforms.
+
+- field_name: applicable_legislation
+ label: Applicable legislation
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+ help_text: The legislation that mandates the creation or management of the resource.
+
+- field_name: access_services
+ label: Access services
+ repeating_label: Access service
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+
+ - field_name: title
+ label: Title
+
+ - field_name: endpoint_description
+ label: Endpoint description
+
+ - field_name: endpoint_url
+ label: Endpoint URL
+ preset: multiple_text
+
+ - field_name: serves_dataset
+ label: Serves dataset
+ preset: multiple_text
+ validators: ignore_missing scheming_multiple_text
+
+ help_text: A data service that gives access to the resource.
+
+ # Note: if not provided, this will be autogenerated
+- field_name: uri
+ label: URI
+ help_text: An URI for this resource (if not provided it will be autogenerated).
diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py
index fdda473f..1bce901c 100644
--- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py
+++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py
@@ -555,7 +555,6 @@ def test_dataset_distribution_access_service_list_values_only(self):
# List
endpoint_url_list = access_service.get('endpoint_url')
- print(access_service)
assert len(endpoint_url_list) == 1
assert 'http://publications.europa.eu/webapi/rdf/sparql' in endpoint_url_list
diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
index 27c6e770..ea343c10 100644
--- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
+++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
@@ -265,6 +265,17 @@ def test_e2e_ckan_to_dcat(self):
wkt_geom = wkt.dumps(dataset["spatial_coverage"][0]["geom"], decimals=4)
assert self._triple(g, spatial[0][2], LOCN.Geometry, wkt_geom, GSP.wktLiteral)
+ # Test qualified relation
+ relation = [t for t in g.triples((dataset_ref, DCAT.qualifiedRelation, None))]
+ assert len(relation) == 1
+ relation_items = [
+ (DCT.relation, URIRef(dataset_dict["qualified_relation"][0]["relation"])),
+ (DCAT.hadRole, URIRef(dataset_dict["qualified_relation"][0]["role"])),
+ ]
+ for predicate, value in relation_items:
+ assert self._triple(
+ g, relation[0][2], predicate, value
+ ), f"relation Predicate {predicate} does not have value {value}"
# Statements
for item in [
("access_rights", DCT.accessRights),
@@ -747,6 +758,17 @@ def test_e2e_dcat_to_ckan(self):
)
assert dataset["spatial_coverage"][0]["geom"]
+ assert len(dataset["qualified_relation"]) == 1
+ assert (
+ dataset["qualified_relation"][0]["relation"]
+ == "http://example.com/dataset/3.141592"
+ )
+ assert (
+ dataset["qualified_relation"][0]["role"]
+ == "http://www.iana.org/assignments/relation/related"
+ )
+
+
resource = dataset["resources"][0]
# Resources: core fields
diff --git a/ckanext/dcat/tests/profiles/health_dcat_ap/__init__.py b/ckanext/dcat/tests/profiles/health_dcat_ap/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py
new file mode 100644
index 00000000..7abcacb4
--- /dev/null
+++ b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py
@@ -0,0 +1,179 @@
+# test
+import json
+import logging
+from pprint import pprint
+
+import pytest
+from ckan.tests.helpers import call_action
+
+from ckanext.dcat.processors import RDFParser
+from ckanext.dcat.tests.utils import BaseParseTest
+
+log = logging.getLogger(__name__)
+
+
+@pytest.mark.usefixtures("with_plugins", "clean_db")
+@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
+@pytest.mark.ckan_config(
+ "scheming.dataset_schemas", "ckanext.dcat.schemas:health_dcat_ap.yaml"
+)
+@pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "euro_health_dcat_ap")
+class TestSchemingParseSupport(BaseParseTest):
+ def test_e2e_dcat_to_ckan(self):
+ """
+ Parse a DCAT RDF graph into a CKAN dataset dict, create a dataset with
+ package_create and check that all expected fields are there
+ """
+
+ contents = self._get_file_contents("dcat/dataset_health.ttl")
+
+ p = RDFParser()
+
+ p.parse(contents, _format="turtle")
+
+ datasets = [d for d in p.datasets()]
+
+ assert len(datasets) == 1
+
+ dataset_dict = datasets[0]
+
+ dataset_dict["name"] = "test-dcat-1"
+ dataset = call_action("package_create", **dataset_dict)
+
+ # Core fields
+
+ assert dataset["title"] == "HealthDCAT-AP test dataset"
+ assert (
+ dataset["notes"]
+ == "This dataset is an example of using HealthDCAT-AP in CKAN"
+ )
+
+ assert sorted([t["name"] for t in dataset["tags"]]) == [
+ "Test 1",
+ "Test 2",
+ "Test 3",
+ ]
+
+ # Standard fields
+ assert dataset["version_notes"] == "Dataset continuously updated"
+ assert dataset["identifier"] == "http://example.com/dataset/1234567890"
+ assert (
+ dataset["frequency"]
+ == "http://publications.europa.eu/resource/authority/frequency/DAILY"
+ )
+ assert (
+ dataset["access_rights"]
+ == "http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC"
+ )
+ assert (
+ dataset["provenance"]
+ == "This example dataset is partly sourced from TEHDAS2"
+ )
+
+ # Hard to map (example uses a blank node which doesn't work well in CKAN)
+ # assert dataset["dcat_type"] == "test-type"
+
+ assert dataset["issued"] == "2024-01-01T00:00:00+00:00"
+ assert dataset["modified"] == "2024-12-31T23:59:59+00:00"
+ assert dataset["temporal_resolution"] == "P1D"
+
+ assert dataset["analytics"] == ["http://example.com/analytics"]
+ assert sorted(dataset["code_values"]) == [
+ "http://example.com/code1",
+ "http://example.com/code2",
+ ]
+ assert sorted(dataset["coding_system"]) == [
+ "http://www.wikidata.org/entity/P1690",
+ "http://www.wikidata.org/entity/P4229",
+ ]
+
+ assert dataset["spatial_coverage"] == [
+ {
+ "uri": "http://publications.europa.eu/resource/authority/country/BEL",
+ }
+ ]
+
+ # List fields
+ assert sorted(dataset["conforms_to"]) == [
+ "http://www.wikidata.org/entity/Q19597236"
+ ]
+ assert sorted(dataset["language"]) == [
+ "http://publications.europa.eu/resource/authority/language/ENG",
+ "http://publications.europa.eu/resource/authority/language/FRA",
+ "http://publications.europa.eu/resource/authority/language/NLD",
+ ]
+ assert sorted(dataset["theme"]) == [
+ "http://publications.europa.eu/resource/authority/data-theme/HEAL"
+ ]
+
+ assert sorted(dataset["is_referenced_by"]) == [
+ "https://doi.org/10.1038/sdata.2016.18",
+ "https://dx.doi.org/10.1002/jmri.28679",
+ ]
+ assert sorted(dataset["applicable_legislation"]) == [
+ "http://data.europa.eu/eli/reg/2022/868/oj",
+ ]
+
+ # Repeating subfields
+ assert dataset["contact"][0]["name"] == "Contact Point"
+ assert dataset["contact"][0]["email"] == "contact@example.com"
+
+ assert dataset["publisher"][0]["name"] == "Contact Point"
+ assert dataset["publisher"][0]["email"] == "info@example.com"
+ assert dataset["publisher"][0]["url"] == "https://healthdata.nl"
+
+ assert len(dataset["qualified_relation"]) == 1
+ assert (
+ dataset["qualified_relation"][0]["relation"]
+ == "http://example.com/dataset/3.141592"
+ )
+ assert (
+ dataset["qualified_relation"][0]["role"]
+ == "http://www.iana.org/assignments/relation/related"
+ )
+
+ assert dataset["temporal_coverage"][0]["start"] == "2020-03-01"
+ assert dataset["temporal_coverage"][0]["end"] == "2024-12-31"
+
+ ## HealthDCAT specific
+ assert sorted(dataset["health_theme"]) == [
+ "http://www.wikidata.org/entity/Q58624061",
+ "http://www.wikidata.org/entity/Q7907952",
+ ]
+
+ assert dataset["legal_basis"] == ["https://w3id.org/dpv#Consent"]
+
+ assert dataset["hdab"][0]["name"] == "EU Health Data Access Body"
+ assert dataset["hdab"][0]["email"] == "hdab@example.com"
+ assert dataset["hdab"][0]["url"] == "https://www.example.com/hdab"
+
+ # CKAN converts these to strings, but also converts back to decimal/nonneg int
+ assert dataset["min_typical_age"] == "0"
+ assert dataset["max_typical_age"] == "110"
+ assert dataset["number_of_records"] == "123456789"
+ assert dataset["number_of_unique_individuals"] == "7654321"
+
+ assert sorted(dataset["personal_data"]) == [
+ "https://w3id.org/dpv/dpv-pd#Age",
+ "https://w3id.org/dpv/dpv-pd#Gender",
+ "https://w3id.org/dpv/dpv-pd#HealthRecord",
+ ]
+
+ assert dataset["population_coverage"] == [
+ "This example includes a very non-descript population"
+ ]
+ assert dataset["publisher_note"] == [
+ "Health-RI is the Dutch health care initiative to build an integrated health data infrastructure for research and innovation."
+ ]
+ assert dataset["publisher_type"] == [
+ "http://example.com/publisherType/undefined"
+ ]
+
+ assert dataset["purpose"] == ["https://w3id.org/dpv#AcademicResearch"]
+
+ assert dataset["retention_period"] == [
+ {
+ "start": "2020-03-01",
+ "end": "2034-12-31",
+ }
+ ]
diff --git a/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py
new file mode 100644
index 00000000..0bfade6e
--- /dev/null
+++ b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py
@@ -0,0 +1,101 @@
+import json
+
+import pytest
+from ckan.tests.helpers import call_action
+from geomet import wkt
+from rdflib import Graph
+from rdflib.namespace import RDF
+from rdflib.term import URIRef
+
+from ckanext.dcat import utils
+from ckanext.dcat.processors import RDFSerializer
+from ckanext.dcat.profiles import (
+ ADMS,
+ DCAT,
+ DCATAP,
+ DCT,
+ FOAF,
+ GSP,
+ LOCN,
+ OWL,
+ RDF,
+ RDFS,
+ SKOS,
+ SPDX,
+ VCARD,
+ XSD,
+)
+from ckanext.dcat.profiles.euro_health_dcat_ap import HEALTHDCATAP
+from ckanext.dcat.tests.utils import BaseSerializeTest
+
+DCAT_AP_PROFILES = ["euro_dcat_ap_3"]
+
+
+@pytest.mark.usefixtures("with_plugins", "clean_db")
+@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
+@pytest.mark.ckan_config(
+ "scheming.dataset_schemas", "ckanext.dcat.schemas:health_dcat_ap.yaml"
+)
+@pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "euro_health_dcat_ap")
+class TestEuroDCATAP3ProfileSerializeDataset(BaseSerializeTest):
+ def test_e2e_ckan_to_dcat(self):
+ """
+ End to end testing of CKAN dataset to RDF triples.
+
+ Note: in this HealthDCAT-AP profile, only the HealthDCAT-AP specific triples are tested for.
+ Triples in other profiles could be tested, but should mainly be tested by their respective
+ profiles."""
+ dataset_dict = json.loads(self._get_file_contents("ckan/health_dcat_ap.json"))[
+ 0
+ ]
+
+ dataset = call_action("package_create", **dataset_dict)
+
+ # Make sure schema was used
+ assert dataset["hdab"][0]["name"] == "EU Health Data Access Body"
+
+ s = RDFSerializer()
+ g = s.g
+
+ dataset_ref = s.graph_from_dataset(dataset)
+
+ # Test dataset URI
+ assert str(dataset_ref) == utils.dataset_uri(dataset)
+
+ # Load Reference graph that only containes
+ contents = self._get_file_contents("dcat/dataset_health_no_blank.ttl")
+ reference = Graph()
+ reference.parse(data=contents, format="turtle")
+
+ # First check that all non-blind nodes from the reference are present in the output
+ # Any other nodes added by other profiles (e.g. DCAT-AP 3) we do not have an opinion about
+ for triple in reference:
+ assert triple in g, f"Triple {triple} not in output graph"
+ # assert all(triple in g for triple in reference)
+
+ # Test HealthDCAT-AP specific HDAB triples
+ # We can assume other blank nodes (e.g. contact point, publisher, temporal) are taken care
+ # of by the base profile.
+ hdab = [t for t in g.triples((dataset_ref, HEALTHDCATAP.hdab, None))]
+ assert len(hdab) == 1
+ hdab_items = [
+ (FOAF.name, dataset_dict["hdab"][0]["name"]),
+ (VCARD.hasEmail, URIRef("mailto:" + dataset_dict["hdab"][0]["email"])),
+ (FOAF.homepage, URIRef(dataset_dict["hdab"][0]["url"])),
+ ]
+ for predicate, value in hdab_items:
+ assert self._triple(
+ g, hdab[0][2], predicate, value
+ ), f"HDAB Predicate {predicate} does not have value {value}"
+
+ # Test qualified relation
+ relation = [t for t in g.triples((dataset_ref, DCAT.qualifiedRelation, None))]
+ assert len(relation) == 1
+ relation_items = [
+ (DCT.relation, URIRef(dataset_dict["qualified_relation"][0]["relation"])),
+ (DCAT.hadRole, URIRef(dataset_dict["qualified_relation"][0]["role"])),
+ ]
+ for predicate, value in relation_items:
+ assert self._triple(
+ g, relation[0][2], predicate, value
+ ), f"relation Predicate {predicate} does not have value {value}"
diff --git a/ckanext/dcat/tests/shacl/test_shacl.py b/ckanext/dcat/tests/shacl/test_shacl.py
index 0a550c27..62cf2644 100644
--- a/ckanext/dcat/tests/shacl/test_shacl.py
+++ b/ckanext/dcat/tests/shacl/test_shacl.py
@@ -147,6 +147,9 @@ def test_validate_dcat_ap_2_graph_shapes_range():
known_failures = [
"Value does not have class skos:Concept",
"Value does not have class dcat:Dataset",
+ # Qualified relations
+ "Value does not conform to Shape :DcatResource_Shape. See details for more information.",
+ "The node is either a Catalog, Dataset or a DataService",
]
assert set(failures) - set(known_failures) == set(), results_text
diff --git a/docs/application-profiles.md b/docs/application-profiles.md
new file mode 100644
index 00000000..12374b12
--- /dev/null
+++ b/docs/application-profiles.md
@@ -0,0 +1,80 @@
+# Application profiles
+
+Besides the [base profiles](profiles.md) there are other profiles included to support other domain
+specific application profiles.
+
+!!! Note
+ If you are interested in contributing a profile that might be useful for the wider community
+ check the documentation on [writing custom profiles](writing-profiles.md) and the
+ [contribution guidelines](contributing.md#including-new-profiles).
+
+## HealthDCAT-AP
+
+### Introduction
+
+This extension contains a profile (`euro_health_dcat_ap`) for the proposed
+[HealthDCAT-AP](https://healthdcat-ap.github.io/) specification.
+This is a health-related extension of the DCAT application profile for sharing information about
+Catalogues containing Datasets and Data Services descriptions in Europe (DCAT-AP).
+
+The development of a Health DCAT application profile aims to standardize health metadata within
+the scope of the [European Health Data Space](https://health.ec.europa.eu/ehealth-digital-health-and-care/european-health-data-space_en)
+(EHDS), fostering greater interoperability, findability and accessibility of electronic health
+data across the EU.
+
+The goal of this profile is to provide the wider FAIR community and other EU portals with a starting
+point for implementing HealthDCAT-AP within their own data catalogs.
+
+!!! Note
+ HealthDCAT-AP is still under active development and not finalized yet. Cardinalities,
+ certain vocabularies and the namespace have not been officially ratified yet. These are
+ expected to be finalized after the public consultation in Q1 2025.
+
+
+### Usage
+
+Use the included `euro_health_dcat_ap` profile in your configuration:
+
+```ini
+ckanext.dcat.rdf.profiles = euro_health_dcat_ap
+```
+
+The HealthDCAT-AP profile is an extension of the DCAT-AP v3 profile and requires ckanext-scheming.
+See the [documentation](getting-started.md#schemas) on how to set it up. You can use the included
+`health_dcat_ap.yaml` schema file as a starting point to adapt it to your needs:
+
+```ini
+scheming.dataset_schemas = ckanext.dcat.schemas:health_dcat_ap.yaml
+```
+
+This profile has currently no additional settings.
+
+### Limitations and deviations
+
+As HealthDCAT-AP is still a draft, it is bound to change. There are currently still some
+inconsistencies in the standard and unclarities regarding certain properties. Below is a short summary
+of limitations and implementaiton decisions made during development of this profile.
+
+1. Cardinalities have not yet been finalized for HealthDCAT-AP. This CKAN schema has taken a very
+ liberal approach and takes all values as strictly optional (no failed validation for missing
+ fields). Note that some mandatory fields are currently impossible to fill with real data e.g. the
+ Health Data Access Body (HDAB) field: the EHDS legislation has not been implemented yet and no HDABs
+ have been formally appointed.
+2. The HealthDCAT-AP namespace is not formally defined yet. For now,
+ `http://healthdataportal.eu/ns/health#` is used. This will be updated once the final namespace is
+ standardized.
+3. The official examples of the standard uses the `dct:description` property to encode the data
+ purpose. This does not seem to be according to the Data Privacy Vocabulary specification, which
+ proposes a controlled vocabulary. See [this issue](https://github.com/HealthDCAT-AP-de/healthdcat-ap.de/issues/11)
+ for the German perspective on this.
+4. The distributions proposed by HealthDCAT-AP, *analytics* and *sample*, are not specifically
+ implemented. URIs are linked, the resources themselves are not loaded. For *sample*, as this is
+ an upstream DCAT-AP property, this can be included once picked up there.
+5. Documentation (*foaf:page*) is implemented as an URI. There is some HealthDCAT-AP example data
+ out in the wild that uses a blank node for this and adds several properties, however this is
+ inconsistent with other DCAT implementations.
+6. DatasetSeries are not supported yet by CKAN, and also not by this profile.
+7. The *quality annotation* property has not been implemented due to usage not being completely
+defined yet.
+8. There is no multilingual support yet.
+9. For other properties, any limitations from the DCAT-AP profiles still apply.
diff --git a/docs/endpoints.md b/docs/endpoints.md
index f3fd6585..748cbf87 100644
--- a/docs/endpoints.md
+++ b/docs/endpoints.md
@@ -42,7 +42,7 @@ RDF representations will be advertised using `` tags on th
```
-Check the [RDF DCAT Serializer](profiles.md#rdf-dcat-serializer) section for more details about how these are generated and how to customize the output using [profiles](profiles.md#profiles).
+Check the [RDF DCAT Serializer](writing-profiles.md#rdf-dcat-serializer) section for more details about how these are generated and how to customize the output using [profiles](profiles.md#profiles).
You can specify the profile by using the `profiles=,` query parameter on the dataset endpoint (as a comma-separated list):
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 86462e44..e1ac04bc 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -31,14 +31,14 @@ The extension includes ready to use [ckanext-scheming](https://github.com/ckan/c
that enable DCAT support. These include a schema definition file (located
in [`ckanext/dcat/schemas`](https://github.com/ckan/ckanext-dcat/tree/master/ckanext/dcat/schemas))
plus extra validators and other custom logic that integrates the metadata modifications with the
-RDF DCAT [Parsers](profiles.md#rdf-dcat-parser) and [Serializers](profiles.md#rdf-dcat-serializer) and other CKAN features and extensions.
+RDF DCAT [Parsers](writing-profiles.md#rdf-dcat-parser) and [Serializers](writing-profiles.md#rdf-dcat-serializer) and other CKAN features and extensions.
There are the following schemas currently included with the extension:
* *dcat_ap_recommended.yaml*: Includes the recommended properties for `dcat:Dataset` and `dcat:Distribution` according to the DCAT AP specification. You can use this schema with the `euro_dcat_ap_2` (+ `euro_dcat_ap_scheming`) and `euro_dcat_ap_3` profiles.
* *dcat_ap_full.yaml*: Includes most of the properties defined for `dcat:Dataset` and `dcat:Distribution` in the [DCAT AP v2.1](https://semiceu.github.io/DCAT-AP/releases/2.1.1/) and [DCAT AP v3](https://semiceu.github.io/DCAT-AP/releases/3.0.0/) specification. You can use this schema with the `euro_dcat_ap_2` (+ `euro_dcat_ap_scheming`) and `euro_dcat_ap_3` profiles.
* *dcat_ap_multilingual.yaml*: An example schema implementing multilingual metadata in some fields using [ckanext-fluent](https://github.com/ckan/ckanext-fluent). See [Multilingual support](profiles.md#multilingual-support) for more information.
-
+* *dcat_us_full.yaml*: Includes most of the properties defined for `dcat:Dataset` and `dcat:Distribution` in the [DCAT US v3](https://doi-do.github.io/dcat-us/) specification. You can use this schema with the `dcat_us_3` profile.
Most sites will want to use these as a base to create their own custom schema to address their own requirements, perhaps alongside a [custom profile](profiles.md#profiles). Of course site maintainers can add or remove schema fields, as well as change the existing validators.
diff --git a/docs/harvester.md b/docs/harvester.md
index 0a0b2aa2..02ca2711 100644
--- a/docs/harvester.md
+++ b/docs/harvester.md
@@ -1,6 +1,6 @@
## RDF DCAT harvester
-The [RDF parser](profiles.md#rdf-dcat-parser) described in the previous section has been integrated into a harvester,
+The [RDF parser](writing-profiles.md#rdf-dcat-parser) described in the previous section has been integrated into a harvester,
to allow automatic import of datasets from remote sources. To enable the RDF harvester, add the `harvest` and `dcat_rdf_harvester` plugins to your CKAN configuration file (you will also need to install [ckanext-harvest](https://github.com/ckan/ckanext-harvest)):
ckan.plugins = ... harvest dcat_rdf_harvester
diff --git a/docs/index.md b/docs/index.md
index c3caca01..925d2bef 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -116,6 +116,6 @@ These are implemented internally using:
* A base [mapping](mapping.md) between DCAT and CKAN datasets and viceversa (compatible with **DCAT-AP** [v1.1](https://joinup.ec.europa.eu/asset/dcat_application_profile/asset_release/dcat-ap-v11), [v2.1](https://joinup.ec.europa.eu/collection/semantic-interoperability-community-semic/solution/dcat-application-profile-data-portals-europe/release/210) and [v3](https://semiceu.github.io/DCAT-AP/releases/3.0.0/) and **DCAT-US** [v3](https://doi-do.github.io/dcat-us/)).
-* An [RDF Parser](profiles.md#rdf-dcat-parser) that allows to read RDF serializations in different formats and extract CKAN dataset dicts, using customizable [profiles](profiles.md#profiles).
+* An [RDF Parser](writing-profiles.md#rdf-dcat-parser) that allows to read RDF serializations in different formats and extract CKAN dataset dicts, using customizable [profiles](profiles.md#profiles).
-* An [RDF Serializer](profiles.md#rdf-dcat-serializer) that allows to transform CKAN datasets metadata to different semantic formats, also allowing customizable [profiles](profiles.md#profiles).
+* An [RDF Serializer](writing-profiles.md#rdf-dcat-serializer) that allows to transform CKAN datasets metadata to different semantic formats, also allowing customizable [profiles](profiles.md#profiles).
diff --git a/docs/mapping.md b/docs/mapping.md
index d127a1c9..fa05ade2 100644
--- a/docs/mapping.md
+++ b/docs/mapping.md
@@ -4,7 +4,7 @@ The following table provides a generic mapping between the fields of the `dcat:D
their equivalents in the CKAN model. In most cases this mapping is deliberately a loose one. For instance, it does not try to link
the DCAT publisher property with a CKAN dataset author, maintainer or organization, as the link between them is not straight-forward
and may depend on a particular instance needs. When mapping from CKAN metadata to DCAT though, there are in some cases fallback fields
-that are used if the default field is not present (see [RDF Serializer](profiles.md#rdf-dcat-serializer) for more details on this).
+that are used if the default field is not present (see [RDF Serializer](writing-profiles.md#rdf-dcat-serializer) for more details on this).
This mapping is compatible with **DCAT-AP** [v1.1](https://joinup.ec.europa.eu/asset/dcat_application_profile/asset_release/dcat-ap-v11), [v2.1](https://joinup.ec.europa.eu/collection/semantic-interoperability-community-semic/solution/dcat-application-profile-data-portals-europe/release/210) and [v3](https://semiceu.github.io/DCAT-AP/releases/3.0.0/) and **DCAT-US** [v3](https://doi-do.github.io/dcat-us/). It depends on the active [profile(s)](profiles.md#profiles) and the fields present in your custom [schema](getting-started.md#schemas) which DCAT properties are mapped.
@@ -276,7 +276,7 @@ If no `publisher` or `publisher_*` fields are found, the serializers will fall b
### Spatial coverage
-The following formats for `dct:spatial` are supported by the default [parser](profiles.md#rdf-dcat-parser). Note that the default [serializer](profiles.md#rdf-dcat-serializer) will return the single `dct:spatial` instance form by default.
+The following formats for `dct:spatial` are supported by the default [parser](writing-profiles.md#rdf-dcat-parser). Note that the default [serializer](writing-profiles.md#rdf-dcat-serializer) will return the single `dct:spatial` instance form by default.
- One `dct:spatial` instance, URI only
diff --git a/docs/profiles.md b/docs/profiles.md
index 9813ff0f..21f8d8a9 100644
--- a/docs/profiles.md
+++ b/docs/profiles.md
@@ -11,14 +11,15 @@ Profiles define :
They essentially define the mapping between DCAT and CKAN.
In most cases the default profile will provide a good mapping that will cover most properties described in the DCAT standard. If you want to extract extra fields defined in the RDF, are using a custom schema or
-need custom logic, you can write a [custom profile](#writing-custom-profiles) that extends or replaces one of the default ones.
+need custom logic, you can write a [custom profile](writing-profiles.md) that extends or replaces one of the default ones.
The profiles currently shipped with the extension are mostly based in the
-[DCAT application profile for data portals in Europe](https://joinup.ec.europa.eu/asset/dcat_application_profile/description). As mentioned before though, they should be generic enough for most DCAT based representations.
+DCAT application profiles for data portals in [Europe](https://joinup.ec.europa.eu/asset/dcat_application_profile/description) and the [US](https://doi-do.github.io/dcat-us/). As mentioned before though, they should be generic enough for most DCAT based representations.
Sites that want to support a particular version of the DCAT-AP can enable a specific profile using one of the profiles below:
* [DCAT-AP v3](https://semiceu.github.io/DCAT-AP/releases/3.0.0) (default): `euro_dcat_ap_3`
+* [DCAT-US v3](https://doi-do.github.io/dcat-us/): `dcat_us_3`
* [DCAT-AP v2.1.0](https://joinup.ec.europa.eu/collection/semantic-interoperability-community-semic/solution/dcat-application-profile-data-portals-europe/release/210): `euro_dcat_ap_2`
* [DCAT-AP v1.1.1](https://joinup.ec.europa.eu/asset/dcat_application_profile/asset_release/dcat-ap-v11): `euro_dcat_ap`
@@ -45,75 +46,6 @@ serializer = RDFSerializer(profiles=['euro_dcat_ap', 'sweden_dcat_ap'])
Note that in both cases the order in which you define them is important, as it will be the one that the profiles will be run on.
-### Writing custom profiles
-
-Internally, profiles are classes that define a particular set of methods called during the parsing process.
-For instance, the `parse_dataset()` method is called on each DCAT dataset found when parsing an RDF file, and should return a CKAN dataset.
-Conversely, the `graph_from_dataset()` will be called when requesting an RDF representation for a dataset, and will need to generate the necessary RDF graph.
-
-Custom profiles should always extend the `ckanext.dcat.profiles.RDFProfile` class. This class has several helper
-functions to make getting metadata from the RDF graph easier. These include helpers for getting fields for FOAF and VCard entities like the ones
-used to define publishers or contact points. Check the source code of `ckanex.dcat.profiles.base.py` to see what is available.
-
-Profiles can extend other profiles to avoid repeating rules, or can be completely independent.
-
-The following example shows a complete example of a profile built on top of the European DCAT-AP profile (`euro_dcat_ap`):
-
-```python
-
-from rdflib.namespace import Namespace
-from ckanext.dcat.profiles import RDFProfile
-
-DCT = Namespace("http://purl.org/dc/terms/")
-
-
-class SwedishDCATAPProfile(RDFProfile):
- '''
- An RDF profile for the Swedish DCAT-AP recommendation for data portals
-
- It requires the European DCAT-AP profile (`euro_dcat_ap`)
- '''
-
- def parse_dataset(self, dataset_dict, dataset_ref):
-
- # Spatial label
- spatial = self._object(dataset_ref, DCT.spatial)
- if spatial:
- spatial_label = self.g.label(spatial)
- if spatial_label:
- dataset_dict['extras'].append({'key': 'spatial_text',
- 'value': str(spatial_label)})
-
- return dataset_dict
-
- def graph_from_dataset(self, dataset_dict, dataset_ref):
-
- g = self.g
-
- spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri')
- spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text')
-
- if spatial_uri:
- spatial_ref = URIRef(spatial_uri)
- else:
- spatial_ref = BNode()
-
- if spatial_text:
- g.add((dataset_ref, DCT.spatial, spatial_ref))
- g.add((spatial_ref, RDF.type, DCT.Location))
- g.add((spatial_ref, RDFS.label, Literal(spatial_text)))
-```
-
-Note how the dataset dict is passed between profiles so it can be further tweaked.
-
-Extensions define their available profiles using the `ckan.rdf.profiles` entrypoint in the `setup.py` file, as in this [example](https://github.com/ckan/ckanext-dcat/blob/cc5fcc7be0be62491301db719ce597aec7c684b0/setup.py#L37:L38) from this same extension:
-
- [ckan.rdf.profiles]
- euro_dcat_ap=ckanext.dcat.profiles:EuropeanDCATAPProfile
- euro_dcat_ap_2=ckanext.dcat.profiles:EuropeanDCATAP2Profile
- euro_dcat_ap_3=ckanext.dcat.profiles:EuropeanDCATAP3Profile
- euro_dcat_ap_scheming=ckanext.dcat.profiles:EuropeanDCATAPSchemingProfile
- schemaorg=ckanext.dcat.profiles:SchemaOrgProfile
## Multilingual support
@@ -185,132 +117,6 @@ See [*examples/ckan/ckan_dataset_multilingual.json*](https://github.com/ckan/cka
for examples of a multilingual CKAN dataset and DCAT serialization.
-Users [writing custom profiles](#writing-custom-profiles) can make use of the `_object_value_multilingual()`
+Users [writing custom profiles](writing-profiles.md) can make use of the `_object_value_multilingual()`
and `_object_value_list_multilingual()` functions of the profile class to handle custom fields not defined
in the base profiles.
-
-
-## Internals
-
-### RDF DCAT Parser
-
-The `ckanext.dcat.processors.RDFParser` class allows to read RDF serializations in different
-formats and extract CKAN dataset dicts. It will look for DCAT datasets and distributions
-and create CKAN datasets and resources, as dictionaries that can be passed to [`package_create`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.create.package_create) or [`package_update`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.update.package_update).
-
-Here is a quick overview of how it works:
-
-```python
-
-from ckanext.dcat.processors import RDFParser, RDFParserException
-
-parser = RDFParser()
-
-# Parsing a local RDF/XML file
-
-with open('datasets.rdf', 'r') as f:
- try:
- parser.parse(f.read())
-
- for dataset in parser.datasets():
- print('Got dataset with title {0}'.format(dataset['title'])
-
- except RDFParserException, e:
- print ('Error parsing the RDF file: {0}'.format(e))
-
-# Parsing a remote JSON-LD file
-
-import requests
-
-parser = RDFParser()
-
-content = requests.get('https://some.catalog.org/datasets.jsonld').content
-
-try:
- parser.parse(content, _format='json-ld')
-
- for dataset in parser.datasets():
- print('Got dataset with title {0}'.format(dataset['title'])
-
-except RDFParserException, e:
- print ('Error parsing the RDF file: {0}'.format(e))
-
-```
-
-The parser is implemented using [RDFLib](https://rdflib.readthedocs.org/), a Python library for working with RDF. Any
-RDF serialization format supported by RDFLib can be parsed into CKAN datasets. The `examples` folder contains
-serializations in different formats including RDF/XML, Turtle or JSON-LD.
-
-### RDF DCAT Serializer
-
-The `ckanext.dcat.processors.RDFSerializer` class generates RDF serializations in different
-formats from CKAN dataset dicts, like the ones returned by [`package_show`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.get.package_show) or [`package_search`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.get.package_search).
-
-Here is an example of how to use it:
-
-```python
-
-from ckanext.dcat.processors import RDFSerializer
-
-# Serializing a single dataset
-
-dataset = get_action('package_show')({}, {'id': 'my-dataset'})
-
-serializer = RDFserializer()
-
-dataset_ttl = serializer.serialize_dataset(dataset, _format='turtle')
-
-
-# Serializing the whole catalog (or rather part of it)
-
-datasets = get_action('package_search')({}, {'q': '*:*', 'rows': 50})
-
-serializer = RDFserializer()
-
-catalog_xml = serializer.serialize_catalog({'title': 'My catalog'},
- dataset_dicts=datasets,
- _format='xml')
-
-# Creating and RDFLib graph from a single dataset
-
-dataset = get_action('package_show')({}, {'id': 'my-dataset'})
-
-serializer = RDFserializer()
-
-dataset_reference = serializer.graph_from_dataset(dataset)
-
-# serializer.g now contains the full dataset graph, an RDFLib Graph class
-
-```
-
-The serializer uses customizable [profiles](#profiles) to generate an RDF graph (an [RDFLib Graph class](https://rdflib.readthedocs.org/en/latest/apidocs/rdflib.html#rdflib.graph.Graph)).
-By default these use the [mapping](mapping.md) described in the previous section.
-
-In some cases, if the default CKAN field that maps to a DCAT property is not present, some other fallback
-values will be used instead. For instance, if the `contact_email` field is not found, `maintainer_email`
-and `author_email` will be used (if present) for the email property of the `adms:contactPoint` property.
-
-Note that the serializer will look both for a first level field or an extra field with the same key, ie both
-the following values will be used for `dct:accrualPeriodicity`:
-
- {
- "name": "my-dataset",
- "frequency": "monthly",
- ...
- }
-
- {
- "name": "my-dataset",
- "extras": [
- {"key": "frequency", "value": "monthly"},
- ]
- ...
- }
-
-Once the dataset graph has been obtained, this is serialized into a text format using [RDFLib](https://rdflib.readthedocs.org/),
-so any format it supports can be obtained (common formats are 'xml', 'turtle' or 'json-ld').
-
-
-
-
-
diff --git a/docs/writing-profiles.md b/docs/writing-profiles.md
new file mode 100644
index 00000000..19cf0543
--- /dev/null
+++ b/docs/writing-profiles.md
@@ -0,0 +1,191 @@
+## Writing custom profiles
+
+Internally, profiles are classes that define a particular set of methods called during the parsing process.
+For instance, the `parse_dataset()` method is called on each DCAT dataset found when parsing an RDF file, and should return a CKAN dataset.
+Conversely, the `graph_from_dataset()` will be called when requesting an RDF representation for a dataset, and will need to generate the necessary RDF graph.
+
+Custom profiles should always extend the `ckanext.dcat.profiles.RDFProfile` class. This class has several helper
+functions to make getting metadata from the RDF graph easier. These include helpers for getting fields for FOAF and VCard entities like the ones
+used to define publishers or contact points. Check the source code of `ckanex.dcat.profiles.base.py` to see what is available.
+
+Profiles can extend other profiles to avoid repeating rules, or can be completely independent.
+
+The following example shows a complete example of a profile built on top of the European DCAT-AP profile (`euro_dcat_ap`):
+
+```python
+
+from rdflib.namespace import Namespace
+from ckanext.dcat.profiles import RDFProfile
+
+DCT = Namespace("http://purl.org/dc/terms/")
+
+
+class SwedishDCATAPProfile(RDFProfile):
+ '''
+ An RDF profile for the Swedish DCAT-AP recommendation for data portals
+
+ It requires the European DCAT-AP profile (`euro_dcat_ap`)
+ '''
+
+ def parse_dataset(self, dataset_dict, dataset_ref):
+
+ # Spatial label
+ spatial = self._object(dataset_ref, DCT.spatial)
+ if spatial:
+ spatial_label = self.g.label(spatial)
+ if spatial_label:
+ dataset_dict['extras'].append({'key': 'spatial_text',
+ 'value': str(spatial_label)})
+
+ return dataset_dict
+
+ def graph_from_dataset(self, dataset_dict, dataset_ref):
+
+ g = self.g
+
+ spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri')
+ spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text')
+
+ if spatial_uri:
+ spatial_ref = URIRef(spatial_uri)
+ else:
+ spatial_ref = BNode()
+
+ if spatial_text:
+ g.add((dataset_ref, DCT.spatial, spatial_ref))
+ g.add((spatial_ref, RDF.type, DCT.Location))
+ g.add((spatial_ref, RDFS.label, Literal(spatial_text)))
+```
+
+Note how the dataset dict is passed between profiles so it can be further tweaked.
+
+Extensions define their available profiles using the `ckan.rdf.profiles` entrypoint in the `setup.py` file, as in this [example](https://github.com/ckan/ckanext-dcat/blob/cc5fcc7be0be62491301db719ce597aec7c684b0/setup.py#L37:L38) from this same extension:
+
+ [ckan.rdf.profiles]
+ euro_dcat_ap=ckanext.dcat.profiles:EuropeanDCATAPProfile
+ euro_dcat_ap_2=ckanext.dcat.profiles:EuropeanDCATAP2Profile
+ euro_dcat_ap_3=ckanext.dcat.profiles:EuropeanDCATAP3Profile
+ euro_dcat_ap_scheming=ckanext.dcat.profiles:EuropeanDCATAPSchemingProfile
+ schemaorg=ckanext.dcat.profiles:SchemaOrgProfile
+
+
+## Internals
+
+### RDF DCAT Parser
+
+The `ckanext.dcat.processors.RDFParser` class allows to read RDF serializations in different
+formats and extract CKAN dataset dicts. It will look for DCAT datasets and distributions
+and create CKAN datasets and resources, as dictionaries that can be passed to [`package_create`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.create.package_create) or [`package_update`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.update.package_update).
+
+Here is a quick overview of how it works:
+
+```python
+
+from ckanext.dcat.processors import RDFParser, RDFParserException
+
+parser = RDFParser()
+
+# Parsing a local RDF/XML file
+
+with open('datasets.rdf', 'r') as f:
+ try:
+ parser.parse(f.read())
+
+ for dataset in parser.datasets():
+ print('Got dataset with title {0}'.format(dataset['title'])
+
+ except RDFParserException, e:
+ print ('Error parsing the RDF file: {0}'.format(e))
+
+# Parsing a remote JSON-LD file
+
+import requests
+
+parser = RDFParser()
+
+content = requests.get('https://some.catalog.org/datasets.jsonld').content
+
+try:
+ parser.parse(content, _format='json-ld')
+
+ for dataset in parser.datasets():
+ print('Got dataset with title {0}'.format(dataset['title'])
+
+except RDFParserException, e:
+ print ('Error parsing the RDF file: {0}'.format(e))
+
+```
+
+The parser is implemented using [RDFLib](https://rdflib.readthedocs.org/), a Python library for working with RDF. Any
+RDF serialization format supported by RDFLib can be parsed into CKAN datasets. The `examples` folder contains
+serializations in different formats including RDF/XML, Turtle or JSON-LD.
+
+### RDF DCAT Serializer
+
+The `ckanext.dcat.processors.RDFSerializer` class generates RDF serializations in different
+formats from CKAN dataset dicts, like the ones returned by [`package_show`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.get.package_show) or [`package_search`](http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.get.package_search).
+
+Here is an example of how to use it:
+
+```python
+
+from ckanext.dcat.processors import RDFSerializer
+
+# Serializing a single dataset
+
+dataset = get_action('package_show')({}, {'id': 'my-dataset'})
+
+serializer = RDFserializer()
+
+dataset_ttl = serializer.serialize_dataset(dataset, _format='turtle')
+
+
+# Serializing the whole catalog (or rather part of it)
+
+datasets = get_action('package_search')({}, {'q': '*:*', 'rows': 50})
+
+serializer = RDFserializer()
+
+catalog_xml = serializer.serialize_catalog({'title': 'My catalog'},
+ dataset_dicts=datasets,
+ _format='xml')
+
+# Creating and RDFLib graph from a single dataset
+
+dataset = get_action('package_show')({}, {'id': 'my-dataset'})
+
+serializer = RDFserializer()
+
+dataset_reference = serializer.graph_from_dataset(dataset)
+
+# serializer.g now contains the full dataset graph, an RDFLib Graph class
+
+```
+
+The serializer uses customizable [profiles](profiles.md) to generate an RDF graph (an [RDFLib Graph class](https://rdflib.readthedocs.org/en/latest/apidocs/rdflib.html#rdflib.graph.Graph)).
+By default these use the [mapping](mapping.md) described in the previous section.
+
+In some cases, if the default CKAN field that maps to a DCAT property is not present, some other fallback
+values will be used instead. For instance, if the `contact_email` field is not found, `maintainer_email`
+and `author_email` will be used (if present) for the email property of the `adms:contactPoint` property.
+
+Note that the serializer will look both for a first level field or an extra field with the same key, ie both
+the following values will be used for `dct:accrualPeriodicity`:
+
+ {
+ "name": "my-dataset",
+ "frequency": "monthly",
+ ...
+ }
+
+ {
+ "name": "my-dataset",
+ "extras": [
+ {"key": "frequency", "value": "monthly"},
+ ]
+ ...
+ }
+
+Once the dataset graph has been obtained, this is serialized into a text format using [RDFLib](https://rdflib.readthedocs.org/),
+so any format it supports can be obtained (common formats are 'xml', 'turtle' or 'json-ld').
+
diff --git a/examples/ckan/ckan_full_dataset_dcat_ap.json b/examples/ckan/ckan_full_dataset_dcat_ap.json
index 6adb770d..bc170025 100644
--- a/examples/ckan/ckan_full_dataset_dcat_ap.json
+++ b/examples/ckan/ckan_full_dataset_dcat_ap.json
@@ -169,6 +169,13 @@
}
],
"spatial_resolution_in_meters": 1.5,
+ "qualified_relation": [
+ {
+ "uri": "",
+ "relation": "http://example.com/dataset/3.141592",
+ "role": "http://www.iana.org/assignments/relation/related"
+ }
+ ],
"resources": [
{
"name": "Resource 1",
diff --git a/examples/ckan/ckan_full_dataset_dcat_ap_vocabularies.json b/examples/ckan/ckan_full_dataset_dcat_ap_vocabularies.json
index 9e0193e9..3c8ca3c5 100644
--- a/examples/ckan/ckan_full_dataset_dcat_ap_vocabularies.json
+++ b/examples/ckan/ckan_full_dataset_dcat_ap_vocabularies.json
@@ -148,6 +148,13 @@
}
],
"spatial_resolution_in_meters": 1.5,
+ "qualified_relation": [
+ {
+ "uri": "",
+ "relation": "http://example.com/dataset/3.141592",
+ "role": "http://www.iana.org/assignments/relation/related"
+ }
+ ],
"resources": [
{
"name": "Resource 1",
diff --git a/examples/ckan/ckan_full_dataset_dcat_us_vocabularies.json b/examples/ckan/ckan_full_dataset_dcat_us_vocabularies.json
index 69ff8d6b..d9e3992f 100644
--- a/examples/ckan/ckan_full_dataset_dcat_us_vocabularies.json
+++ b/examples/ckan/ckan_full_dataset_dcat_us_vocabularies.json
@@ -186,6 +186,13 @@
"license": "https://resources.data.gov/vocab/license/TODO/CC_BYNC_4_0"
}
],
+ "qualified_relation": [
+ {
+ "uri": "",
+ "relation": "http://example.com/dataset/3.141592",
+ "role": "http://www.iana.org/assignments/relation/related"
+ }
+ ],
"resources": [
{
"name": "Resource 1",
diff --git a/examples/ckan/health_dcat_ap.json b/examples/ckan/health_dcat_ap.json
new file mode 100644
index 00000000..2670c77b
--- /dev/null
+++ b/examples/ckan/health_dcat_ap.json
@@ -0,0 +1,199 @@
+[
+ {
+ "access_rights": "http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC",
+ "analytics": [
+ "http://example.com/analytics"
+ ],
+ "alternate_identifier": [
+ "internalURI:admsIdentifier0"
+ ],
+ "applicable_legislation": [
+ "http://data.europa.eu/eli/reg/2022/868/oj"
+ ],
+ "author": null,
+ "author_email": null,
+ "code_values": [
+ "http://example.com/code1",
+ "http://example.com/code2"
+ ],
+ "coding_system": [
+ "http://www.wikidata.org/entity/P1690",
+ "http://www.wikidata.org/entity/P4229"
+ ],
+ "conforms_to": [
+ "http://www.wikidata.org/entity/Q19597236"
+ ],
+ "creator_user_id": null,
+ "dcat_type": "n1049372e768c4429a6b2200c22f5f1a4b7",
+ "documentation": [
+ "n1049372e768c4429a6b2200c22f5f1a4b9"
+ ],
+ "frequency": "http://publications.europa.eu/resource/authority/frequency/DAILY",
+ "health_category": [
+ "http://example.com/ontology/resource/authority/healthcategories/PHDR",
+ "http://example.com/ontology/resource/authority/healthcategories/IDHP",
+ "http://example.com/ontology/resource/authority/healthcategories/DIOH",
+ "http://example.com/ontology/resource/authority/healthcategories/EHRS"
+ ],
+ "health_theme": [
+ "http://www.wikidata.org/entity/Q7907952",
+ "http://www.wikidata.org/entity/Q58624061"
+ ],
+ "id": "e7ccf79d-705c-427f-8e96-f87bcd6e5318",
+ "identifier": "http://example.com/dataset/1234567890",
+ "is_referenced_by": [
+ "https://doi.org/10.1038/sdata.2016.18",
+ "https://dx.doi.org/10.1002/jmri.28679"
+ ],
+ "isopen": false,
+ "issued": "2024-01-01T00:00:00+00:00",
+ "language": [
+ "http://publications.europa.eu/resource/authority/language/ENG",
+ "http://publications.europa.eu/resource/authority/language/NLD",
+ "http://publications.europa.eu/resource/authority/language/FRA"
+ ],
+ "legal_basis": [
+ "https://w3id.org/dpv#Consent"
+ ],
+ "license_id": "",
+ "license_title": "",
+ "maintainer": null,
+ "maintainer_email": null,
+ "max_typical_age": "110",
+ "metadata_created": "2024-12-02T19:00:30.897399",
+ "metadata_modified": "2024-12-02T19:00:30.897406",
+ "min_typical_age": "0",
+ "modified": "2024-12-31T23:59:59+00:00",
+ "name": "test-dcat-1",
+ "notes": "This dataset is an example of using HealthDCAT-AP in CKAN",
+ "num_resources": 0,
+ "num_tags": 3,
+ "number_of_records": "123456789",
+ "number_of_unique_individuals": "7654321",
+ "organization": null,
+ "personal_data": [
+ "https://w3id.org/dpv/dpv-pd#Age",
+ "https://w3id.org/dpv/dpv-pd#Gender",
+ "https://w3id.org/dpv/dpv-pd#HealthRecord"
+ ],
+ "population_coverage": [
+ "This example includes a very non-descript population"
+ ],
+ "private": false,
+ "provenance": "This example dataset is partly sourced from TEHDAS2",
+ "publisher_note": [
+ "Health-RI is the Dutch health care initiative to build an integrated health data infrastructure for research and innovation."
+ ],
+ "publisher_type": [
+ "http://example.com/publisherType/undefined"
+ ],
+ "purpose": [
+ "https://w3id.org/dpv#AcademicResearch"
+ ],
+ "qualified_relation": [
+ {
+ "uri": "",
+ "relation": "http://example.com/dataset/3.141592",
+ "role": "http://www.iana.org/assignments/relation/related"
+ }
+ ],
+ "state": "active",
+ "temporal_resolution": "P1D",
+ "theme": [
+ "http://publications.europa.eu/resource/authority/data-theme/HEAL"
+ ],
+ "title": "HealthDCAT-AP test dataset",
+ "type": "dataset",
+ "uri": "http://example.healthdata.nl/set/dataset",
+ "version_notes": "Dataset continuously updated",
+ "contact": [
+ {
+ "email": "covacsurv@sciensano.be",
+ "identifier": "",
+ "name": "Contact Point"
+ }
+ ],
+ "creator": [
+ {
+ "email": "info@example.com",
+ "identifier": "",
+ "name": "Contact Point",
+ "type": "",
+ "url": "https:/example.com/homepage"
+ }
+ ],
+ "extras": [
+ {
+ "key": "related_resource",
+ "value": "[\"http://example.com/dataset/9876543210\"]"
+ },
+ {
+ "key": "sample",
+ "value": "[\"http://example.com/sample\"]"
+ },
+ {
+ "key": "spatial_uri",
+ "value": "http://publications.europa.eu/resource/authority/country/BEL"
+ }
+ ],
+ "hdab": [
+ {
+ "email": "hdab@example.com",
+ "identifier": "",
+ "name": "EU Health Data Access Body",
+ "type": "",
+ "uri": "",
+ "url": "https://www.example.com/hdab"
+ }
+ ],
+ "publisher": [
+ {
+ "email": "info@example.com",
+ "identifier": "",
+ "name": "Contact Point",
+ "type": "",
+ "uri": "",
+ "url": "https://healthdata.nl"
+ }
+ ],
+ "retention_period": [
+ {
+ "end": "2034-12-31",
+ "start": "2020-03-01"
+ }
+ ],
+ "tags": [
+ {
+ "display_name": "Test 1",
+ "id": "5c418ec2-cb41-4c42-9b9c-f5d1e3a831e5",
+ "name": "Test 1",
+ "state": "active",
+ "vocabulary_id": null
+ },
+ {
+ "display_name": "Test 2",
+ "id": "c4117ace-2114-470d-b6e9-0df7580a12d8",
+ "name": "Test 2",
+ "state": "active",
+ "vocabulary_id": null
+ },
+ {
+ "display_name": "Test 3",
+ "id": "d5a5288d-3bff-431e-be94-12c71d25d75b",
+ "name": "Test 3",
+ "state": "active",
+ "vocabulary_id": null
+ }
+ ],
+ "temporal_coverage": [
+ {
+ "end": "2024-12-31",
+ "start": "2020-03-01"
+ }
+ ],
+ "resources": [],
+ "groups": [],
+ "relationships_as_subject": [],
+ "relationships_as_object": []
+ }
+]
\ No newline at end of file
diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf
index 42f1ea5e..5ce71e1c 100644
--- a/examples/dcat/dataset.rdf
+++ b/examples/dcat/dataset.rdf
@@ -70,6 +70,12 @@
PT15M
+
+
+
+
+
+ Point of Contact
diff --git a/examples/dcat/dataset_health.ttl b/examples/dcat/dataset_health.ttl
new file mode 100644
index 00000000..a665c1ee
--- /dev/null
+++ b/examples/dcat/dataset_health.ttl
@@ -0,0 +1,295 @@
+@prefix adms: .
+@prefix dcat: .
+@prefix dcatap: .
+@prefix dct: .
+@prefix dqv: .
+@prefix foaf: .
+@prefix locn: .
+@prefix oa: .
+@prefix prov: .
+@prefix rdfs: .
+@prefix skos: .
+@prefix spdx: .
+@prefix vcard: .
+
+
+ a dcat:Resource , dcat:Dataset;
+ dcatap:applicableLegislation ;
+
+ ;
+
+ ,
+ ;
+ ,
+ ;
+
+ [ a foaf:Organization;
+ foaf:homepage ;
+ foaf:mbox ;
+ foaf:name "EU Health Data Access Body"
+ ];
+
+ , , , ;
+
+ , ;
+
+ "110"^^;
+
+ "0"^^;
+
+ "123456789"^^;
+
+ "7654321"^^;
+
+ "This example includes a very non-descript population";
+
+ "Health-RI is the Dutch health care initiative to build an integrated health data infrastructure for research and innovation.";
+
+ ;
+
+ [ a dct:PeriodOfTime;
+ rdfs:comment "As stated in the CSI deliberation";
+ dcat:endDate "2034-12-31"^^;
+ dcat:startDate "2020-03-01"^^
+ ];
+ dct:accessRights ;
+ dct:accrualPeriodicity ;
+ dct:alternative "TEST-DATASET";
+ dct:conformsTo ;
+ dct:creator ;
+ dct:description "This dataset is an example of using HealthDCAT-AP in CKAN";
+ dct:identifier "http://example.com/dataset/1234567890"^^;
+ dct:isPartOf ;
+ dct:isReferencedBy , ;
+ dct:issued "2024-01-01T00:00:00Z"^^;
+ dct:language , , ;
+ dct:modified "2024-12-31T23:59:59Z"^^;
+ dct:provenance [ a dct:ProvenanceStatement;
+ rdfs:label "This example dataset is partly sourced from TEHDAS2"
+ ];
+ dct:publisher [ a foaf:Organization , foaf:Agent;
+ foaf:homepage ;
+ foaf:mbox ;
+ foaf:name "Contact Point"
+ ];
+ dct:relation ;
+ dcat:qualifiedRelation [
+ a dcat:Relationship ;
+ dct:relation ;
+ dcat:hadRole
+ ];
+ dct:spatial ;
+ dct:temporal [ a dct:PeriodOfTime;
+ dcat:endDate "2024-12-31"^^;
+ dcat:startDate "2020-03-01"^^
+ ];
+ dct:title "HealthDCAT-AP test dataset";
+ dct:type [ a skos:Concept;
+ skos:inScheme ;
+ skos:prefLabel "Personal Data"
+ ];
+ adms:identifier ;
+ adms:sample ;
+ adms:versionNotes "Dataset continuously updated";
+ dcat:contactPoint ;
+ # dcat:distribution ;
+ dcat:hasVersion ;
+ dcat:keyword "Test 1" , "Test 2" , "Test 3";
+ dcat:spatialResolutionInMeters "10"^^;
+ dcat:temporalResolution "P1D"^^;
+ dcat:theme ;
+ # dcat:version is not mapped in ckan and should be hasVersion
+ # dcat:version "Project HDBP0250";
+ dqv:hasQualityAnnotation [ a dqv:QualityCertificate;
+ oa:hasBody ;
+ oa:hasTarget ;
+ oa:motivatedBy dqv:qualityAssessment
+ ];
+ prov:qualifiedAttribution ;
+ prov:wasGeneratedBy ;
+ foaf:page [ a foaf:Document;
+ rdfs:label "Landing Page for Sciensano";
+ foaf:homepage
+ ];
+
+ ;
+
+ ,
+ ,
+ ;
+
+ .
+
+
+ a dcat:Distribution;
+ dcatap:applicableLegislation ;
+ dct:format ;
+ dct:identifier "http://ehelse.healthdataportal.eu/analytics/47f55653-a151-48c1-8d90-940561da6e57";
+ dct:isPartOf ;
+ dct:issued "2024-06-03T08:51:00Z"^^;
+ dct:license ;
+ dct:modified "2024-06-04T18:00:00Z"^^;
+ dct:rights [ a dct:RightsStatement;
+ rdfs:label "_g_L202C11377" , "internalURI:wasGeneratedBy0" , "_g_L123C7733"
+ ];
+ dct:title "Technical report number of unique study subjects available by environment for project HDBP0250";
+ dcat:accessURL ;
+ dcat:downloadURL ;
+ dcat:mediaType .
+
+
+ a dct:MediaType .
+
+
+ a foaf:Agent;
+ foaf:homepage ;
+ foaf:mbox ;
+ foaf:name "Contact Point" .
+
+
+ a adms:Identifier;
+ skos:notation "https://www.healthinformationportal.eu/health-information-sources/linking-registers-covid-19-vaccine-surveillance"^^;
+ adms:schemaAgency "Health Information Portal" .
+
+
+ a vcard:Organization , vcard:Kind;
+ vcard:fn "Contact Point";
+ vcard:hasEmail ;
+ vcard:hasURL ;
+ vcard:organisationName "Contact Point";
+ vcard:organisationUnit "Health Information" .
+
+
+ a dcat:CatalogRecord;
+ dct:creator ;
+ dct:identifier "16e16149-bf41-42f6-8741-225e8c97a35e";
+ dct:issued "2024-10-04T14:28:36Z"^^;
+ dct:modified "2024-10-09T17:34:28Z"^^;
+ spdx:checksum [ a spdx:Checksum;
+ spdx:algorithm spdx:checksumAlgorithm_md5;
+ spdx:checksumValue "ea77c251b6945e450ae4d66c581495d4"
+ ];
+ foaf:primaryTopic .
+
+
+
+ a dct:LinguisticSystem .
+
+
+ a ;
+ dct:title "ID_TU_STATBEL_POP";
+
+ ;
+ dcat:keyword "TEST-DATASET" .
+
+
+ a dcat:Distribution;
+ dcatap:applicableLegislation ;
+ dct:format ;
+ dct:identifier "http://ehelse.healthdataportal.eu/sample/fe921169-4619-4386-8bfe-60ea131dbe96";
+ dct:isPartOf ;
+ dct:issued "2024-06-03T08:51:00Z"^^;
+ dct:language ;
+ dct:license ;
+ dct:modified "2024-06-04T18:00:00Z"^^;
+ dct:rights [ a dct:RightsStatement;
+ rdfs:label "Free access."
+ ];
+ dct:title "Proxy data generating for the EHDS2 Pilot project Sciensano Use Case";
+ dcat:accessURL ;
+ dcat:downloadURL ;
+ dcat:mediaType .
+
+
+
+ a dct:LinguisticSystem .
+
+
+ a dct:LinguisticSystem .
+
+
+ a skos:Concept;
+ skos:prefLabel "National Public Health Institute" .
+
+
+ a dct:RightsStatement .
+
+
+ a dct:Frequency .
+
+
+ a prov:Attribution;
+ dcat:hadRole ;
+ prov:agent [ a foaf:Organization;
+ foaf:homepage ;
+ foaf:mbox ;
+ foaf:name "Contact Point"
+ ] .
+
+
+ a dct:Location .
+
+
+ a skos:Concept;
+ dct:identifier "https://icd.who.int/browse10/2019/en#/Y59.0"^^;
+ skos:definition "Viral vaccines";
+ skos:hasTopConcept ;
+ skos:notation "Y59.0";
+ skos:prefLabel "Viral vaccines" .
+
+
+ a dct:MediaTypeOrExtent .
+
+#
+# a dcat:Distribution;
+# dcatap:applicableLegislation ;
+# dct:description "EU Health Data Access Body For better Healthcare, Research & Policy Making";
+# dct:format ;
+# dct:identifier "http://ehelse.healthdataportal.eu/distribution/13a3851d-6cdf-4570-a7f0-7f03015d1925";
+# dct:isPartOf ;
+# dct:issued "2024-06-03T08:51:00Z"^^;
+# dct:license ;
+# dct:modified "2024-06-04T18:00:00Z"^^;
+# dct:rights [ a dct:RightsStatement;
+# rdfs:label "Access to data is conditional on the issuance of a permit by the HDAB after submission of a data request application (English)"
+# ];
+# dct:title "EU Health Data Access Body";
+# dcat:accessURL ;
+# dcat:byteSize "80000"^^ .
+
+
+ a prov:Activity;
+ rdfs:label "http://dbpedia.org/resource/Record_linkage";
+ rdfs:seeAlso ;
+ dct:type ;
+ prov:startedAtTime "2021-01-01T00:00:00Z"^^;
+ prov:wasAssociatedWith [ a prov:Agent;
+ prov:actedOnBehalfOf [ a prov:Organization , prov:Agent;
+ foaf:name "Contact Point"
+ ];
+ foaf:homepage ;
+ foaf:mbox ;
+ foaf:name "Dr. Joris van Loenhout"
+ ];
+ foaf:page .
+
+
+ a ;
+
+ ;
+
+ "Patient death reason\tInformation on wheter the cause of death was COVID-19.";
+
+ "CD_COD_COVID" .
+
+
+ a skos:Concept;
+ dct:identifier "https://icd.who.int/browse10/2019/en#/U07.1"^^;
+ skos:definition "COVID-19, virus identified";
+ skos:hasTopConcept ;
+ skos:notation "U07.1";
+ skos:prefLabel "Test 1" .
+
+
+ a dct:LicenseDocument;
+ rdfs:label "Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported" .
diff --git a/examples/dcat/dataset_health_no_blank.ttl b/examples/dcat/dataset_health_no_blank.ttl
new file mode 100644
index 00000000..ba854b5f
--- /dev/null
+++ b/examples/dcat/dataset_health_no_blank.ttl
@@ -0,0 +1,81 @@
+# This Graph contains no blank nodes, to allow for easy comparison between a generated graph
+# The blind nodes can be compared manually
+
+@prefix adms: .
+@prefix dcat: .
+@prefix dcatap: .
+@prefix dct: .
+@prefix dpv: .
+@prefix foaf: .
+@prefix healthdcatap: .
+@prefix rdfs: .
+@prefix skos: .
+@prefix vcard: .
+@prefix xsd: .
+
+ a dcat:Dataset ;
+# healthdcatap:hdab [ a foaf:Agent ;
+# vcard:hasEmail ;
+# foaf:homepage ;
+# foaf:name "EU Health Data Access Body" ] ;
+# dct:provenance [ a dct:ProvenanceStatement ;
+# rdfs:label "This example dataset is partly sourced from TEHDAS2" ] ;
+# dct:publisher [ a foaf:Agent ;
+# vcard:hasEmail ;
+# foaf:homepage ;
+# foaf:name "Contact Point" ] ;
+# dct:temporal [ a dct:PeriodOfTime ;
+# dcat:endDate "2024-12-31"^^xsd:date ;
+# dcat:startDate "2020-03-01"^^xsd:date ] ;
+# adms:identifier [ a adms:Identifier ;
+# skos:notation "internalURI:admsIdentifier0" ] ;
+# dcat:contactPoint [ a vcard:Kind ;
+# vcard:fn "Contact Point" ;
+# vcard:hasEmail ] ;
+ dcatap:applicableLegislation ;
+ healthdcatap:analytics ;
+ healthdcatap:hasCodeValues ,
+ ;
+ healthdcatap:hasCodingSystem ,
+ ;
+ healthdcatap:healthCategory ,
+ ,
+ ,
+ ,
+ ,
+ ;
+ healthdcatap:maxTypicalAge "110"^^xsd:nonNegativeInteger ;
+ healthdcatap:minTypicalAge "0"^^xsd:nonNegativeInteger ;
+ healthdcatap:numberOfRecords "123456789"^^xsd:nonNegativeInteger ;
+ healthdcatap:numberOfUniqueIndividuals "7654321"^^xsd:nonNegativeInteger ;
+ healthdcatap:populationCoverage "This example includes a very non-descript population" ;
+ healthdcatap:publisherNote "Health-RI is the Dutch health care initiative to build an integrated health data infrastructure for research and innovation." ;
+ healthdcatap:publisherType ;
+ dct:accessRights ;
+ dct:accrualPeriodicity ;
+ dct:conformsTo ;
+ dct:description "This dataset is an example of using HealthDCAT-AP in CKAN" ;
+ dct:identifier ;
+ dct:isReferencedBy ,
+ ;
+ dct:issued "2024-01-01T00:00:00+00:00"^^xsd:dateTime ;
+ dct:language ,
+ ,
+ ;
+ dct:modified "2024-12-31T23:59:59+00:00"^^xsd:dateTime ;
+ dct:relation ;
+ dct:title "HealthDCAT-AP test dataset" ;
+ dct:type "n1049372e768c4429a6b2200c22f5f1a4b7" ;
+ adms:sample ;
+ adms:versionNotes "Dataset continuously updated" ;
+ dcat:keyword "Test 1",
+ "Test 2",
+ "Test 3" ;
+ dcat:temporalResolution "P1D"^^xsd:duration ;
+ dcat:theme ;
+ foaf:page "n1049372e768c4429a6b2200c22f5f1a4b9" ;
+ dpv:hasLegalBasis dpv:Consent ;
+ dpv:hasPurpose dpv:AcademicResearch ;
+ dpv:hasPersonalData ,
+ ,
+ .
diff --git a/mkdocs.yml b/mkdocs.yml
index bed64a4b..5287e055 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -70,7 +70,9 @@ nav:
- DCAT support:
- 'endpoints.md'
- DCAT ↔ CKAN mapping: 'mapping.md'
- - 'profiles.md'
+ - Base profiles: 'profiles.md'
+ - Application profiles: 'application-profiles.md'
+ - Writing profiles: 'writing-profiles.md'
- Other features:
- 'harvester.md'
- Google Dataset Search: 'google-dataset-search.md'
diff --git a/pyproject.toml b/pyproject.toml
index b7634286..80033250 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,5 +61,6 @@ euro_dcat_ap = "ckanext.dcat.profiles:EuropeanDCATAPProfile"
euro_dcat_ap_2 = "ckanext.dcat.profiles:EuropeanDCATAP2Profile"
euro_dcat_ap_3 = "ckanext.dcat.profiles:EuropeanDCATAP3Profile"
euro_dcat_ap_scheming = "ckanext.dcat.profiles:EuropeanDCATAPSchemingProfile"
-dcat_us_3="ckanext.dcat.profiles:DCATUS3Profile"
+euro_health_dcat_ap = "ckanext.dcat.profiles:EuropeanHealthDCATAPProfile"
+dcat_us_3 = "ckanext.dcat.profiles:DCATUS3Profile"
schemaorg = "ckanext.dcat.profiles:SchemaOrgProfile"