2025-12-01
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Utilities for interacting with SHACL Shapes Graphs more easily.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Optional, Union
|
||||
|
||||
from rdflib import BNode, Graph, Literal, URIRef, paths
|
||||
from rdflib.collection import Collection
|
||||
from rdflib.namespace import RDF, SH
|
||||
from rdflib.paths import Path
|
||||
from rdflib.term import Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.term import IdentifiedNode
|
||||
|
||||
|
||||
class SHACLPathError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Map the variable length path operators to the corresponding SHACL path predicates
|
||||
_PATH_MOD_TO_PRED = {
|
||||
paths.ZeroOrMore: SH.zeroOrMorePath,
|
||||
paths.OneOrMore: SH.oneOrMorePath,
|
||||
paths.ZeroOrOne: SH.zeroOrOnePath,
|
||||
}
|
||||
|
||||
|
||||
# This implementation is roughly based on
|
||||
# pyshacl.helper.sparql_query_helper::SPARQLQueryHelper._shacl_path_to_sparql_path
|
||||
def parse_shacl_path(
|
||||
shapes_graph: Graph,
|
||||
path_identifier: Node,
|
||||
) -> Union[URIRef, Path]:
|
||||
"""
|
||||
Parse a valid SHACL path (e.g. the object of a triple with predicate sh:path)
|
||||
from a :class:`~rdflib.graph.Graph` as a :class:`~rdflib.term.URIRef` if the path
|
||||
is simply a predicate or a :class:`~rdflib.paths.Path` otherwise.
|
||||
|
||||
:param shapes_graph: A :class:`~rdflib.graph.Graph` containing the path to be parsed
|
||||
:param path_identifier: A :class:`~rdflib.term.Node` of the path
|
||||
:return: A :class:`~rdflib.term.URIRef` or a :class:`~rdflib.paths.Path`
|
||||
"""
|
||||
path: Optional[Union[URIRef, Path]] = None
|
||||
|
||||
# Literals are not allowed.
|
||||
if isinstance(path_identifier, Literal):
|
||||
raise TypeError("Literals are not a valid SHACL path.")
|
||||
|
||||
# If a path is a URI, that's the whole path.
|
||||
elif isinstance(path_identifier, URIRef):
|
||||
if path_identifier == RDF.nil:
|
||||
raise SHACLPathError(
|
||||
"A list of SHACL Paths must contain at least two path items."
|
||||
)
|
||||
path = path_identifier
|
||||
|
||||
# Handle Sequence Paths
|
||||
elif shapes_graph.value(path_identifier, RDF.first) is not None:
|
||||
sequence = list(shapes_graph.items(path_identifier))
|
||||
if len(sequence) < 2:
|
||||
raise SHACLPathError(
|
||||
"A list of SHACL Sequence Paths must contain at least two path items."
|
||||
)
|
||||
path = paths.SequencePath(
|
||||
*(parse_shacl_path(shapes_graph, path) for path in sequence)
|
||||
)
|
||||
|
||||
# Handle sh:inversePath
|
||||
elif inverse_path := shapes_graph.value(path_identifier, SH.inversePath):
|
||||
path = paths.InvPath(parse_shacl_path(shapes_graph, inverse_path))
|
||||
|
||||
# Handle sh:alternativePath
|
||||
elif alternative_path := shapes_graph.value(path_identifier, SH.alternativePath):
|
||||
alternatives = list(shapes_graph.items(alternative_path))
|
||||
if len(alternatives) < 2:
|
||||
raise SHACLPathError(
|
||||
"List of SHACL alternate paths must have at least two path items."
|
||||
)
|
||||
path = paths.AlternativePath(
|
||||
*(
|
||||
parse_shacl_path(shapes_graph, alternative)
|
||||
for alternative in alternatives
|
||||
)
|
||||
)
|
||||
|
||||
# Handle sh:zeroOrMorePath
|
||||
elif zero_or_more_path := shapes_graph.value(path_identifier, SH.zeroOrMorePath):
|
||||
path = paths.MulPath(parse_shacl_path(shapes_graph, zero_or_more_path), "*")
|
||||
|
||||
# Handle sh:oneOrMorePath
|
||||
elif one_or_more_path := shapes_graph.value(path_identifier, SH.oneOrMorePath):
|
||||
path = paths.MulPath(parse_shacl_path(shapes_graph, one_or_more_path), "+")
|
||||
|
||||
# Handle sh:zeroOrOnePath
|
||||
elif zero_or_one_path := shapes_graph.value(path_identifier, SH.zeroOrOnePath):
|
||||
path = paths.MulPath(parse_shacl_path(shapes_graph, zero_or_one_path), "?")
|
||||
|
||||
# Raise error if none of the above options were found
|
||||
elif path is None:
|
||||
raise SHACLPathError(f"Cannot parse {repr(path_identifier)} as a SHACL Path.")
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def _build_path_component(
|
||||
graph: Graph, path_component: URIRef | Path
|
||||
) -> IdentifiedNode:
|
||||
"""
|
||||
Helper method that implements the recursive component of SHACL path
|
||||
triple construction.
|
||||
|
||||
:param graph: A :class:`~rdflib.graph.Graph` into which to insert triples
|
||||
:param graph_component: A :class:`~rdflib.term.URIRef` or
|
||||
:class:`~rdflib.paths.Path` that is part of a path expression
|
||||
:return: The :class:`~rdflib.term.IdentifiedNode of the resource in the
|
||||
graph that corresponds to the provided path_component
|
||||
"""
|
||||
# Literals or other types are not allowed
|
||||
if not isinstance(path_component, (URIRef, Path)):
|
||||
raise TypeError(
|
||||
f"Objects of type {type(path_component)} are not valid "
|
||||
+ "components of a SHACL path."
|
||||
)
|
||||
|
||||
# If the path component is a URI, return it
|
||||
elif isinstance(path_component, URIRef):
|
||||
return path_component
|
||||
# Otherwise, the path component is represented as a blank node
|
||||
bnode = BNode()
|
||||
|
||||
# Handle Sequence Paths
|
||||
if isinstance(path_component, paths.SequencePath):
|
||||
# Sequence paths are a Collection directly with at least two items
|
||||
if len(path_component.args) < 2:
|
||||
raise SHACLPathError(
|
||||
"A list of SHACL Sequence Paths must contain at least two path items."
|
||||
)
|
||||
Collection(
|
||||
graph,
|
||||
bnode,
|
||||
[_build_path_component(graph, arg) for arg in path_component.args],
|
||||
)
|
||||
|
||||
# Handle Inverse Paths
|
||||
elif isinstance(path_component, paths.InvPath):
|
||||
graph.add(
|
||||
(bnode, SH.inversePath, _build_path_component(graph, path_component.arg))
|
||||
)
|
||||
|
||||
# Handle Alternative Paths
|
||||
elif isinstance(path_component, paths.AlternativePath):
|
||||
# Alternative paths are a Collection but referenced by sh:alternativePath
|
||||
# with at least two items
|
||||
if len(path_component.args) < 2:
|
||||
raise SHACLPathError(
|
||||
"List of SHACL alternate paths must have at least two path items."
|
||||
)
|
||||
coll = Collection(
|
||||
graph,
|
||||
BNode(),
|
||||
[_build_path_component(graph, arg) for arg in path_component.args],
|
||||
)
|
||||
graph.add((bnode, SH.alternativePath, coll.uri))
|
||||
|
||||
# Handle Variable Length Paths
|
||||
elif isinstance(path_component, paths.MulPath):
|
||||
# Get the predicate corresponding to the path modifiier
|
||||
pred = _PATH_MOD_TO_PRED.get(path_component.mod)
|
||||
if pred is None:
|
||||
raise SHACLPathError(f"Unknown path modifier {path_component.mod}")
|
||||
graph.add((bnode, pred, _build_path_component(graph, path_component.path)))
|
||||
|
||||
# Return the blank node created for the provided path_component
|
||||
return bnode
|
||||
|
||||
|
||||
def build_shacl_path(
|
||||
path: URIRef | Path, target_graph: Graph | None = None
|
||||
) -> tuple[IdentifiedNode, Graph | None]:
|
||||
"""
|
||||
Build the SHACL Path triples for a path given by a :class:`~rdflib.term.URIRef` for
|
||||
simple paths or a :class:`~rdflib.paths.Path` for complex paths.
|
||||
|
||||
Returns an :class:`~rdflib.term.IdentifiedNode` for the path (which should be
|
||||
the object of a triple with predicate sh:path) and the graph into which any
|
||||
new triples were added.
|
||||
|
||||
:param path: A :class:`~rdflib.term.URIRef` or a :class:`~rdflib.paths.Path`
|
||||
:param target_graph: Optionally, a :class:`~rdflib.graph.Graph` into which to put
|
||||
constructed triples. If not provided, a new graph will be created
|
||||
:return: A (path_identifier, graph) tuple where:
|
||||
- path_identifier: If path is a :class:`~rdflib.term.URIRef`, this is simply
|
||||
the provided path. If path is a :class:`~rdflib.paths.Path`, this is
|
||||
the :class:`~rdflib.term.BNode` corresponding to the root of the SHACL
|
||||
path expression added to the graph.
|
||||
- graph: None if path is a :class:`~rdflib.term.URIRef` (as no new triples
|
||||
are constructed). If path is a :class:`~rdflib.paths.Path`, this is either the
|
||||
target_graph provided or a new graph into which the path triples were added.
|
||||
"""
|
||||
# If a path is a URI, that's the whole path. No graph needs to be constructed.
|
||||
if isinstance(path, URIRef):
|
||||
return path, None
|
||||
|
||||
# Create a graph if one was not provided
|
||||
if target_graph is None:
|
||||
target_graph = Graph()
|
||||
|
||||
# Recurse through the path to build the graph representation
|
||||
return _build_path_component(target_graph, path), target_graph
|
||||
Reference in New Issue
Block a user