2025-12-01
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
"""A pure Python package providing the core RDF constructs.
|
||||
|
||||
The packages is intended to provide the core RDF types and interfaces
|
||||
for working with RDF. The package defines a plugin interface for
|
||||
parsers, stores, and serializers that other packages can use to
|
||||
implement parsers, stores, and serializers that will plug into the
|
||||
rdflib package.
|
||||
|
||||
The primary interface `rdflib` exposes to work with RDF is
|
||||
`rdflib.graph.Graph`.
|
||||
|
||||
A tiny example:
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
|
||||
>>> g = Graph()
|
||||
>>> result = g.parse("http://www.w3.org/2000/10/swap/test/meet/blue.rdf")
|
||||
|
||||
>>> print("graph has %s statements." % len(g))
|
||||
graph has 4 statements.
|
||||
>>>
|
||||
>>> for s, p, o in g:
|
||||
... if (s, p, o) not in g:
|
||||
... raise Exception("It better be!")
|
||||
|
||||
>>> s = g.serialize(format='nt')
|
||||
>>>
|
||||
>>> sorted(g) == [
|
||||
... (URIRef("http://meetings.example.com/cal#m1"),
|
||||
... URIRef("http://www.example.org/meeting_organization#homePage"),
|
||||
... URIRef("http://meetings.example.com/m1/hp")),
|
||||
... (URIRef("http://www.example.org/people#fred"),
|
||||
... URIRef("http://www.example.org/meeting_organization#attending"),
|
||||
... URIRef("http://meetings.example.com/cal#m1")),
|
||||
... (URIRef("http://www.example.org/people#fred"),
|
||||
... URIRef("http://www.example.org/personal_details#GivenName"),
|
||||
... Literal("Fred")),
|
||||
... (URIRef("http://www.example.org/people#fred"),
|
||||
... URIRef("http://www.example.org/personal_details#hasEmail"),
|
||||
... URIRef("mailto:fred@example.com"))
|
||||
... ]
|
||||
True
|
||||
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from importlib import metadata
|
||||
|
||||
_DISTRIBUTION_METADATA = metadata.metadata("rdflib")
|
||||
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
__version__: str = _DISTRIBUTION_METADATA["Version"]
|
||||
__date__ = "2025-03-29"
|
||||
|
||||
__all__ = [
|
||||
"URIRef",
|
||||
"BNode",
|
||||
"IdentifiedNode",
|
||||
"Literal",
|
||||
"Node",
|
||||
"Variable",
|
||||
"Namespace",
|
||||
"Dataset",
|
||||
"Graph",
|
||||
"ConjunctiveGraph",
|
||||
"BRICK",
|
||||
"CSVW",
|
||||
"DC",
|
||||
"DCAT",
|
||||
"DCMITYPE",
|
||||
"DCTERMS",
|
||||
"DOAP",
|
||||
"FOAF",
|
||||
"ODRL2",
|
||||
"ORG",
|
||||
"OWL",
|
||||
"PROF",
|
||||
"PROV",
|
||||
"QB",
|
||||
"RDF",
|
||||
"RDFS",
|
||||
"SDO",
|
||||
"SH",
|
||||
"SKOS",
|
||||
"SOSA",
|
||||
"SSN",
|
||||
"TIME",
|
||||
"VANN",
|
||||
"VOID",
|
||||
"XMLNS",
|
||||
"XSD",
|
||||
"util",
|
||||
"plugin",
|
||||
"query",
|
||||
"NORMALIZE_LITERALS",
|
||||
]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import __main__
|
||||
|
||||
if (
|
||||
not hasattr(__main__, "__file__")
|
||||
and sys.stdout is not None
|
||||
and hasattr(sys.stderr, "isatty")
|
||||
and sys.stderr.isatty()
|
||||
):
|
||||
# show log messages in interactive mode
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
del __main__
|
||||
except ImportError:
|
||||
# Main already imported from elsewhere
|
||||
import warnings
|
||||
|
||||
warnings.warn("__main__ already imported", ImportWarning)
|
||||
del warnings
|
||||
|
||||
del sys
|
||||
|
||||
|
||||
NORMALIZE_LITERALS = True
|
||||
"""
|
||||
If True - Literals lexical forms are normalized when created.
|
||||
I.e. the lexical forms is parsed according to data-type, then the
|
||||
stored lexical form is the re-serialized value that was parsed.
|
||||
|
||||
Illegal values for a datatype are simply kept. The normalized keyword
|
||||
for Literal.__new__ can override this.
|
||||
|
||||
For example:
|
||||
|
||||
>>> from rdflib import Literal,XSD
|
||||
>>> Literal("01", datatype=XSD.int)
|
||||
rdflib.term.Literal("1", datatype=rdflib.term.URIRef("http://www.w3.org/2001/XMLSchema#integer"))
|
||||
|
||||
This flag may be changed at any time, but will only affect literals
|
||||
created after that time, previously created literals will remain
|
||||
(un)normalized.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
DAWG_LITERAL_COLLATION = False
|
||||
"""
|
||||
DAWG_LITERAL_COLLATION determines how literals are ordered or compared
|
||||
to each other.
|
||||
|
||||
In SPARQL, applying the >,<,>=,<= operators to literals of
|
||||
incompatible data-types is an error, i.e:
|
||||
|
||||
Literal(2)>Literal('cake') is neither true nor false, but an error.
|
||||
|
||||
This is a problem in PY3, where lists of Literals of incompatible
|
||||
types can no longer be sorted.
|
||||
|
||||
Setting this flag to True gives you strict DAWG/SPARQL compliance,
|
||||
setting it to False will order Literals with incompatible datatypes by
|
||||
datatype URI
|
||||
|
||||
In particular, this determines how the rich comparison operators for
|
||||
Literal work, eq, __neq__, __lt__, etc.
|
||||
"""
|
||||
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import (
|
||||
BRICK,
|
||||
CSVW,
|
||||
DC,
|
||||
DCAT,
|
||||
DCMITYPE,
|
||||
DCTERMS,
|
||||
DOAP,
|
||||
FOAF,
|
||||
ODRL2,
|
||||
ORG,
|
||||
OWL,
|
||||
PROF,
|
||||
PROV,
|
||||
QB,
|
||||
RDF,
|
||||
RDFS,
|
||||
SDO,
|
||||
SH,
|
||||
SKOS,
|
||||
SOSA,
|
||||
SSN,
|
||||
TIME,
|
||||
VANN,
|
||||
VOID,
|
||||
XMLNS,
|
||||
XSD,
|
||||
Namespace,
|
||||
)
|
||||
from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef, Variable
|
||||
|
||||
from rdflib import plugin, query, util # isort:skip
|
||||
from rdflib.container import * # isort:skip # noqa: F403
|
||||
@@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import string
|
||||
import sys
|
||||
from typing import Dict
|
||||
from urllib.error import HTTPError
|
||||
from urllib.parse import quote as urlquote
|
||||
from urllib.parse import urljoin, urlsplit
|
||||
from urllib.request import HTTPRedirectHandler, Request, urlopen
|
||||
from urllib.response import addinfourl
|
||||
|
||||
|
||||
def _make_redirect_request(request: Request, http_error: HTTPError) -> Request:
|
||||
"""
|
||||
Create a new request object for a redirected request.
|
||||
|
||||
The logic is based on `urllib.request.HTTPRedirectHandler` from `this commit <https://github.com/python/cpython/blob/b58bc8c2a9a316891a5ea1a0487aebfc86c2793a/Lib/urllib/request.py#L641-L751>_`.
|
||||
|
||||
:param request: The original request that resulted in the redirect.
|
||||
:param http_error: The response to the original request that indicates a
|
||||
redirect should occur and contains the new location.
|
||||
:return: A new request object to the location indicated by the response.
|
||||
:raises HTTPError: the supplied ``http_error`` if the redirect request
|
||||
cannot be created.
|
||||
:raises ValueError: If the response code is `None`.
|
||||
:raises ValueError: If the response does not contain a ``Location`` header
|
||||
or the ``Location`` header is not a string.
|
||||
:raises HTTPError: If the scheme of the new location is not ``http``,
|
||||
``https``, or ``ftp``.
|
||||
:raises HTTPError: If there are too many redirects or a redirect loop.
|
||||
"""
|
||||
new_url = http_error.headers.get("Location")
|
||||
if new_url is None:
|
||||
raise http_error
|
||||
if not isinstance(new_url, str):
|
||||
raise ValueError(f"Location header {new_url!r} is not a string")
|
||||
|
||||
new_url_parts = urlsplit(new_url)
|
||||
|
||||
# For security reasons don't allow redirection to anything other than http,
|
||||
# https or ftp.
|
||||
if new_url_parts.scheme not in ("http", "https", "ftp", ""):
|
||||
raise HTTPError(
|
||||
new_url,
|
||||
http_error.code,
|
||||
f"{http_error.reason} - Redirection to url {new_url!r} is not allowed",
|
||||
http_error.headers,
|
||||
http_error.fp,
|
||||
)
|
||||
|
||||
# http.client.parse_headers() decodes as ISO-8859-1. Recover the original
|
||||
# bytes and percent-encode non-ASCII bytes, and any special characters such
|
||||
# as the space.
|
||||
new_url = urlquote(new_url, encoding="iso-8859-1", safe=string.punctuation)
|
||||
new_url = urljoin(request.full_url, new_url)
|
||||
|
||||
# XXX Probably want to forget about the state of the current
|
||||
# request, although that might interact poorly with other
|
||||
# handlers that also use handler-specific request attributes
|
||||
content_headers = ("content-length", "content-type")
|
||||
newheaders = {
|
||||
k: v for k, v in request.headers.items() if k.lower() not in content_headers
|
||||
}
|
||||
new_request = Request(
|
||||
new_url,
|
||||
headers=newheaders,
|
||||
origin_req_host=request.origin_req_host,
|
||||
unverifiable=True,
|
||||
)
|
||||
|
||||
visited: Dict[str, int]
|
||||
if hasattr(request, "redirect_dict"):
|
||||
visited = request.redirect_dict
|
||||
if (
|
||||
visited.get(new_url, 0) >= HTTPRedirectHandler.max_repeats
|
||||
or len(visited) >= HTTPRedirectHandler.max_redirections
|
||||
):
|
||||
raise HTTPError(
|
||||
request.full_url,
|
||||
http_error.code,
|
||||
HTTPRedirectHandler.inf_msg + http_error.reason,
|
||||
http_error.headers,
|
||||
http_error.fp,
|
||||
)
|
||||
else:
|
||||
visited = {}
|
||||
setattr(request, "redirect_dict", visited)
|
||||
|
||||
setattr(new_request, "redirect_dict", visited)
|
||||
visited[new_url] = visited.get(new_url, 0) + 1
|
||||
return new_request
|
||||
|
||||
|
||||
def _urlopen(request: Request) -> addinfourl:
|
||||
"""
|
||||
This is a shim for `urlopen` that handles HTTP redirects with status code
|
||||
308 (Permanent Redirect).
|
||||
|
||||
This function should be removed once all supported versions of Python
|
||||
handles the 308 HTTP status code.
|
||||
|
||||
:param request: The request to open.
|
||||
:return: The response to the request.
|
||||
"""
|
||||
try:
|
||||
return urlopen(request)
|
||||
except HTTPError as error:
|
||||
if error.code == 308 and sys.version_info < (3, 11):
|
||||
# HTTP response code 308 (Permanent Redirect) is not supported by python
|
||||
# versions older than 3.11. See <https://bugs.python.org/issue40321> and
|
||||
# <https://github.com/python/cpython/issues/84501> for more details.
|
||||
# This custom error handling should be removed once all supported
|
||||
# versions of Python handles 308.
|
||||
new_request = _make_redirect_request(request, error)
|
||||
return _urlopen(new_request)
|
||||
else:
|
||||
raise
|
||||
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
This module contains type aliases that should only be used when type checking
|
||||
as it would otherwise introduce a runtime dependency on `typing_extensions` for
|
||||
older python versions which is not desirable.
|
||||
|
||||
This was made mainly to accommodate ``sphinx-autodoc-typehints`` which cannot
|
||||
recognize type aliases from imported files if the type aliases are defined
|
||||
inside ``if TYPE_CHECKING:``. So instead of placing the type aliases in normal
|
||||
modules inside ``TYPE_CHECKING`` guards they are in this file which should only
|
||||
be imported inside ``TYPE_CHECKING`` guards.
|
||||
|
||||
.. important::
|
||||
Things inside this module are not for use outside of RDFLib
|
||||
and this module is not part the the RDFLib public API.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
"_NamespaceSetString",
|
||||
"_MulPathMod",
|
||||
]
|
||||
|
||||
|
||||
from typing import Literal as PyLiteral
|
||||
|
||||
_NamespaceSetString = PyLiteral["core", "rdflib", "none"]
|
||||
_MulPathMod = PyLiteral["*", "+", "?"]
|
||||
@@ -0,0 +1,277 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterable, Iterator, List, Optional
|
||||
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.term import BNode, Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import Graph
|
||||
|
||||
__all__ = ["Collection"]
|
||||
|
||||
|
||||
class Collection:
|
||||
"""
|
||||
See "Emulating container types":
|
||||
https://docs.python.org/reference/datamodel.html#emulating-container-types
|
||||
|
||||
>>> from rdflib.term import Literal
|
||||
>>> from rdflib.graph import Graph
|
||||
>>> from pprint import pprint
|
||||
>>> listname = BNode()
|
||||
>>> g = Graph('Memory')
|
||||
>>> listItem1 = BNode()
|
||||
>>> listItem2 = BNode()
|
||||
>>> g.add((listname, RDF.first, Literal(1))) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listname, RDF.rest, listItem1)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem1, RDF.first, Literal(2))) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem1, RDF.rest, listItem2)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem2, RDF.rest, RDF.nil)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem2, RDF.first, Literal(3))) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> c = Collection(g,listname)
|
||||
>>> pprint([term.n3() for term in c])
|
||||
['"1"^^<http://www.w3.org/2001/XMLSchema#integer>',
|
||||
'"2"^^<http://www.w3.org/2001/XMLSchema#integer>',
|
||||
'"3"^^<http://www.w3.org/2001/XMLSchema#integer>']
|
||||
|
||||
>>> Literal(1) in c
|
||||
True
|
||||
>>> len(c)
|
||||
3
|
||||
>>> c._get_container(1) == listItem1
|
||||
True
|
||||
>>> c.index(Literal(2)) == 1
|
||||
True
|
||||
|
||||
The collection is immutable if ``uri`` is the empty list
|
||||
(``http://www.w3.org/1999/02/22-rdf-syntax-ns#nil``).
|
||||
"""
|
||||
|
||||
def __init__(self, graph: Graph, uri: Node, seq: List[Node] = []):
|
||||
self.graph = graph
|
||||
self.uri = uri or BNode()
|
||||
if seq:
|
||||
self += seq
|
||||
|
||||
def n3(self) -> str:
|
||||
"""
|
||||
>>> from rdflib.term import Literal
|
||||
>>> from rdflib.graph import Graph
|
||||
>>> listname = BNode()
|
||||
>>> g = Graph('Memory')
|
||||
>>> listItem1 = BNode()
|
||||
>>> listItem2 = BNode()
|
||||
>>> g.add((listname, RDF.first, Literal(1))) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listname, RDF.rest, listItem1)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem1, RDF.first, Literal(2))) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem1, RDF.rest, listItem2)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem2, RDF.rest, RDF.nil)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((listItem2, RDF.first, Literal(3))) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> c = Collection(g, listname)
|
||||
>>> print(c.n3()) #doctest: +NORMALIZE_WHITESPACE
|
||||
( "1"^^<http://www.w3.org/2001/XMLSchema#integer>
|
||||
"2"^^<http://www.w3.org/2001/XMLSchema#integer>
|
||||
"3"^^<http://www.w3.org/2001/XMLSchema#integer> )
|
||||
"""
|
||||
return "( %s )" % (" ".join([i.n3() for i in self]))
|
||||
|
||||
def _get_container(self, index: int) -> Optional[Node]:
|
||||
"""Gets the first, rest holding node at index."""
|
||||
assert isinstance(index, int)
|
||||
graph = self.graph
|
||||
container: Optional[Node] = self.uri
|
||||
i = 0
|
||||
while i < index:
|
||||
i += 1
|
||||
container = graph.value(container, RDF.rest)
|
||||
if container is None:
|
||||
break
|
||||
return container
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""length of items in collection."""
|
||||
return len(list(self.graph.items(self.uri)))
|
||||
|
||||
def index(self, item: Node) -> int:
|
||||
"""
|
||||
Returns the 0-based numerical index of the item in the list
|
||||
"""
|
||||
listname = self.uri
|
||||
index = 0
|
||||
while True:
|
||||
if (listname, RDF.first, item) in self.graph:
|
||||
return index
|
||||
else:
|
||||
newlink = list(self.graph.objects(listname, RDF.rest))
|
||||
index += 1
|
||||
if newlink == [RDF.nil]:
|
||||
raise ValueError("%s is not in %s" % (item, self.uri))
|
||||
elif not newlink:
|
||||
raise Exception("Malformed RDF Collection: %s" % self.uri)
|
||||
else:
|
||||
assert len(newlink) == 1, "Malformed RDF Collection: %s" % self.uri
|
||||
listname = newlink[0]
|
||||
|
||||
def __getitem__(self, key: int) -> Node:
|
||||
"""TODO"""
|
||||
c = self._get_container(key)
|
||||
if c:
|
||||
v = self.graph.value(c, RDF.first)
|
||||
if v:
|
||||
return v
|
||||
else:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
raise IndexError(key)
|
||||
|
||||
def __setitem__(self, key: int, value: Node) -> None:
|
||||
"""TODO"""
|
||||
c = self._get_container(key)
|
||||
if c:
|
||||
self.graph.set((c, RDF.first, value))
|
||||
else:
|
||||
raise IndexError(key)
|
||||
|
||||
def __delitem__(self, key: int) -> None:
|
||||
"""
|
||||
>>> from rdflib.namespace import RDF, RDFS
|
||||
>>> from rdflib import Graph
|
||||
>>> from pprint import pformat
|
||||
>>> g = Graph()
|
||||
>>> a = BNode('foo')
|
||||
>>> b = BNode('bar')
|
||||
>>> c = BNode('baz')
|
||||
>>> g.add((a, RDF.first, RDF.type)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((a, RDF.rest, b)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((b, RDF.first, RDFS.label)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((b, RDF.rest, c)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((c, RDF.first, RDFS.comment)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> g.add((c, RDF.rest, RDF.nil)) # doctest: +ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> len(g)
|
||||
6
|
||||
>>> def listAncestry(node, graph):
|
||||
... for i in graph.subjects(RDF.rest, node):
|
||||
... yield i
|
||||
>>> [str(node.n3())
|
||||
... for node in g.transitiveClosure(listAncestry, RDF.nil)]
|
||||
['_:baz', '_:bar', '_:foo']
|
||||
>>> lst = Collection(g, a)
|
||||
>>> len(lst)
|
||||
3
|
||||
>>> b == lst._get_container(1)
|
||||
True
|
||||
>>> c == lst._get_container(2)
|
||||
True
|
||||
>>> del lst[1]
|
||||
>>> len(lst)
|
||||
2
|
||||
>>> len(g)
|
||||
4
|
||||
|
||||
"""
|
||||
self[key] # to raise any potential key exceptions
|
||||
graph = self.graph
|
||||
current = self._get_container(key)
|
||||
assert current
|
||||
if len(self) == 1 and key > 0:
|
||||
pass
|
||||
elif key == len(self) - 1:
|
||||
# the tail
|
||||
priorlink = self._get_container(key - 1)
|
||||
# type error: Argument 1 to "set" of "Graph" has incompatible type "Tuple[Optional[Node], URIRef, URIRef]"; expected "Tuple[Node, Node, Any]"
|
||||
self.graph.set((priorlink, RDF.rest, RDF.nil)) # type: ignore[arg-type]
|
||||
graph.remove((current, None, None))
|
||||
else:
|
||||
next = self._get_container(key + 1)
|
||||
prior = self._get_container(key - 1)
|
||||
assert next and prior
|
||||
graph.remove((current, None, None))
|
||||
graph.set((prior, RDF.rest, next))
|
||||
|
||||
def __iter__(self) -> Iterator[Node]:
|
||||
"""Iterator over items in Collections"""
|
||||
return self.graph.items(self.uri)
|
||||
|
||||
def _end(self) -> Node:
|
||||
# find end of list
|
||||
container = self.uri
|
||||
while True:
|
||||
rest = self.graph.value(container, RDF.rest)
|
||||
if rest is None or rest == RDF.nil:
|
||||
return container
|
||||
else:
|
||||
container = rest
|
||||
|
||||
def append(self, item: Node) -> Collection:
|
||||
"""
|
||||
>>> from rdflib.term import Literal
|
||||
>>> from rdflib.graph import Graph
|
||||
>>> listname = BNode()
|
||||
>>> g = Graph()
|
||||
>>> c = Collection(g,listname,[Literal(1),Literal(2)])
|
||||
>>> links = [
|
||||
... list(g.subjects(object=i, predicate=RDF.first))[0] for i in c]
|
||||
>>> len([i for i in links if (i, RDF.rest, RDF.nil) in g])
|
||||
1
|
||||
|
||||
"""
|
||||
|
||||
end = self._end()
|
||||
if end == RDF.nil:
|
||||
raise ValueError("Cannot append to empty list")
|
||||
|
||||
if (end, RDF.first, None) in self.graph:
|
||||
# append new node to the end of the linked list
|
||||
node = BNode()
|
||||
self.graph.set((end, RDF.rest, node))
|
||||
end = node
|
||||
|
||||
self.graph.add((end, RDF.first, item))
|
||||
self.graph.add((end, RDF.rest, RDF.nil))
|
||||
return self
|
||||
|
||||
def __iadd__(self, other: Iterable[Node]):
|
||||
end = self._end()
|
||||
if end == RDF.nil:
|
||||
raise ValueError("Cannot append to empty list")
|
||||
self.graph.remove((end, RDF.rest, None))
|
||||
|
||||
for item in other:
|
||||
if (end, RDF.first, None) in self.graph:
|
||||
nxt = BNode()
|
||||
self.graph.add((end, RDF.rest, nxt))
|
||||
end = nxt
|
||||
|
||||
self.graph.add((end, RDF.first, item))
|
||||
|
||||
self.graph.add((end, RDF.rest, RDF.nil))
|
||||
return self
|
||||
|
||||
def clear(self):
|
||||
container: Optional[Node] = self.uri
|
||||
graph = self.graph
|
||||
while container:
|
||||
rest = graph.value(container, RDF.rest)
|
||||
graph.remove((container, RDF.first, None))
|
||||
graph.remove((container, RDF.rest, None))
|
||||
container = rest
|
||||
return self
|
||||
@@ -0,0 +1,631 @@
|
||||
"""
|
||||
A collection of utilities for canonicalizing and inspecting graphs.
|
||||
|
||||
Among other things, they solve of the problem of deterministic bnode
|
||||
comparisons.
|
||||
|
||||
Warning: the time to canonicalize bnodes may increase exponentially on
|
||||
degenerate larger graphs. Use with care!
|
||||
|
||||
Example of comparing two graphs::
|
||||
|
||||
>>> g1 = Graph().parse(format='n3', data='''
|
||||
... @prefix : <http://example.org/ns#> .
|
||||
... <http://example.org> :rel
|
||||
... <http://example.org/same>,
|
||||
... [ :label "Same" ],
|
||||
... <http://example.org/a>,
|
||||
... [ :label "A" ] .
|
||||
... ''')
|
||||
>>> g2 = Graph().parse(format='n3', data='''
|
||||
... @prefix : <http://example.org/ns#> .
|
||||
... <http://example.org> :rel
|
||||
... <http://example.org/same>,
|
||||
... [ :label "Same" ],
|
||||
... <http://example.org/b>,
|
||||
... [ :label "B" ] .
|
||||
... ''')
|
||||
>>>
|
||||
>>> iso1 = to_isomorphic(g1)
|
||||
>>> iso2 = to_isomorphic(g2)
|
||||
|
||||
These are not isomorphic::
|
||||
|
||||
>>> iso1 == iso2
|
||||
False
|
||||
|
||||
Diff the two graphs::
|
||||
|
||||
>>> in_both, in_first, in_second = graph_diff(iso1, iso2)
|
||||
|
||||
Present in both::
|
||||
|
||||
>>> def dump_nt_sorted(g):
|
||||
... for l in sorted(g.serialize(format='nt').splitlines()):
|
||||
... if l: print(l.decode('ascii'))
|
||||
|
||||
>>> dump_nt_sorted(in_both) #doctest: +SKIP
|
||||
<http://example.org>
|
||||
<http://example.org/ns#rel> <http://example.org/same> .
|
||||
<http://example.org>
|
||||
<http://example.org/ns#rel> _:cbcaabaaba17fecbc304a64f8edee4335e .
|
||||
_:cbcaabaaba17fecbc304a64f8edee4335e
|
||||
<http://example.org/ns#label> "Same" .
|
||||
|
||||
Only in first::
|
||||
|
||||
>>> dump_nt_sorted(in_first) #doctest: +SKIP
|
||||
<http://example.org>
|
||||
<http://example.org/ns#rel> <http://example.org/a> .
|
||||
<http://example.org>
|
||||
<http://example.org/ns#rel> _:cb124e4c6da0579f810c0ffe4eff485bd9 .
|
||||
_:cb124e4c6da0579f810c0ffe4eff485bd9
|
||||
<http://example.org/ns#label> "A" .
|
||||
|
||||
Only in second::
|
||||
|
||||
>>> dump_nt_sorted(in_second) #doctest: +SKIP
|
||||
<http://example.org>
|
||||
<http://example.org/ns#rel> <http://example.org/b> .
|
||||
<http://example.org>
|
||||
<http://example.org/ns#rel> _:cb558f30e21ddfc05ca53108348338ade8 .
|
||||
_:cb558f30e21ddfc05ca53108348338ade8
|
||||
<http://example.org/ns#label> "B" .
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO:
|
||||
# - Doesn't handle quads.
|
||||
# - Add warning and/or safety mechanism before working on large graphs?
|
||||
# - use this in existing Graph.isomorphic?
|
||||
|
||||
__all__ = [
|
||||
"IsomorphicGraph",
|
||||
"to_isomorphic",
|
||||
"isomorphic",
|
||||
"to_canonical_graph",
|
||||
"graph_diff",
|
||||
"similar",
|
||||
]
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from hashlib import sha256
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph, ReadOnlyGraphAggregate, _TripleType
|
||||
from rdflib.term import BNode, IdentifiedNode, Node, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from _hashlib import HASH
|
||||
|
||||
|
||||
def _total_seconds(td):
|
||||
result = td.days * 24 * 60 * 60
|
||||
result += td.seconds
|
||||
result += td.microseconds / 1000000.0
|
||||
return result
|
||||
|
||||
|
||||
class _runtime: # noqa: N801
|
||||
def __init__(self, label):
|
||||
self.label = label
|
||||
|
||||
def __call__(self, f):
|
||||
if self.label is None:
|
||||
self.label = f.__name__ + "_runtime"
|
||||
|
||||
def wrapped_f(*args, **kwargs):
|
||||
start = datetime.now()
|
||||
result = f(*args, **kwargs)
|
||||
if "stats" in kwargs and kwargs["stats"] is not None:
|
||||
stats = kwargs["stats"]
|
||||
stats[self.label] = _total_seconds(datetime.now() - start)
|
||||
return result
|
||||
|
||||
return wrapped_f
|
||||
|
||||
|
||||
class _call_count: # noqa: N801
|
||||
def __init__(self, label):
|
||||
self.label = label
|
||||
|
||||
def __call__(self, f):
|
||||
if self.label is None:
|
||||
self.label = f.__name__ + "_runtime"
|
||||
|
||||
def wrapped_f(*args, **kwargs):
|
||||
if "stats" in kwargs and kwargs["stats"] is not None:
|
||||
stats = kwargs["stats"]
|
||||
if self.label not in stats:
|
||||
stats[self.label] = 0
|
||||
stats[self.label] += 1
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return wrapped_f
|
||||
|
||||
|
||||
class IsomorphicGraph(ConjunctiveGraph):
|
||||
"""An implementation of the RGDA1 graph digest algorithm.
|
||||
|
||||
An implementation of RGDA1 (publication below),
|
||||
a combination of Sayers & Karp's graph digest algorithm using
|
||||
sum and SHA-256 <http://www.hpl.hp.com/techreports/2003/HPL-2003-235R1.pdf>
|
||||
and traces <http://pallini.di.uniroma1.it>, an average case
|
||||
polynomial time algorithm for graph canonicalization.
|
||||
|
||||
McCusker, J. P. (2015). WebSig: A Digital Signature Framework for the Web.
|
||||
Rensselaer Polytechnic Institute, Troy, NY.
|
||||
http://gradworks.umi.com/3727015.pdf
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(IsomorphicGraph, self).__init__(**kwargs)
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Graph isomorphism testing."""
|
||||
if not isinstance(other, IsomorphicGraph):
|
||||
return False
|
||||
elif len(self) != len(other):
|
||||
return False
|
||||
return self.internal_hash() == other.internal_hash()
|
||||
|
||||
def __ne__(self, other):
|
||||
"""Negative graph isomorphism testing."""
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __hash__(self):
|
||||
return super(IsomorphicGraph, self).__hash__()
|
||||
|
||||
def graph_digest(self, stats=None):
|
||||
"""Synonym for IsomorphicGraph.internal_hash."""
|
||||
return self.internal_hash(stats=stats)
|
||||
|
||||
def internal_hash(self, stats=None):
|
||||
"""
|
||||
This is defined instead of __hash__ to avoid a circular recursion
|
||||
scenario with the Memory store for rdflib which requires a hash lookup
|
||||
in order to return a generator of triples.
|
||||
"""
|
||||
return _TripleCanonicalizer(self).to_hash(stats=stats)
|
||||
|
||||
|
||||
HashFunc = Callable[[str], int]
|
||||
ColorItem = Tuple[Union[int, str], URIRef, Union[int, str]]
|
||||
ColorItemTuple = Tuple[ColorItem, ...]
|
||||
HashCache = Optional[Dict[ColorItemTuple, str]]
|
||||
Stats = Dict[str, Union[int, str]]
|
||||
|
||||
|
||||
class Color:
|
||||
def __init__(
|
||||
self,
|
||||
nodes: List[IdentifiedNode],
|
||||
hashfunc: HashFunc,
|
||||
color: ColorItemTuple = (),
|
||||
hash_cache: HashCache = None,
|
||||
):
|
||||
if hash_cache is None:
|
||||
hash_cache = {}
|
||||
self._hash_cache = hash_cache
|
||||
self.color = color
|
||||
self.nodes = nodes
|
||||
self.hashfunc = hashfunc
|
||||
self._hash_color = None
|
||||
|
||||
def __str__(self):
|
||||
nodes, color = self.key()
|
||||
return "Color %s (%s nodes)" % (color, nodes)
|
||||
|
||||
def key(self):
|
||||
return (len(self.nodes), self.hash_color())
|
||||
|
||||
def hash_color(self, color: Optional[Tuple[ColorItem, ...]] = None) -> str:
|
||||
if color is None:
|
||||
color = self.color
|
||||
if color in self._hash_cache:
|
||||
return self._hash_cache[color]
|
||||
|
||||
def stringify(x):
|
||||
if isinstance(x, Node):
|
||||
return x.n3()
|
||||
else:
|
||||
return str(x)
|
||||
|
||||
if isinstance(color, Node):
|
||||
return stringify(color)
|
||||
value = 0
|
||||
for triple in color:
|
||||
value += self.hashfunc(" ".join([stringify(x) for x in triple]))
|
||||
val: str = "%x" % value
|
||||
self._hash_cache[color] = val
|
||||
return val
|
||||
|
||||
def distinguish(self, W: Color, graph: Graph): # noqa: N803
|
||||
colors: Dict[str, Color] = {}
|
||||
for n in self.nodes:
|
||||
new_color: Tuple[ColorItem, ...] = list(self.color) # type: ignore[assignment]
|
||||
for node in W.nodes:
|
||||
new_color += [ # type: ignore[operator]
|
||||
(1, p, W.hash_color()) for s, p, o in graph.triples((n, None, node))
|
||||
]
|
||||
new_color += [ # type: ignore[operator]
|
||||
(W.hash_color(), p, 3) for s, p, o in graph.triples((node, None, n))
|
||||
]
|
||||
new_color = tuple(new_color)
|
||||
new_hash_color = self.hash_color(new_color)
|
||||
|
||||
if new_hash_color not in colors:
|
||||
c = Color([], self.hashfunc, new_color, hash_cache=self._hash_cache)
|
||||
colors[new_hash_color] = c
|
||||
colors[new_hash_color].nodes.append(n)
|
||||
return colors.values()
|
||||
|
||||
def discrete(self):
|
||||
return len(self.nodes) == 1
|
||||
|
||||
def copy(self):
|
||||
return Color(
|
||||
self.nodes[:], self.hashfunc, self.color, hash_cache=self._hash_cache
|
||||
)
|
||||
|
||||
|
||||
_HashT = Callable[[], "HASH"]
|
||||
|
||||
|
||||
class _TripleCanonicalizer:
|
||||
def __init__(self, graph: Graph, hashfunc: _HashT = sha256):
|
||||
self.graph = graph
|
||||
|
||||
def _hashfunc(s: str):
|
||||
h = hashfunc()
|
||||
h.update(str(s).encode("utf8"))
|
||||
return int(h.hexdigest(), 16)
|
||||
|
||||
self._hash_cache: HashCache = {}
|
||||
self.hashfunc = _hashfunc
|
||||
|
||||
def _discrete(self, coloring: List[Color]) -> bool:
|
||||
return len([c for c in coloring if not c.discrete()]) == 0
|
||||
|
||||
def _initial_color(self) -> List[Color]:
|
||||
"""Finds an initial color for the graph.
|
||||
|
||||
Finds an initial color of the graph by finding all blank nodes and
|
||||
non-blank nodes that are adjacent. Nodes that are not adjacent to blank
|
||||
nodes are not included, as they are a) already colored (by URI or literal)
|
||||
and b) do not factor into the color of any blank node.
|
||||
"""
|
||||
bnodes: Set[BNode] = set()
|
||||
others = set()
|
||||
self._neighbors = defaultdict(set)
|
||||
for s, p, o in self.graph:
|
||||
nodes = set([s, p, o])
|
||||
b = set([x for x in nodes if isinstance(x, BNode)])
|
||||
if len(b) > 0:
|
||||
others |= nodes - b
|
||||
bnodes |= b
|
||||
if isinstance(s, BNode):
|
||||
self._neighbors[s].add(o)
|
||||
if isinstance(o, BNode):
|
||||
self._neighbors[o].add(s)
|
||||
if isinstance(p, BNode):
|
||||
self._neighbors[p].add(s)
|
||||
self._neighbors[p].add(p)
|
||||
if len(bnodes) > 0:
|
||||
return [Color(list(bnodes), self.hashfunc, hash_cache=self._hash_cache)] + [
|
||||
# type error: List item 0 has incompatible type "Union[IdentifiedNode, Literal]"; expected "IdentifiedNode"
|
||||
# type error: Argument 3 to "Color" has incompatible type "Union[IdentifiedNode, Literal]"; expected "Tuple[Tuple[Union[int, str], URIRef, Union[int, str]], ...]"
|
||||
Color([x], self.hashfunc, x, hash_cache=self._hash_cache) # type: ignore[list-item, arg-type]
|
||||
for x in others
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
||||
def _individuate(self, color, individual):
|
||||
new_color = list(color.color)
|
||||
new_color.append((len(color.nodes),))
|
||||
|
||||
color.nodes.remove(individual)
|
||||
c = Color(
|
||||
[individual], self.hashfunc, tuple(new_color), hash_cache=self._hash_cache
|
||||
)
|
||||
return c
|
||||
|
||||
def _get_candidates(self, coloring: List[Color]) -> Iterator[Tuple[Node, Color]]:
|
||||
for c in [c for c in coloring if not c.discrete()]:
|
||||
for node in c.nodes:
|
||||
yield node, c
|
||||
|
||||
def _refine(self, coloring: List[Color], sequence: List[Color]) -> List[Color]:
|
||||
sequence = sorted(sequence, key=lambda x: x.key(), reverse=True)
|
||||
coloring = coloring[:]
|
||||
while len(sequence) > 0 and not self._discrete(coloring):
|
||||
W = sequence.pop() # noqa: N806
|
||||
for c in coloring[:]:
|
||||
if len(c.nodes) > 1 or isinstance(c.nodes[0], BNode):
|
||||
colors = sorted(
|
||||
c.distinguish(W, self.graph),
|
||||
key=lambda x: x.key(),
|
||||
reverse=True,
|
||||
)
|
||||
coloring.remove(c)
|
||||
coloring.extend(colors)
|
||||
try:
|
||||
si = sequence.index(c)
|
||||
sequence = sequence[:si] + colors + sequence[si + 1 :]
|
||||
except ValueError:
|
||||
sequence = colors[1:] + sequence
|
||||
combined_colors: List[Color] = []
|
||||
combined_color_map: Dict[str, Color] = dict()
|
||||
for color in coloring:
|
||||
color_hash = color.hash_color()
|
||||
# This is a hash collision, and be combined into a single color for individuation.
|
||||
if color_hash in combined_color_map:
|
||||
combined_color_map[color_hash].nodes.extend(color.nodes)
|
||||
else:
|
||||
combined_colors.append(color)
|
||||
combined_color_map[color_hash] = color
|
||||
return combined_colors
|
||||
|
||||
@_runtime("to_hash_runtime")
|
||||
def to_hash(self, stats: Optional[Stats] = None):
|
||||
result = 0
|
||||
for triple in self.canonical_triples(stats=stats):
|
||||
result += self.hashfunc(" ".join([x.n3() for x in triple]))
|
||||
if stats is not None:
|
||||
stats["graph_digest"] = "%x" % result
|
||||
return result
|
||||
|
||||
def _experimental_path(self, coloring: List[Color]) -> List[Color]:
|
||||
coloring = [c.copy() for c in coloring]
|
||||
while not self._discrete(coloring):
|
||||
color = [x for x in coloring if not x.discrete()][0]
|
||||
node = color.nodes[0]
|
||||
new_color = self._individuate(color, node)
|
||||
coloring.append(new_color)
|
||||
coloring = self._refine(coloring, [new_color])
|
||||
return coloring
|
||||
|
||||
def _create_generator(
|
||||
self,
|
||||
colorings: List[List[Color]],
|
||||
groupings: Optional[Dict[Node, Set[Node]]] = None,
|
||||
) -> Dict[Node, Set[Node]]:
|
||||
if not groupings:
|
||||
groupings = defaultdict(set)
|
||||
for group in zip(*colorings):
|
||||
g = set([c.nodes[0] for c in group])
|
||||
for n in group:
|
||||
g |= groupings[n]
|
||||
for n in g:
|
||||
groupings[n] = g
|
||||
return groupings
|
||||
|
||||
@_call_count("individuations")
|
||||
def _traces(
|
||||
self,
|
||||
coloring: List[Color],
|
||||
stats: Optional[Stats] = None,
|
||||
depth: List[int] = [0],
|
||||
) -> List[Color]:
|
||||
if stats is not None and "prunings" not in stats:
|
||||
stats["prunings"] = 0
|
||||
depth[0] += 1
|
||||
candidates = self._get_candidates(coloring)
|
||||
best: List[List[Color]] = []
|
||||
best_score = None
|
||||
best_experimental_score = None
|
||||
last_coloring = None
|
||||
generator: Dict[Node, Set[Node]] = defaultdict(set)
|
||||
visited: Set[Node] = set()
|
||||
for candidate, color in candidates:
|
||||
if candidate in generator:
|
||||
v = generator[candidate] & visited
|
||||
if len(v) > 0:
|
||||
visited.add(candidate)
|
||||
continue
|
||||
visited.add(candidate)
|
||||
coloring_copy: List[Color] = []
|
||||
color_copy = None
|
||||
for c in coloring:
|
||||
c_copy = c.copy()
|
||||
coloring_copy.append(c_copy)
|
||||
if c == color:
|
||||
color_copy = c_copy
|
||||
new_color = self._individuate(color_copy, candidate)
|
||||
coloring_copy.append(new_color)
|
||||
refined_coloring = self._refine(coloring_copy, [new_color])
|
||||
color_score = tuple([c.key() for c in refined_coloring])
|
||||
experimental = self._experimental_path(coloring_copy)
|
||||
experimental_score = set([c.key() for c in experimental])
|
||||
if last_coloring:
|
||||
generator = self._create_generator( # type: ignore[unreachable]
|
||||
[last_coloring, experimental], generator
|
||||
)
|
||||
last_coloring = experimental
|
||||
if best_score is None or best_score < color_score: # type: ignore[unreachable]
|
||||
best = [refined_coloring]
|
||||
best_score = color_score
|
||||
best_experimental_score = experimental_score
|
||||
elif best_score > color_score: # type: ignore[unreachable]
|
||||
# prune this branch.
|
||||
if stats is not None:
|
||||
stats["prunings"] += 1
|
||||
elif experimental_score != best_experimental_score:
|
||||
best.append(refined_coloring)
|
||||
else:
|
||||
# prune this branch.
|
||||
if stats is not None:
|
||||
stats["prunings"] += 1
|
||||
discrete: List[List[Color]] = [x for x in best if self._discrete(x)]
|
||||
if len(discrete) == 0:
|
||||
best_score = None
|
||||
best_depth = None
|
||||
for coloring in best:
|
||||
d = [depth[0]]
|
||||
new_color = self._traces(coloring, stats=stats, depth=d)
|
||||
color_score = tuple([c.key() for c in refined_coloring])
|
||||
if best_score is None or color_score > best_score: # type: ignore[unreachable]
|
||||
discrete = [new_color]
|
||||
best_score = color_score
|
||||
best_depth = d[0]
|
||||
depth[0] = best_depth # type: ignore[assignment]
|
||||
return discrete[0]
|
||||
|
||||
def canonical_triples(self, stats: Optional[Stats] = None):
|
||||
if stats is not None:
|
||||
start_coloring = datetime.now()
|
||||
coloring = self._initial_color()
|
||||
if stats is not None:
|
||||
stats["triple_count"] = len(self.graph)
|
||||
stats["adjacent_nodes"] = max(0, len(coloring) - 1)
|
||||
coloring = self._refine(coloring, coloring[:])
|
||||
if stats is not None:
|
||||
stats["initial_coloring_runtime"] = _total_seconds(
|
||||
datetime.now() - start_coloring
|
||||
)
|
||||
stats["initial_color_count"] = len(coloring)
|
||||
|
||||
if not self._discrete(coloring):
|
||||
depth = [0]
|
||||
coloring = self._traces(coloring, stats=stats, depth=depth)
|
||||
if stats is not None:
|
||||
stats["tree_depth"] = depth[0]
|
||||
elif stats is not None:
|
||||
stats["individuations"] = 0
|
||||
stats["tree_depth"] = 0
|
||||
if stats is not None:
|
||||
stats["color_count"] = len(coloring)
|
||||
|
||||
bnode_labels: Dict[Node, str] = dict(
|
||||
[(c.nodes[0], c.hash_color()) for c in coloring]
|
||||
)
|
||||
if stats is not None:
|
||||
stats["canonicalize_triples_runtime"] = _total_seconds(
|
||||
datetime.now() - start_coloring
|
||||
)
|
||||
for triple in self.graph:
|
||||
result = tuple(self._canonicalize_bnodes(triple, bnode_labels))
|
||||
yield result
|
||||
|
||||
def _canonicalize_bnodes(
|
||||
self,
|
||||
triple: _TripleType,
|
||||
labels: Dict[Node, str],
|
||||
):
|
||||
for term in triple:
|
||||
if isinstance(term, BNode):
|
||||
yield BNode(value="cb%s" % labels[term])
|
||||
else:
|
||||
yield term
|
||||
|
||||
|
||||
def to_isomorphic(graph: Graph) -> IsomorphicGraph:
|
||||
if isinstance(graph, IsomorphicGraph):
|
||||
return graph
|
||||
result = IsomorphicGraph()
|
||||
if hasattr(graph, "identifier"):
|
||||
result = IsomorphicGraph(identifier=graph.identifier)
|
||||
result += graph
|
||||
return result
|
||||
|
||||
|
||||
def isomorphic(graph1: Graph, graph2: Graph) -> bool:
|
||||
"""Compare graph for equality.
|
||||
|
||||
Uses an algorithm to compute unique hashes which takes bnodes into account.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> g1 = Graph().parse(format='n3', data='''
|
||||
... @prefix : <http://example.org/ns#> .
|
||||
... <http://example.org> :rel <http://example.org/a> .
|
||||
... <http://example.org> :rel <http://example.org/b> .
|
||||
... <http://example.org> :rel [ :label "A bnode." ] .
|
||||
... ''')
|
||||
>>> g2 = Graph().parse(format='n3', data='''
|
||||
... @prefix ns: <http://example.org/ns#> .
|
||||
... <http://example.org> ns:rel [ ns:label "A bnode." ] .
|
||||
... <http://example.org> ns:rel <http://example.org/b>,
|
||||
... <http://example.org/a> .
|
||||
... ''')
|
||||
>>> isomorphic(g1, g2)
|
||||
True
|
||||
|
||||
>>> g3 = Graph().parse(format='n3', data='''
|
||||
... @prefix : <http://example.org/ns#> .
|
||||
... <http://example.org> :rel <http://example.org/a> .
|
||||
... <http://example.org> :rel <http://example.org/b> .
|
||||
... <http://example.org> :rel <http://example.org/c> .
|
||||
... ''')
|
||||
>>> isomorphic(g1, g3)
|
||||
False
|
||||
"""
|
||||
gd1 = _TripleCanonicalizer(graph1).to_hash()
|
||||
gd2 = _TripleCanonicalizer(graph2).to_hash()
|
||||
return gd1 == gd2
|
||||
|
||||
|
||||
def to_canonical_graph(
|
||||
g1: Graph, stats: Optional[Stats] = None
|
||||
) -> ReadOnlyGraphAggregate:
|
||||
"""Creates a canonical, read-only graph.
|
||||
|
||||
Creates a canonical, read-only graph where all bnode id:s are based on
|
||||
deterministical SHA-256 checksums, correlated with the graph contents.
|
||||
"""
|
||||
graph = Graph()
|
||||
graph += _TripleCanonicalizer(g1).canonical_triples(stats=stats)
|
||||
return ReadOnlyGraphAggregate([graph])
|
||||
|
||||
|
||||
def graph_diff(g1: Graph, g2: Graph) -> Tuple[Graph, Graph, Graph]:
|
||||
"""Returns three sets of triples: "in both", "in first" and "in second"."""
|
||||
# bnodes have deterministic values in canonical graphs:
|
||||
cg1 = to_canonical_graph(g1)
|
||||
cg2 = to_canonical_graph(g2)
|
||||
in_both = cg1 * cg2
|
||||
in_first = cg1 - cg2
|
||||
in_second = cg2 - cg1
|
||||
return (in_both, in_first, in_second)
|
||||
|
||||
|
||||
_MOCK_BNODE = BNode()
|
||||
|
||||
|
||||
def similar(g1: Graph, g2: Graph):
|
||||
"""Checks if the two graphs are "similar".
|
||||
|
||||
Checks if the two graphs are "similar", by comparing sorted triples where
|
||||
all bnodes have been replaced by a singular mock bnode (the
|
||||
``_MOCK_BNODE``).
|
||||
|
||||
This is a much cheaper, but less reliable, alternative to the comparison
|
||||
algorithm in ``isomorphic``.
|
||||
"""
|
||||
return all(t1 == t2 for (t1, t2) in _squashed_graphs_triples(g1, g2))
|
||||
|
||||
|
||||
def _squashed_graphs_triples(g1: Graph, g2: Graph):
|
||||
for t1, t2 in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))):
|
||||
yield t1, t2
|
||||
|
||||
|
||||
def _squash_graph(graph: Graph):
|
||||
return (_squash_bnodes(triple) for triple in graph)
|
||||
|
||||
|
||||
def _squash_bnodes(triple):
|
||||
return tuple((isinstance(t, BNode) and _MOCK_BNODE) or t for t in triple)
|
||||
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Utility functions and objects to ease Python 2/3 compatibility,
|
||||
and different versions of support libraries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import re
|
||||
import warnings
|
||||
from typing import Match
|
||||
|
||||
|
||||
def cast_bytes(s, enc="utf-8"):
|
||||
if isinstance(s, str):
|
||||
return s.encode(enc)
|
||||
return s
|
||||
|
||||
|
||||
def ascii(stream):
|
||||
return codecs.getreader("ascii")(stream)
|
||||
|
||||
|
||||
def bopen(*args, **kwargs):
|
||||
# type error: No overload variant of "open" matches argument types "Tuple[Any, ...]", "str", "Dict[str, Any]"
|
||||
return open(*args, mode="rb", **kwargs) # type: ignore[call-overload]
|
||||
|
||||
|
||||
long_type = int
|
||||
|
||||
|
||||
def sign(n):
|
||||
if n < 0:
|
||||
return -1
|
||||
if n > 0:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
r_unicodeEscape = re.compile(r"(\\u[0-9A-Fa-f]{4}|\\U[0-9A-Fa-f]{8})") # noqa: N816
|
||||
|
||||
|
||||
def _unicodeExpand(s): # noqa: N802
|
||||
return r_unicodeEscape.sub(lambda m: chr(int(m.group(0)[2:], 16)), s)
|
||||
|
||||
|
||||
def decodeStringEscape(s): # noqa: N802
|
||||
warnings.warn(
|
||||
DeprecationWarning(
|
||||
"rdflib.compat.decodeStringEscape() is deprecated, "
|
||||
"it will be removed in rdflib 7.0.0. "
|
||||
"This function is not used anywhere in rdflib anymore "
|
||||
"and the utility that it does provide is not implemented correctly."
|
||||
)
|
||||
)
|
||||
r"""
|
||||
s is byte-string - replace \ escapes in string
|
||||
"""
|
||||
|
||||
s = s.replace("\\t", "\t")
|
||||
s = s.replace("\\n", "\n")
|
||||
s = s.replace("\\r", "\r")
|
||||
s = s.replace("\\b", "\b")
|
||||
s = s.replace("\\f", "\f")
|
||||
s = s.replace('\\"', '"')
|
||||
s = s.replace("\\'", "'")
|
||||
s = s.replace("\\\\", "\\")
|
||||
|
||||
return s
|
||||
# return _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping
|
||||
|
||||
|
||||
_string_escape_map = {
|
||||
"t": "\t",
|
||||
"b": "\b",
|
||||
"n": "\n",
|
||||
"r": "\r",
|
||||
"f": "\f",
|
||||
'"': '"',
|
||||
"'": "'",
|
||||
"\\": "\\",
|
||||
}
|
||||
|
||||
|
||||
def _turtle_escape_subber(match: Match[str]) -> str:
|
||||
smatch, umatch = match.groups()
|
||||
if smatch is not None:
|
||||
return _string_escape_map[smatch]
|
||||
else:
|
||||
return chr(int(umatch[1:], 16))
|
||||
|
||||
|
||||
_turtle_escape_pattern = re.compile(
|
||||
r"""\\(?:([tbnrf"'\\])|(u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))""",
|
||||
)
|
||||
|
||||
|
||||
def decodeUnicodeEscape(escaped: str) -> str: # noqa: N802
|
||||
if "\\" not in escaped:
|
||||
# Most of times, there are no backslashes in strings.
|
||||
return escaped
|
||||
return _turtle_escape_pattern.sub(_turtle_escape_subber, escaped)
|
||||
@@ -0,0 +1,278 @@
|
||||
import warnings
|
||||
from random import randint
|
||||
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.term import BNode, URIRef
|
||||
|
||||
__all__ = ["Container", "Bag", "Seq", "Alt", "NoElementException"]
|
||||
|
||||
|
||||
class Container:
|
||||
"""A class for constructing RDF containers, as per https://www.w3.org/TR/rdf11-mt/#rdf-containers
|
||||
|
||||
Basic usage, creating a ``Bag`` and adding to it::
|
||||
|
||||
>>> from rdflib import Graph, BNode, Literal, Bag
|
||||
>>> g = Graph()
|
||||
>>> b = Bag(g, BNode(), [Literal("One"), Literal("Two"), Literal("Three")])
|
||||
>>> print(g.serialize(format="turtle"))
|
||||
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
||||
<BLANKLINE>
|
||||
[] a rdf:Bag ;
|
||||
rdf:_1 "One" ;
|
||||
rdf:_2 "Two" ;
|
||||
rdf:_3 "Three" .
|
||||
<BLANKLINE>
|
||||
<BLANKLINE>
|
||||
|
||||
>>> # print out an item using an index reference
|
||||
>>> print(b[2])
|
||||
Two
|
||||
|
||||
>>> # add a new item
|
||||
>>> b.append(Literal("Hello")) # doctest: +ELLIPSIS
|
||||
<rdflib.container.Bag object at ...>
|
||||
>>> print(g.serialize(format="turtle"))
|
||||
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
||||
<BLANKLINE>
|
||||
[] a rdf:Bag ;
|
||||
rdf:_1 "One" ;
|
||||
rdf:_2 "Two" ;
|
||||
rdf:_3 "Three" ;
|
||||
rdf:_4 "Hello" .
|
||||
<BLANKLINE>
|
||||
<BLANKLINE>
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, graph, uri, seq=[], rtype="Bag"):
|
||||
"""Creates a Container
|
||||
|
||||
:param graph: a Graph instance
|
||||
:param uri: URI or Blank Node of the Container
|
||||
:param seq: the elements of the Container
|
||||
:param rtype: the type of Container, one of "Bag", "Seq" or "Alt"
|
||||
"""
|
||||
|
||||
self.graph = graph
|
||||
self.uri = uri or BNode()
|
||||
self._len = 0
|
||||
self._rtype = rtype # rdf:Bag or rdf:Seq or rdf:Alt
|
||||
|
||||
self.append_multiple(seq)
|
||||
|
||||
# adding triple corresponding to container type
|
||||
self.graph.add((self.uri, RDF.type, RDF[self._rtype]))
|
||||
|
||||
def n3(self):
|
||||
items = []
|
||||
for i in range(len(self)):
|
||||
v = self[i + 1]
|
||||
items.append(v)
|
||||
|
||||
return "( %s )" % " ".join([a.n3() for a in items])
|
||||
|
||||
def _get_container(self):
|
||||
"""Returns the URI of the container"""
|
||||
|
||||
return self.uri
|
||||
|
||||
def __len__(self):
|
||||
"""Number of items in container"""
|
||||
|
||||
return self._len
|
||||
|
||||
def type_of_conatiner(self):
|
||||
warnings.warn(
|
||||
"rdflib.container.Container.type_of_conatiner is deprecated. "
|
||||
"Use type_of_container method instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._rtype
|
||||
|
||||
def type_of_container(self):
|
||||
return self._rtype
|
||||
|
||||
def index(self, item):
|
||||
"""Returns the 1-based numerical index of the item in the container"""
|
||||
|
||||
pred = self.graph.predicates(self.uri, item)
|
||||
if not pred:
|
||||
raise ValueError("%s is not in %s" % (item, "container"))
|
||||
li_index = URIRef(str(RDF) + "_")
|
||||
|
||||
i = None
|
||||
for p in pred:
|
||||
i = int(p.replace(li_index, ""))
|
||||
return i
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Returns item of the container at index key"""
|
||||
|
||||
c = self._get_container()
|
||||
|
||||
assert isinstance(key, int)
|
||||
elem_uri = str(RDF) + "_" + str(key)
|
||||
if key <= 0 or key > len(self):
|
||||
raise KeyError(key)
|
||||
v = self.graph.value(c, URIRef(elem_uri))
|
||||
if v:
|
||||
return v
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Sets the item at index key or predicate rdf:_key of the container to value"""
|
||||
|
||||
assert isinstance(key, int)
|
||||
|
||||
c = self._get_container()
|
||||
elem_uri = str(RDF) + "_" + str(key)
|
||||
if key <= 0 or key > len(self):
|
||||
raise KeyError(key)
|
||||
|
||||
self.graph.set((c, URIRef(elem_uri), value))
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Removing the item with index key or predicate rdf:_key"""
|
||||
|
||||
assert isinstance(key, int)
|
||||
if key <= 0 or key > len(self):
|
||||
raise KeyError(key)
|
||||
|
||||
graph = self.graph
|
||||
container = self.uri
|
||||
elem_uri = str(RDF) + "_" + str(key)
|
||||
graph.remove((container, URIRef(elem_uri), None))
|
||||
for j in range(key + 1, len(self) + 1):
|
||||
elem_uri = str(RDF) + "_" + str(j)
|
||||
v = graph.value(container, URIRef(elem_uri))
|
||||
graph.remove((container, URIRef(elem_uri), v))
|
||||
elem_uri = str(RDF) + "_" + str(j - 1)
|
||||
graph.add((container, URIRef(elem_uri), v))
|
||||
|
||||
self._len -= 1
|
||||
|
||||
def items(self):
|
||||
"""Returns a list of all items in the container"""
|
||||
|
||||
l_ = []
|
||||
container = self.uri
|
||||
i = 1
|
||||
while True:
|
||||
elem_uri = str(RDF) + "_" + str(i)
|
||||
|
||||
if (container, URIRef(elem_uri), None) in self.graph:
|
||||
i += 1
|
||||
l_.append(self.graph.value(container, URIRef(elem_uri)))
|
||||
else:
|
||||
break
|
||||
return l_
|
||||
|
||||
def end(self): #
|
||||
# find end index (1-based) of container
|
||||
|
||||
container = self.uri
|
||||
i = 1
|
||||
while True:
|
||||
elem_uri = str(RDF) + "_" + str(i)
|
||||
|
||||
if (container, URIRef(elem_uri), None) in self.graph:
|
||||
i += 1
|
||||
else:
|
||||
return i - 1
|
||||
|
||||
def append(self, item):
|
||||
"""Adding item to the end of the container"""
|
||||
|
||||
end = self.end()
|
||||
elem_uri = str(RDF) + "_" + str(end + 1)
|
||||
container = self.uri
|
||||
self.graph.add((container, URIRef(elem_uri), item))
|
||||
self._len += 1
|
||||
|
||||
return self
|
||||
|
||||
def append_multiple(self, other):
|
||||
"""Adding multiple elements to the container to the end which are in python list other"""
|
||||
|
||||
end = self.end() # it should return the last index
|
||||
|
||||
container = self.uri
|
||||
for item in other:
|
||||
end += 1
|
||||
self._len += 1
|
||||
elem_uri = str(RDF) + "_" + str(end)
|
||||
self.graph.add((container, URIRef(elem_uri), item))
|
||||
|
||||
return self
|
||||
|
||||
def clear(self):
|
||||
"""Removing all elements from the container"""
|
||||
|
||||
container = self.uri
|
||||
graph = self.graph
|
||||
i = 1
|
||||
while True:
|
||||
elem_uri = str(RDF) + "_" + str(i)
|
||||
if (container, URIRef(elem_uri), None) in self.graph:
|
||||
graph.remove((container, URIRef(elem_uri), None))
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
self._len = 0
|
||||
return self
|
||||
|
||||
|
||||
class Bag(Container):
|
||||
"""Unordered container (no preference order of elements)"""
|
||||
|
||||
def __init__(self, graph, uri, seq=[]):
|
||||
Container.__init__(self, graph, uri, seq, "Bag")
|
||||
|
||||
|
||||
class Alt(Container):
|
||||
def __init__(self, graph, uri, seq=[]):
|
||||
Container.__init__(self, graph, uri, seq, "Alt")
|
||||
|
||||
def anyone(self):
|
||||
if len(self) == 0:
|
||||
raise NoElementException()
|
||||
else:
|
||||
p = randint(1, len(self))
|
||||
item = self.__getitem__(p)
|
||||
return item
|
||||
|
||||
|
||||
class Seq(Container):
|
||||
def __init__(self, graph, uri, seq=[]):
|
||||
Container.__init__(self, graph, uri, seq, "Seq")
|
||||
|
||||
def add_at_position(self, pos, item):
|
||||
assert isinstance(pos, int)
|
||||
if pos <= 0 or pos > len(self) + 1:
|
||||
raise ValueError("Invalid Position for inserting element in rdf:Seq")
|
||||
|
||||
if pos == len(self) + 1:
|
||||
self.append(item)
|
||||
else:
|
||||
for j in range(len(self), pos - 1, -1):
|
||||
container = self._get_container()
|
||||
elem_uri = str(RDF) + "_" + str(j)
|
||||
v = self.graph.value(container, URIRef(elem_uri))
|
||||
self.graph.remove((container, URIRef(elem_uri), v))
|
||||
elem_uri = str(RDF) + "_" + str(j + 1)
|
||||
self.graph.add((container, URIRef(elem_uri), v))
|
||||
elem_uri_pos = str(RDF) + "_" + str(pos)
|
||||
self.graph.add((container, URIRef(elem_uri_pos), item))
|
||||
self._len += 1
|
||||
return self
|
||||
|
||||
|
||||
class NoElementException(Exception): # noqa: N818
|
||||
def __init__(self, message="rdf:Alt Container is empty"):
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return self.message
|
||||
@@ -0,0 +1,93 @@
|
||||
"""
|
||||
Dirt Simple Events
|
||||
|
||||
A Dispatcher (or a subclass of Dispatcher) stores event handlers that
|
||||
are 'fired' simple event objects when interesting things happen.
|
||||
|
||||
Create a dispatcher:
|
||||
|
||||
>>> d = Dispatcher()
|
||||
|
||||
Now create a handler for the event and subscribe it to the dispatcher
|
||||
to handle Event events. A handler is a simple function or method that
|
||||
accepts the event as an argument:
|
||||
|
||||
>>> def handler1(event): print(repr(event))
|
||||
>>> d.subscribe(Event, handler1) # doctest: +ELLIPSIS
|
||||
<rdflib.events.Dispatcher object at ...>
|
||||
|
||||
Now dispatch a new event into the dispatcher, and see handler1 get
|
||||
fired:
|
||||
|
||||
>>> d.dispatch(Event(foo='bar', data='yours', used_by='the event handlers'))
|
||||
<rdflib.events.Event ['data', 'foo', 'used_by']>
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
__all__ = ["Event", "Dispatcher"]
|
||||
|
||||
|
||||
class Event:
|
||||
"""
|
||||
An event is a container for attributes. The source of an event
|
||||
creates this object, or a subclass, gives it any kind of data that
|
||||
the events handlers need to handle the event, and then calls
|
||||
notify(event).
|
||||
|
||||
The target of an event registers a function to handle the event it
|
||||
is interested with subscribe(). When a sources calls
|
||||
notify(event), each subscriber to that event will be called in no
|
||||
particular order.
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
self.__dict__.update(kw)
|
||||
|
||||
def __repr__(self):
|
||||
attrs = sorted(self.__dict__.keys())
|
||||
return "<rdflib.events.Event %s>" % ([a for a in attrs],)
|
||||
|
||||
|
||||
class Dispatcher:
|
||||
"""
|
||||
An object that can dispatch events to a privately managed group of
|
||||
subscribers.
|
||||
"""
|
||||
|
||||
_dispatch_map: Optional[Dict[Any, Any]] = None
|
||||
|
||||
def set_map(self, amap: Dict[Any, Any]):
|
||||
self._dispatch_map = amap
|
||||
return self
|
||||
|
||||
def get_map(self):
|
||||
return self._dispatch_map
|
||||
|
||||
def subscribe(self, event_type, handler):
|
||||
"""Subscribe the given handler to an event_type. Handlers
|
||||
are called in the order they are subscribed.
|
||||
"""
|
||||
if self._dispatch_map is None:
|
||||
self.set_map({})
|
||||
# type error: error: Item "None" of "Optional[Dict[Any, Any]]" has no attribute "get"
|
||||
lst = self._dispatch_map.get(event_type, None) # type: ignore[union-attr]
|
||||
if lst is None:
|
||||
lst = [handler]
|
||||
else:
|
||||
lst.append(handler)
|
||||
# type error: Unsupported target for indexed assignment ("Optional[Dict[Any, Any]]")
|
||||
self._dispatch_map[event_type] = lst # type: ignore[index]
|
||||
return self
|
||||
|
||||
def dispatch(self, event):
|
||||
"""Dispatch the given event to the subscribed handlers for
|
||||
the event's type"""
|
||||
if self._dispatch_map is not None:
|
||||
lst = self._dispatch_map.get(type(event), None)
|
||||
if lst is None:
|
||||
raise ValueError("unknown event type: %s" % type(event))
|
||||
for l_ in lst:
|
||||
l_(event)
|
||||
@@ -0,0 +1,45 @@
|
||||
"""
|
||||
TODO:
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = [
|
||||
"Error",
|
||||
"ParserError",
|
||||
"UniquenessError",
|
||||
]
|
||||
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base class for rdflib exceptions."""
|
||||
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
Exception.__init__(self, msg)
|
||||
self.msg = msg
|
||||
|
||||
|
||||
class ParserError(Error):
|
||||
"""RDF Parser error."""
|
||||
|
||||
def __init__(self, msg: str):
|
||||
Error.__init__(self, msg)
|
||||
self.msg: str = msg
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.msg
|
||||
|
||||
|
||||
class UniquenessError(Error):
|
||||
"""A uniqueness assumption was made in the context, and that is not true"""
|
||||
|
||||
def __init__(self, values: Any):
|
||||
Error.__init__(
|
||||
self,
|
||||
"\
|
||||
Uniqueness assumption is not fulfilled. Multiple values are: %s"
|
||||
% values,
|
||||
)
|
||||
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import getopt
|
||||
import sys
|
||||
import time
|
||||
from typing import TextIO, Union
|
||||
|
||||
import rdflib
|
||||
from rdflib.util import guess_format
|
||||
|
||||
|
||||
def _help():
|
||||
sys.stderr.write(
|
||||
"""
|
||||
program.py [-f <format>] [-o <output>] [files...]
|
||||
Read RDF files given on STDOUT - does something to the resulting graph
|
||||
If no files are given, read from stdin
|
||||
-o specifies file for output, if not given stdout is used
|
||||
-f specifies parser to use, if not given it is guessed from extension
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def main(target, _help=_help, options="", stdin=True):
|
||||
"""
|
||||
A main function for tools that read RDF from files given on commandline
|
||||
or from STDIN (if stdin parameter is true)
|
||||
"""
|
||||
|
||||
args, files = getopt.getopt(sys.argv[1:], "hf:o:" + options)
|
||||
dargs = dict(args)
|
||||
|
||||
if "-h" in dargs:
|
||||
_help()
|
||||
sys.exit(-1)
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
if "-f" in dargs:
|
||||
f = dargs["-f"]
|
||||
else:
|
||||
f = None
|
||||
|
||||
out: Union[TextIO, codecs.StreamReaderWriter]
|
||||
if "-o" in dargs:
|
||||
sys.stderr.write("Output to %s\n" % dargs["-o"])
|
||||
out = codecs.open(dargs["-o"], "w", "utf-8")
|
||||
else:
|
||||
out = sys.stdout
|
||||
|
||||
start = time.time()
|
||||
if len(files) == 0 and stdin:
|
||||
sys.stderr.write("Reading from stdin as %s..." % f)
|
||||
g.parse(sys.stdin, format=f)
|
||||
sys.stderr.write("[done]\n")
|
||||
else:
|
||||
size = 0
|
||||
for x in files:
|
||||
if f is None:
|
||||
f = guess_format(x)
|
||||
start1 = time.time()
|
||||
sys.stderr.write("Loading %s as %s... " % (x, f))
|
||||
g.parse(x, format=f)
|
||||
sys.stderr.write(
|
||||
"done.\t(%d triples\t%.2f seconds)\n"
|
||||
% (len(g) - size, time.time() - start1)
|
||||
)
|
||||
size = len(g)
|
||||
|
||||
sys.stderr.write(
|
||||
"Loaded a total of %d triples in %.2f seconds.\n"
|
||||
% (len(g), time.time() - start)
|
||||
)
|
||||
|
||||
target(g, out, args)
|
||||
@@ -0,0 +1,257 @@
|
||||
"""
|
||||
A Describer is a stateful utility for creating RDF statements in a
|
||||
semi-declarative manner. It has methods for creating literal values, rel and
|
||||
rev resource relations (somewhat resembling RDFa).
|
||||
|
||||
The `Describer.rel` and `Describer.rev` methods return a context manager which sets the current
|
||||
about to the referenced resource for the context scope (for use with the
|
||||
``with`` statement).
|
||||
|
||||
Full example in the ``to_rdf`` method below::
|
||||
|
||||
>>> import datetime
|
||||
>>> from rdflib.graph import Graph
|
||||
>>> from rdflib.namespace import Namespace, RDFS, FOAF
|
||||
>>>
|
||||
>>> ORG_URI = "http://example.org/"
|
||||
>>>
|
||||
>>> CV = Namespace("http://purl.org/captsolo/resume-rdf/0.2/cv#")
|
||||
>>>
|
||||
>>> class Person:
|
||||
... def __init__(self):
|
||||
... self.first_name = "Some"
|
||||
... self.last_name = "Body"
|
||||
... self.username = "some1"
|
||||
... self.presentation = "Just a Python & RDF hacker."
|
||||
... self.image = "/images/persons/" + self.username + ".jpg"
|
||||
... self.site = "http://example.net/"
|
||||
... self.start_date = datetime.date(2009, 9, 4)
|
||||
... def get_full_name(self):
|
||||
... return " ".join([self.first_name, self.last_name])
|
||||
... def get_absolute_url(self):
|
||||
... return "/persons/" + self.username
|
||||
... def get_thumbnail_url(self):
|
||||
... return self.image.replace('.jpg', '-thumb.jpg')
|
||||
...
|
||||
... def to_rdf(self):
|
||||
... graph = Graph()
|
||||
... graph.bind('foaf', FOAF)
|
||||
... graph.bind('cv', CV)
|
||||
... lang = 'en'
|
||||
... d = Describer(graph, base=ORG_URI)
|
||||
... d.about(self.get_absolute_url()+'#person')
|
||||
... d.rdftype(FOAF.Person)
|
||||
... d.value(FOAF.name, self.get_full_name())
|
||||
... d.value(FOAF.givenName, self.first_name)
|
||||
... d.value(FOAF.familyName, self.last_name)
|
||||
... d.rel(FOAF.homepage, self.site)
|
||||
... d.value(RDFS.comment, self.presentation, lang=lang)
|
||||
... with d.rel(FOAF.depiction, self.image):
|
||||
... d.rdftype(FOAF.Image)
|
||||
... d.rel(FOAF.thumbnail, self.get_thumbnail_url())
|
||||
... with d.rev(CV.aboutPerson):
|
||||
... d.rdftype(CV.CV)
|
||||
... with d.rel(CV.hasWorkHistory):
|
||||
... d.value(CV.startDate, self.start_date)
|
||||
... d.rel(CV.employedIn, ORG_URI+"#company")
|
||||
... return graph
|
||||
...
|
||||
>>> person_graph = Person().to_rdf()
|
||||
>>> expected = Graph().parse(data='''<?xml version="1.0" encoding="utf-8"?>
|
||||
... <rdf:RDF
|
||||
... xmlns:foaf="http://xmlns.com/foaf/0.1/"
|
||||
... xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
... xmlns:cv="http://purl.org/captsolo/resume-rdf/0.2/cv#"
|
||||
... xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
|
||||
... <foaf:Person rdf:about="http://example.org/persons/some1#person">
|
||||
... <foaf:name>Some Body</foaf:name>
|
||||
... <foaf:givenName>Some</foaf:givenName>
|
||||
... <foaf:familyName>Body</foaf:familyName>
|
||||
... <foaf:depiction>
|
||||
... <foaf:Image
|
||||
... rdf:about=
|
||||
... "http://example.org/images/persons/some1.jpg">
|
||||
... <foaf:thumbnail
|
||||
... rdf:resource=
|
||||
... "http://example.org/images/persons/some1-thumb.jpg"/>
|
||||
... </foaf:Image>
|
||||
... </foaf:depiction>
|
||||
... <rdfs:comment xml:lang="en">
|
||||
... Just a Python & RDF hacker.
|
||||
... </rdfs:comment>
|
||||
... <foaf:homepage rdf:resource="http://example.net/"/>
|
||||
... </foaf:Person>
|
||||
... <cv:CV>
|
||||
... <cv:aboutPerson
|
||||
... rdf:resource="http://example.org/persons/some1#person">
|
||||
... </cv:aboutPerson>
|
||||
... <cv:hasWorkHistory>
|
||||
... <rdf:Description>
|
||||
... <cv:startDate
|
||||
... rdf:datatype="http://www.w3.org/2001/XMLSchema#date"
|
||||
... >2009-09-04</cv:startDate>
|
||||
... <cv:employedIn rdf:resource="http://example.org/#company"/>
|
||||
... </rdf:Description>
|
||||
... </cv:hasWorkHistory>
|
||||
... </cv:CV>
|
||||
... </rdf:RDF>
|
||||
... ''', format="xml")
|
||||
>>>
|
||||
>>> from rdflib.compare import isomorphic
|
||||
>>> isomorphic(person_graph, expected) #doctest: +SKIP
|
||||
True
|
||||
"""
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef
|
||||
|
||||
|
||||
class Describer:
|
||||
def __init__(self, graph=None, about=None, base=None):
|
||||
if graph is None:
|
||||
graph = Graph()
|
||||
self.graph = graph
|
||||
self.base = base
|
||||
self._subjects = []
|
||||
self.about(about or None)
|
||||
|
||||
def about(self, subject, **kws):
|
||||
"""
|
||||
Sets the current subject. Will convert the given object into an
|
||||
``URIRef`` if it's not an ``Identifier``.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> d = Describer()
|
||||
>>> d._current() #doctest: +ELLIPSIS
|
||||
rdflib.term.BNode(...)
|
||||
>>> d.about("http://example.org/")
|
||||
>>> d._current()
|
||||
rdflib.term.URIRef('http://example.org/')
|
||||
|
||||
"""
|
||||
kws.setdefault("base", self.base)
|
||||
subject = cast_identifier(subject, **kws)
|
||||
if self._subjects:
|
||||
self._subjects[-1] = subject
|
||||
else:
|
||||
self._subjects.append(subject)
|
||||
|
||||
def value(self, p, v, **kws):
|
||||
"""
|
||||
Set a literal value for the given property. Will cast the value to an
|
||||
``Literal`` if a plain literal is given.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from rdflib import URIRef
|
||||
>>> from rdflib.namespace import RDF, RDFS
|
||||
>>> d = Describer(about="http://example.org/")
|
||||
>>> d.value(RDFS.label, "Example")
|
||||
>>> d.graph.value(URIRef('http://example.org/'), RDFS.label)
|
||||
rdflib.term.Literal('Example')
|
||||
|
||||
"""
|
||||
v = cast_value(v, **kws)
|
||||
self.graph.add((self._current(), p, v))
|
||||
|
||||
def rel(self, p, o=None, **kws):
|
||||
"""Set an object for the given property. Will convert the given object
|
||||
into an ``URIRef`` if it's not an ``Identifier``. If none is given, a
|
||||
new ``BNode`` is used.
|
||||
|
||||
Returns a context manager for use in a ``with`` block, within which the
|
||||
given object is used as current subject.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from rdflib import URIRef
|
||||
>>> from rdflib.namespace import RDF, RDFS
|
||||
>>> d = Describer(about="/", base="http://example.org/")
|
||||
>>> _ctxt = d.rel(RDFS.seeAlso, "/about")
|
||||
>>> d.graph.value(URIRef('http://example.org/'), RDFS.seeAlso)
|
||||
rdflib.term.URIRef('http://example.org/about')
|
||||
|
||||
>>> with d.rel(RDFS.seeAlso, "/more"):
|
||||
... d.value(RDFS.label, "More")
|
||||
>>> (URIRef('http://example.org/'), RDFS.seeAlso,
|
||||
... URIRef('http://example.org/more')) in d.graph
|
||||
True
|
||||
>>> d.graph.value(URIRef('http://example.org/more'), RDFS.label)
|
||||
rdflib.term.Literal('More')
|
||||
|
||||
"""
|
||||
|
||||
kws.setdefault("base", self.base)
|
||||
p = cast_identifier(p)
|
||||
o = cast_identifier(o, **kws)
|
||||
self.graph.add((self._current(), p, o))
|
||||
return self._subject_stack(o)
|
||||
|
||||
def rev(self, p, s=None, **kws):
|
||||
"""
|
||||
Same as ``rel``, but uses current subject as *object* of the relation.
|
||||
The given resource is still used as subject in the returned context
|
||||
manager.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from rdflib import URIRef
|
||||
>>> from rdflib.namespace import RDF, RDFS
|
||||
>>> d = Describer(about="http://example.org/")
|
||||
>>> with d.rev(RDFS.seeAlso, "http://example.net/"):
|
||||
... d.value(RDFS.label, "Net")
|
||||
>>> (URIRef('http://example.net/'), RDFS.seeAlso,
|
||||
... URIRef('http://example.org/')) in d.graph
|
||||
True
|
||||
>>> d.graph.value(URIRef('http://example.net/'), RDFS.label)
|
||||
rdflib.term.Literal('Net')
|
||||
|
||||
"""
|
||||
kws.setdefault("base", self.base)
|
||||
p = cast_identifier(p)
|
||||
s = cast_identifier(s, **kws)
|
||||
self.graph.add((s, p, self._current()))
|
||||
return self._subject_stack(s)
|
||||
|
||||
def rdftype(self, t):
|
||||
"""
|
||||
Shorthand for setting rdf:type of the current subject.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from rdflib import URIRef
|
||||
>>> from rdflib.namespace import RDF, RDFS
|
||||
>>> d = Describer(about="http://example.org/")
|
||||
>>> d.rdftype(RDFS.Resource)
|
||||
>>> (URIRef('http://example.org/'),
|
||||
... RDF.type, RDFS.Resource) in d.graph
|
||||
True
|
||||
|
||||
"""
|
||||
self.graph.add((self._current(), RDF.type, t))
|
||||
|
||||
def _current(self):
|
||||
return self._subjects[-1]
|
||||
|
||||
@contextmanager
|
||||
def _subject_stack(self, subject):
|
||||
self._subjects.append(subject)
|
||||
yield None
|
||||
self._subjects.pop()
|
||||
|
||||
|
||||
def cast_value(v, **kws):
|
||||
if not isinstance(v, Literal):
|
||||
v = Literal(v, **kws)
|
||||
return v
|
||||
|
||||
|
||||
def cast_identifier(ref, **kws):
|
||||
ref = ref or BNode()
|
||||
if not isinstance(ref, Identifier):
|
||||
ref = URIRef(ref, **kws)
|
||||
return ref
|
||||
@@ -0,0 +1,355 @@
|
||||
"""Convert (to and) from rdflib graphs to other well known graph libraries.
|
||||
|
||||
Currently the following libraries are supported:
|
||||
- networkx: MultiDiGraph, DiGraph, Graph
|
||||
- graph_tool: Graph
|
||||
|
||||
Doctests in this file are all skipped, as we can't run them conditionally if
|
||||
networkx or graph_tool are available and they would err otherwise.
|
||||
see ../../test/test_extras_external_graph_libs.py for conditional tests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Dict, List
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import Graph
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _identity(x):
|
||||
return x
|
||||
|
||||
|
||||
def _rdflib_to_networkx_graph(
|
||||
graph: Graph,
|
||||
nxgraph,
|
||||
calc_weights: bool,
|
||||
edge_attrs,
|
||||
transform_s=_identity,
|
||||
transform_o=_identity,
|
||||
):
|
||||
"""Helper method for multidigraph, digraph and graph.
|
||||
|
||||
Modifies nxgraph in-place!
|
||||
|
||||
Arguments:
|
||||
graph: an rdflib.Graph.
|
||||
nxgraph: a networkx.Graph/DiGraph/MultiDigraph.
|
||||
calc_weights: If True adds a 'weight' attribute to each edge according
|
||||
to the count of s,p,o triples between s and o, which is meaningful
|
||||
for Graph/DiGraph.
|
||||
edge_attrs: Callable to construct edge data from s, p, o.
|
||||
'triples' attribute is handled specially to be merged.
|
||||
'weight' should not be generated if calc_weights==True.
|
||||
(see invokers below!)
|
||||
transform_s: Callable to transform node generated from s.
|
||||
transform_o: Callable to transform node generated from o.
|
||||
"""
|
||||
assert callable(edge_attrs)
|
||||
assert callable(transform_s)
|
||||
assert callable(transform_o)
|
||||
import networkx as nx
|
||||
|
||||
for s, p, o in graph:
|
||||
ts, to = transform_s(s), transform_o(o) # apply possible transformations
|
||||
data = nxgraph.get_edge_data(ts, to)
|
||||
if data is None or isinstance(nxgraph, nx.MultiDiGraph):
|
||||
# no edge yet, set defaults
|
||||
data = edge_attrs(s, p, o)
|
||||
if calc_weights:
|
||||
data["weight"] = 1
|
||||
nxgraph.add_edge(ts, to, **data)
|
||||
else:
|
||||
# already have an edge, just update attributes
|
||||
if calc_weights:
|
||||
data["weight"] += 1
|
||||
if "triples" in data:
|
||||
d = edge_attrs(s, p, o)
|
||||
data["triples"].extend(d["triples"])
|
||||
|
||||
|
||||
def rdflib_to_networkx_multidigraph(
|
||||
graph: Graph, edge_attrs=lambda s, p, o: {"key": p}, **kwds
|
||||
):
|
||||
r"""Converts the given graph into a networkx.MultiDiGraph.
|
||||
|
||||
The subjects and objects are the later nodes of the MultiDiGraph.
|
||||
The predicates are used as edge keys (to identify multi-edges).
|
||||
|
||||
:Parameters:
|
||||
|
||||
- graph: a rdflib.Graph.
|
||||
- edge_attrs: Callable to construct later edge_attributes. It receives
|
||||
3 variables (s, p, o) and should construct a dictionary that is
|
||||
passed to networkx's add_edge(s, o, \*\*attrs) function.
|
||||
|
||||
By default this will include setting the MultiDiGraph key=p here.
|
||||
If you don't want to be able to re-identify the edge later on, you
|
||||
can set this to ``lambda s, p, o: {}``. In this case MultiDiGraph's
|
||||
default (increasing ints) will be used.
|
||||
|
||||
Returns:
|
||||
networkx.MultiDiGraph
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
>>> g = Graph()
|
||||
>>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
|
||||
>>> p, q = URIRef('p'), URIRef('q')
|
||||
>>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
|
||||
>>> for t in edges:
|
||||
... g.add(t)
|
||||
...
|
||||
>>> mdg = rdflib_to_networkx_multidigraph(g)
|
||||
>>> len(mdg.edges())
|
||||
4
|
||||
>>> mdg.has_edge(a, b)
|
||||
True
|
||||
>>> mdg.has_edge(a, b, key=p)
|
||||
True
|
||||
>>> mdg.has_edge(a, b, key=q)
|
||||
True
|
||||
|
||||
>>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {})
|
||||
>>> mdg.has_edge(a, b, key=0)
|
||||
True
|
||||
>>> mdg.has_edge(a, b, key=1)
|
||||
True
|
||||
"""
|
||||
import networkx as nx
|
||||
|
||||
mdg = nx.MultiDiGraph()
|
||||
_rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds)
|
||||
return mdg
|
||||
|
||||
|
||||
def rdflib_to_networkx_digraph(
|
||||
graph: Graph,
|
||||
calc_weights: bool = True,
|
||||
edge_attrs=lambda s, p, o: {"triples": [(s, p, o)]},
|
||||
**kwds,
|
||||
):
|
||||
r"""Converts the given graph into a networkx.DiGraph.
|
||||
|
||||
As an rdflib.Graph() can contain multiple edges between nodes, by default
|
||||
adds the a 'triples' attribute to the single DiGraph edge with a list of
|
||||
all triples between s and o.
|
||||
Also by default calculates the edge weight as the length of triples.
|
||||
|
||||
:Parameters:
|
||||
|
||||
- ``graph``: a rdflib.Graph.
|
||||
- ``calc_weights``: If true calculate multi-graph edge-count as edge 'weight'
|
||||
- ``edge_attrs``: Callable to construct later edge_attributes. It receives
|
||||
3 variables (s, p, o) and should construct a dictionary that is passed to
|
||||
networkx's add_edge(s, o, \*\*attrs) function.
|
||||
|
||||
By default this will include setting the 'triples' attribute here,
|
||||
which is treated specially by us to be merged. Other attributes of
|
||||
multi-edges will only contain the attributes of the first edge.
|
||||
If you don't want the 'triples' attribute for tracking, set this to
|
||||
``lambda s, p, o: {}``.
|
||||
|
||||
Returns: networkx.DiGraph
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
>>> g = Graph()
|
||||
>>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
|
||||
>>> p, q = URIRef('p'), URIRef('q')
|
||||
>>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
|
||||
>>> for t in edges:
|
||||
... g.add(t)
|
||||
...
|
||||
>>> dg = rdflib_to_networkx_digraph(g)
|
||||
>>> dg[a][b]['weight']
|
||||
2
|
||||
>>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)]
|
||||
True
|
||||
>>> len(dg.edges())
|
||||
3
|
||||
>>> dg.size()
|
||||
3
|
||||
>>> dg.size(weight='weight')
|
||||
4.0
|
||||
|
||||
>>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{})
|
||||
>>> 'weight' in dg[a][b]
|
||||
False
|
||||
>>> 'triples' in dg[a][b]
|
||||
False
|
||||
|
||||
"""
|
||||
import networkx as nx
|
||||
|
||||
dg = nx.DiGraph()
|
||||
_rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds)
|
||||
return dg
|
||||
|
||||
|
||||
def rdflib_to_networkx_graph(
|
||||
graph: Graph,
|
||||
calc_weights: bool = True,
|
||||
edge_attrs=lambda s, p, o: {"triples": [(s, p, o)]},
|
||||
**kwds,
|
||||
):
|
||||
r"""Converts the given graph into a networkx.Graph.
|
||||
|
||||
As an rdflib.Graph() can contain multiple directed edges between nodes, by
|
||||
default adds the a 'triples' attribute to the single DiGraph edge with a
|
||||
list of triples between s and o in graph.
|
||||
Also by default calculates the edge weight as the len(triples).
|
||||
|
||||
:Parameters:
|
||||
|
||||
- graph: a rdflib.Graph.
|
||||
- calc_weights: If true calculate multi-graph edge-count as edge 'weight'
|
||||
- edge_attrs: Callable to construct later edge_attributes. It receives
|
||||
3 variables (s, p, o) and should construct a dictionary that is
|
||||
passed to networkx's add_edge(s, o, \*\*attrs) function.
|
||||
|
||||
By default this will include setting the 'triples' attribute here,
|
||||
which is treated specially by us to be merged. Other attributes of
|
||||
multi-edges will only contain the attributes of the first edge.
|
||||
If you don't want the 'triples' attribute for tracking, set this to
|
||||
``lambda s, p, o: {}``.
|
||||
|
||||
Returns:
|
||||
networkx.Graph
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
>>> g = Graph()
|
||||
>>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
|
||||
>>> p, q = URIRef('p'), URIRef('q')
|
||||
>>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
|
||||
>>> for t in edges:
|
||||
... g.add(t)
|
||||
...
|
||||
>>> ug = rdflib_to_networkx_graph(g)
|
||||
>>> ug[a][b]['weight']
|
||||
3
|
||||
>>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)]
|
||||
True
|
||||
>>> len(ug.edges())
|
||||
2
|
||||
>>> ug.size()
|
||||
2
|
||||
>>> ug.size(weight='weight')
|
||||
4.0
|
||||
|
||||
>>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{})
|
||||
>>> 'weight' in ug[a][b]
|
||||
False
|
||||
>>> 'triples' in ug[a][b]
|
||||
False
|
||||
"""
|
||||
import networkx as nx
|
||||
|
||||
g = nx.Graph()
|
||||
_rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds)
|
||||
return g
|
||||
|
||||
|
||||
def rdflib_to_graphtool(
|
||||
graph: Graph,
|
||||
v_prop_names: List[str] = ["term"],
|
||||
e_prop_names: List[str] = ["term"],
|
||||
transform_s=lambda s, p, o: {"term": s},
|
||||
transform_p=lambda s, p, o: {"term": p},
|
||||
transform_o=lambda s, p, o: {"term": o},
|
||||
):
|
||||
"""Converts the given graph into a graph_tool.Graph().
|
||||
|
||||
The subjects and objects are the later vertices of the Graph.
|
||||
The predicates become edges.
|
||||
|
||||
:Parameters:
|
||||
- graph: a rdflib.Graph.
|
||||
- v_prop_names: a list of names for the vertex properties. The default is set
|
||||
to ['term'] (see transform_s, transform_o below).
|
||||
- e_prop_names: a list of names for the edge properties.
|
||||
- transform_s: callable with s, p, o input. Should return a dictionary
|
||||
containing a value for each name in v_prop_names. By default is set
|
||||
to {'term': s} which in combination with v_prop_names = ['term']
|
||||
adds s as 'term' property to the generated vertex for s.
|
||||
- transform_p: similar to transform_s, but wrt. e_prop_names. By default
|
||||
returns {'term': p} which adds p as a property to the generated
|
||||
edge between the vertex for s and the vertex for o.
|
||||
- transform_o: similar to transform_s.
|
||||
|
||||
Returns: graph_tool.Graph()
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
>>> g = Graph()
|
||||
>>> a, b, l = URIRef('a'), URIRef('b'), Literal('l')
|
||||
>>> p, q = URIRef('p'), URIRef('q')
|
||||
>>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)]
|
||||
>>> for t in edges:
|
||||
... g.add(t)
|
||||
...
|
||||
>>> mdg = rdflib_to_graphtool(g)
|
||||
>>> len(list(mdg.edges()))
|
||||
4
|
||||
>>> from graph_tool import util as gt_util
|
||||
>>> vpterm = mdg.vertex_properties['term']
|
||||
>>> va = gt_util.find_vertex(mdg, vpterm, a)[0]
|
||||
>>> vb = gt_util.find_vertex(mdg, vpterm, b)[0]
|
||||
>>> vl = gt_util.find_vertex(mdg, vpterm, l)[0]
|
||||
>>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())]
|
||||
True
|
||||
>>> epterm = mdg.edge_properties['term']
|
||||
>>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3
|
||||
True
|
||||
>>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1
|
||||
True
|
||||
|
||||
>>> mdg = rdflib_to_graphtool(
|
||||
... g,
|
||||
... e_prop_names=[str('name')],
|
||||
... transform_p=lambda s, p, o: {str('name'): unicode(p)})
|
||||
>>> epterm = mdg.edge_properties['name']
|
||||
>>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3
|
||||
True
|
||||
>>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1
|
||||
True
|
||||
|
||||
"""
|
||||
# pytype error: Can't find module 'graph_tool'.
|
||||
import graph_tool as gt # pytype: disable=import-error
|
||||
|
||||
g = gt.Graph()
|
||||
|
||||
vprops = [(vpn, g.new_vertex_property("object")) for vpn in v_prop_names]
|
||||
for vpn, vprop in vprops:
|
||||
g.vertex_properties[vpn] = vprop
|
||||
eprops = [(epn, g.new_edge_property("object")) for epn in e_prop_names]
|
||||
for epn, eprop in eprops:
|
||||
g.edge_properties[epn] = eprop
|
||||
node_to_vertex: Dict[Any, Any] = {}
|
||||
for s, p, o in graph:
|
||||
sv = node_to_vertex.get(s)
|
||||
if sv is None:
|
||||
v = g.add_vertex()
|
||||
node_to_vertex[s] = v
|
||||
tmp_props = transform_s(s, p, o)
|
||||
for vpn, vprop in vprops:
|
||||
vprop[v] = tmp_props[vpn]
|
||||
sv = v
|
||||
|
||||
ov = node_to_vertex.get(o)
|
||||
if ov is None:
|
||||
v = g.add_vertex()
|
||||
node_to_vertex[o] = v
|
||||
tmp_props = transform_o(s, p, o)
|
||||
for vpn, vprop in vprops:
|
||||
vprop[v] = tmp_props[vpn]
|
||||
ov = v
|
||||
|
||||
e = g.add_edge(sv, ov)
|
||||
tmp_props = transform_p(s, p, o)
|
||||
for epn, eprop in eprops:
|
||||
eprop[e] = tmp_props[epn]
|
||||
return g
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Utilities for interacting with SHACL Shapes Graphs more easily.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Optional, Union
|
||||
|
||||
from rdflib import BNode, Graph, Literal, URIRef, paths
|
||||
from rdflib.collection import Collection
|
||||
from rdflib.namespace import RDF, SH
|
||||
from rdflib.paths import Path
|
||||
from rdflib.term import Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.term import IdentifiedNode
|
||||
|
||||
|
||||
class SHACLPathError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Map the variable length path operators to the corresponding SHACL path predicates
|
||||
_PATH_MOD_TO_PRED = {
|
||||
paths.ZeroOrMore: SH.zeroOrMorePath,
|
||||
paths.OneOrMore: SH.oneOrMorePath,
|
||||
paths.ZeroOrOne: SH.zeroOrOnePath,
|
||||
}
|
||||
|
||||
|
||||
# This implementation is roughly based on
|
||||
# pyshacl.helper.sparql_query_helper::SPARQLQueryHelper._shacl_path_to_sparql_path
|
||||
def parse_shacl_path(
|
||||
shapes_graph: Graph,
|
||||
path_identifier: Node,
|
||||
) -> Union[URIRef, Path]:
|
||||
"""
|
||||
Parse a valid SHACL path (e.g. the object of a triple with predicate sh:path)
|
||||
from a :class:`~rdflib.graph.Graph` as a :class:`~rdflib.term.URIRef` if the path
|
||||
is simply a predicate or a :class:`~rdflib.paths.Path` otherwise.
|
||||
|
||||
:param shapes_graph: A :class:`~rdflib.graph.Graph` containing the path to be parsed
|
||||
:param path_identifier: A :class:`~rdflib.term.Node` of the path
|
||||
:return: A :class:`~rdflib.term.URIRef` or a :class:`~rdflib.paths.Path`
|
||||
"""
|
||||
path: Optional[Union[URIRef, Path]] = None
|
||||
|
||||
# Literals are not allowed.
|
||||
if isinstance(path_identifier, Literal):
|
||||
raise TypeError("Literals are not a valid SHACL path.")
|
||||
|
||||
# If a path is a URI, that's the whole path.
|
||||
elif isinstance(path_identifier, URIRef):
|
||||
if path_identifier == RDF.nil:
|
||||
raise SHACLPathError(
|
||||
"A list of SHACL Paths must contain at least two path items."
|
||||
)
|
||||
path = path_identifier
|
||||
|
||||
# Handle Sequence Paths
|
||||
elif shapes_graph.value(path_identifier, RDF.first) is not None:
|
||||
sequence = list(shapes_graph.items(path_identifier))
|
||||
if len(sequence) < 2:
|
||||
raise SHACLPathError(
|
||||
"A list of SHACL Sequence Paths must contain at least two path items."
|
||||
)
|
||||
path = paths.SequencePath(
|
||||
*(parse_shacl_path(shapes_graph, path) for path in sequence)
|
||||
)
|
||||
|
||||
# Handle sh:inversePath
|
||||
elif inverse_path := shapes_graph.value(path_identifier, SH.inversePath):
|
||||
path = paths.InvPath(parse_shacl_path(shapes_graph, inverse_path))
|
||||
|
||||
# Handle sh:alternativePath
|
||||
elif alternative_path := shapes_graph.value(path_identifier, SH.alternativePath):
|
||||
alternatives = list(shapes_graph.items(alternative_path))
|
||||
if len(alternatives) < 2:
|
||||
raise SHACLPathError(
|
||||
"List of SHACL alternate paths must have at least two path items."
|
||||
)
|
||||
path = paths.AlternativePath(
|
||||
*(
|
||||
parse_shacl_path(shapes_graph, alternative)
|
||||
for alternative in alternatives
|
||||
)
|
||||
)
|
||||
|
||||
# Handle sh:zeroOrMorePath
|
||||
elif zero_or_more_path := shapes_graph.value(path_identifier, SH.zeroOrMorePath):
|
||||
path = paths.MulPath(parse_shacl_path(shapes_graph, zero_or_more_path), "*")
|
||||
|
||||
# Handle sh:oneOrMorePath
|
||||
elif one_or_more_path := shapes_graph.value(path_identifier, SH.oneOrMorePath):
|
||||
path = paths.MulPath(parse_shacl_path(shapes_graph, one_or_more_path), "+")
|
||||
|
||||
# Handle sh:zeroOrOnePath
|
||||
elif zero_or_one_path := shapes_graph.value(path_identifier, SH.zeroOrOnePath):
|
||||
path = paths.MulPath(parse_shacl_path(shapes_graph, zero_or_one_path), "?")
|
||||
|
||||
# Raise error if none of the above options were found
|
||||
elif path is None:
|
||||
raise SHACLPathError(f"Cannot parse {repr(path_identifier)} as a SHACL Path.")
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def _build_path_component(
|
||||
graph: Graph, path_component: URIRef | Path
|
||||
) -> IdentifiedNode:
|
||||
"""
|
||||
Helper method that implements the recursive component of SHACL path
|
||||
triple construction.
|
||||
|
||||
:param graph: A :class:`~rdflib.graph.Graph` into which to insert triples
|
||||
:param graph_component: A :class:`~rdflib.term.URIRef` or
|
||||
:class:`~rdflib.paths.Path` that is part of a path expression
|
||||
:return: The :class:`~rdflib.term.IdentifiedNode of the resource in the
|
||||
graph that corresponds to the provided path_component
|
||||
"""
|
||||
# Literals or other types are not allowed
|
||||
if not isinstance(path_component, (URIRef, Path)):
|
||||
raise TypeError(
|
||||
f"Objects of type {type(path_component)} are not valid "
|
||||
+ "components of a SHACL path."
|
||||
)
|
||||
|
||||
# If the path component is a URI, return it
|
||||
elif isinstance(path_component, URIRef):
|
||||
return path_component
|
||||
# Otherwise, the path component is represented as a blank node
|
||||
bnode = BNode()
|
||||
|
||||
# Handle Sequence Paths
|
||||
if isinstance(path_component, paths.SequencePath):
|
||||
# Sequence paths are a Collection directly with at least two items
|
||||
if len(path_component.args) < 2:
|
||||
raise SHACLPathError(
|
||||
"A list of SHACL Sequence Paths must contain at least two path items."
|
||||
)
|
||||
Collection(
|
||||
graph,
|
||||
bnode,
|
||||
[_build_path_component(graph, arg) for arg in path_component.args],
|
||||
)
|
||||
|
||||
# Handle Inverse Paths
|
||||
elif isinstance(path_component, paths.InvPath):
|
||||
graph.add(
|
||||
(bnode, SH.inversePath, _build_path_component(graph, path_component.arg))
|
||||
)
|
||||
|
||||
# Handle Alternative Paths
|
||||
elif isinstance(path_component, paths.AlternativePath):
|
||||
# Alternative paths are a Collection but referenced by sh:alternativePath
|
||||
# with at least two items
|
||||
if len(path_component.args) < 2:
|
||||
raise SHACLPathError(
|
||||
"List of SHACL alternate paths must have at least two path items."
|
||||
)
|
||||
coll = Collection(
|
||||
graph,
|
||||
BNode(),
|
||||
[_build_path_component(graph, arg) for arg in path_component.args],
|
||||
)
|
||||
graph.add((bnode, SH.alternativePath, coll.uri))
|
||||
|
||||
# Handle Variable Length Paths
|
||||
elif isinstance(path_component, paths.MulPath):
|
||||
# Get the predicate corresponding to the path modifiier
|
||||
pred = _PATH_MOD_TO_PRED.get(path_component.mod)
|
||||
if pred is None:
|
||||
raise SHACLPathError(f"Unknown path modifier {path_component.mod}")
|
||||
graph.add((bnode, pred, _build_path_component(graph, path_component.path)))
|
||||
|
||||
# Return the blank node created for the provided path_component
|
||||
return bnode
|
||||
|
||||
|
||||
def build_shacl_path(
|
||||
path: URIRef | Path, target_graph: Graph | None = None
|
||||
) -> tuple[IdentifiedNode, Graph | None]:
|
||||
"""
|
||||
Build the SHACL Path triples for a path given by a :class:`~rdflib.term.URIRef` for
|
||||
simple paths or a :class:`~rdflib.paths.Path` for complex paths.
|
||||
|
||||
Returns an :class:`~rdflib.term.IdentifiedNode` for the path (which should be
|
||||
the object of a triple with predicate sh:path) and the graph into which any
|
||||
new triples were added.
|
||||
|
||||
:param path: A :class:`~rdflib.term.URIRef` or a :class:`~rdflib.paths.Path`
|
||||
:param target_graph: Optionally, a :class:`~rdflib.graph.Graph` into which to put
|
||||
constructed triples. If not provided, a new graph will be created
|
||||
:return: A (path_identifier, graph) tuple where:
|
||||
- path_identifier: If path is a :class:`~rdflib.term.URIRef`, this is simply
|
||||
the provided path. If path is a :class:`~rdflib.paths.Path`, this is
|
||||
the :class:`~rdflib.term.BNode` corresponding to the root of the SHACL
|
||||
path expression added to the graph.
|
||||
- graph: None if path is a :class:`~rdflib.term.URIRef` (as no new triples
|
||||
are constructed). If path is a :class:`~rdflib.paths.Path`, this is either the
|
||||
target_graph provided or a new graph into which the path triples were added.
|
||||
"""
|
||||
# If a path is a URI, that's the whole path. No graph needs to be constructed.
|
||||
if isinstance(path, URIRef):
|
||||
return path, None
|
||||
|
||||
# Create a graph if one was not provided
|
||||
if target_graph is None:
|
||||
target_graph = Graph()
|
||||
|
||||
# Recurse through the path to build the graph representation
|
||||
return _build_path_component(target_graph, path), target_graph
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,118 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class CSVW(DefinedNamespace):
|
||||
"""
|
||||
CSVW Namespace Vocabulary Terms
|
||||
|
||||
This document describes the RDFS vocabulary description used in the Metadata Vocabulary for Tabular Data
|
||||
[[tabular-metadata]] along with the default JSON-LD Context.
|
||||
|
||||
Generated from: http://www.w3.org/ns/csvw
|
||||
Date: 2020-05-26 14:19:58.184766
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
aboutUrl: URIRef # A URI template property that MAY be used to indicate what a cell contains information about.
|
||||
base: URIRef # An atomic property that contains a single string: a term defined in the default context representing a built-in datatype URL, as listed above.
|
||||
column: URIRef # An array property of column descriptions as described in section 5.6 Columns.
|
||||
columnReference: URIRef # A column reference property that holds either a single reference to a column description object within this schema, or an array of references. These form the referencing columns for the foreign key definition.
|
||||
commentPrefix: URIRef # An atomic property that sets the comment prefix flag to the single provided value, which MUST be a string.
|
||||
datatype: URIRef # An object property that contains either a single string that is the main datatype of the values of the cell or a datatype description object. If the value of this property is a string, it MUST be one of the built-in datatypes defined in section 5.11.1 Built-in Datatypes or an absolute URL; if it is an object then it describes a more specialised datatype.
|
||||
decimalChar: URIRef # A string whose value is used to represent a decimal point within the number.
|
||||
default: URIRef # An atomic property holding a single string that is used to create a default value for the cell in cases where the original string value is an empty string.
|
||||
delimiter: URIRef # An atomic property that sets the delimiter flag to the single provided value, which MUST be a string.
|
||||
describes: URIRef # From IANA describes: The relationship A 'describes' B asserts that resource A provides a description of resource B. There are no constraints on the format or representation of either A or B, neither are there any further constraints on either resource.
|
||||
dialect: URIRef # An object property that provides a single dialect description. If provided, dialect provides hints to processors about how to parse the referenced files to create tabular data models for the tables in the group.
|
||||
doubleQuote: URIRef # A boolean atomic property that, if `true`, sets the escape character flag to `"`.
|
||||
encoding: URIRef # An atomic property that sets the encoding flag to the single provided string value, which MUST be a defined in [[encoding]]. The default is "utf-8".
|
||||
foreignKey: URIRef # For a Table: a list of foreign keys on the table. For a Schema: an array property of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables.
|
||||
format: URIRef # An atomic property that contains either a single string or an object that defines the format of a value of this type, used when parsing a string value as described in Parsing Cells in [[tabular-data-model]].
|
||||
groupChar: URIRef # A string whose value is used to group digits within the number.
|
||||
header: URIRef # A boolean atomic property that, if `true`, sets the header row count flag to `1`, and if `false` to `0`, unless headerRowCount is provided, in which case the value provided for the header property is ignored.
|
||||
headerRowCount: URIRef # An numeric atomic property that sets the header row count flag to the single provided value, which must be a non-negative integer.
|
||||
lang: URIRef # An atomic property giving a single string language code as defined by [[BCP47]].
|
||||
length: URIRef # The exact length of the value of the cell.
|
||||
lineTerminators: URIRef # An atomic property that sets the line terminators flag to either an array containing the single provided string value, or the provided array.
|
||||
maxExclusive: URIRef # An atomic property that contains a single number that is the maximum valid value (exclusive).
|
||||
maxInclusive: URIRef # An atomic property that contains a single number that is the maximum valid value (inclusive).
|
||||
maxLength: URIRef # A numeric atomic property that contains a single integer that is the maximum length of the value.
|
||||
minExclusive: URIRef # An atomic property that contains a single number that is the minimum valid value (exclusive).
|
||||
minInclusive: URIRef # An atomic property that contains a single number that is the minimum valid value (inclusive).
|
||||
minLength: URIRef # An atomic property that contains a single integer that is the minimum length of the value.
|
||||
name: URIRef # An atomic property that gives a single canonical name for the column. The value of this property becomes the name annotation for the described column.
|
||||
note: URIRef # An array property that provides an array of objects representing arbitrary annotations on the annotated tabular data model.
|
||||
null: URIRef # An atomic property giving the string or strings used for null values within the data. If the string value of the cell is equal to any one of these values, the cell value is `null`.
|
||||
ordered: URIRef # A boolean atomic property taking a single value which indicates whether a list that is the value of the cell is ordered (if `true`) or unordered (if `false`).
|
||||
pattern: URIRef # A regular expression string, in the syntax and interpreted as defined by [[ECMASCRIPT]].
|
||||
primaryKey: URIRef # For Schema: A column reference property that holds either a single reference to a column description object or an array of references. For Row: a possibly empty list of cells whose values together provide a unique identifier for this row. This is similar to the name of a column.
|
||||
propertyUrl: URIRef # An URI template property that MAY be used to create a URI for a property if the table is mapped to another format.
|
||||
quoteChar: URIRef # An atomic property that sets the quote character flag to the single provided value, which must be a string or `null`.
|
||||
reference: URIRef # An object property that identifies a **referenced table** and a set of **referenced columns** within that table.
|
||||
referencedRow: URIRef # A possibly empty list of pairs of a foreign key and a row in a table within the same group of tables.
|
||||
required: URIRef # A boolean atomic property taking a single value which indicates whether the cell must have a non-null value. The default is `false`.
|
||||
resource: URIRef # A link property holding a URL that is the identifier for a specific table that is being referenced.
|
||||
row: URIRef # Relates a Table to each Row output.
|
||||
rowTitle: URIRef # A column reference property that holds either a single reference to a column description object or an array of references.
|
||||
rownum: URIRef # The position of the row amongst the rows of the Annotated Tabl, starting from 1
|
||||
schemaReference: URIRef # A link property holding a URL that is the identifier for a schema that is being referenced.
|
||||
scriptFormat: URIRef # A link property giving the single URL for the format that is used by the script or template.
|
||||
separator: URIRef # An atomic property that MUST have a single string value that is the character used to separate items in the string value of the cell.
|
||||
skipBlankRows: URIRef # An boolean atomic property that sets the `skip blank rows` flag to the single provided boolean value.
|
||||
skipColumns: URIRef # An numeric atomic property that sets the `skip columns` flag to the single provided numeric value, which MUST be a non-negative integer.
|
||||
skipInitialSpace: URIRef # A boolean atomic property that, if `true`, sets the trim flag to "start". If `false`, to `false`.
|
||||
skipRows: URIRef # An numeric atomic property that sets the `skip rows` flag to the single provided numeric value, which MUST be a non-negative integer.
|
||||
source: URIRef # A single string atomic property that provides, if specified, the format to which the tabular data should be transformed prior to the transformation using the script or template.
|
||||
suppressOutput: URIRef # A boolean atomic property. If `true`, suppresses any output that would be generated when converting a table or cells within a column.
|
||||
table: URIRef # Relates an Table group to annotated tables.
|
||||
tableDirection: URIRef # One of `rtl`, `ltr` or `auto`. Indicates whether the tables in the group should be displayed with the first column on the right, on the left, or based on the first character in the table that has a specific direction.
|
||||
tableSchema: URIRef # An object property that provides a single schema description as described in section 5.5 Schemas, used as the default for all the tables in the group
|
||||
targetFormat: URIRef # A link property giving the single URL for the format that will be created through the transformation.
|
||||
textDirection: URIRef # An atomic property that must have a single value that is one of `rtl` or `ltr` (the default).
|
||||
title: URIRef # For a Transformation A natural language property that describes the format that will be generated from the transformation. For a Column: A natural language property that provides possible alternative names for the column.
|
||||
transformations: URIRef # An array property of transformation definitions that provide mechanisms to transform the tabular data into other formats.
|
||||
trim: URIRef # An atomic property that, if the boolean `true`, sets the trim flag to `true` and if the boolean `false` to `false`. If the value provided is a string, sets the trim flag to the provided value, which must be one of "true", "false", "start" or "end".
|
||||
url: URIRef # For a Table: This link property gives the single URL of the CSV file that the table is held in, relative to the location of the metadata document. For a Transformation: A link property giving the single URL of the file that the script or template is held in, relative to the location of the metadata document.
|
||||
valueUrl: URIRef # An URI template property that is used to map the values of cells into URLs.
|
||||
virtual: URIRef # A boolean atomic property taking a single value which indicates whether the column is a virtual column not present in the original source
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Cell: URIRef # A Cell represents a cell at the intersection of a Row and a Column within a Table.
|
||||
Column: (
|
||||
URIRef # A Column represents a vertical arrangement of Cells within a Table.
|
||||
)
|
||||
Datatype: URIRef # Describes facets of a datatype.
|
||||
Dialect: URIRef # A Dialect Description provides hints to parsers about how to parse a linked file.
|
||||
Direction: URIRef # The class of table/text directions.
|
||||
ForeignKey: URIRef # Describes relationships between Columns in one or more Tables.
|
||||
NumericFormat: URIRef # If the datatype is a numeric type, the format property indicates the expected format for that number. Its value must be either a single string or an object with one or more properties.
|
||||
Row: URIRef # A Row represents a horizontal arrangement of cells within a Table.
|
||||
Schema: URIRef # A Schema is a definition of a tabular format that may be common to multiple tables.
|
||||
Table: URIRef # An annotated table is a table that is annotated with additional metadata.
|
||||
TableGroup: URIRef # A Group of Tables comprises a set of Annotated Tables and a set of annotations that relate to those Tables.
|
||||
TableReference: URIRef # An object property that identifies a referenced table and a set of referenced columns within that table.
|
||||
Transformation: URIRef # A Transformation Definition is a definition of how tabular data can be transformed into another format.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Datatype
|
||||
JSON: URIRef # A literal containing JSON.
|
||||
uriTemplate: URIRef #
|
||||
|
||||
# http://www.w3.org/ns/csvw#Direction
|
||||
auto: URIRef # Indicates whether the tables in the group should be displayed based on the first character in the table that has a specific direction.
|
||||
inherit: URIRef # For `textDirection`, indicates that the direction is inherited from the `tableDirection` annotation of the `table`.
|
||||
ltr: URIRef # Indicates whether the tables in the group should be displayed with the first column on the right.
|
||||
rtl: URIRef # Indicates whether the tables in the group should be displayed with the first column on the left.
|
||||
|
||||
# http://www.w3.org/ns/prov#Role
|
||||
csvEncodedTabularData: (
|
||||
URIRef # Describes the role of a CSV file in the tabular data mapping.
|
||||
)
|
||||
tabularMetadata: (
|
||||
URIRef # Describes the role of a Metadata file in the tabular data mapping.
|
||||
)
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/csvw#")
|
||||
@@ -0,0 +1,37 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class DC(DefinedNamespace):
|
||||
"""
|
||||
Dublin Core Metadata Element Set, Version 1.1
|
||||
|
||||
Generated from: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_elements.ttl
|
||||
Date: 2020-05-26 14:19:58.671906
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
contributor: (
|
||||
URIRef # An entity responsible for making contributions to the resource.
|
||||
)
|
||||
coverage: URIRef # The spatial or temporal topic of the resource, spatial applicability of the resource, or jurisdiction under which the resource is relevant.
|
||||
creator: URIRef # An entity primarily responsible for making the resource.
|
||||
date: URIRef # A point or period of time associated with an event in the lifecycle of the resource.
|
||||
description: URIRef # An account of the resource.
|
||||
format: URIRef # The file format, physical medium, or dimensions of the resource.
|
||||
identifier: (
|
||||
URIRef # An unambiguous reference to the resource within a given context.
|
||||
)
|
||||
language: URIRef # A language of the resource.
|
||||
publisher: URIRef # An entity responsible for making the resource available.
|
||||
relation: URIRef # A related resource.
|
||||
rights: URIRef # Information about rights held in and over the resource.
|
||||
source: URIRef # A related resource from which the described resource is derived.
|
||||
subject: URIRef # The topic of the resource.
|
||||
title: URIRef # A name given to the resource.
|
||||
type: URIRef # The nature or genre of the resource.
|
||||
|
||||
_NS = Namespace("http://purl.org/dc/elements/1.1/")
|
||||
@@ -0,0 +1,24 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class DCAM(DefinedNamespace):
|
||||
"""
|
||||
Metadata terms for vocabulary description
|
||||
|
||||
Generated from: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_abstract_model.ttl
|
||||
Date: 2020-05-26 14:20:00.970966
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
domainIncludes: URIRef # A suggested class for subjects of this property.
|
||||
memberOf: URIRef # A relationship between a resource and a vocabulary encoding scheme which indicates that the resource is a member of a set.
|
||||
rangeIncludes: URIRef # A suggested class for values of this property.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
VocabularyEncodingScheme: URIRef # An enumerated set of resources.
|
||||
|
||||
_NS = Namespace("http://purl.org/dc/dcam/")
|
||||
@@ -0,0 +1,71 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class DCAT(DefinedNamespace):
|
||||
"""
|
||||
The data catalog vocabulary
|
||||
|
||||
DCAT is an RDF vocabulary designed to facilitate interoperability between data catalogs published on the Web.
|
||||
By using DCAT to describe datasets in data catalogs, publishers increase discoverability and enable
|
||||
applications easily to consume metadata from multiple catalogs. It further enables decentralized publishing of
|
||||
catalogs and facilitates federated dataset search across sites. Aggregated DCAT metadata can serve as a
|
||||
manifest file to facilitate digital preservation. DCAT is defined at http://www.w3.org/TR/vocab-dcat/. Any
|
||||
variance between that normative document and this schema is an error in this schema.
|
||||
|
||||
Generated from: https://www.w3.org/ns/dcat2.ttl
|
||||
Date: 2020-05-26 14:19:59.985854
|
||||
|
||||
"""
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
accessURL: URIRef # A URL of a resource that gives access to a distribution of the dataset. E.g. landing page, feed, SPARQL endpoint. Use for all cases except a simple download link, in which case downloadURL is preferred.
|
||||
bbox: URIRef # The geographic bounding box of a resource.
|
||||
byteSize: URIRef # The size of a distribution in bytes.
|
||||
centroid: URIRef # The geographic center (centroid) of a resource.
|
||||
compressFormat: URIRef # The compression format of the distribution in which the data is contained in a compressed form, e.g. to reduce the size of the downloadable file.
|
||||
contactPoint: URIRef # Relevant contact information for the catalogued resource. Use of vCard is recommended.
|
||||
dataset: URIRef # A collection of data that is listed in the catalog.
|
||||
distribution: URIRef # An available distribution of the dataset.
|
||||
downloadURL: URIRef # The URL of the downloadable file in a given format. E.g. CSV file or RDF file. The format is indicated by the distribution's dct:format and/or dcat:mediaType.
|
||||
endDate: URIRef # The end of the period.
|
||||
keyword: URIRef # A keyword or tag describing a resource.
|
||||
landingPage: URIRef # A Web page that can be navigated to in a Web browser to gain access to the catalog, a dataset, its distributions and/or additional information.
|
||||
mediaType: URIRef # The media type of the distribution as defined by IANA.
|
||||
packageFormat: URIRef # The package format of the distribution in which one or more data files are grouped together, e.g. to enable a set of related files to be downloaded together.
|
||||
record: URIRef # A record describing the registration of a single dataset or data service that is part of the catalog.
|
||||
startDate: URIRef # The start of the period
|
||||
theme: (
|
||||
URIRef # A main category of the resource. A resource can have multiple themes.
|
||||
)
|
||||
themeTaxonomy: URIRef # The knowledge organization system (KOS) used to classify catalog's datasets.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Catalog: URIRef # A curated collection of metadata about resources (e.g., datasets and data services in the context of a data catalog).
|
||||
CatalogRecord: URIRef # A record in a data catalog, describing the registration of a single dataset or data service.
|
||||
Dataset: URIRef # A collection of data, published or curated by a single source, and available for access or download in one or more representations.
|
||||
Distribution: URIRef # A specific representation of a dataset. A dataset might be available in multiple serializations that may differ in various ways, including natural language, media-type or format, schematic organization, temporal and spatial resolution, level of detail or profiles (which might specify any or all of the above).
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
DataService: URIRef # A site or end-point providing operations related to the discovery of, access to, or processing functions on, data or related resources.
|
||||
Relationship: URIRef # An association class for attaching additional information to a relationship between DCAT Resources.
|
||||
Resource: URIRef # Resource published or curated by a single agent.
|
||||
Role: URIRef # A role is the function of a resource or agent with respect to another resource, in the context of resource attribution or resource relationships.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
spatialResolutionInMeters: URIRef # mínima separacíon espacial disponible en un conjunto de datos, medida en metros.
|
||||
temporalResolution: URIRef # minimum time period resolvable in a dataset.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
accessService: URIRef # A site or end-point that gives access to the distribution of the dataset.
|
||||
catalog: URIRef # A catalog whose contents are of interest in the context of this catalog.
|
||||
endpointDescription: URIRef # A description of the service end-point, including its operations, parameters etc.
|
||||
endpointURL: URIRef # The root location or primary endpoint of the service (a web-resolvable IRI).
|
||||
hadRole: URIRef # The function of an entity or agent with respect to another entity or resource.
|
||||
qualifiedRelation: (
|
||||
URIRef # Link to a description of a relationship with another resource.
|
||||
)
|
||||
servesDataset: URIRef # A collection of data that this DataService can distribute.
|
||||
service: URIRef # A site or endpoint that is listed in the catalog.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/dcat#")
|
||||
@@ -0,0 +1,30 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class DCMITYPE(DefinedNamespace):
|
||||
"""
|
||||
DCMI Type Vocabulary
|
||||
|
||||
Generated from: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_type.ttl
|
||||
Date: 2020-05-26 14:19:59.084150
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Collection: URIRef # An aggregation of resources.
|
||||
Dataset: URIRef # Data encoded in a defined structure.
|
||||
Event: URIRef # A non-persistent, time-based occurrence.
|
||||
Image: URIRef # A visual representation other than text.
|
||||
InteractiveResource: URIRef # A resource requiring interaction from the user to be understood, executed, or experienced.
|
||||
MovingImage: URIRef # A series of visual representations imparting an impression of motion when shown in succession.
|
||||
PhysicalObject: URIRef # An inanimate, three-dimensional object or substance.
|
||||
Service: URIRef # A system that provides one or more functions.
|
||||
Software: URIRef # A computer program in source or compiled form.
|
||||
Sound: URIRef # A resource primarily intended to be heard.
|
||||
StillImage: URIRef # A static visual representation.
|
||||
Text: URIRef # A resource consisting primarily of words for reading.
|
||||
|
||||
_NS = Namespace("http://purl.org/dc/dcmitype/")
|
||||
@@ -0,0 +1,139 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class DCTERMS(DefinedNamespace):
|
||||
"""
|
||||
DCMI Metadata Terms - other
|
||||
|
||||
Generated from: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.ttl
|
||||
Date: 2020-05-26 14:20:00.590514
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://purl.org/dc/dcam/VocabularyEncodingScheme
|
||||
DCMIType: URIRef # The set of classes specified by the DCMI Type Vocabulary, used to categorize the nature or genre of the resource.
|
||||
DDC: URIRef # The set of conceptual resources specified by the Dewey Decimal Classification.
|
||||
IMT: URIRef # The set of media types specified by the Internet Assigned Numbers Authority.
|
||||
LCC: URIRef # The set of conceptual resources specified by the Library of Congress Classification.
|
||||
LCSH: URIRef # The set of labeled concepts specified by the Library of Congress Subject Headings.
|
||||
MESH: (
|
||||
URIRef # The set of labeled concepts specified by the Medical Subject Headings.
|
||||
)
|
||||
NLM: URIRef # The set of conceptual resources specified by the National Library of Medicine Classification.
|
||||
TGN: URIRef # The set of places specified by the Getty Thesaurus of Geographic Names.
|
||||
UDC: URIRef # The set of conceptual resources specified by the Universal Decimal Classification.
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
abstract: URIRef # A summary of the resource.
|
||||
accessRights: URIRef # Information about who access the resource or an indication of its security status.
|
||||
accrualMethod: URIRef # The method by which items are added to a collection.
|
||||
accrualPeriodicity: (
|
||||
URIRef # The frequency with which items are added to a collection.
|
||||
)
|
||||
accrualPolicy: URIRef # The policy governing the addition of items to a collection.
|
||||
alternative: URIRef # An alternative name for the resource.
|
||||
audience: URIRef # A class of agents for whom the resource is intended or useful.
|
||||
available: URIRef # Date that the resource became or will become available.
|
||||
bibliographicCitation: URIRef # A bibliographic reference for the resource.
|
||||
conformsTo: (
|
||||
URIRef # An established standard to which the described resource conforms.
|
||||
)
|
||||
contributor: (
|
||||
URIRef # An entity responsible for making contributions to the resource.
|
||||
)
|
||||
coverage: URIRef # The spatial or temporal topic of the resource, spatial applicability of the resource, or jurisdiction under which the resource is relevant.
|
||||
created: URIRef # Date of creation of the resource.
|
||||
creator: URIRef # An entity responsible for making the resource.
|
||||
date: URIRef # A point or period of time associated with an event in the lifecycle of the resource.
|
||||
dateAccepted: URIRef # Date of acceptance of the resource.
|
||||
dateCopyrighted: URIRef # Date of copyright of the resource.
|
||||
dateSubmitted: URIRef # Date of submission of the resource.
|
||||
description: URIRef # An account of the resource.
|
||||
educationLevel: URIRef # A class of agents, defined in terms of progression through an educational or training context, for which the described resource is intended.
|
||||
extent: URIRef # The size or duration of the resource.
|
||||
format: URIRef # The file format, physical medium, or dimensions of the resource.
|
||||
hasFormat: URIRef # A related resource that is substantially the same as the pre-existing described resource, but in another format.
|
||||
hasPart: URIRef # A related resource that is included either physically or logically in the described resource.
|
||||
hasVersion: URIRef # A related resource that is a version, edition, or adaptation of the described resource.
|
||||
identifier: (
|
||||
URIRef # An unambiguous reference to the resource within a given context.
|
||||
)
|
||||
instructionalMethod: URIRef # A process, used to engender knowledge, attitudes and skills, that the described resource is designed to support.
|
||||
isFormatOf: URIRef # A pre-existing related resource that is substantially the same as the described resource, but in another format.
|
||||
isPartOf: URIRef # A related resource in which the described resource is physically or logically included.
|
||||
isReferencedBy: URIRef # A related resource that references, cites, or otherwise points to the described resource.
|
||||
isReplacedBy: URIRef # A related resource that supplants, displaces, or supersedes the described resource.
|
||||
isRequiredBy: URIRef # A related resource that requires the described resource to support its function, delivery, or coherence.
|
||||
isVersionOf: URIRef # A related resource of which the described resource is a version, edition, or adaptation.
|
||||
issued: URIRef # Date of formal issuance of the resource.
|
||||
language: URIRef # A language of the resource.
|
||||
license: URIRef # A legal document giving official permission to do something with the resource.
|
||||
mediator: URIRef # An entity that mediates access to the resource.
|
||||
medium: URIRef # The material or physical carrier of the resource.
|
||||
modified: URIRef # Date on which the resource was changed.
|
||||
provenance: URIRef # A statement of any changes in ownership and custody of the resource since its creation that are significant for its authenticity, integrity, and interpretation.
|
||||
publisher: URIRef # An entity responsible for making the resource available.
|
||||
references: URIRef # A related resource that is referenced, cited, or otherwise pointed to by the described resource.
|
||||
relation: URIRef # A related resource.
|
||||
replaces: URIRef # A related resource that is supplanted, displaced, or superseded by the described resource.
|
||||
requires: URIRef # A related resource that is required by the described resource to support its function, delivery, or coherence.
|
||||
rights: URIRef # Information about rights held in and over the resource.
|
||||
rightsHolder: (
|
||||
URIRef # A person or organization owning or managing rights over the resource.
|
||||
)
|
||||
source: URIRef # A related resource from which the described resource is derived.
|
||||
spatial: URIRef # Spatial characteristics of the resource.
|
||||
subject: URIRef # A topic of the resource.
|
||||
tableOfContents: URIRef # A list of subunits of the resource.
|
||||
temporal: URIRef # Temporal characteristics of the resource.
|
||||
title: URIRef # A name given to the resource.
|
||||
type: URIRef # The nature or genre of the resource.
|
||||
valid: URIRef # Date (often a range) of validity of a resource.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Agent: URIRef # A resource that acts or has the power to act.
|
||||
AgentClass: URIRef # A group of agents.
|
||||
BibliographicResource: URIRef # A book, article, or other documentary resource.
|
||||
FileFormat: URIRef # A digital resource format.
|
||||
Frequency: URIRef # A rate at which something recurs.
|
||||
Jurisdiction: (
|
||||
URIRef # The extent or range of judicial, law enforcement, or other authority.
|
||||
)
|
||||
LicenseDocument: URIRef # A legal document giving official permission to do something with a resource.
|
||||
LinguisticSystem: URIRef # A system of signs, symbols, sounds, gestures, or rules used in communication.
|
||||
Location: URIRef # A spatial region or named place.
|
||||
LocationPeriodOrJurisdiction: URIRef # A location, period of time, or jurisdiction.
|
||||
MediaType: URIRef # A file format or physical medium.
|
||||
MediaTypeOrExtent: URIRef # A media type or extent.
|
||||
MethodOfAccrual: URIRef # A method by which resources are added to a collection.
|
||||
MethodOfInstruction: (
|
||||
URIRef # A process that is used to engender knowledge, attitudes, and skills.
|
||||
)
|
||||
PeriodOfTime: URIRef # An interval of time that is named or defined by its start and end dates.
|
||||
PhysicalMedium: URIRef # A physical material or carrier.
|
||||
PhysicalResource: URIRef # A material thing.
|
||||
Policy: URIRef # A plan or course of action by an authority, intended to influence and determine decisions, actions, and other matters.
|
||||
ProvenanceStatement: URIRef # Any changes in ownership and custody of a resource since its creation that are significant for its authenticity, integrity, and interpretation.
|
||||
RightsStatement: URIRef # A statement about the intellectual property rights (IPR) held in or over a resource, a legal document giving official permission to do something with a resource, or a statement about access rights.
|
||||
SizeOrDuration: URIRef # A dimension or extent, or a time taken to play or execute.
|
||||
Standard: URIRef # A reference point against which other things can be evaluated or compared.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Datatype
|
||||
Box: URIRef # The set of regions in space defined by their geographic coordinates according to the DCMI Box Encoding Scheme.
|
||||
ISO3166: URIRef # The set of codes listed in ISO 3166-1 for the representation of names of countries.
|
||||
Period: URIRef # The set of time intervals defined by their limits according to the DCMI Period Encoding Scheme.
|
||||
Point: URIRef # The set of points in space defined by their geographic coordinates according to the DCMI Point Encoding Scheme.
|
||||
RFC1766: URIRef # The set of tags, constructed according to RFC 1766, for the identification of languages.
|
||||
RFC3066: URIRef # The set of tags constructed according to RFC 3066 for the identification of languages.
|
||||
RFC4646: URIRef # The set of tags constructed according to RFC 4646 for the identification of languages.
|
||||
RFC5646: URIRef # The set of tags constructed according to RFC 5646 for the identification of languages.
|
||||
URI: URIRef # The set of identifiers constructed according to the generic syntax for Uniform Resource Identifiers as specified by the Internet Engineering Task Force.
|
||||
W3CDTF: URIRef # The set of dates and times constructed according to the W3C Date and Time Formats Specification.
|
||||
|
||||
# Valid non-python identifiers
|
||||
_extras = ["ISO639-2", "ISO639-3"]
|
||||
|
||||
_NS = Namespace("http://purl.org/dc/terms/")
|
||||
@@ -0,0 +1,84 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class DOAP(DefinedNamespace):
|
||||
"""
|
||||
Description of a Project (DOAP) vocabulary
|
||||
|
||||
The Description of a Project (DOAP) vocabulary, described using W3C RDF Schema and the Web Ontology Language.
|
||||
|
||||
Generated from: http://usefulinc.com/ns/doap
|
||||
Date: 2024-08-01 13:03:22.175167
|
||||
"""
|
||||
|
||||
_NS = Namespace("http://usefulinc.com/ns/doap#")
|
||||
|
||||
_fail = True
|
||||
|
||||
ArchRepository: URIRef # GNU Arch source code repository. Dépôt GNU Arch du code source. Repositorio GNU Arch del código fuente. GNU Arch Quellcode-Versionierungssystem. Úložiště zdrojových kódů GNU Arch. Repositório GNU Arch do código fonte.
|
||||
BKRepository: URIRef # BitKeeper source code repository. Dépôt BitKeeper du code source. Repositorio BitKeeper del código fuente. BitKeeper Quellcode-Versionierungssystem. Úložiště zdrojových kódů BitKeeper. Repositório BitKeeper do código fonte.
|
||||
BazaarBranch: (
|
||||
URIRef # Bazaar source code branch. Código fonte da ramificação Bazaar.
|
||||
)
|
||||
CVSRepository: URIRef # CVS source code repository. Dépôt CVS du code source. Repositorio CVS del código fuente. CVS Quellcode-Versionierungssystem. Úložiště zdrojových kódů CVS. Repositório CVS do código fonte.
|
||||
DarcsRepository: URIRef # darcs source code repository. Dépôt darcs du code source. Repositorio darcs del código fuente. Repositório darcs do código fonte.
|
||||
GitBranch: URIRef # Git source code branch. Código fonte da ramificação Git.
|
||||
GitRepository: URIRef # Git source code repository. Dépôt Git du code source. Repositorio Git del código fuente. Git Quellcode-Versionierungssystem. Úložiště zdrojových kódů Git. Repositório Git do código fonte.
|
||||
HgRepository: URIRef # Mercurial source code repository. Repositório Mercurial do código fonte.
|
||||
Project: URIRef # A project. Un projet. Un proyecto. Ein Projekt. Projekt. Projeto.
|
||||
Repository: URIRef # Source code repository. Dépôt du code source. Repositorio del código fuente. Quellcode-Versionierungssystem. Úložiště zdrojových kódů. Repositório do código fonte.
|
||||
SVNRepository: URIRef # Subversion source code repository. Dépôt Subversion du code source. Repositorio Subversion del código fuente. Subversion Quellcode-Versionierungssystem. Úložiště zdrojových kódů Subversion. Repositório Subversion do código fonte.
|
||||
Specification: URIRef # A specification of a system's aspects, technical or otherwise. A especificação de aspetos, técnicas ou outros do sistema.
|
||||
Version: URIRef # Version information of a project release. Détails sur une version d'une release d'un projet. Información sobre la versión de un release del proyecto. Versionsinformation eines Projekt Releases. Informace o uvolněné verzi projektu. Informação sobre a versão do projeto lançado.
|
||||
audience: (
|
||||
URIRef # Description of target user base Descrição do utilizador base alvo
|
||||
)
|
||||
blog: URIRef # URI of a blog related to a project URI de um blog relacionado com um projeto
|
||||
browse: URIRef # Web browser interface to repository. Interface web au dépôt. Interface web del repositorio. Web-Browser Interface für das Repository. Webové rozhraní pro prohlížení úložiště. Interface web do repositório.
|
||||
category: URIRef # A category of project. Une catégorie de projet. Una categoría de proyecto. Eine Kategorie eines Projektes. Kategorie projektu. Uma categoría de projeto.
|
||||
created: URIRef # Date when something was created, in YYYY-MM-DD form. e.g. 2004-04-05 Date à laquelle a été créé quelque chose, au format AAAA-MM-JJ (par ex. 2004-04-05) Fecha en la que algo fue creado, en formato AAAA-MM-DD. e.g. 2004-04-05 Erstellungsdatum von Irgendwas, angegeben im YYYY-MM-DD Format, z.B. 2004-04-05. Datum, kdy bylo něco vytvořeno ve formátu RRRR-MM-DD, např. 2004-04-05 Data em que algo foi criado, no formato AAAA-MM-DD. e.g. 2004-04-05
|
||||
description: URIRef # Plain text description of a project, of 2-4 sentences in length. Texte descriptif d'un projet, long de 2 à 4 phrases. Descripción en texto plano de un proyecto, de 2 a 4 enunciados de longitud. Beschreibung eines Projekts als einfacher Text mit der Länge von 2 bis 4 Sätzen. Čistě textový, 2 až 4 věty dlouhý popis projektu. Descrição de um projeto em texto apenas, com 2 a 4 frases de comprimento.
|
||||
developer: URIRef # Developer of software for the project. Développeur pour le projet. Desarrollador de software para el proyecto. Software-Entwickler für das Projekt. Vývojář softwaru projektu. Programador de software para o projeto.
|
||||
documentation: (
|
||||
URIRef # Documentation of the project. Aide pour l’utilisation de ce projet.
|
||||
)
|
||||
documenter: URIRef # Contributor of documentation to the project. Collaborateur à la documentation du projet. Proveedor de documentación para el proyecto. Mitarbeiter an der Dokumentation des Projektes. Spoluautor dokumentace projektu. Contribuidor para a documentação do projeto.
|
||||
helper: URIRef # Project contributor. Collaborateur au projet. Colaborador del proyecto. Projekt-Mitarbeiter. Spoluautor projektu. Ajudante ou colaborador do projeto.
|
||||
homepage: URIRef # URL of a project's homepage, associated with exactly one project. L'URL de la page web d'un projet, associée avec un unique projet. El URL de la página de un proyecto, asociada con exactamente un proyecto. URL der Projekt-Homepage, verbunden mit genau einem Projekt. URL adresa domovské stránky projektu asociované s právě jedním projektem. O URL da página de um projeto, asociada com exactamente um projeto.
|
||||
implements: URIRef # A specification that a project implements. Could be a standard, API or legally defined level of conformance. Uma especificação que um projeto implementa. Pode ser uma padrão, API ou um nível de conformidade definida legalmente.
|
||||
language: URIRef # BCP47 language code a project has been translated into Código de idioma BCP47 do projeto para o qual foi traduzido
|
||||
license: URIRef # The URI of an RDF description of the license the software is distributed under. E.g. a SPDX reference L'URI d'une description RDF de la licence sous laquelle le programme est distribué. El URI de una descripción RDF de la licencia bajo la cuál se distribuye el software. Die URI einer RDF-Beschreibung einer Lizenz unter der die Software herausgegeben wird. z.B. eine SPDX Referenz URI adresa RDF popisu licence, pod kterou je software distribuován. O URI de uma descrição RDF da licença do software sob a qual é distribuída. Ex.: referência SPDX
|
||||
location: URIRef # Location of a repository. Emplacement d'un dépôt. lugar de un repositorio. Lokation eines Repositorys. Umístění úložiště. Localização de um repositório.
|
||||
maintainer: URIRef # Maintainer of a project, a project leader. Développeur principal d'un projet, un meneur du projet. Desarrollador principal de un proyecto, un líder de proyecto. Hauptentwickler eines Projektes, der Projektleiter Správce projektu, vedoucí projektu. Programador principal de um projeto, um líder de projeto.
|
||||
module: URIRef # Module name of a Subversion, CVS, BitKeeper or Arch repository. Nom du module d'un dépôt Subversion, CVS, BitKeeper ou Arch. Nombre del módulo de un repositorio Subversion, CVS, BitKeeper o Arch. Modul-Name eines Subversion, CVS, BitKeeper oder Arch Repositorys. Jméno modulu v CVS, BitKeeper nebo Arch úložišti. Nome do módulo de um repositório Subversion, CVS, BitKeeper ou Arch.
|
||||
name: URIRef # A name of something. Le nom de quelque chose. El nombre de algo. Der Name von Irgendwas Jméno něčeho. O nome de alguma coisa.
|
||||
os: URIRef # Operating system that a project is limited to. Omit this property if the project is not OS-specific. Système d'exploitation auquel est limité le projet. Omettez cette propriété si le projet n'est pas limité à un système d'exploitation. Sistema opertivo al cuál está limitado el proyecto. Omita esta propiedad si el proyecto no es específico de un sistema opertaivo en particular. Betriebssystem auf dem das Projekt eingesetzt werden kann. Diese Eigenschaft kann ausgelassen werden, wenn das Projekt nicht BS-spezifisch ist. Operační systém, na jehož použití je projekt limitován. Vynechejte tuto vlastnost, pokud je projekt nezávislý na operačním systému. Sistema operativo a que o projeto está limitado. Omita esta propriedade se o projeto não é condicionado pelo SO usado.
|
||||
platform: URIRef # Indicator of software platform (non-OS specific), e.g. Java, Firefox, ECMA CLR Indicador da plataforma do software (não específico a nenhum SO), ex.: Java, Firefox, ECMA CLR
|
||||
release: URIRef # A project release. Une release (révision) d'un projet. Un release (versión) de un proyecto. Ein Release (Version) eines Projekts. Relase (verze) projektu. A publicação de um projeto.
|
||||
repository: URIRef # Source code repository. Dépôt du code source. Repositorio del código fuente. Quellcode-Versionierungssystem. Úložiště zdrojových kódů. Repositório do código fonte.
|
||||
repositoryOf: URIRef # The project that uses a repository.
|
||||
revision: URIRef # Revision identifier of a software release. Identifiant de révision d'une release du programme. Indentificador de la versión de un release de software. Versionsidentifikator eines Software-Releases. Identifikátor zpřístupněné revize softwaru. Identificador do lançamento da revisão do software.
|
||||
screenshots: URIRef # Web page with screenshots of project. Page web avec des captures d'écran du projet. Página web con capturas de pantalla del proyecto. Web-Seite mit Screenshots eines Projektes. Webová stránka projektu se snímky obrazovky. Página web com as capturas de ecrãn do projeto.
|
||||
shortdesc: URIRef # Short (8 or 9 words) plain text description of a project. Texte descriptif concis (8 ou 9 mots) d'un projet. Descripción corta (8 o 9 palabras) en texto plano de un proyecto. Kurzbeschreibung (8 oder 9 Wörter) eines Projekts als einfacher Text. Krátký (8 nebo 9 slov) čistě textový popis projektu. Descrição curta (com 8 ou 9 palavras) de um projeto em texto apenas.
|
||||
tester: URIRef # A tester or other quality control contributor. Un testeur ou un collaborateur au contrôle qualité. Un tester u otro proveedor de control de calidad. Ein Tester oder anderer Mitarbeiter der Qualitätskontrolle. Tester nebo jiný spoluautor kontrolující kvalitu. Um controlador ou outro contribuidor para o controlo de qualidade.
|
||||
translator: URIRef # Contributor of translations to the project. Collaborateur à la traduction du projet. Proveedor de traducciones al proyecto. Mitarbeiter an den Übersetzungen des Projektes. Spoluautor překladu projektu. Contribuidor das traduções para o projeto.
|
||||
vendor: URIRef # Vendor organization: commercial, free or otherwise
|
||||
wiki: URIRef # URL of Wiki for collaborative discussion of project. L'URL du Wiki pour la discussion collaborative sur le projet. URL del Wiki para discusión colaborativa del proyecto. Wiki-URL für die kollaborative Dikussion eines Projektes. URL adresa wiki projektu pro společné diskuse. URL da Wiki para discussão em grupo do projeto.
|
||||
|
||||
# Valid non-python identifiers
|
||||
_extras = [
|
||||
"anon-root", # Repository for anonymous access. Dépôt pour accès anonyme. Repositorio para acceso anónimo. Repository für anonymen Zugriff Úložiště pro anonymní přístup. Repositório para acesso anónimo.
|
||||
"bug-database", # Bug tracker for a project. Suivi des bugs pour un projet. Bug tracker para un proyecto. Fehlerdatenbank eines Projektes. Správa chyb projektu. Bug tracker para um projeto.
|
||||
"developer-forum", # A forum or community for developers of this project.
|
||||
"download-mirror", # Mirror of software download web page. Miroir de la page de téléchargement du programme. Mirror de la página web de descarga. Spiegel der Seite von die Projekt-Software heruntergeladen werden kann. Zrcadlo stránky pro stažení softwaru. Mirror da página web para fazer download.
|
||||
"download-page", # Web page from which the project software can be downloaded. Page web à partir de laquelle on peut télécharger le programme. Página web de la cuál se puede bajar el software. Web-Seite von der die Projekt-Software heruntergeladen werden kann. Webová stránka, na které lze stáhnout projektový software. Página web da qual o projeto de software pode ser descarregado.
|
||||
"file-release", # URI of download associated with this release. URI adresa stažení asociované s revizí. URI para download associado com a publicação.
|
||||
"mailing-list", # Mailing list home page or email address. Page web de la liste de diffusion, ou adresse de courriel. Página web de la lista de correo o dirección de correo. Homepage der Mailing Liste oder E-Mail Adresse. Domovská stránka nebo e–mailová adresa e–mailové diskuse. Página web da lista de distribuição de e-mail ou dos endereços.
|
||||
"old-homepage", # URL of a project's past homepage, associated with exactly one project. L'URL d'une ancienne page web d'un projet, associée avec un unique projet. El URL de la antigua página de un proyecto, asociada con exactamente un proyecto. URL der letzten Projekt-Homepage, verbunden mit genau einem Projekt. URL adresa předešlé domovské stránky projektu asociované s právě jedním projektem. O URL antigo da página de um projeto, associada com exactamente um projeto.
|
||||
"programming-language", # Programming language a project is implemented in or intended for use with. Langage de programmation avec lequel un projet est implémenté, ou avec lequel il est prévu de l'utiliser. Lenguaje de programación en el que un proyecto es implementado o con el cuál pretende usarse. Programmiersprache in der ein Projekt implementiert ist oder intendiert wird zu benutzen. Programovací jazyk, ve kterém je projekt implementován nebo pro který je zamýšlen k použití. Linguagem de programação que o projeto usa ou é para ser utilizada.
|
||||
"security-contact", # The Agent that should be contacted if security issues are found with the project.
|
||||
"security-policy", # URL of the security policy of a project.
|
||||
"service-endpoint", # The URI of a web service endpoint where software as a service may be accessed
|
||||
"support-forum", # A forum or community that supports this project.
|
||||
]
|
||||
@@ -0,0 +1,105 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class FOAF(DefinedNamespace):
|
||||
"""
|
||||
Friend of a Friend (FOAF) vocabulary
|
||||
|
||||
The Friend of a Friend (FOAF) RDF vocabulary, described using W3C RDF Schema and the Web Ontology Language.
|
||||
|
||||
Generated from: http://xmlns.com/foaf/spec/index.rdf
|
||||
Date: 2020-05-26 14:20:01.597998
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
account: URIRef # Indicates an account held by this agent.
|
||||
accountName: (
|
||||
URIRef # Indicates the name (identifier) associated with this online account.
|
||||
)
|
||||
accountServiceHomepage: (
|
||||
URIRef # Indicates a homepage of the service provide for this online account.
|
||||
)
|
||||
age: URIRef # The age in years of some agent.
|
||||
based_near: URIRef # A location that something is based near, for some broadly human notion of near.
|
||||
birthday: URIRef # The birthday of this Agent, represented in mm-dd string form, eg. '12-31'.
|
||||
currentProject: URIRef # A current project this person works on.
|
||||
depiction: URIRef # A depiction of some thing.
|
||||
depicts: URIRef # A thing depicted in this representation.
|
||||
dnaChecksum: URIRef # A checksum for the DNA of some thing. Joke.
|
||||
familyName: URIRef # The family name of some person.
|
||||
family_name: URIRef # The family name of some person.
|
||||
firstName: URIRef # The first name of a person.
|
||||
focus: URIRef # The underlying or 'focal' entity associated with some SKOS-described concept.
|
||||
fundedBy: URIRef # An organization funding a project or person.
|
||||
geekcode: URIRef # A textual geekcode for this person, see http://www.geekcode.com/geek.html
|
||||
gender: URIRef # The gender of this Agent (typically but not necessarily 'male' or 'female').
|
||||
givenName: URIRef # The given name of some person.
|
||||
givenname: URIRef # The given name of some person.
|
||||
holdsAccount: URIRef # Indicates an account held by this agent.
|
||||
img: URIRef # An image that can be used to represent some thing (ie. those depictions which are particularly representative of something, eg. one's photo on a homepage).
|
||||
interest: URIRef # A page about a topic of interest to this person.
|
||||
knows: URIRef # A person known by this person (indicating some level of reciprocated interaction between the parties).
|
||||
lastName: URIRef # The last name of a person.
|
||||
made: URIRef # Something that was made by this agent.
|
||||
maker: URIRef # An agent that made this thing.
|
||||
member: URIRef # Indicates a member of a Group
|
||||
membershipClass: (
|
||||
URIRef # Indicates the class of individuals that are a member of a Group
|
||||
)
|
||||
myersBriggs: URIRef # A Myers Briggs (MBTI) personality classification.
|
||||
name: URIRef # A name for some thing.
|
||||
nick: URIRef # A short informal nickname characterising an agent (includes login identifiers, IRC and other chat nicknames).
|
||||
page: URIRef # A page or document about this thing.
|
||||
pastProject: URIRef # A project this person has previously worked on.
|
||||
phone: URIRef # A phone, specified using fully qualified tel: URI scheme (refs: http://www.w3.org/Addressing/schemes.html#tel).
|
||||
plan: URIRef # A .plan comment, in the tradition of finger and '.plan' files.
|
||||
primaryTopic: URIRef # The primary topic of some page or document.
|
||||
publications: URIRef # A link to the publications of this person.
|
||||
schoolHomepage: URIRef # A homepage of a school attended by the person.
|
||||
sha1: URIRef # A sha1sum hash, in hex.
|
||||
skypeID: URIRef # A Skype ID
|
||||
status: URIRef # A string expressing what the user is happy for the general public (normally) to know about their current activity.
|
||||
surname: URIRef # The surname of some person.
|
||||
theme: URIRef # A theme.
|
||||
thumbnail: URIRef # A derived thumbnail image.
|
||||
tipjar: URIRef # A tipjar document for this agent, describing means for payment and reward.
|
||||
title: URIRef # Title (Mr, Mrs, Ms, Dr. etc)
|
||||
topic: URIRef # A topic of some page or document.
|
||||
topic_interest: URIRef # A thing of interest to this person.
|
||||
workInfoHomepage: URIRef # A work info homepage of some person; a page about their work for some organization.
|
||||
workplaceHomepage: URIRef # A workplace homepage of some person; the homepage of an organization they work for.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Agent: URIRef # An agent (eg. person, group, software or physical artifact).
|
||||
Document: URIRef # A document.
|
||||
Group: URIRef # A class of Agents.
|
||||
Image: URIRef # An image.
|
||||
LabelProperty: URIRef # A foaf:LabelProperty is any RDF property with textual values that serve as labels.
|
||||
OnlineAccount: URIRef # An online account.
|
||||
OnlineChatAccount: URIRef # An online chat account.
|
||||
OnlineEcommerceAccount: URIRef # An online e-commerce account.
|
||||
OnlineGamingAccount: URIRef # An online gaming account.
|
||||
Organization: URIRef # An organization.
|
||||
Person: URIRef # A person.
|
||||
PersonalProfileDocument: URIRef # A personal profile RDF document.
|
||||
Project: URIRef # A project (a collective endeavour of some kind).
|
||||
|
||||
# http://www.w3.org/2002/07/owl#InverseFunctionalProperty
|
||||
aimChatID: URIRef # An AIM chat ID
|
||||
homepage: URIRef # A homepage for some thing.
|
||||
icqChatID: URIRef # An ICQ chat ID
|
||||
isPrimaryTopicOf: URIRef # A document that this thing is the primary topic of.
|
||||
jabberID: URIRef # A jabber ID for something.
|
||||
logo: URIRef # A logo representing some thing.
|
||||
mbox: URIRef # A personal mailbox, ie. an Internet mailbox associated with exactly one owner, the first owner of this mailbox. This is a 'static inverse functional property', in that there is (across time and change) at most one individual that ever has any particular value for foaf:mbox.
|
||||
mbox_sha1sum: URIRef # The sha1sum of the URI of an Internet mailbox associated with exactly one owner, the first owner of the mailbox.
|
||||
msnChatID: URIRef # An MSN chat ID
|
||||
openid: URIRef # An OpenID for an Agent.
|
||||
weblog: URIRef # A weblog of some thing (whether person, group, company etc.).
|
||||
yahooChatID: URIRef # A Yahoo chat ID
|
||||
|
||||
_NS = Namespace("http://xmlns.com/foaf/0.1/")
|
||||
@@ -0,0 +1,111 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class GEO(DefinedNamespace):
|
||||
"""
|
||||
An RDF/OWL vocabulary for representing spatial information
|
||||
|
||||
Generated from: http://schemas.opengis.net/geosparql/1.0/geosparql_vocab_all.rdf
|
||||
Date: 2021-12-27 17:38:15.101187
|
||||
|
||||
.. code-block:: Turtle
|
||||
|
||||
<http://www.opengis.net/ont/geosparql> dc:creator "Open Geospatial Consortium"^^xsd:string
|
||||
dc:date "2012-04-30"^^xsd:date
|
||||
dc:source <http://www.opengis.net/doc/IS/geosparql/1.0>
|
||||
"OGC GeoSPARQL – A Geographic Query Language for RDF Data OGC 11-052r5"^^xsd:string
|
||||
rdfs:seeAlso <http://www.opengis.net/def/function/ogc-geosparql/1.0>
|
||||
<http://www.opengis.net/def/rule/ogc-geosparql/1.0>
|
||||
<http://www.opengis.net/doc/IS/geosparql/1.0>
|
||||
owl:imports dc:
|
||||
<http://www.opengis.net/ont/gml>
|
||||
<http://www.opengis.net/ont/sf>
|
||||
<http://www.w3.org/2004/02/skos/core>
|
||||
owl:versionInfo "OGC GeoSPARQL 1.0"^^xsd:string
|
||||
"""
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Datatype
|
||||
dggsLiteral: URIRef # A DGGS serialization of a geometry object.
|
||||
geoJSONLiteral: URIRef # A GeoJSON serialization of a geometry object.
|
||||
gmlLiteral: URIRef # A GML serialization of a geometry object.
|
||||
kmlLiteral: URIRef # A KML serialization of a geometry object.
|
||||
wktLiteral: URIRef # A Well-known Text serialization of a geometry object.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
Feature: URIRef # This class represents the top-level feature type. This class is equivalent to GFI_Feature defined in ISO 19156:2011, and it is superclass of all feature types.
|
||||
FeatureCollection: URIRef # A collection of individual Features.
|
||||
Geometry: URIRef # The class represents the top-level geometry type. This class is equivalent to the UML class GM_Object defined in ISO 19107, and it is superclass of all geometry types.
|
||||
GeometryCollection: URIRef # A collection of individual Geometries.
|
||||
SpatialObject: URIRef # The class spatial-object represents everything that can have a spatial representation. It is superclass of feature and geometry.
|
||||
SpatialObjectCollection: URIRef # A collection of individual Spatial Objects. This is the superclass of Feature Collection and Geometry Collection.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
asGML: URIRef # The GML serialization of a geometry
|
||||
asWKT: URIRef # The WKT serialization of a geometry
|
||||
asGeoJSON: URIRef # The GeoJSON serialization of a geometry
|
||||
asKML: URIRef # The KML serialization of a geometry
|
||||
asDGGS: URIRef # The DGGS serialization of a geometry
|
||||
coordinateDimension: URIRef # The number of measurements or axes needed to describe the position of this geometry in a coordinate system.
|
||||
dimension: URIRef # The topological dimension of this geometric object, which must be less than or equal to the coordinate dimension. In non-homogeneous collections, this will return the largest topological dimension of the contained objects.
|
||||
hasMetricArea: URIRef # The area of a Spatial Object in square meters.
|
||||
hasMetricLength: URIRef # The length of a Spatial Object in meters.
|
||||
hasMetricPerimeterLength: (
|
||||
URIRef # The length of the perimeter of a Spatial Object in meters.
|
||||
)
|
||||
hasMetricSpatialAccuracy: URIRef # The spatial resolution of a Geometry in meters.
|
||||
hasMetricSpatialResolution: (
|
||||
URIRef # The spatial resolution of a Geometry in meters.
|
||||
)
|
||||
hasMetricSize: URIRef # Subproperties of this property are used to indicate the size of a Spatial Object as a measurement or estimate of one or more dimensions of the Spatial Object's spatial presence. Units are always metric (meter, square meter or cubic meter)
|
||||
hasMetricVolume: URIRef # The volume of a Spatial Object in cubic meters.
|
||||
hasSerialization: (
|
||||
URIRef # Connects a geometry object with its text-based serialization.
|
||||
)
|
||||
isEmpty: URIRef # (true) if this geometric object is the empty Geometry. If true, then this geometric object represents the empty point set for the coordinate space.
|
||||
isSimple: URIRef # (true) if this geometric object has no anomalous geometric points, such as self intersection or self tangency.
|
||||
spatialDimension: URIRef # The number of measurements or axes needed to describe the spatial position of this geometry in a coordinate system.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
defaultGeometry: URIRef # The default geometry to be used in spatial calculations. It is Usually the most detailed geometry.
|
||||
ehContains: URIRef # Exists if the subject SpatialObject spatially contains the object SpatialObject. DE-9IM: T*TFF*FF*
|
||||
ehCoveredBy: URIRef # Exists if the subject SpatialObject is spatially covered by the object SpatialObject. DE-9IM: TFF*TFT**
|
||||
ehCovers: URIRef # Exists if the subject SpatialObject spatially covers the object SpatialObject. DE-9IM: T*TFT*FF*
|
||||
ehDisjoint: URIRef # Exists if the subject SpatialObject is spatially disjoint from the object SpatialObject. DE-9IM: FF*FF****
|
||||
ehEquals: URIRef # Exists if the subject SpatialObject spatially equals the object SpatialObject. DE-9IM: TFFFTFFFT
|
||||
ehInside: URIRef # Exists if the subject SpatialObject is spatially inside the object SpatialObject. DE-9IM: TFF*FFT**
|
||||
ehMeet: URIRef # Exists if the subject SpatialObject spatially meets the object SpatialObject. DE-9IM: FT******* ^ F**T***** ^ F***T****
|
||||
ehOverlap: URIRef # Exists if the subject SpatialObject spatially overlaps the object SpatialObject. DE-9IM: T*T***T**
|
||||
hasArea: URIRef # The area of a Spatial Object.
|
||||
hasBoundingBox: (
|
||||
URIRef # The minimum or smallest bounding or enclosing box of a given Feature.
|
||||
)
|
||||
hasCentroid: URIRef # The arithmetic mean position of all the geometry points of a given Feature.
|
||||
hasDefaultGeometry: URIRef # The default geometry to be used in spatial calculations, usually the most detailed geometry.
|
||||
hasGeometry: URIRef # A spatial representation for a given feature.
|
||||
hasLength: URIRef # The length of a Spatial Object.
|
||||
hasPerimeterLength: URIRef # The length of the perimeter of a Spatial Object.
|
||||
hasSize: URIRef # Subproperties of this property are used to indicate the size of a Spatial Object as a measurement or estimate of one or more dimensions of the Spatial Object's spatial presence.
|
||||
hasSpatialAccuracy: (
|
||||
URIRef # The positional accuracy of the coordinates of a Geometry.
|
||||
)
|
||||
hasSpatialResolution: URIRef # The spatial resolution of a Geometry.
|
||||
hasVolume: URIRef # he volume of a three-dimensional Spatial Object.
|
||||
rcc8dc: URIRef # Exists if the subject SpatialObject is spatially disjoint from the object SpatialObject. DE-9IM: FFTFFTTTT
|
||||
rcc8ec: URIRef # Exists if the subject SpatialObject spatially meets the object SpatialObject. DE-9IM: FFTFTTTTT
|
||||
rcc8eq: URIRef # Exists if the subject SpatialObject spatially equals the object SpatialObject. DE-9IM: TFFFTFFFT
|
||||
rcc8ntpp: URIRef # Exists if the subject SpatialObject is spatially inside the object SpatialObject. DE-9IM: TFFTFFTTT
|
||||
rcc8ntppi: URIRef # Exists if the subject SpatialObject spatially contains the object SpatialObject. DE-9IM: TTTFFTFFT
|
||||
rcc8po: URIRef # Exists if the subject SpatialObject spatially overlaps the object SpatialObject. DE-9IM: TTTTTTTTT
|
||||
rcc8tpp: URIRef # Exists if the subject SpatialObject is spatially covered by the object SpatialObject. DE-9IM: TFFTTFTTT
|
||||
rcc8tppi: URIRef # Exists if the subject SpatialObject spatially covers the object SpatialObject. DE-9IM: TTTFTTFFT
|
||||
sfContains: URIRef # Exists if the subject SpatialObject spatially contains the object SpatialObject. DE-9IM: T*****FF*
|
||||
sfCrosses: URIRef # Exists if the subject SpatialObject spatially crosses the object SpatialObject. DE-9IM: T*T******
|
||||
sfDisjoint: URIRef # Exists if the subject SpatialObject is spatially disjoint from the object SpatialObject. DE-9IM: FF*FF****
|
||||
sfEquals: URIRef # Exists if the subject SpatialObject spatially equals the object SpatialObject. DE-9IM: TFFFTFFFT
|
||||
sfIntersects: URIRef # Exists if the subject SpatialObject is not spatially disjoint from the object SpatialObject. DE-9IM: T******** ^ *T******* ^ ***T***** ^ ****T****
|
||||
sfOverlaps: URIRef # Exists if the subject SpatialObject spatially overlaps the object SpatialObject. DE-9IM: T*T***T**
|
||||
sfTouches: URIRef # Exists if the subject SpatialObject spatially touches the object SpatialObject. DE-9IM: FT******* ^ F**T***** ^ F***T****
|
||||
sfWithin: URIRef # Exists if the subject SpatialObject is spatially within the object SpatialObject. DE-9IM: T*F**F***
|
||||
|
||||
_NS = Namespace("http://www.opengis.net/ont/geosparql#")
|
||||
@@ -0,0 +1,283 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class ODRL2(DefinedNamespace):
|
||||
"""
|
||||
ODRL Version 2.2
|
||||
|
||||
The ODRL Vocabulary and Expression defines a set of concepts and terms (the vocabulary) and encoding mechanism
|
||||
(the expression) for permissions and obligations statements describing digital content usage based on the ODRL
|
||||
Information Model.
|
||||
|
||||
Generated from: https://www.w3.org/ns/odrl/2/ODRL22.ttl
|
||||
Date: 2020-05-26 14:20:02.352356
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
action: URIRef # The operation relating to the Asset for which the Rule is being subjected.
|
||||
andSequence: URIRef # The relation is satisfied when each of the Constraints are satisfied in the order specified.
|
||||
assignee: URIRef # The Party is the recipient of the Rule.
|
||||
assigneeOf: URIRef # Identifies an ODRL Policy for which the identified Party undertakes the assignee functional role.
|
||||
assigner: URIRef # The Party is the issuer of the Rule.
|
||||
assignerOf: URIRef # Identifies an ODRL Policy for which the identified Party undertakes the assigner functional role.
|
||||
attributedParty: URIRef # The Party to be attributed.
|
||||
attributingParty: URIRef # The Party who undertakes the attribution.
|
||||
compensatedParty: URIRef # The Party is the recipient of the compensation.
|
||||
compensatingParty: URIRef # The Party that is the provider of the compensation.
|
||||
conflict: URIRef # The conflict-resolution strategy for a Policy.
|
||||
consentedParty: URIRef # The Party who obtains the consent.
|
||||
consentingParty: URIRef # The Party to obtain consent from.
|
||||
consequence: URIRef # Relates a Duty to another Duty, the latter being a consequence of not fulfilling the former.
|
||||
constraint: URIRef # Constraint applied to a Rule
|
||||
contractedParty: URIRef # The Party who is being contracted.
|
||||
contractingParty: URIRef # The Party who is offering the contract.
|
||||
dataType: URIRef # The datatype of the value of the rightOperand or rightOperandReference of a Constraint.
|
||||
duty: URIRef # Relates an individual Duty to a Permission.
|
||||
failure: URIRef # Failure is an abstract property that defines the violation (or unmet) relationship between Rules.
|
||||
function: URIRef # Function is an abstract property whose sub-properties define the functional roles which may be fulfilled by a party in relation to a Rule.
|
||||
hasPolicy: URIRef # Identifies an ODRL Policy for which the identified Asset is the target Asset to all the Rules.
|
||||
implies: URIRef # An Action asserts that another Action is not prohibited to enable its operational semantics.
|
||||
includedIn: URIRef # An Action transitively asserts that another Action that encompasses its operational semantics.
|
||||
informedParty: URIRef # The Party to be informed of all uses.
|
||||
informingParty: URIRef # The Party who provides the inform use data.
|
||||
inheritAllowed: URIRef # Indicates if the Policy entity can be inherited.
|
||||
inheritFrom: URIRef # Relates a (child) policy to another (parent) policy from which terms are inherited.
|
||||
inheritRelation: URIRef # Identifies the type of inheritance.
|
||||
leftOperand: URIRef # The left operand in a constraint expression.
|
||||
obligation: URIRef # Relates an individual Duty to a Policy.
|
||||
operand: URIRef # Operand is an abstract property for a logical relationship.
|
||||
operator: URIRef # The operator function applied to operands of a Constraint
|
||||
output: URIRef # The output property specifies the Asset which is created from the output of the Action.
|
||||
partOf: URIRef # Identifies an Asset/PartyCollection that the Asset/Party is a member of.
|
||||
payeeParty: URIRef # The Party is the recipient of the payment.
|
||||
permission: URIRef # Relates an individual Permission to a Policy.
|
||||
profile: URIRef # The identifier(s) of an ODRL Profile that the Policy conforms to.
|
||||
prohibition: URIRef # Relates an individual Prohibition to a Policy.
|
||||
proximity: URIRef # An value indicating the closeness or nearness.
|
||||
refinement: URIRef # Constraint used to refine the semantics of an Action, or Party/Asset Collection
|
||||
relation: URIRef # Relation is an abstract property which creates an explicit link between an Action and an Asset.
|
||||
remedy: URIRef # Relates an individual remedy Duty to a Prohibition.
|
||||
rightOperand: URIRef # The value of the right operand in a constraint expression.
|
||||
rightOperandReference: URIRef # A reference to a web resource providing the value for the right operand of a Constraint.
|
||||
scope: URIRef # The identifier of a scope that provides context to the extent of the entity.
|
||||
source: URIRef # Reference to a Asset/PartyCollection
|
||||
status: URIRef # the value generated from the leftOperand action or a value related to the leftOperand set as the reference for the comparison.
|
||||
target: URIRef # The target property indicates the Asset that is the primary subject to which the Rule action directly applies.
|
||||
timedCount: URIRef # The number of seconds after which timed metering use of the asset begins.
|
||||
trackedParty: URIRef # The Party whose usage is being tracked.
|
||||
trackingParty: URIRef # The Party who is tracking usage.
|
||||
uid: URIRef # An unambiguous identifier
|
||||
undefined: (
|
||||
URIRef # Relates the strategy used for handling undefined actions to a Policy.
|
||||
)
|
||||
unit: URIRef # The unit of measurement of the value of the rightOperand or rightOperandReference of a Constraint.
|
||||
xone: URIRef # The relation is satisfied when only one, and not more, of the Constraints is satisfied
|
||||
|
||||
# http://www.w3.org/2002/07/owl#NamedIndividual
|
||||
All: URIRef # Specifies that the scope of the relationship is all of the collective individuals within a context.
|
||||
All2ndConnections: URIRef # Specifies that the scope of the relationship is all of the second-level connections to the Party.
|
||||
AllConnections: URIRef # Specifies that the scope of the relationship is all of the first-level connections of the Party.
|
||||
AllGroups: URIRef # Specifies that the scope of the relationship is all of the group connections of the Party.
|
||||
Group: URIRef # Specifies that the scope of the relationship is the defined group with multiple individual members.
|
||||
Individual: URIRef # Specifies that the scope of the relationship is the single Party individual.
|
||||
absolutePosition: URIRef # A point in space or time defined with absolute coordinates for the positioning of the target Asset.
|
||||
absoluteSize: URIRef # Measure(s) of one or two axes for 2D-objects or measure(s) of one to tree axes for 3D-objects of the target Asset.
|
||||
absoluteSpatialPosition: URIRef # The absolute spatial positions of four corners of a rectangle on a 2D-canvas or the eight corners of a cuboid in a 3D-space for the target Asset to fit.
|
||||
absoluteTemporalPosition: URIRef # The absolute temporal positions in a media stream the target Asset has to fit.
|
||||
count: URIRef # Numeric count of executions of the action of the Rule.
|
||||
dateTime: URIRef # The date (and optional time and timezone) of exercising the action of the Rule. Right operand value MUST be an xsd:date or xsd:dateTime as defined by [[xmlschema11-2]].
|
||||
delayPeriod: URIRef # A time delay period prior to exercising the action of the Rule. The point in time triggering this period MAY be defined by another temporal Constraint combined by a Logical Constraint (utilising the odrl:andSequence operand). Right operand value MUST be an xsd:duration as defined by [[xmlschema11-2]].
|
||||
deliveryChannel: (
|
||||
URIRef # The delivery channel used for exercising the action of the Rule.
|
||||
)
|
||||
device: URIRef # An identified device used for exercising the action of the Rule.
|
||||
elapsedTime: URIRef # A continuous elapsed time period which may be used for exercising of the action of the Rule. Right operand value MUST be an xsd:duration as defined by [[xmlschema11-2]].
|
||||
eq: URIRef # Indicating that a given value equals the right operand of the Constraint.
|
||||
event: URIRef # An identified event setting a context for exercising the action of the Rule.
|
||||
fileFormat: URIRef # A transformed file format of the target Asset.
|
||||
gt: URIRef # Indicating that a given value is greater than the right operand of the Constraint.
|
||||
gteq: URIRef # Indicating that a given value is greater than or equal to the right operand of the Constraint.
|
||||
hasPart: URIRef # A set-based operator indicating that a given value contains the right operand of the Constraint.
|
||||
ignore: URIRef # The Action is to be ignored and is not part of the policy – and the policy remains valid.
|
||||
industry: URIRef # A defined industry sector setting a context for exercising the action of the Rule.
|
||||
invalid: URIRef # The policy is void.
|
||||
isA: URIRef # A set-based operator indicating that a given value is an instance of the right operand of the Constraint.
|
||||
isAllOf: URIRef # A set-based operator indicating that a given value is all of the right operand of the Constraint.
|
||||
isAnyOf: URIRef # A set-based operator indicating that a given value is any of the right operand of the Constraint.
|
||||
isNoneOf: URIRef # A set-based operator indicating that a given value is none of the right operand of the Constraint.
|
||||
isPartOf: URIRef # A set-based operator indicating that a given value is contained by the right operand of the Constraint.
|
||||
language: URIRef # A natural language used by the target Asset.
|
||||
lt: URIRef # Indicating that a given value is less than the right operand of the Constraint.
|
||||
lteq: URIRef # Indicating that a given value is less than or equal to the right operand of the Constraint.
|
||||
media: URIRef # Category of a media asset setting a context for exercising the action of the Rule.
|
||||
meteredTime: URIRef # An accumulated amount of one to many metered time periods which were used for exercising the action of the Rule. Right operand value MUST be an xsd:duration as defined by [[xmlschema11-2]].
|
||||
neq: URIRef # Indicating that a given value is not equal to the right operand of the Constraint.
|
||||
payAmount: URIRef # The amount of a financial payment. Right operand value MUST be an xsd:decimal.
|
||||
percentage: URIRef # A percentage amount of the target Asset relevant for exercising the action of the Rule. Right operand value MUST be an xsd:decimal from 0 to 100.
|
||||
perm: URIRef # Permissions take preference over prohibitions.
|
||||
policyUsage: (
|
||||
URIRef # Indicates the actual datetime the action of the Rule was exercised.
|
||||
)
|
||||
product: URIRef # Category of product or service setting a context for exercising the action of the Rule.
|
||||
prohibit: URIRef # Prohibitions take preference over permissions.
|
||||
purpose: URIRef # A defined purpose for exercising the action of the Rule.
|
||||
recipient: URIRef # The party receiving the result/outcome of exercising the action of the Rule.
|
||||
relativePosition: URIRef # A point in space or time defined with coordinates relative to full measures the positioning of the target Asset.
|
||||
relativeSize: URIRef # Measure(s) of one or two axes for 2D-objects or measure(s) of one to tree axes for 3D-objects - expressed as percentages of full values - of the target Asset.
|
||||
relativeSpatialPosition: URIRef # The relative spatial positions - expressed as percentages of full values - of four corners of a rectangle on a 2D-canvas or the eight corners of a cuboid in a 3D-space of the target Asset.
|
||||
relativeTemporalPosition: URIRef # A point in space or time defined with coordinates relative to full measures the positioning of the target Asset.
|
||||
resolution: URIRef # Resolution of the rendition of the target Asset.
|
||||
spatial: URIRef # A named and identified geospatial area with defined borders which is used for exercising the action of the Rule. An IRI MUST be used to represent this value.
|
||||
spatialCoordinates: URIRef # A set of coordinates setting the borders of a geospatial area used for exercising the action of the Rule. The coordinates MUST include longitude and latitude, they MAY include altitude and the geodetic datum.
|
||||
support: URIRef # The Action is to be supported as part of the policy – and the policy remains valid.
|
||||
system: URIRef # An identified computing system used for exercising the action of the Rule.
|
||||
systemDevice: URIRef # An identified computing system or computing device used for exercising the action of the Rule.
|
||||
timeInterval: URIRef # A recurring period of time before the next execution of the action of the Rule. Right operand value MUST be an xsd:duration as defined by [[xmlschema11-2]].
|
||||
unitOfCount: URIRef # The unit of measure used for counting the executions of the action of the Rule.
|
||||
version: URIRef # The version of the target Asset.
|
||||
virtualLocation: URIRef # An identified location of the IT communication space which is relevant for exercising the action of the Rule.
|
||||
|
||||
# http://www.w3.org/2004/02/skos/core#Collection
|
||||
|
||||
# http://www.w3.org/2004/02/skos/core#Concept
|
||||
Action: URIRef # An operation on an Asset.
|
||||
Agreement: URIRef # A Policy that grants the assignee a Rule over an Asset from an assigner.
|
||||
Assertion: URIRef # A Policy that asserts a Rule over an Asset from parties.
|
||||
Asset: URIRef # A resource or a collection of resources that are the subject of a Rule.
|
||||
AssetCollection: URIRef # An Asset that is collection of individual resources
|
||||
AssetScope: URIRef # Scopes for Asset Scope expressions.
|
||||
ConflictTerm: URIRef # Used to establish strategies to resolve conflicts that arise from the merging of Policies or conflicts between Permissions and Prohibitions in the same Policy.
|
||||
Constraint: URIRef # A boolean expression that refines the semantics of an Action and Party/Asset Collection or declare the conditions applicable to a Rule.
|
||||
Duty: URIRef # The obligation to perform an Action
|
||||
LeftOperand: URIRef # Left operand for a constraint expression.
|
||||
LogicalConstraint: URIRef # A logical expression that refines the semantics of an Action and Party/Asset Collection or declare the conditions applicable to a Rule.
|
||||
Offer: URIRef # A Policy that proposes a Rule over an Asset from an assigner.
|
||||
Operator: URIRef # Operator for constraint expression.
|
||||
Party: (
|
||||
URIRef # An entity or a collection of entities that undertake Roles in a Rule.
|
||||
)
|
||||
PartyCollection: URIRef # A Party that is a group of individual entities
|
||||
PartyScope: URIRef # Scopes for Party Scope expressions.
|
||||
Permission: URIRef # The ability to perform an Action over an Asset.
|
||||
Policy: URIRef # A non-empty group of Permissions and/or Prohibitions.
|
||||
Privacy: URIRef # A Policy that expresses a Rule over an Asset containing personal information.
|
||||
Prohibition: URIRef # The inability to perform an Action over an Asset.
|
||||
Request: URIRef # A Policy that proposes a Rule over an Asset from an assignee.
|
||||
RightOperand: URIRef # Right operand for constraint expression.
|
||||
Rule: URIRef # An abstract concept that represents the common characteristics of Permissions, Prohibitions, and Duties.
|
||||
Set: URIRef # A Policy that expresses a Rule over an Asset.
|
||||
Ticket: (
|
||||
URIRef # A Policy that grants the holder a Rule over an Asset from an assigner.
|
||||
)
|
||||
UndefinedTerm: URIRef # Is used to indicate how to support Actions that are not part of any vocabulary or profile in the policy expression system.
|
||||
acceptTracking: URIRef # To accept that the use of the Asset may be tracked.
|
||||
adHocShare: URIRef # The act of sharing the asset to parties in close proximity to the owner.
|
||||
aggregate: (
|
||||
URIRef # To use the Asset or parts of it as part of a composite collection.
|
||||
)
|
||||
annotate: URIRef # To add explanatory notations/commentaries to the Asset without modifying the Asset in any other way.
|
||||
anonymize: URIRef # To anonymize all or parts of the Asset.
|
||||
append: URIRef # The act of adding to the end of an asset.
|
||||
appendTo: URIRef # The act of appending data to the Asset without modifying the Asset in any other way.
|
||||
archive: URIRef # To store the Asset (in a non-transient form).
|
||||
attachPolicy: URIRef # The act of keeping the policy notice with the asset.
|
||||
attachSource: (
|
||||
URIRef # The act of attaching the source of the asset and its derivatives.
|
||||
)
|
||||
attribute: URIRef # To attribute the use of the Asset.
|
||||
commercialize: URIRef # The act of using the asset in a business environment.
|
||||
compensate: URIRef # To compensate by transfer of some amount of value, if defined, for using or selling the Asset.
|
||||
concurrentUse: URIRef # To create multiple copies of the Asset that are being concurrently used.
|
||||
copy: URIRef # The act of making an exact reproduction of the asset.
|
||||
core: URIRef # Identifier for the ODRL Core Profile
|
||||
delete: (
|
||||
URIRef # To permanently remove all copies of the Asset after it has been used.
|
||||
)
|
||||
derive: URIRef # To create a new derivative Asset from this Asset and to edit or modify the derivative.
|
||||
digitize: URIRef # To produce a digital copy of (or otherwise digitize) the Asset from its analogue form.
|
||||
display: URIRef # To create a static and transient rendition of an Asset.
|
||||
distribute: URIRef # To supply the Asset to third-parties.
|
||||
ensureExclusivity: URIRef # To ensure that the Rule on the Asset is exclusive.
|
||||
execute: URIRef # To run the computer program Asset.
|
||||
export: URIRef # The act of transforming the asset into a new form.
|
||||
extract: URIRef # To extract parts of the Asset and to use it as a new Asset.
|
||||
extractChar: URIRef # The act of extracting (replicating) unchanged characters from the asset.
|
||||
extractPage: (
|
||||
URIRef # The act of extracting (replicating) unchanged pages from the asset.
|
||||
)
|
||||
extractWord: (
|
||||
URIRef # The act of extracting (replicating) unchanged words from the asset.
|
||||
)
|
||||
give: URIRef # To transfer the ownership of the Asset to a third party without compensation and while deleting the original asset.
|
||||
grantUse: URIRef # To grant the use of the Asset to third parties.
|
||||
include: URIRef # To include other related assets in the Asset.
|
||||
index: URIRef # To record the Asset in an index.
|
||||
inform: URIRef # To inform that an action has been performed on or in relation to the Asset.
|
||||
install: URIRef # To load the computer program Asset onto a storage device which allows operating or running the Asset.
|
||||
lease: URIRef # The act of making available the asset to a third-party for a fixed period of time with exchange of value.
|
||||
lend: URIRef # The act of making available the asset to a third-party for a fixed period of time without exchange of value.
|
||||
license: URIRef # The act of granting the right to use the asset to a third-party.
|
||||
modify: URIRef # To change existing content of the Asset. A new asset is not created by this action.
|
||||
move: URIRef # To move the Asset from one digital location to another including deleting the original copy.
|
||||
nextPolicy: URIRef # To grant the specified Policy to a third party for their use of the Asset.
|
||||
obtainConsent: URIRef # To obtain verifiable consent to perform the requested action in relation to the Asset.
|
||||
pay: URIRef # The act of paying a financial amount to a party for use of the asset.
|
||||
play: URIRef # To create a sequential and transient rendition of an Asset.
|
||||
present: URIRef # To publicly perform the Asset.
|
||||
preview: URIRef # The act of providing a short preview of the asset.
|
||||
print: URIRef # To create a tangible and permanent rendition of an Asset.
|
||||
read: URIRef # To obtain data from the Asset.
|
||||
reproduce: URIRef # To make duplicate copies the Asset in any material form.
|
||||
reviewPolicy: URIRef # To review the Policy applicable to the Asset.
|
||||
secondaryUse: URIRef # The act of using the asset for a purpose other than the purpose it was intended for.
|
||||
sell: URIRef # To transfer the ownership of the Asset to a third party with compensation and while deleting the original asset.
|
||||
share: URIRef # The act of the non-commercial reproduction and distribution of the asset to third-parties.
|
||||
shareAlike: URIRef # The act of distributing any derivative asset under the same terms as the original asset.
|
||||
stream: URIRef # To deliver the Asset in real-time.
|
||||
synchronize: URIRef # To use the Asset in timed relations with media (audio/visual) elements of another Asset.
|
||||
textToSpeech: URIRef # To have a text Asset read out loud.
|
||||
transfer: URIRef # To transfer the ownership of the Asset in perpetuity.
|
||||
transform: URIRef # To convert the Asset into a different format.
|
||||
translate: URIRef # To translate the original natural language of an Asset into another natural language.
|
||||
uninstall: URIRef # To unload and delete the computer program Asset from a storage device and disable its readiness for operation.
|
||||
use: URIRef # To use the Asset
|
||||
watermark: URIRef # To apply a watermark to the Asset.
|
||||
write: URIRef # The act of writing to the Asset.
|
||||
writeTo: URIRef # The act of adding data to the Asset.
|
||||
|
||||
# Valid non-python identifiers
|
||||
_extras = [
|
||||
"and",
|
||||
"or",
|
||||
"#actionConcepts",
|
||||
"#actions",
|
||||
"#actionsCommon",
|
||||
"#assetConcepts",
|
||||
"#assetParty",
|
||||
"#assetRelations",
|
||||
"#assetRelationsCommon",
|
||||
"#conflictConcepts",
|
||||
"#constraintLeftOperandCommon",
|
||||
"#constraintLogicalOperands",
|
||||
"#constraintRelationalOperators",
|
||||
"#constraintRightOpCommon",
|
||||
"#constraints",
|
||||
"#deprecatedTerms",
|
||||
"#duties",
|
||||
"#logicalConstraints",
|
||||
"#partyConcepts",
|
||||
"#partyRoles",
|
||||
"#partyRolesCommon",
|
||||
"#permissions",
|
||||
"#policyConcepts",
|
||||
"#policySubClasses",
|
||||
"#policySubClassesCommon",
|
||||
"#prohibitions",
|
||||
"#ruleConcepts",
|
||||
]
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/odrl/2/")
|
||||
@@ -0,0 +1,70 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class ORG(DefinedNamespace):
|
||||
"""
|
||||
Core organization ontology
|
||||
|
||||
Vocabulary for describing organizational structures, specializable to a broad variety of types of
|
||||
organization.
|
||||
|
||||
Generated from: http://www.w3.org/ns/org#
|
||||
Date: 2020-05-26 14:20:02.908408
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
basedAt: URIRef # Indicates the site at which a person is based. We do not restrict the possibility that a person is based at multiple sites.
|
||||
changedBy: URIRef # Indicates a change event which resulted in a change to this organization. Depending on the event the organization may or may not have continued to exist after the event. Inverse of `org:originalOrganization`.
|
||||
classification: URIRef # Indicates a classification for this Organization within some classification scheme. Extension vocabularies may wish to specialize this property to have a range corresponding to a specific `skos:ConceptScheme`. This property is under discussion and may be revised or removed - in many cases organizations are best categorized by defining a sub-class hierarchy in an extension vocabulary.
|
||||
hasMember: URIRef # Indicates a person who is a member of the subject Organization. Inverse of `org:memberOf`, see that property for further clarification. Provided for compatibility with `foaf:member`.
|
||||
hasMembership: URIRef # Indicates a membership relationship that the Agent plays. Inverse of `org:member`.
|
||||
hasPost: URIRef # Indicates a Post which exists within the Organization.
|
||||
hasPrimarySite: URIRef # Indicates a primary site for the Organization, this is the default means by which an Organization can be contacted and is not necessarily the formal headquarters.
|
||||
hasRegisteredSite: URIRef # Indicates the legally registered site for the organization, in many legal jurisdictions there is a requirement that FormalOrganizations such as Companies or Charities have such a primary designed site.
|
||||
hasSite: URIRef # Indicates a site at which the Organization has some presence even if only indirect (e.g. virtual office or a professional service which is acting as the registered address for a company). Inverse of `org:siteOf`.
|
||||
hasSubOrganization: URIRef # Represents hierarchical containment of Organizations or Organizational Units; indicates an organization which is a sub-part or child of this organization. Inverse of `org:subOrganizationOf`.
|
||||
hasUnit: URIRef # Indicates a unit which is part of this Organization, e.g. a Department within a larger FormalOrganization. Inverse of `org:unitOf`.
|
||||
headOf: URIRef # Indicates that a person is the leader or formal head of the Organization. This will normally mean that they are the root of the `org:reportsTo` (acyclic) graph, though an organization may have more than one head.
|
||||
heldBy: URIRef # Indicates an Agent which holds a Post.
|
||||
holds: URIRef # Indicates a Post held by some Agent.
|
||||
identifier: URIRef # Gives an identifier, such as a company registration number, that can be used to used to uniquely identify the organization. Many different national and international identier schemes are available. The org ontology is neutral to which schemes are used. The particular identifier scheme should be indicated by the datatype of the identifier value. Using datatypes to distinguish the notation scheme used is consistent with recommended best practice for `skos:notation` of which this property is a specialization.
|
||||
linkedTo: URIRef # Indicates an arbitrary relationship between two organizations. Specializations of this can be used to, for example, denote funding or supply chain relationships.
|
||||
location: URIRef # Gives a location description for a person within the organization, for example a _Mail Stop_ for internal posting purposes.
|
||||
member: URIRef # Indicates the Person (or other Agent including Organization) involved in the Membership relationship. Inverse of `org:hasMembership`
|
||||
memberDuring: URIRef # Optional property to indicate the interval for which the membership is/was valid.
|
||||
memberOf: URIRef # Indicates that a person is a member of the Organization with no indication of the nature of that membership or the role played. Note that the choice of property name is not meant to limit the property to only formal membership arrangements, it is also intended to cover related concepts such as affilliation or other involvement in the organization. Extensions can specialize this relationship to indicate particular roles within the organization or more nuanced relationships to the organization. Has an optional inverse, `org:hasmember`.
|
||||
organization: URIRef # Indicates Organization in which the Agent is a member.
|
||||
originalOrganization: URIRef # Indicates one or more organizations that existed before the change event. Depending on the event they may or may not have continued to exist after the event. Inverse of `org:changedBy`.
|
||||
postIn: URIRef # Indicates the Organization in which the Post exists.
|
||||
purpose: URIRef # Indicates the purpose of this Organization. There can be many purposes at different levels of abstraction but the nature of an organization is to have a reason for existence and this property is a means to document that reason. An Organization may have multiple purposes. It is recommended that the purpose be denoted by a controlled term or code list, ideally a `skos:Concept`. However, the range is left open to allow for other types of descriptive schemes. It is expected that specializations or application profiles of this vocabulary will constrain the range of the purpose. Alternative names: _remit_ _responsibility_ (esp. if applied to OrganizationalUnits such as Government Departments).
|
||||
remuneration: URIRef # Indicates a salary or other reward associated with the role. Typically this will be denoted using an existing representation scheme such as `gr:PriceSpecification` but the range is left open to allow applications to specialize it (e.g. to remunerationInGBP).
|
||||
reportsTo: URIRef # Indicates a reporting relationship as might be depicted on an organizational chart. The precise semantics of the reporting relationship will vary by organization but is intended to encompass both direct supervisory relationships (e.g. carrying objective and salary setting authority) and more general reporting or accountability relationships (e.g. so called _dotted line_ reporting).
|
||||
resultedFrom: URIRef # Indicates an event which resulted in this organization. Inverse of `org:resultingOrganization`.
|
||||
resultingOrganization: URIRef # Indicates an organization which was created or changed as a result of the event. Inverse of `org:resultedFrom`.
|
||||
role: URIRef # Indicates the Role that the Agent plays in a Membership relationship with an Organization.
|
||||
roleProperty: URIRef # This is a metalevel property which is used to annotate an `org:Role` instance with a sub-property of `org:memberOf` that can be used to directly indicate the role for easy of query. The intended semantics is a Membership relation involving the Role implies the existence of a direct property relationship through an inference rule of the form: `{ [] org:member ?p; org:organization ?o; org:role [org:roleProperty ?r] } -> {?p ?r ?o}`.
|
||||
siteAddress: URIRef # Indicates an address for the site in a suitable encoding. Use of vCard (using the http://www.w3.org/TR/vcard-rdf/ vocabulary) is encouraged but the range is left open to allow other encodings to be used. The address may include email, telephone, and geo-location information and is not restricted to a physical address.
|
||||
siteOf: URIRef # Indicates an Organization which has some presence at the given site. This is the inverse of `org:hasSite`.
|
||||
subOrganizationOf: URIRef # Represents hierarchical containment of Organizations or OrganizationalUnits; indicates an Organization which contains this Organization. Inverse of `org:hasSubOrganization`.
|
||||
transitiveSubOrganizationOf: URIRef # The transitive closure of subOrganizationOf, giving a representation of all organizations that contain this one. Note that technically this is a super property of the transitive closure so it could contain additional assertions but such usage is discouraged.
|
||||
unitOf: URIRef # Indicates an Organization of which this Unit is a part, e.g. a Department within a larger FormalOrganization. This is the inverse of `org:hasUnit`.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
ChangeEvent: URIRef # Represents an event which resulted in a major change to an organization such as a merger or complete restructuring. It is intended for situations where the resulting organization is sufficient distinct from the original organizations that it has a distinct identity and distinct URI. Extension vocabularies should define sub-classes of this to denote particular categories of event. The instant or interval at which the event occurred should be given by `prov:startAtTime` and `prov:endedAtTime`, a description should be given by `dct:description`.
|
||||
FormalOrganization: URIRef # An Organization which is recognized in the world at large, in particular in legal jurisdictions, with associated rights and responsibilities. Examples include a Corporation, Charity, Government or Church. Note that this is a super class of `gr:BusinessEntity` and it is recommended to use the GoodRelations vocabulary to denote Business classifications such as DUNS or NAICS.
|
||||
Membership: URIRef # Indicates the nature of an Agent's membership of an organization. Represents an n-ary relation between an Agent, an Organization and a Role. It is possible to directly indicate membership, independent of the specific Role, through use of the `org:memberOf` property.
|
||||
Organization: URIRef # Represents a collection of people organized together into a community or other social, commercial or political structure. The group has some common purpose or reason for existence which goes beyond the set of people belonging to it and can act as an Agent. Organizations are often decomposable into hierarchical structures. It is recommended that SKOS lexical labels should be used to label the Organization. In particular `skos:prefLabel` for the primary (possibly legally recognized name), `skos:altLabel` for alternative names (trading names, colloquial names) and `skos:notation` to denote a code from a code list. Alternative names: _Collective_ _Body_ _Org_ _Group_
|
||||
OrganizationalCollaboration: URIRef # A collaboration between two or more Organizations such as a project. It meets the criteria for being an Organization in that it has an identity and defining purpose independent of its particular members but is neither a formally recognized legal entity nor a sub-unit within some larger organization. Might typically have a shorter lifetime than the Organizations within it, but not necessarily. All members are `org:Organization`s rather than individuals and those Organizations can play particular roles within the venture. Alternative names: _Project_ _Venture_ _Endeavour_ _Consortium_ _Endeavour_
|
||||
OrganizationalUnit: URIRef # An Organization such as a University Support Unit which is part of some larger FormalOrganization and only has full recognition within the context of that FormalOrganization, it is not a Legal Entity in its own right. Units can be large and complex containing other Units and even FormalOrganizations. Alternative names: _OU_ _Unit_ _Department_
|
||||
Post: URIRef # A Post represents some position within an organization that exists independently of the person or persons filling it. Posts may be used to represent situations where a person is a member of an organization ex officio (for example the Secretary of State for Scotland is part of UK Cabinet by virtue of being Secretary of State for Scotland, not as an individual person). A post can be held by multiple people and hence can be treated as a organization in its own right.
|
||||
Role: URIRef # Denotes a role that a Person or other Agent can take in an organization. Instances of this class describe the abstract role; to denote a specific instance of a person playing that role in a specific organization use an instance of `org:Membership`. It is common for roles to be arranged in some taxonomic structure and we use SKOS to represent that. The normal SKOS lexical properties should be used when labelling the Role. Additional descriptive properties for the Role, such as a Salary band, may be added by extension vocabularies.
|
||||
Site: URIRef # An office or other premise at which the organization is located. Many organizations are spread across multiple sites and many sites will host multiple locations. In most cases a Site will be a physical location. However, we don't exclude the possibility of non-physical sites such as a virtual office with an associated post box and phone reception service. Extensions may provide subclasses to denote particular types of site.
|
||||
|
||||
# http://www.w3.org/ns/org#Role
|
||||
Head: URIRef # head
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/org#")
|
||||
@@ -0,0 +1,140 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class OWL(DefinedNamespace):
|
||||
"""
|
||||
The OWL 2 Schema vocabulary (OWL 2)
|
||||
|
||||
This ontology partially describes the built-in classes and properties that together form the basis of
|
||||
the RDF/XML syntax of OWL 2. The content of this ontology is based on Tables 6.1 and 6.2 in Section 6.4
|
||||
of the OWL 2 RDF-Based Semantics specification, available at http://www.w3.org/TR/owl2-rdf-based-
|
||||
semantics/. Please note that those tables do not include the different annotations (labels, comments and
|
||||
rdfs:isDefinedBy links) used in this file. Also note that the descriptions provided in this ontology do not
|
||||
provide a complete and correct formal description of either the syntax or the semantics of the introduced
|
||||
terms (please see the OWL 2 recommendations for the complete and normative specifications). Furthermore,
|
||||
the information provided by this ontology may be misleading if not used with care. This ontology SHOULD NOT
|
||||
be imported into OWL ontologies. Importing this file into an OWL 2 DL ontology will cause it to become
|
||||
an OWL 2 Full ontology and may have other, unexpected, consequences.
|
||||
|
||||
Generated from: http://www.w3.org/2002/07/owl#
|
||||
Date: 2020-05-26 14:20:03.193795
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
allValuesFrom: URIRef # The property that determines the class that a universal property restriction refers to.
|
||||
annotatedProperty: URIRef # The property that determines the predicate of an annotated axiom or annotated annotation.
|
||||
annotatedSource: URIRef # The property that determines the subject of an annotated axiom or annotated annotation.
|
||||
annotatedTarget: URIRef # The property that determines the object of an annotated axiom or annotated annotation.
|
||||
assertionProperty: URIRef # The property that determines the predicate of a negative property assertion.
|
||||
cardinality: URIRef # The property that determines the cardinality of an exact cardinality restriction.
|
||||
complementOf: URIRef # The property that determines that a given class is the complement of another class.
|
||||
datatypeComplementOf: URIRef # The property that determines that a given data range is the complement of another data range with respect to the data domain.
|
||||
differentFrom: (
|
||||
URIRef # The property that determines that two given individuals are different.
|
||||
)
|
||||
disjointUnionOf: URIRef # The property that determines that a given class is equivalent to the disjoint union of a collection of other classes.
|
||||
disjointWith: (
|
||||
URIRef # The property that determines that two given classes are disjoint.
|
||||
)
|
||||
distinctMembers: URIRef # The property that determines the collection of pairwise different individuals in a owl:AllDifferent axiom.
|
||||
equivalentClass: URIRef # The property that determines that two given classes are equivalent, and that is used to specify datatype definitions.
|
||||
equivalentProperty: (
|
||||
URIRef # The property that determines that two given properties are equivalent.
|
||||
)
|
||||
hasKey: URIRef # The property that determines the collection of properties that jointly build a key.
|
||||
hasSelf: URIRef # The property that determines the property that a self restriction refers to.
|
||||
hasValue: URIRef # The property that determines the individual that a has-value restriction refers to.
|
||||
intersectionOf: URIRef # The property that determines the collection of classes or data ranges that build an intersection.
|
||||
inverseOf: (
|
||||
URIRef # The property that determines that two given properties are inverse.
|
||||
)
|
||||
maxCardinality: URIRef # The property that determines the cardinality of a maximum cardinality restriction.
|
||||
maxQualifiedCardinality: URIRef # The property that determines the cardinality of a maximum qualified cardinality restriction.
|
||||
members: URIRef # The property that determines the collection of members in either a owl:AllDifferent, owl:AllDisjointClasses or owl:AllDisjointProperties axiom.
|
||||
minCardinality: URIRef # The property that determines the cardinality of a minimum cardinality restriction.
|
||||
minQualifiedCardinality: URIRef # The property that determines the cardinality of a minimum qualified cardinality restriction.
|
||||
onClass: URIRef # The property that determines the class that a qualified object cardinality restriction refers to.
|
||||
onDataRange: URIRef # The property that determines the data range that a qualified data cardinality restriction refers to.
|
||||
onDatatype: URIRef # The property that determines the datatype that a datatype restriction refers to.
|
||||
onProperties: URIRef # The property that determines the n-tuple of properties that a property restriction on an n-ary data range refers to.
|
||||
onProperty: URIRef # The property that determines the property that a property restriction refers to.
|
||||
oneOf: URIRef # The property that determines the collection of individuals or data values that build an enumeration.
|
||||
propertyChainAxiom: URIRef # The property that determines the n-tuple of properties that build a sub property chain of a given property.
|
||||
propertyDisjointWith: (
|
||||
URIRef # The property that determines that two given properties are disjoint.
|
||||
)
|
||||
qualifiedCardinality: URIRef # The property that determines the cardinality of an exact qualified cardinality restriction.
|
||||
sameAs: URIRef # The property that determines that two given individuals are equal.
|
||||
someValuesFrom: URIRef # The property that determines the class that an existential property restriction refers to.
|
||||
sourceIndividual: URIRef # The property that determines the subject of a negative property assertion.
|
||||
targetIndividual: URIRef # The property that determines the object of a negative object property assertion.
|
||||
targetValue: URIRef # The property that determines the value of a negative data property assertion.
|
||||
unionOf: URIRef # The property that determines the collection of classes or data ranges that build a union.
|
||||
withRestrictions: URIRef # The property that determines the collection of facet-value pairs that define a datatype restriction.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
AllDifferent: URIRef # The class of collections of pairwise different individuals.
|
||||
AllDisjointClasses: URIRef # The class of collections of pairwise disjoint classes.
|
||||
AllDisjointProperties: (
|
||||
URIRef # The class of collections of pairwise disjoint properties.
|
||||
)
|
||||
Annotation: URIRef # The class of annotated annotations for which the RDF serialization consists of an annotated subject, predicate and object.
|
||||
AnnotationProperty: URIRef # The class of annotation properties.
|
||||
AsymmetricProperty: URIRef # The class of asymmetric properties.
|
||||
Axiom: URIRef # The class of annotated axioms for which the RDF serialization consists of an annotated subject, predicate and object.
|
||||
Class: URIRef # The class of OWL classes.
|
||||
DataRange: URIRef # The class of OWL data ranges, which are special kinds of datatypes. Note: The use of the IRI owl:DataRange has been deprecated as of OWL 2. The IRI rdfs:Datatype SHOULD be used instead.
|
||||
DatatypeProperty: URIRef # The class of data properties.
|
||||
DeprecatedClass: URIRef # The class of deprecated classes.
|
||||
DeprecatedProperty: URIRef # The class of deprecated properties.
|
||||
FunctionalProperty: URIRef # The class of functional properties.
|
||||
InverseFunctionalProperty: URIRef # The class of inverse-functional properties.
|
||||
IrreflexiveProperty: URIRef # The class of irreflexive properties.
|
||||
NamedIndividual: URIRef # The class of named individuals.
|
||||
NegativePropertyAssertion: URIRef # The class of negative property assertions.
|
||||
ObjectProperty: URIRef # The class of object properties.
|
||||
Ontology: URIRef # The class of ontologies.
|
||||
OntologyProperty: URIRef # The class of ontology properties.
|
||||
ReflexiveProperty: URIRef # The class of reflexive properties.
|
||||
Restriction: URIRef # The class of property restrictions.
|
||||
SymmetricProperty: URIRef # The class of symmetric properties.
|
||||
TransitiveProperty: URIRef # The class of transitive properties.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#AnnotationProperty
|
||||
backwardCompatibleWith: URIRef # The annotation property that indicates that a given ontology is backward compatible with another ontology.
|
||||
deprecated: URIRef # The annotation property that indicates that a given entity has been deprecated.
|
||||
incompatibleWith: URIRef # The annotation property that indicates that a given ontology is incompatible with another ontology.
|
||||
priorVersion: URIRef # The annotation property that indicates the predecessor ontology of a given ontology.
|
||||
versionInfo: URIRef # The annotation property that provides version information for an ontology or another OWL construct.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
Nothing: URIRef # This is the empty class.
|
||||
Thing: URIRef # The class of OWL individuals.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
bottomDataProperty: URIRef # The data property that does not relate any individual to any data value.
|
||||
topDataProperty: (
|
||||
URIRef # The data property that relates every individual to every data value.
|
||||
)
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
bottomObjectProperty: (
|
||||
URIRef # The object property that does not relate any two individuals.
|
||||
)
|
||||
topObjectProperty: URIRef # The object property that relates every two individuals.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#OntologyProperty
|
||||
imports: URIRef # The property that is used for importing other ontologies into a given ontology.
|
||||
versionIRI: URIRef # The property that identifies the version IRI of an ontology.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Datatype
|
||||
# NOTE: the following two elements don't appear in the OWL RDF documents but are defined in the OWL2 Recommentation
|
||||
# at https://www.w3.org/TR/owl2-syntax/#Datatype_Maps
|
||||
rational: URIRef # The value space is the set of all rational numbers. The lexical form is numerator '/' denominator, where both are integers.
|
||||
real: URIRef # The value space is the set of all real numbers. Does not directly provide any lexical forms.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/2002/07/owl#")
|
||||
@@ -0,0 +1,39 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class PROF(DefinedNamespace):
|
||||
"""
|
||||
Profiles Vocabulary
|
||||
|
||||
This vocabulary is for describing relationships between standards/specifications, profiles of them and
|
||||
supporting artifacts such as validating resources. This model starts with
|
||||
[http://dublincore.org/2012/06/14/dcterms#Standard](dct:Standard) entities which can either be Base
|
||||
Specifications (a standard not profiling any other Standard) or Profiles (Standards which do profile others).
|
||||
Base Specifications or Profiles can have Resource Descriptors associated with them that defines implementing
|
||||
rules for the it. Resource Descriptors must indicate the role they play (to guide, to validate etc.) and the
|
||||
formalism they adhere to (dct:format) to allow for content negotiation. A vocabulary of Resource Roles are
|
||||
provided alongside this vocabulary but that list is extensible.
|
||||
|
||||
Generated from: https://www.w3.org/ns/dx/prof/profilesont.ttl
|
||||
Date: 2020-05-26 14:20:03.542924
|
||||
|
||||
"""
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
Profile: URIRef # A named set of constraints on one or more identified base specifications or other profiles, including the identification of any implementing subclasses of datatypes, semantic interpretations, vocabularies, options and parameters of those base specifications necessary to accomplish a particular function. This definition includes what are often called "application profiles", "metadata application profiles", or "metadata profiles".
|
||||
ResourceDescriptor: URIRef # A resource that defines an aspect - a particular part or feature - of a Profile
|
||||
ResourceRole: URIRef # The role that an Resource plays
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
hasToken: URIRef # A preferred alternative identifier for the Profile
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
hasArtifact: URIRef # The URL of a downloadable file with particulars such as its format and role indicated by a Resource Descriptor
|
||||
hasResource: URIRef # A resource which describes the nature of an artifact and the role it plays in relation to a profile
|
||||
hasRole: URIRef # The function of the described artifactresource in the expression of the Profile, such as a specification, guidance documentation, SHACL file etc.
|
||||
isInheritedFrom: URIRef # This property indicates a Resource Descriptor described by this Profile’s base specification that is to be considered a Resource Descriptor for this Profile also
|
||||
isProfileOf: URIRef # A Profile is a profile of a dct:Standard (or a Base Specification or another Profile)
|
||||
isTransitiveProfileOf: URIRef # A base specification an Profile conforms to
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/dx/prof/")
|
||||
@@ -0,0 +1,250 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class PROV(DefinedNamespace):
|
||||
"""
|
||||
W3C PROVenance Interchange Ontology (PROV-O)
|
||||
|
||||
This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If
|
||||
you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe
|
||||
public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All
|
||||
feedback is welcome.
|
||||
|
||||
PROV Access and Query Ontology
|
||||
|
||||
This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If
|
||||
you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe
|
||||
public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All
|
||||
feedback is welcome.
|
||||
|
||||
Dublin Core extensions of the W3C PROVenance Interchange Ontology (PROV-O)
|
||||
|
||||
This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If
|
||||
you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe
|
||||
public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All
|
||||
feedback is welcome.
|
||||
|
||||
W3C PROV Linking Across Provenance Bundles Ontology (PROV-LINKS)
|
||||
|
||||
This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If
|
||||
you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe
|
||||
public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/ ). All
|
||||
feedback is welcome.
|
||||
|
||||
W3C PROVenance Interchange Ontology (PROV-O) Dictionary Extension
|
||||
|
||||
This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page).
|
||||
If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org
|
||||
(subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-
|
||||
comments/). All feedback is welcome.
|
||||
|
||||
W3C PROVenance Interchange
|
||||
|
||||
This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If
|
||||
you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe
|
||||
public-prov-comments-request@w3.org, archives http://lists.w3.org/ Archives/Public/public-prov-comments/). All
|
||||
feedback is welcome.
|
||||
|
||||
Generated from: http://www.w3.org/ns/prov
|
||||
Date: 2020-05-26 14:20:04.650279
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Resource
|
||||
activityOfInfluence: URIRef # activityOfInfluence
|
||||
agentOfInfluence: URIRef # agentOfInfluence
|
||||
contributed: URIRef # contributed
|
||||
ended: URIRef # ended
|
||||
entityOfInfluence: URIRef # entityOfInfluence
|
||||
generalizationOf: URIRef # generalizationOf
|
||||
generatedAsDerivation: URIRef # generatedAsDerivation
|
||||
hadDelegate: URIRef # hadDelegate
|
||||
hadDerivation: URIRef # hadDerivation
|
||||
hadInfluence: URIRef # hadInfluence
|
||||
hadRevision: URIRef # hadRevision
|
||||
informed: URIRef # informed
|
||||
locationOf: URIRef # locationOf
|
||||
qualifiedAssociationOf: URIRef # qualifiedAssociationOf
|
||||
qualifiedAttributionOf: URIRef # qualifiedAttributionOf
|
||||
qualifiedCommunicationOf: URIRef # qualifiedCommunicationOf
|
||||
qualifiedDelegationOf: URIRef # qualifiedDelegationOf
|
||||
qualifiedDerivationOf: URIRef # qualifiedDerivationOf
|
||||
qualifiedEndOf: URIRef # qualifiedEndOf
|
||||
qualifiedGenerationOf: URIRef # qualifiedGenerationOf
|
||||
qualifiedInfluenceOf: URIRef # qualifiedInfluenceOf
|
||||
qualifiedInvalidationOf: URIRef # qualifiedInvalidationOf
|
||||
qualifiedQuotationOf: URIRef # qualifiedQuotationOf
|
||||
qualifiedSourceOf: URIRef # qualifiedSourceOf
|
||||
qualifiedStartOf: URIRef # qualifiedStartOf
|
||||
qualifiedUsingActivity: URIRef # qualifiedUsingActivity
|
||||
quotedAs: URIRef # quotedAs
|
||||
revisedEntity: URIRef # revisedEntity
|
||||
started: URIRef # started
|
||||
wasActivityOfInfluence: URIRef # wasActivityOfInfluence
|
||||
wasAssociateFor: URIRef # wasAssociateFor
|
||||
wasMemberOf: URIRef # wasMemberOf
|
||||
wasPlanOf: URIRef # wasPlanOf
|
||||
wasPrimarySourceOf: URIRef # wasPrimarySourceOf
|
||||
wasRoleIn: URIRef # wasRoleIn
|
||||
wasUsedBy: URIRef # wasUsedBy
|
||||
wasUsedInDerivation: URIRef # wasUsedInDerivation
|
||||
|
||||
# http://www.w3.org/2002/07/owl#AnnotationProperty
|
||||
aq: URIRef #
|
||||
category: URIRef # Classify prov-o terms into three categories, including 'starting-point', 'qualifed', and 'extended'. This classification is used by the prov-o html document to gently introduce prov-o terms to its users.
|
||||
component: URIRef # Classify prov-o terms into six components according to prov-dm, including 'agents-responsibility', 'alternate', 'annotations', 'collections', 'derivations', and 'entities-activities'. This classification is used so that readers of prov-o specification can find its correspondence with the prov-dm specification.
|
||||
constraints: URIRef # A reference to the principal section of the PROV-CONSTRAINTS document that describes this concept.
|
||||
definition: URIRef # A definition quoted from PROV-DM or PROV-CONSTRAINTS that describes the concept expressed with this OWL term.
|
||||
dm: URIRef # A reference to the principal section of the PROV-DM document that describes this concept.
|
||||
editorialNote: URIRef # A note by the OWL development team about how this term expresses the PROV-DM concept, or how it should be used in context of semantic web or linked data.
|
||||
editorsDefinition: URIRef # When the prov-o term does not have a definition drawn from prov-dm, and the prov-o editor provides one.
|
||||
inverse: URIRef # PROV-O does not define all property inverses. The directionalities defined in PROV-O should be given preference over those not defined. However, if users wish to name the inverse of a PROV-O property, the local name given by prov:inverse should be used.
|
||||
n: URIRef # A reference to the principal section of the PROV-M document that describes this concept.
|
||||
order: URIRef # The position that this OWL term should be listed within documentation. The scope of the documentation (e.g., among all terms, among terms within a prov:category, among properties applying to a particular class, etc.) is unspecified.
|
||||
qualifiedForm: URIRef # This annotation property links a subproperty of prov:wasInfluencedBy with the subclass of prov:Influence and the qualifying property that are used to qualify it. Example annotation: prov:wasGeneratedBy prov:qualifiedForm prov:qualifiedGeneration, prov:Generation . Then this unqualified assertion: :entity1 prov:wasGeneratedBy :activity1 . can be qualified by adding: :entity1 prov:qualifiedGeneration :entity1Gen . :entity1Gen a prov:Generation, prov:Influence; prov:activity :activity1; :customValue 1337 . Note how the value of the unqualified influence (prov:wasGeneratedBy :activity1) is mirrored as the value of the prov:activity (or prov:entity, or prov:agent) property on the influence class.
|
||||
sharesDefinitionWith: URIRef #
|
||||
specializationOf: URIRef # specializationOf
|
||||
todo: URIRef #
|
||||
unqualifiedForm: URIRef # Classes and properties used to qualify relationships are annotated with prov:unqualifiedForm to indicate the property used to assert an unqualified provenance relation.
|
||||
wasRevisionOf: URIRef # A revision is a derivation that revises an entity into a revised version.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
Accept: URIRef # Accept
|
||||
Activity: URIRef # Activity
|
||||
ActivityInfluence: URIRef # ActivityInfluence provides additional descriptions of an Activity's binary influence upon any other kind of resource. Instances of ActivityInfluence use the prov:activity property to cite the influencing Activity.
|
||||
Agent: URIRef # Agent
|
||||
AgentInfluence: URIRef # AgentInfluence provides additional descriptions of an Agent's binary influence upon any other kind of resource. Instances of AgentInfluence use the prov:agent property to cite the influencing Agent.
|
||||
Association: URIRef # An instance of prov:Association provides additional descriptions about the binary prov:wasAssociatedWith relation from an prov:Activity to some prov:Agent that had some responsibility for it. For example, :baking prov:wasAssociatedWith :baker; prov:qualifiedAssociation [ a prov:Association; prov:agent :baker; :foo :bar ].
|
||||
Attribution: URIRef # An instance of prov:Attribution provides additional descriptions about the binary prov:wasAttributedTo relation from an prov:Entity to some prov:Agent that had some responsible for it. For example, :cake prov:wasAttributedTo :baker; prov:qualifiedAttribution [ a prov:Attribution; prov:entity :baker; :foo :bar ].
|
||||
Bundle: URIRef # Note that there are kinds of bundles (e.g. handwritten letters, audio recordings, etc.) that are not expressed in PROV-O, but can be still be described by PROV-O.
|
||||
Collection: URIRef # Collection
|
||||
Communication: URIRef # An instance of prov:Communication provides additional descriptions about the binary prov:wasInformedBy relation from an informed prov:Activity to the prov:Activity that informed it. For example, :you_jumping_off_bridge prov:wasInformedBy :everyone_else_jumping_off_bridge; prov:qualifiedCommunication [ a prov:Communication; prov:activity :everyone_else_jumping_off_bridge; :foo :bar ].
|
||||
Contribute: URIRef # Contribute
|
||||
Contributor: URIRef # Contributor
|
||||
Copyright: URIRef # Copyright
|
||||
Create: URIRef # Create
|
||||
Creator: URIRef # Creator
|
||||
Delegation: URIRef # An instance of prov:Delegation provides additional descriptions about the binary prov:actedOnBehalfOf relation from a performing prov:Agent to some prov:Agent for whom it was performed. For example, :mixing prov:wasAssociatedWith :toddler . :toddler prov:actedOnBehalfOf :mother; prov:qualifiedDelegation [ a prov:Delegation; prov:entity :mother; :foo :bar ].
|
||||
Derivation: URIRef # The more specific forms of prov:Derivation (i.e., prov:Revision, prov:Quotation, prov:PrimarySource) should be asserted if they apply.
|
||||
Dictionary: URIRef # This concept allows for the provenance of the dictionary, but also of its constituents to be expressed. Such a notion of dictionary corresponds to a wide variety of concrete data structures, such as a maps or associative arrays.
|
||||
DirectQueryService: URIRef # Type for a generic provenance query service. Mainly for use in RDF provenance query service descriptions, to facilitate discovery in linked data environments.
|
||||
EmptyDictionary: URIRef # Empty Dictionary
|
||||
End: URIRef # An instance of prov:End provides additional descriptions about the binary prov:wasEndedBy relation from some ended prov:Activity to an prov:Entity that ended it. For example, :ball_game prov:wasEndedBy :buzzer; prov:qualifiedEnd [ a prov:End; prov:entity :buzzer; :foo :bar; prov:atTime '2012-03-09T08:05:08-05:00'^^xsd:dateTime ].
|
||||
Entity: URIRef # Entity
|
||||
EntityInfluence: URIRef # It is not recommended that the type EntityInfluence be asserted without also asserting one of its more specific subclasses.
|
||||
Generation: URIRef # An instance of prov:Generation provides additional descriptions about the binary prov:wasGeneratedBy relation from a generated prov:Entity to the prov:Activity that generated it. For example, :cake prov:wasGeneratedBy :baking; prov:qualifiedGeneration [ a prov:Generation; prov:activity :baking; :foo :bar ].
|
||||
Influence: URIRef # Because prov:Influence is a broad relation, its most specific subclasses (e.g. prov:Communication, prov:Delegation, prov:End, prov:Revision, etc.) should be used when applicable.
|
||||
Insertion: URIRef # Insertion
|
||||
InstantaneousEvent: URIRef # An instantaneous event, or event for short, happens in the world and marks a change in the world, in its activities and in its entities. The term 'event' is commonly used in process algebra with a similar meaning. Events represent communications or interactions; they are assumed to be atomic and instantaneous.
|
||||
Invalidation: URIRef # An instance of prov:Invalidation provides additional descriptions about the binary prov:wasInvalidatedBy relation from an invalidated prov:Entity to the prov:Activity that invalidated it. For example, :uncracked_egg prov:wasInvalidatedBy :baking; prov:qualifiedInvalidation [ a prov:Invalidation; prov:activity :baking; :foo :bar ].
|
||||
KeyEntityPair: URIRef # Key-Entity Pair
|
||||
Location: URIRef # Location
|
||||
Modify: URIRef # Modify
|
||||
Organization: URIRef # Organization
|
||||
Person: URIRef # Person
|
||||
Plan: URIRef # There exist no prescriptive requirement on the nature of plans, their representation, the actions or steps they consist of, or their intended goals. Since plans may evolve over time, it may become necessary to track their provenance, so plans themselves are entities. Representing the plan explicitly in the provenance can be useful for various tasks: for example, to validate the execution as represented in the provenance record, to manage expectation failures, or to provide explanations.
|
||||
PrimarySource: URIRef # An instance of prov:PrimarySource provides additional descriptions about the binary prov:hadPrimarySource relation from some secondary prov:Entity to an earlier, primary prov:Entity. For example, :blog prov:hadPrimarySource :newsArticle; prov:qualifiedPrimarySource [ a prov:PrimarySource; prov:entity :newsArticle; :foo :bar ] .
|
||||
Publish: URIRef # Publish
|
||||
Publisher: URIRef # Publisher
|
||||
Quotation: URIRef # An instance of prov:Quotation provides additional descriptions about the binary prov:wasQuotedFrom relation from some taken prov:Entity from an earlier, larger prov:Entity. For example, :here_is_looking_at_you_kid prov:wasQuotedFrom :casablanca_script; prov:qualifiedQuotation [ a prov:Quotation; prov:entity :casablanca_script; :foo :bar ].
|
||||
Removal: URIRef # Removal
|
||||
Replace: URIRef # Replace
|
||||
Revision: URIRef # An instance of prov:Revision provides additional descriptions about the binary prov:wasRevisionOf relation from some newer prov:Entity to an earlier prov:Entity. For example, :draft_2 prov:wasRevisionOf :draft_1; prov:qualifiedRevision [ a prov:Revision; prov:entity :draft_1; :foo :bar ].
|
||||
RightsAssignment: URIRef # RightsAssignment
|
||||
RightsHolder: URIRef # RightsHolder
|
||||
Role: URIRef # Role
|
||||
ServiceDescription: URIRef # Type for a generic provenance query service. Mainly for use in RDF provenance query service descriptions, to facilitate discovery in linked data environments.
|
||||
SoftwareAgent: URIRef # SoftwareAgent
|
||||
Start: URIRef # An instance of prov:Start provides additional descriptions about the binary prov:wasStartedBy relation from some started prov:Activity to an prov:Entity that started it. For example, :foot_race prov:wasStartedBy :bang; prov:qualifiedStart [ a prov:Start; prov:entity :bang; :foo :bar; prov:atTime '2012-03-09T08:05:08-05:00'^^xsd:dateTime ] .
|
||||
Submit: URIRef # Submit
|
||||
Usage: URIRef # An instance of prov:Usage provides additional descriptions about the binary prov:used relation from some prov:Activity to an prov:Entity that it used. For example, :keynote prov:used :podium; prov:qualifiedUsage [ a prov:Usage; prov:entity :podium; :foo :bar ].
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
atTime: URIRef # The time at which an InstantaneousEvent occurred, in the form of xsd:dateTime.
|
||||
endedAtTime: (
|
||||
URIRef # The time at which an activity ended. See also prov:startedAtTime.
|
||||
)
|
||||
generatedAtTime: URIRef # The time at which an entity was completely created and is available for use.
|
||||
invalidatedAtTime: (
|
||||
URIRef # The time at which an entity was invalidated (i.e., no longer usable).
|
||||
)
|
||||
provenanceUriTemplate: URIRef # Relates a provenance service to a URI template string for constructing provenance-URIs.
|
||||
removedKey: URIRef # removedKey
|
||||
startedAtTime: (
|
||||
URIRef # The time at which an activity started. See also prov:endedAtTime.
|
||||
)
|
||||
value: URIRef # value
|
||||
|
||||
# http://www.w3.org/2002/07/owl#FunctionalProperty
|
||||
pairEntity: URIRef # pairKey
|
||||
pairKey: URIRef # pairKey
|
||||
|
||||
# http://www.w3.org/2002/07/owl#NamedIndividual
|
||||
EmptyCollection: URIRef # EmptyCollection
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
actedOnBehalfOf: URIRef # An object property to express the accountability of an agent towards another agent. The subordinate agent acted on behalf of the responsible agent in an actual activity.
|
||||
activity: URIRef # activity
|
||||
agent: URIRef # agent
|
||||
alternateOf: URIRef # alternateOf
|
||||
asInBundle: URIRef # prov:asInBundle is used to specify which bundle the general entity of a prov:mentionOf property is described. When :x prov:mentionOf :y and :y is described in Bundle :b, the triple :x prov:asInBundle :b is also asserted to cite the Bundle in which :y was described.
|
||||
atLocation: URIRef # The Location of any resource.
|
||||
derivedByInsertionFrom: URIRef # derivedByInsertionFrom
|
||||
derivedByRemovalFrom: URIRef # derivedByRemovalFrom
|
||||
describesService: URIRef # relates a generic provenance query service resource (type prov:ServiceDescription) to a specific query service description (e.g. a prov:DirectQueryService or a sd:Service).
|
||||
dictionary: URIRef # dictionary
|
||||
entity: URIRef # entity
|
||||
generated: URIRef # generated
|
||||
hadActivity: URIRef # The _optional_ Activity of an Influence, which used, generated, invalidated, or was the responsibility of some Entity. This property is _not_ used by ActivityInfluence (use prov:activity instead).
|
||||
hadDictionaryMember: URIRef # hadDictionaryMember
|
||||
hadGeneration: (
|
||||
URIRef # The _optional_ Generation involved in an Entity's Derivation.
|
||||
)
|
||||
hadMember: URIRef # hadMember
|
||||
hadPlan: URIRef # The _optional_ Plan adopted by an Agent in Association with some Activity. Plan specifications are out of the scope of this specification.
|
||||
hadPrimarySource: URIRef # hadPrimarySource
|
||||
hadRole: URIRef # This property has multiple RDFS domains to suit multiple OWL Profiles. See <a href="#owl-profile">PROV-O OWL Profile</a>.
|
||||
hadUsage: URIRef # The _optional_ Usage involved in an Entity's Derivation.
|
||||
has_anchor: (
|
||||
URIRef # Indicates anchor URI for a potentially dynamic resource instance.
|
||||
)
|
||||
has_provenance: URIRef # Indicates a provenance-URI for a resource; the resource identified by this property presents a provenance record about its subject or anchor resource.
|
||||
has_query_service: URIRef # Indicates a provenance query service that can access provenance related to its subject or anchor resource.
|
||||
influenced: URIRef # influenced
|
||||
influencer: URIRef # Subproperties of prov:influencer are used to cite the object of an unqualified PROV-O triple whose predicate is a subproperty of prov:wasInfluencedBy (e.g. prov:used, prov:wasGeneratedBy). prov:influencer is used much like rdf:object is used.
|
||||
insertedKeyEntityPair: URIRef # insertedKeyEntityPair
|
||||
invalidated: URIRef # invalidated
|
||||
mentionOf: URIRef # prov:mentionOf is used to specialize an entity as described in another bundle. It is to be used in conjunction with prov:asInBundle. prov:asInBundle is used to cite the Bundle in which the generalization was mentioned.
|
||||
pingback: URIRef # Relates a resource to a provenance pingback service that may receive additional provenance links about the resource.
|
||||
qualifiedAssociation: URIRef # If this Activity prov:wasAssociatedWith Agent :ag, then it can qualify the Association using prov:qualifiedAssociation [ a prov:Association; prov:agent :ag; :foo :bar ].
|
||||
qualifiedAttribution: URIRef # If this Entity prov:wasAttributedTo Agent :ag, then it can qualify how it was influenced using prov:qualifiedAttribution [ a prov:Attribution; prov:agent :ag; :foo :bar ].
|
||||
qualifiedCommunication: URIRef # If this Activity prov:wasInformedBy Activity :a, then it can qualify how it was influenced using prov:qualifiedCommunication [ a prov:Communication; prov:activity :a; :foo :bar ].
|
||||
qualifiedDelegation: URIRef # If this Agent prov:actedOnBehalfOf Agent :ag, then it can qualify how with prov:qualifiedResponsibility [ a prov:Responsibility; prov:agent :ag; :foo :bar ].
|
||||
qualifiedDerivation: URIRef # If this Entity prov:wasDerivedFrom Entity :e, then it can qualify how it was derived using prov:qualifiedDerivation [ a prov:Derivation; prov:entity :e; :foo :bar ].
|
||||
qualifiedEnd: URIRef # If this Activity prov:wasEndedBy Entity :e1, then it can qualify how it was ended using prov:qualifiedEnd [ a prov:End; prov:entity :e1; :foo :bar ].
|
||||
qualifiedGeneration: URIRef # If this Activity prov:generated Entity :e, then it can qualify how it performed the Generation using prov:qualifiedGeneration [ a prov:Generation; prov:entity :e; :foo :bar ].
|
||||
qualifiedInfluence: URIRef # Because prov:qualifiedInfluence is a broad relation, the more specific relations (qualifiedCommunication, qualifiedDelegation, qualifiedEnd, etc.) should be used when applicable.
|
||||
qualifiedInsertion: URIRef # qualifiedInsertion
|
||||
qualifiedInvalidation: URIRef # If this Entity prov:wasInvalidatedBy Activity :a, then it can qualify how it was invalidated using prov:qualifiedInvalidation [ a prov:Invalidation; prov:activity :a; :foo :bar ].
|
||||
qualifiedPrimarySource: URIRef # If this Entity prov:hadPrimarySource Entity :e, then it can qualify how using prov:qualifiedPrimarySource [ a prov:PrimarySource; prov:entity :e; :foo :bar ].
|
||||
qualifiedQuotation: URIRef # If this Entity prov:wasQuotedFrom Entity :e, then it can qualify how using prov:qualifiedQuotation [ a prov:Quotation; prov:entity :e; :foo :bar ].
|
||||
qualifiedRemoval: URIRef # qualifiedRemoval
|
||||
qualifiedRevision: URIRef # If this Entity prov:wasRevisionOf Entity :e, then it can qualify how it was revised using prov:qualifiedRevision [ a prov:Revision; prov:entity :e; :foo :bar ].
|
||||
qualifiedStart: URIRef # If this Activity prov:wasStartedBy Entity :e1, then it can qualify how it was started using prov:qualifiedStart [ a prov:Start; prov:entity :e1; :foo :bar ].
|
||||
qualifiedUsage: URIRef # If this Activity prov:used Entity :e, then it can qualify how it used it using prov:qualifiedUsage [ a prov:Usage; prov:entity :e; :foo :bar ].
|
||||
used: URIRef # A prov:Entity that was used by this prov:Activity. For example, :baking prov:used :spoon, :egg, :oven .
|
||||
wasAssociatedWith: URIRef # An prov:Agent that had some (unspecified) responsibility for the occurrence of this prov:Activity.
|
||||
wasAttributedTo: URIRef # Attribution is the ascribing of an entity to an agent.
|
||||
wasDerivedFrom: URIRef # The more specific subproperties of prov:wasDerivedFrom (i.e., prov:wasQuotedFrom, prov:wasRevisionOf, prov:hadPrimarySource) should be used when applicable.
|
||||
wasEndedBy: URIRef # End is when an activity is deemed to have ended. An end may refer to an entity, known as trigger, that terminated the activity.
|
||||
wasGeneratedBy: URIRef # wasGeneratedBy
|
||||
wasInfluencedBy: URIRef # This property has multiple RDFS domains to suit multiple OWL Profiles. See <a href="#owl-profile">PROV-O OWL Profile</a>.
|
||||
wasInformedBy: URIRef # An activity a2 is dependent on or informed by another activity a1, by way of some unspecified entity that is generated by a1 and used by a2.
|
||||
wasInvalidatedBy: URIRef # wasInvalidatedBy
|
||||
wasQuotedFrom: URIRef # An entity is derived from an original entity by copying, or 'quoting', some or all of it.
|
||||
wasStartedBy: URIRef # Start is when an activity is deemed to have started. A start may refer to an entity, known as trigger, that initiated the activity.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/prov#")
|
||||
@@ -0,0 +1,63 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class QB(DefinedNamespace):
|
||||
"""
|
||||
Vocabulary for multi-dimensional (e.g. statistical) data publishing
|
||||
|
||||
This vocabulary allows multi-dimensional data, such as statistics, to be published in RDF. It is based on the
|
||||
core information model from SDMX (and thus also DDI).
|
||||
|
||||
Generated from: http://purl.org/linked-data/cube#
|
||||
Date: 2020-05-26 14:20:05.485176
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
attribute: URIRef # An alternative to qb:componentProperty which makes explicit that the component is a attribute
|
||||
codeList: URIRef # gives the code list associated with a CodedProperty
|
||||
component: URIRef # indicates a component specification which is included in the structure of the dataset
|
||||
componentAttachment: URIRef # Indicates the level at which the component property should be attached, this might an qb:DataSet, qb:Slice or qb:Observation, or a qb:MeasureProperty.
|
||||
componentProperty: URIRef # indicates a ComponentProperty (i.e. attribute/dimension) expected on a DataSet, or a dimension fixed in a SliceKey
|
||||
componentRequired: URIRef # Indicates whether a component property is required (true) or optional (false) in the context of a DSD. Only applicable to components correspond to an attribute. Defaults to false (optional).
|
||||
concept: URIRef # gives the concept which is being measured or indicated by a ComponentProperty
|
||||
dataSet: URIRef # indicates the data set of which this observation is a part
|
||||
dimension: URIRef # An alternative to qb:componentProperty which makes explicit that the component is a dimension
|
||||
hierarchyRoot: URIRef # Specifies a root of the hierarchy. A hierarchy may have multiple roots but must have at least one.
|
||||
measure: URIRef # An alternative to qb:componentProperty which makes explicit that the component is a measure
|
||||
measureDimension: URIRef # An alternative to qb:componentProperty which makes explicit that the component is a measure dimension
|
||||
measureType: URIRef # Generic measure dimension, the value of this dimension indicates which measure (from the set of measures in the DSD) is being given by the obsValue (or other primary measure)
|
||||
observation: (
|
||||
URIRef # indicates a observation contained within this slice of the data set
|
||||
)
|
||||
observationGroup: URIRef # Indicates a group of observations. The domain of this property is left open so that a group may be attached to different resources and need not be restricted to a single DataSet
|
||||
order: URIRef # indicates a priority order for the components of sets with this structure, used to guide presentations - lower order numbers come before higher numbers, un-numbered components come last
|
||||
parentChildProperty: URIRef # Specifies a property which relates a parent concept in the hierarchy to a child concept.
|
||||
slice: URIRef # Indicates a subset of a DataSet defined by fixing a subset of the dimensional values
|
||||
sliceKey: URIRef # indicates a slice key which is used for slices in this dataset
|
||||
sliceStructure: URIRef # indicates the sub-key corresponding to this slice
|
||||
structure: URIRef # indicates the structure to which this data set conforms
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Attachable: URIRef # Abstract superclass for everything that can have attributes and dimensions
|
||||
AttributeProperty: URIRef # The class of components which represent attributes of observations in the cube, e.g. unit of measurement
|
||||
CodedProperty: URIRef # Superclass of all coded ComponentProperties
|
||||
ComponentProperty: URIRef # Abstract super-property of all properties representing dimensions, attributes or measures
|
||||
ComponentSet: URIRef # Abstract class of things which reference one or more ComponentProperties
|
||||
ComponentSpecification: URIRef # Used to define properties of a component (attribute, dimension etc) which are specific to its usage in a DSD.
|
||||
DataSet: URIRef # Represents a collection of observations, possibly organized into various slices, conforming to some common dimensional structure.
|
||||
DataStructureDefinition: URIRef # Defines the structure of a DataSet or slice
|
||||
DimensionProperty: (
|
||||
URIRef # The class of components which represent the dimensions of the cube
|
||||
)
|
||||
HierarchicalCodeList: URIRef # Represents a generalized hierarchy of concepts which can be used for coding. The hierarchy is defined by one or more roots together with a property which relates concepts in the hierarchy to their child concept . The same concepts may be members of multiple hierarchies provided that different qb:parentChildProperty values are used for each hierarchy.
|
||||
MeasureProperty: URIRef # The class of components which represent the measured value of the phenomenon being observed
|
||||
Observation: URIRef # A single observation in the cube, may have one or more associated measured values
|
||||
ObservationGroup: URIRef # A, possibly arbitrary, group of observations.
|
||||
Slice: URIRef # Denotes a subset of a DataSet defined by fixing a subset of the dimensional values, component properties on the Slice
|
||||
SliceKey: URIRef # Denotes a subset of the component properties of a DataSet which are fixed in the corresponding slices
|
||||
|
||||
_NS = Namespace("http://purl.org/linked-data/cube#")
|
||||
@@ -0,0 +1,51 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class RDF(DefinedNamespace):
|
||||
"""
|
||||
The RDF Concepts Vocabulary (RDF)
|
||||
|
||||
This is the RDF Schema for the RDF vocabulary terms in the RDF Namespace, defined in RDF 1.1 Concepts.
|
||||
|
||||
Generated from: http://www.w3.org/1999/02/22-rdf-syntax-ns#
|
||||
Date: 2020-05-26 14:20:05.642859
|
||||
|
||||
dc:date "2019-12-16"
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
_underscore_num = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#List
|
||||
nil: URIRef # The empty list, with no items in it. If the rest of a list is nil then the list has no more items in it.
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
direction: URIRef # The base direction component of a CompoundLiteral.
|
||||
first: URIRef # The first item in the subject RDF list.
|
||||
language: URIRef # The language component of a CompoundLiteral.
|
||||
object: URIRef # The object of the subject RDF statement.
|
||||
predicate: URIRef # The predicate of the subject RDF statement.
|
||||
rest: URIRef # The rest of the subject RDF list after the first item.
|
||||
subject: URIRef # The subject of the subject RDF statement.
|
||||
type: URIRef # The subject is an instance of a class.
|
||||
value: URIRef # Idiomatic property used for structured values.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Alt: URIRef # The class of containers of alternatives.
|
||||
Bag: URIRef # The class of unordered containers.
|
||||
CompoundLiteral: URIRef # A class representing a compound literal.
|
||||
List: URIRef # The class of RDF Lists.
|
||||
Property: URIRef # The class of RDF properties.
|
||||
Seq: URIRef # The class of ordered containers.
|
||||
Statement: URIRef # The class of RDF statements.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Datatype
|
||||
HTML: URIRef # The datatype of RDF literals storing fragments of HTML content
|
||||
JSON: URIRef # The datatype of RDF literals storing JSON content.
|
||||
PlainLiteral: URIRef # The class of plain (i.e. untyped) literal values, as used in RIF and OWL 2
|
||||
XMLLiteral: URIRef # The datatype of XML literal values.
|
||||
langString: URIRef # The datatype of language-tagged string values
|
||||
|
||||
_NS = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
@@ -0,0 +1,35 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class RDFS(DefinedNamespace):
|
||||
"""
|
||||
The RDF Schema vocabulary (RDFS)
|
||||
|
||||
Generated from: http://www.w3.org/2000/01/rdf-schema#
|
||||
Date: 2020-05-26 14:20:05.794866
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
comment: URIRef # A description of the subject resource.
|
||||
domain: URIRef # A domain of the subject property.
|
||||
isDefinedBy: URIRef # The definition of the subject resource.
|
||||
label: URIRef # A human-readable name for the subject.
|
||||
member: URIRef # A member of the subject resource.
|
||||
range: URIRef # A range of the subject property.
|
||||
seeAlso: URIRef # Further information about the subject resource.
|
||||
subClassOf: URIRef # The subject is a subclass of a class.
|
||||
subPropertyOf: URIRef # The subject is a subproperty of a property.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Class: URIRef # The class of classes.
|
||||
Container: URIRef # The class of RDF containers.
|
||||
ContainerMembershipProperty: URIRef # The class of container membership properties, rdf:_1, rdf:_2, ..., all of which are sub-properties of 'member'.
|
||||
Datatype: URIRef # The class of RDF datatypes.
|
||||
Literal: URIRef # The class of literal values, eg. textual strings and integers.
|
||||
Resource: URIRef # The class resource, everything.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,293 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class SH(DefinedNamespace):
|
||||
"""
|
||||
W3C Shapes Constraint Language (SHACL) Vocabulary
|
||||
|
||||
This vocabulary defines terms used in SHACL, the W3C Shapes Constraint Language.
|
||||
|
||||
Generated from: https://www.w3.org/ns/shacl.ttl
|
||||
Date: 2020-05-26 14:20:08.041103
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
alternativePath: URIRef # The (single) value of this property must be a list of path elements, representing the elements of alternative paths.
|
||||
annotationProperty: URIRef # The annotation property that shall be set.
|
||||
annotationValue: URIRef # The (default) values of the annotation property.
|
||||
annotationVarName: URIRef # The name of the SPARQL variable from the SELECT clause that shall be used for the values.
|
||||
ask: URIRef # The SPARQL ASK query to execute.
|
||||
closed: URIRef # If set to true then the shape is closed.
|
||||
condition: URIRef # The shapes that the focus nodes need to conform to before a rule is executed on them.
|
||||
conforms: URIRef # True if the validation did not produce any validation results, and false otherwise.
|
||||
construct: URIRef # The SPARQL CONSTRUCT query to execute.
|
||||
datatype: URIRef # Specifies an RDF datatype that all value nodes must have.
|
||||
deactivated: URIRef # If set to true then all nodes conform to this.
|
||||
declare: URIRef # Links a resource with its namespace prefix declarations.
|
||||
defaultValue: URIRef # A default value for a property, for example for user interface tools to pre-populate input fields.
|
||||
description: URIRef # Human-readable descriptions for the property in the context of the surrounding shape.
|
||||
detail: URIRef # Links a result with other results that provide more details, for example to describe violations against nested shapes.
|
||||
disjoint: URIRef # Specifies a property where the set of values must be disjoint with the value nodes.
|
||||
entailment: URIRef # An entailment regime that indicates what kind of inferencing is required by a shapes graph.
|
||||
equals: URIRef # Specifies a property that must have the same values as the value nodes.
|
||||
expression: URIRef # The node expression that must return true for the value nodes.
|
||||
filterShape: (
|
||||
URIRef # The shape that all input nodes of the expression need to conform to.
|
||||
)
|
||||
flags: (
|
||||
URIRef # An optional flag to be used with regular expression pattern matching.
|
||||
)
|
||||
focusNode: URIRef # The focus node that was validated when the result was produced.
|
||||
group: URIRef # Can be used to link to a property group to indicate that a property shape belongs to a group of related property shapes.
|
||||
hasValue: URIRef # Specifies a value that must be among the value nodes.
|
||||
ignoredProperties: URIRef # An optional RDF list of properties that are also permitted in addition to those explicitly enumerated via sh:property/sh:path.
|
||||
intersection: URIRef # A list of node expressions that shall be intersected.
|
||||
inversePath: URIRef # The (single) value of this property represents an inverse path (object to subject).
|
||||
js: URIRef # Constraints expressed in JavaScript.
|
||||
jsFunctionName: URIRef # The name of the JavaScript function to execute.
|
||||
jsLibrary: URIRef # Declares which JavaScript libraries are needed to execute this.
|
||||
jsLibraryURL: URIRef # Declares the URLs of a JavaScript library. This should be the absolute URL of a JavaScript file. Implementations may redirect those to local files.
|
||||
labelTemplate: URIRef # Outlines how human-readable labels of instances of the associated Parameterizable shall be produced. The values can contain {?paramName} as placeholders for the actual values of the given parameter.
|
||||
languageIn: (
|
||||
URIRef # Specifies a list of language tags that all value nodes must have.
|
||||
)
|
||||
lessThan: URIRef # Specifies a property that must have smaller values than the value nodes.
|
||||
lessThanOrEquals: URIRef # Specifies a property that must have smaller or equal values than the value nodes.
|
||||
maxCount: (
|
||||
URIRef # Specifies the maximum number of values in the set of value nodes.
|
||||
)
|
||||
maxExclusive: URIRef # Specifies the maximum exclusive value of each value node.
|
||||
maxInclusive: URIRef # Specifies the maximum inclusive value of each value node.
|
||||
maxLength: URIRef # Specifies the maximum string length of each value node.
|
||||
message: URIRef # A human-readable message (possibly with placeholders for variables) explaining the cause of the result.
|
||||
minCount: (
|
||||
URIRef # Specifies the minimum number of values in the set of value nodes.
|
||||
)
|
||||
minExclusive: URIRef # Specifies the minimum exclusive value of each value node.
|
||||
minInclusive: URIRef # Specifies the minimum inclusive value of each value node.
|
||||
minLength: URIRef # Specifies the minimum string length of each value node.
|
||||
name: URIRef # Human-readable labels for the property in the context of the surrounding shape.
|
||||
namespace: URIRef # The namespace associated with a prefix in a prefix declaration.
|
||||
node: URIRef # Specifies the node shape that all value nodes must conform to.
|
||||
nodeKind: URIRef # Specifies the node kind (e.g. IRI or literal) each value node.
|
||||
nodeValidator: URIRef # The validator(s) used to evaluate a constraint in the context of a node shape.
|
||||
nodes: URIRef # The node expression producing the input nodes of a filter shape expression.
|
||||
object: (
|
||||
URIRef # An expression producing the nodes that shall be inferred as objects.
|
||||
)
|
||||
oneOrMorePath: URIRef # The (single) value of this property represents a path that is matched one or more times.
|
||||
optional: URIRef # Indicates whether a parameter is optional.
|
||||
order: URIRef # Specifies the relative order of this compared to its siblings. For example use 0 for the first, 1 for the second.
|
||||
parameter: URIRef # The parameters of a function or constraint component.
|
||||
path: URIRef # Specifies the property path of a property shape.
|
||||
pattern: URIRef # Specifies a regular expression pattern that the string representations of the value nodes must match.
|
||||
predicate: URIRef # An expression producing the properties that shall be inferred as predicates.
|
||||
prefix: URIRef # The prefix of a prefix declaration.
|
||||
prefixes: URIRef # The prefixes that shall be applied before parsing the associated SPARQL query.
|
||||
property: URIRef # Links a shape to its property shapes.
|
||||
propertyValidator: URIRef # The validator(s) used to evaluate a constraint in the context of a property shape.
|
||||
qualifiedMaxCount: (
|
||||
URIRef # The maximum number of value nodes that can conform to the shape.
|
||||
)
|
||||
qualifiedMinCount: (
|
||||
URIRef # The minimum number of value nodes that must conform to the shape.
|
||||
)
|
||||
qualifiedValueShape: (
|
||||
URIRef # The shape that a specified number of values must conform to.
|
||||
)
|
||||
qualifiedValueShapesDisjoint: URIRef # Can be used to mark the qualified value shape to be disjoint with its sibling shapes.
|
||||
result: URIRef # The validation results contained in a validation report.
|
||||
resultAnnotation: URIRef # Links a SPARQL validator with zero or more sh:ResultAnnotation instances, defining how to derive additional result properties based on the variables of the SELECT query.
|
||||
resultMessage: URIRef # Human-readable messages explaining the cause of the result.
|
||||
resultPath: URIRef # The path of a validation result, based on the path of the validated property shape.
|
||||
resultSeverity: URIRef # The severity of the result, e.g. warning.
|
||||
returnType: (
|
||||
URIRef # The expected type of values returned by the associated function.
|
||||
)
|
||||
rule: URIRef # The rules linked to a shape.
|
||||
select: URIRef # The SPARQL SELECT query to execute.
|
||||
severity: URIRef # Defines the severity that validation results produced by a shape must have. Defaults to sh:Violation.
|
||||
shapesGraph: (
|
||||
URIRef # Shapes graphs that should be used when validating this data graph.
|
||||
)
|
||||
shapesGraphWellFormed: URIRef # If true then the validation engine was certain that the shapes graph has passed all SHACL syntax requirements during the validation process.
|
||||
sourceConstraint: (
|
||||
URIRef # The constraint that was validated when the result was produced.
|
||||
)
|
||||
sourceConstraintComponent: (
|
||||
URIRef # The constraint component that is the source of the result.
|
||||
)
|
||||
sourceShape: URIRef # The shape that is was validated when the result was produced.
|
||||
sparql: URIRef # Links a shape with SPARQL constraints.
|
||||
subject: URIRef # An expression producing the resources that shall be inferred as subjects.
|
||||
suggestedShapesGraph: URIRef # Suggested shapes graphs for this ontology. The values of this property may be used in the absence of specific sh:shapesGraph statements.
|
||||
target: URIRef # Links a shape to a target specified by an extension language, for example instances of sh:SPARQLTarget.
|
||||
targetClass: URIRef # Links a shape to a class, indicating that all instances of the class must conform to the shape.
|
||||
targetNode: URIRef # Links a shape to individual nodes, indicating that these nodes must conform to the shape.
|
||||
targetObjectsOf: URIRef # Links a shape to a property, indicating that all all objects of triples that have the given property as their predicate must conform to the shape.
|
||||
targetSubjectsOf: URIRef # Links a shape to a property, indicating that all subjects of triples that have the given property as their predicate must conform to the shape.
|
||||
union: URIRef # A list of node expressions that shall be used together.
|
||||
uniqueLang: URIRef # Specifies whether all node values must have a unique (or no) language tag.
|
||||
update: URIRef # The SPARQL UPDATE to execute.
|
||||
validator: URIRef # The validator(s) used to evaluate constraints of either node or property shapes.
|
||||
value: URIRef # An RDF node that has caused the result.
|
||||
xone: URIRef # Specifies a list of shapes so that the value nodes must conform to exactly one of the shapes.
|
||||
zeroOrMorePath: URIRef # The (single) value of this property represents a path that is matched zero or more times.
|
||||
zeroOrOnePath: URIRef # The (single) value of this property represents a path that is matched zero or one times.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
AbstractResult: URIRef # The base class of validation results, typically not instantiated directly.
|
||||
ConstraintComponent: URIRef # The class of constraint components.
|
||||
Function: URIRef # The class of SHACL functions.
|
||||
JSConstraint: URIRef # The class of constraints backed by a JavaScript function.
|
||||
JSExecutable: URIRef # Abstract base class of resources that declare an executable JavaScript.
|
||||
JSFunction: URIRef # The class of SHACL functions that execute a JavaScript function when called.
|
||||
JSLibrary: URIRef # Represents a JavaScript library, typically identified by one or more URLs of files to include.
|
||||
JSRule: URIRef # The class of SHACL rules expressed using JavaScript.
|
||||
JSTarget: URIRef # The class of targets that are based on JavaScript functions.
|
||||
JSTargetType: URIRef # The (meta) class for parameterizable targets that are based on JavaScript functions.
|
||||
JSValidator: URIRef # A SHACL validator based on JavaScript. This can be used to declare SHACL constraint components that perform JavaScript-based validation when used.
|
||||
NodeKind: URIRef # The class of all node kinds, including sh:BlankNode, sh:IRI, sh:Literal or the combinations of these: sh:BlankNodeOrIRI, sh:BlankNodeOrLiteral, sh:IRIOrLiteral.
|
||||
NodeShape: URIRef # A node shape is a shape that specifies constraint that need to be met with respect to focus nodes.
|
||||
Parameter: URIRef # The class of parameter declarations, consisting of a path predicate and (possibly) information about allowed value type, cardinality and other characteristics.
|
||||
Parameterizable: URIRef # Superclass of components that can take parameters, especially functions and constraint components.
|
||||
PrefixDeclaration: URIRef # The class of prefix declarations, consisting of pairs of a prefix with a namespace.
|
||||
PropertyGroup: URIRef # Instances of this class represent groups of property shapes that belong together.
|
||||
PropertyShape: URIRef # A property shape is a shape that specifies constraints on the values of a focus node for a given property or path.
|
||||
ResultAnnotation: URIRef # A class of result annotations, which define the rules to derive the values of a given annotation property as extra values for a validation result.
|
||||
Rule: URIRef # The class of SHACL rules. Never instantiated directly.
|
||||
SPARQLAskExecutable: (
|
||||
URIRef # The class of SPARQL executables that are based on an ASK query.
|
||||
)
|
||||
SPARQLAskValidator: URIRef # The class of validators based on SPARQL ASK queries. The queries are evaluated for each value node and are supposed to return true if the given node conforms.
|
||||
SPARQLConstraint: URIRef # The class of constraints based on SPARQL SELECT queries.
|
||||
SPARQLConstructExecutable: (
|
||||
URIRef # The class of SPARQL executables that are based on a CONSTRUCT query.
|
||||
)
|
||||
SPARQLExecutable: URIRef # The class of resources that encapsulate a SPARQL query.
|
||||
SPARQLFunction: (
|
||||
URIRef # A function backed by a SPARQL query - either ASK or SELECT.
|
||||
)
|
||||
SPARQLRule: URIRef # The class of SHACL rules based on SPARQL CONSTRUCT queries.
|
||||
SPARQLSelectExecutable: (
|
||||
URIRef # The class of SPARQL executables based on a SELECT query.
|
||||
)
|
||||
SPARQLSelectValidator: URIRef # The class of validators based on SPARQL SELECT queries. The queries are evaluated for each focus node and are supposed to produce bindings for all focus nodes that do not conform.
|
||||
SPARQLTarget: URIRef # The class of targets that are based on SPARQL queries.
|
||||
SPARQLTargetType: URIRef # The (meta) class for parameterizable targets that are based on SPARQL queries.
|
||||
SPARQLUpdateExecutable: (
|
||||
URIRef # The class of SPARQL executables based on a SPARQL UPDATE.
|
||||
)
|
||||
Severity: URIRef # The class of validation result severity levels, including violation and warning levels.
|
||||
Shape: URIRef # A shape is a collection of constraints that may be targeted for certain nodes.
|
||||
Target: URIRef # The base class of targets such as those based on SPARQL queries.
|
||||
TargetType: URIRef # The (meta) class for parameterizable targets. Instances of this are instantiated as values of the sh:target property.
|
||||
TripleRule: URIRef # A rule based on triple (subject, predicate, object) pattern.
|
||||
ValidationReport: URIRef # The class of SHACL validation reports.
|
||||
ValidationResult: URIRef # The class of validation results.
|
||||
Validator: URIRef # The class of validators, which provide instructions on how to process a constraint definition. This class serves as base class for the SPARQL-based validators and other possible implementations.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Resource
|
||||
this: URIRef # A node expression that represents the current focus node.
|
||||
|
||||
# http://www.w3.org/ns/shacl#ConstraintComponent
|
||||
AndConstraintComponent: URIRef # A constraint component that can be used to test whether a value node conforms to all members of a provided list of shapes.
|
||||
ClassConstraintComponent: URIRef # A constraint component that can be used to verify that each value node is an instance of a given type.
|
||||
ClosedConstraintComponent: URIRef # A constraint component that can be used to indicate that focus nodes must only have values for those properties that have been explicitly enumerated via sh:property/sh:path.
|
||||
DatatypeConstraintComponent: URIRef # A constraint component that can be used to restrict the datatype of all value nodes.
|
||||
DisjointConstraintComponent: URIRef # A constraint component that can be used to verify that the set of value nodes is disjoint with the the set of nodes that have the focus node as subject and the value of a given property as predicate.
|
||||
EqualsConstraintComponent: URIRef # A constraint component that can be used to verify that the set of value nodes is equal to the set of nodes that have the focus node as subject and the value of a given property as predicate.
|
||||
ExpressionConstraintComponent: URIRef # A constraint component that can be used to verify that a given node expression produces true for all value nodes.
|
||||
HasValueConstraintComponent: URIRef # A constraint component that can be used to verify that one of the value nodes is a given RDF node.
|
||||
InConstraintComponent: URIRef # A constraint component that can be used to exclusively enumerate the permitted value nodes.
|
||||
JSConstraintComponent: URIRef # A constraint component with the parameter sh:js linking to a sh:JSConstraint containing a sh:script.
|
||||
LanguageInConstraintComponent: URIRef # A constraint component that can be used to enumerate language tags that all value nodes must have.
|
||||
LessThanConstraintComponent: URIRef # A constraint component that can be used to verify that each value node is smaller than all the nodes that have the focus node as subject and the value of a given property as predicate.
|
||||
LessThanOrEqualsConstraintComponent: URIRef # A constraint component that can be used to verify that every value node is smaller than all the nodes that have the focus node as subject and the value of a given property as predicate.
|
||||
MaxCountConstraintComponent: URIRef # A constraint component that can be used to restrict the maximum number of value nodes.
|
||||
MaxExclusiveConstraintComponent: URIRef # A constraint component that can be used to restrict the range of value nodes with a maximum exclusive value.
|
||||
MaxInclusiveConstraintComponent: URIRef # A constraint component that can be used to restrict the range of value nodes with a maximum inclusive value.
|
||||
MaxLengthConstraintComponent: URIRef # A constraint component that can be used to restrict the maximum string length of value nodes.
|
||||
MinCountConstraintComponent: URIRef # A constraint component that can be used to restrict the minimum number of value nodes.
|
||||
MinExclusiveConstraintComponent: URIRef # A constraint component that can be used to restrict the range of value nodes with a minimum exclusive value.
|
||||
MinInclusiveConstraintComponent: URIRef # A constraint component that can be used to restrict the range of value nodes with a minimum inclusive value.
|
||||
MinLengthConstraintComponent: URIRef # A constraint component that can be used to restrict the minimum string length of value nodes.
|
||||
NodeConstraintComponent: URIRef # A constraint component that can be used to verify that all value nodes conform to the given node shape.
|
||||
NodeKindConstraintComponent: URIRef # A constraint component that can be used to restrict the RDF node kind of each value node.
|
||||
NotConstraintComponent: URIRef # A constraint component that can be used to verify that value nodes do not conform to a given shape.
|
||||
OrConstraintComponent: URIRef # A constraint component that can be used to restrict the value nodes so that they conform to at least one out of several provided shapes.
|
||||
PatternConstraintComponent: URIRef # A constraint component that can be used to verify that every value node matches a given regular expression.
|
||||
PropertyConstraintComponent: URIRef # A constraint component that can be used to verify that all value nodes conform to the given property shape.
|
||||
QualifiedMaxCountConstraintComponent: URIRef # A constraint component that can be used to verify that a specified maximum number of value nodes conforms to a given shape.
|
||||
QualifiedMinCountConstraintComponent: URIRef # A constraint component that can be used to verify that a specified minimum number of value nodes conforms to a given shape.
|
||||
SPARQLConstraintComponent: URIRef # A constraint component that can be used to define constraints based on SPARQL queries.
|
||||
UniqueLangConstraintComponent: URIRef # A constraint component that can be used to specify that no pair of value nodes may use the same language tag.
|
||||
XoneConstraintComponent: URIRef # A constraint component that can be used to restrict the value nodes so that they conform to exactly one out of several provided shapes.
|
||||
|
||||
# http://www.w3.org/ns/shacl#NodeKind
|
||||
BlankNode: URIRef # The node kind of all blank nodes.
|
||||
BlankNodeOrIRI: URIRef # The node kind of all blank nodes or IRIs.
|
||||
BlankNodeOrLiteral: URIRef # The node kind of all blank nodes or literals.
|
||||
IRI: URIRef # The node kind of all IRIs.
|
||||
IRIOrLiteral: URIRef # The node kind of all IRIs or literals.
|
||||
Literal: URIRef # The node kind of all literals.
|
||||
|
||||
# http://www.w3.org/ns/shacl#Parameter
|
||||
|
||||
# http://www.w3.org/ns/shacl#Severity
|
||||
Info: URIRef # The severity for an informational validation result.
|
||||
Violation: URIRef # The severity for a violation validation result.
|
||||
Warning: URIRef # The severity for a warning validation result.
|
||||
|
||||
# Valid non-python identifiers
|
||||
_extras = [
|
||||
"and",
|
||||
"class",
|
||||
"in",
|
||||
"not",
|
||||
"or",
|
||||
"AndConstraintComponent-and",
|
||||
"ClassConstraintComponent-class",
|
||||
"ClosedConstraintComponent-closed",
|
||||
"ClosedConstraintComponent-ignoredProperties",
|
||||
"DatatypeConstraintComponent-datatype",
|
||||
"DisjointConstraintComponent-disjoint",
|
||||
"EqualsConstraintComponent-equals",
|
||||
"ExpressionConstraintComponent-expression",
|
||||
"HasValueConstraintComponent-hasValue",
|
||||
"InConstraintComponent-in",
|
||||
"JSConstraint-js",
|
||||
"LanguageInConstraintComponent-languageIn",
|
||||
"LessThanConstraintComponent-lessThan",
|
||||
"LessThanOrEqualsConstraintComponent-lessThanOrEquals",
|
||||
"MaxCountConstraintComponent-maxCount",
|
||||
"MaxExclusiveConstraintComponent-maxExclusive",
|
||||
"MaxInclusiveConstraintComponent-maxInclusive",
|
||||
"MaxLengthConstraintComponent-maxLength",
|
||||
"MinCountConstraintComponent-minCount",
|
||||
"MinExclusiveConstraintComponent-minExclusive",
|
||||
"MinInclusiveConstraintComponent-minInclusive",
|
||||
"MinLengthConstraintComponent-minLength",
|
||||
"NodeConstraintComponent-node",
|
||||
"NodeKindConstraintComponent-nodeKind",
|
||||
"NotConstraintComponent-not",
|
||||
"OrConstraintComponent-or",
|
||||
"PatternConstraintComponent-flags",
|
||||
"PatternConstraintComponent-pattern",
|
||||
"PropertyConstraintComponent-property",
|
||||
"QualifiedMaxCountConstraintComponent-qualifiedMaxCount",
|
||||
"QualifiedMaxCountConstraintComponent-qualifiedValueShape",
|
||||
"QualifiedMaxCountConstraintComponent-qualifiedValueShapesDisjoint",
|
||||
"QualifiedMinCountConstraintComponent-qualifiedMinCount",
|
||||
"QualifiedMinCountConstraintComponent-qualifiedValueShape",
|
||||
"QualifiedMinCountConstraintComponent-qualifiedValueShapesDisjoint",
|
||||
"SPARQLConstraintComponent-sparql",
|
||||
"UniqueLangConstraintComponent-uniqueLang",
|
||||
"XoneConstraintComponent-xone",
|
||||
]
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/shacl#")
|
||||
@@ -0,0 +1,66 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class SKOS(DefinedNamespace):
|
||||
"""
|
||||
SKOS Vocabulary
|
||||
|
||||
An RDF vocabulary for describing the basic structure and content of concept schemes such as thesauri,
|
||||
classification schemes, subject heading lists, taxonomies, 'folksonomies', other types of controlled
|
||||
vocabulary, and also concept schemes embedded in glossaries and terminologies.
|
||||
|
||||
Generated from: https://www.w3.org/2009/08/skos-reference/skos.rdf
|
||||
Date: 2020-05-26 14:20:08.489187
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
altLabel: URIRef # An alternative lexical label for a resource.
|
||||
broadMatch: URIRef # skos:broadMatch is used to state a hierarchical mapping link between two conceptual resources in different concept schemes.
|
||||
broader: URIRef # Relates a concept to a concept that is more general in meaning.
|
||||
broaderTransitive: (
|
||||
URIRef # skos:broaderTransitive is a transitive superproperty of skos:broader.
|
||||
)
|
||||
changeNote: URIRef # A note about a modification to a concept.
|
||||
closeMatch: URIRef # skos:closeMatch is used to link two concepts that are sufficiently similar that they can be used interchangeably in some information retrieval applications. In order to avoid the possibility of "compound errors" when combining mappings across more than two concept schemes, skos:closeMatch is not declared to be a transitive property.
|
||||
definition: URIRef # A statement or formal explanation of the meaning of a concept.
|
||||
editorialNote: (
|
||||
URIRef # A note for an editor, translator or maintainer of the vocabulary.
|
||||
)
|
||||
exactMatch: URIRef # skos:exactMatch is used to link two concepts, indicating a high degree of confidence that the concepts can be used interchangeably across a wide range of information retrieval applications. skos:exactMatch is a transitive property, and is a sub-property of skos:closeMatch.
|
||||
example: URIRef # An example of the use of a concept.
|
||||
hasTopConcept: URIRef # Relates, by convention, a concept scheme to a concept which is topmost in the broader/narrower concept hierarchies for that scheme, providing an entry point to these hierarchies.
|
||||
hiddenLabel: URIRef # A lexical label for a resource that should be hidden when generating visual displays of the resource, but should still be accessible to free text search operations.
|
||||
historyNote: URIRef # A note about the past state/use/meaning of a concept.
|
||||
inScheme: URIRef # Relates a resource (for example a concept) to a concept scheme in which it is included.
|
||||
mappingRelation: URIRef # Relates two concepts coming, by convention, from different schemes, and that have comparable meanings
|
||||
member: URIRef # Relates a collection to one of its members.
|
||||
memberList: (
|
||||
URIRef # Relates an ordered collection to the RDF list containing its members.
|
||||
)
|
||||
narrowMatch: URIRef # skos:narrowMatch is used to state a hierarchical mapping link between two conceptual resources in different concept schemes.
|
||||
narrower: URIRef # Relates a concept to a concept that is more specific in meaning.
|
||||
narrowerTransitive: URIRef # skos:narrowerTransitive is a transitive superproperty of skos:narrower.
|
||||
notation: URIRef # A notation, also known as classification code, is a string of characters such as "T58.5" or "303.4833" used to uniquely identify a concept within the scope of a given concept scheme.
|
||||
note: URIRef # A general note, for any purpose.
|
||||
prefLabel: (
|
||||
URIRef # The preferred lexical label for a resource, in a given language.
|
||||
)
|
||||
related: URIRef # Relates a concept to a concept with which there is an associative semantic relationship.
|
||||
relatedMatch: URIRef # skos:relatedMatch is used to state an associative mapping link between two conceptual resources in different concept schemes.
|
||||
scopeNote: (
|
||||
URIRef # A note that helps to clarify the meaning and/or the use of a concept.
|
||||
)
|
||||
semanticRelation: URIRef # Links a concept to a concept related by meaning.
|
||||
topConceptOf: URIRef # Relates a concept to the concept scheme that it is a top level concept of.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
Collection: URIRef # A meaningful collection of concepts.
|
||||
Concept: URIRef # An idea or notion; a unit of thought.
|
||||
ConceptScheme: URIRef # A set of concepts, optionally including statements about semantic relationships between those concepts.
|
||||
OrderedCollection: URIRef # An ordered collection of concepts, where both the grouping and the ordering are meaningful.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/2004/02/skos/core#")
|
||||
@@ -0,0 +1,68 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class SOSA(DefinedNamespace):
|
||||
"""
|
||||
Sensor, Observation, Sample, and Actuator (SOSA) Ontology
|
||||
|
||||
This ontology is based on the SSN Ontology by the W3C Semantic Sensor Networks Incubator Group (SSN-XG),
|
||||
together with considerations from the W3C/OGC Spatial Data on the Web Working Group.
|
||||
|
||||
Generated from: http://www.w3.org/ns/sosa/
|
||||
Date: 2020-05-26 14:20:08.792504
|
||||
|
||||
"""
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
ActuatableProperty: URIRef # An actuatable quality (property, characteristic) of a FeatureOfInterest.
|
||||
Actuation: URIRef # An Actuation carries out an (Actuation) Procedure to change the state of the world using an Actuator.
|
||||
Actuator: URIRef # A device that is used by, or implements, an (Actuation) Procedure that changes the state of the world.
|
||||
FeatureOfInterest: URIRef # The thing whose property is being estimated or calculated in the course of an Observation to arrive at a Result or whose property is being manipulated by an Actuator, or which is being sampled or transformed in an act of Sampling.
|
||||
ObservableProperty: URIRef # An observable quality (property, characteristic) of a FeatureOfInterest.
|
||||
Observation: URIRef # Act of carrying out an (Observation) Procedure to estimate or calculate a value of a property of a FeatureOfInterest. Links to a Sensor to describe what made the Observation and how; links to an ObservableProperty to describe what the result is an estimate of, and to a FeatureOfInterest to detail what that property was associated with.
|
||||
ObservationCollection: URIRef # Collection of one or more observations, whose members share a common value for one or more property
|
||||
Platform: URIRef # A Platform is an entity that hosts other entities, particularly Sensors, Actuators, Samplers, and other Platforms.
|
||||
Procedure: URIRef # A workflow, protocol, plan, algorithm, or computational method specifying how to make an Observation, create a Sample, or make a change to the state of the world (via an Actuator). A Procedure is re-usable, and might be involved in many Observations, Samplings, or Actuations. It explains the steps to be carried out to arrive at reproducible results.
|
||||
Result: URIRef # The Result of an Observation, Actuation, or act of Sampling. To store an observation's simple result value one can use the hasSimpleResult property.
|
||||
Sample: URIRef # Feature which is intended to be representative of a FeatureOfInterest on which Observations may be made.
|
||||
Sampler: URIRef # A device that is used by, or implements, a Sampling Procedure to create or transform one or more samples.
|
||||
Sampling: URIRef # An act of Sampling carries out a sampling Procedure to create or transform one or more samples.
|
||||
Sensor: URIRef # Device, agent (including humans), or software (simulation) involved in, or implementing, a Procedure. Sensors respond to a stimulus, e.g., a change in the environment, or input data composed from the results of prior Observations, and generate a Result. Sensors can be hosted by Platforms.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
hasSimpleResult: (
|
||||
URIRef # The simple value of an Observation or Actuation or act of Sampling.
|
||||
)
|
||||
resultTime: URIRef # The result time is the instant of time when the Observation, Actuation or Sampling activity was completed.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
actsOnProperty: URIRef # Relation between an Actuation and the property of a FeatureOfInterest it is acting upon.
|
||||
hasFeatureOfInterest: URIRef # A relation between an Observation and the entity whose quality was observed, or between an Actuation and the entity whose property was modified, or between an act of Sampling and the entity that was sampled.
|
||||
hasMember: URIRef # Link to a member of a collection of observations that share the same value for one or more of the characteristic properties
|
||||
hasOriginalSample: URIRef # link to the original sample that is related to the context sample through a chain of isSampleOf relations
|
||||
hasResult: URIRef # Relation linking an Observation or Actuation or act of Sampling and a Result or Sample.
|
||||
hasSample: URIRef # Relation between a FeatureOfInterest and the Sample used to represent it.
|
||||
hasSampledFeature: URIRef # link to the ultimate feature of interest of the context sample - i.e. the end of a chain of isSampleOf relations
|
||||
hasUltimateFeatureOfInterest: URIRef # link to the ultimate feature of interest of an observation or act of sampling. This is useful when the proximate feature of interest is a sample of the ultimate feature of interest, directly or trasntitively.
|
||||
hosts: URIRef # Relation between a Platform and a Sensor, Actuator, Sampler, or Platform, hosted or mounted on it.
|
||||
isActedOnBy: URIRef # Relation between an ActuatableProperty of a FeatureOfInterest and an Actuation changing its state.
|
||||
isFeatureOfInterestOf: URIRef # A relation between a FeatureOfInterest and an Observation about it, an Actuation acting on it, or an act of Sampling that sampled it.
|
||||
isHostedBy: URIRef # Relation between a Sensor, Actuator, Sampler, or Platform, and the Platform that it is mounted on or hosted by.
|
||||
isObservedBy: URIRef # Relation between an ObservableProperty and the Sensor able to observe it.
|
||||
isResultOf: URIRef # Relation linking a Result to the Observation or Actuation or act of Sampling that created or caused it.
|
||||
isSampleOf: URIRef # Relation from a Sample to the FeatureOfInterest that it is intended to be representative of.
|
||||
madeActuation: URIRef # Relation between an Actuator and the Actuation it has made.
|
||||
madeByActuator: URIRef # Relation linking an Actuation to the Actuator that made that Actuation.
|
||||
madeBySampler: URIRef # Relation linking an act of Sampling to the Sampler (sampling device or entity) that made it.
|
||||
madeBySensor: URIRef # Relation between an Observation and the Sensor which made the Observation.
|
||||
madeObservation: (
|
||||
URIRef # Relation between a Sensor and an Observation made by the Sensor.
|
||||
)
|
||||
madeSampling: URIRef # Relation between a Sampler (sampling device or entity) and the Sampling act it performed.
|
||||
observedProperty: URIRef # Relation linking an Observation to the property that was observed. The ObservableProperty should be a property of the FeatureOfInterest (linked by hasFeatureOfInterest) of this Observation.
|
||||
observes: URIRef # Relation between a Sensor and an ObservableProperty that it is capable of sensing.
|
||||
phenomenonTime: URIRef # The time that the Result of an Observation, Actuation or Sampling applies to the FeatureOfInterest. Not necessarily the same as the resultTime. May be an Interval or an Instant, or some other compound TemporalEntity.
|
||||
usedProcedure: URIRef # A relation to link to a re-usable Procedure used in making an Observation, an Actuation, or a Sample, typically through a Sensor, Actuator or Sampler.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/sosa/")
|
||||
@@ -0,0 +1,46 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class SSN(DefinedNamespace):
|
||||
"""
|
||||
Semantic Sensor Network Ontology
|
||||
|
||||
This ontology describes sensors, actuators and observations, and related concepts. It does not describe domain
|
||||
concepts, time, locations, etc. these are intended to be included from other ontologies via OWL imports.
|
||||
|
||||
Generated from: http://www.w3.org/ns/ssn/
|
||||
Date: 2020-05-26 14:20:09.068204
|
||||
|
||||
"""
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
Deployment: URIRef # Describes the Deployment of one or more Systems for a particular purpose. Deployment may be done on a Platform.
|
||||
Input: URIRef # Any information that is provided to a Procedure for its use.
|
||||
Output: URIRef # Any information that is reported from a Procedure.
|
||||
Property: URIRef # A quality of an entity. An aspect of an entity that is intrinsic to and cannot exist without the entity.
|
||||
Stimulus: URIRef # An event in the real world that 'triggers' the Sensor. The properties associated to the Stimulus may be different to the eventual observed ObservableProperty. It is the event, not the object, that triggers the Sensor.
|
||||
System: URIRef # System is a unit of abstraction for pieces of infrastructure that implement Procedures. A System may have components, its subsystems, which are other systems.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#FunctionalProperty
|
||||
wasOriginatedBy: (
|
||||
URIRef # Relation between an Observation and the Stimulus that originated it.
|
||||
)
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
deployedOnPlatform: URIRef # Relation between a Deployment and the Platform on which the Systems are deployed.
|
||||
deployedSystem: URIRef # Relation between a Deployment and a deployed System.
|
||||
detects: URIRef # A relation from a Sensor to the Stimulus that the Sensor detects. The Stimulus itself will be serving as a proxy for some ObservableProperty.
|
||||
forProperty: URIRef # A relation between some aspect of an entity and a Property.
|
||||
hasDeployment: URIRef # Relation between a System and a Deployment, recording that the System is deployed in that Deployment.
|
||||
hasInput: URIRef # Relation between a Procedure and an Input to it.
|
||||
hasOutput: URIRef # Relation between a Procedure and an Output of it.
|
||||
hasProperty: URIRef # Relation between an entity and a Property of that entity.
|
||||
hasSubSystem: URIRef # Relation between a System and its component parts.
|
||||
implementedBy: URIRef # Relation between a Procedure (an algorithm, procedure or method) and an entity that implements that Procedure in some executable way.
|
||||
implements: URIRef # Relation between an entity that implements a Procedure in some executable way and the Procedure (an algorithm, procedure or method).
|
||||
inDeployment: URIRef # Relation between a Platform and a Deployment, meaning that the deployedSystems of the Deployment are hosted on the Platform.
|
||||
isPropertyOf: URIRef # Relation between a Property and the entity it belongs to.
|
||||
isProxyFor: URIRef # A relation from a Stimulus to the Property that the Stimulus is serving as a proxy for.
|
||||
|
||||
_NS = Namespace("http://www.w3.org/ns/ssn/")
|
||||
@@ -0,0 +1,135 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class TIME(DefinedNamespace):
|
||||
"""
|
||||
OWL-Time
|
||||
|
||||
Generated from: http://www.w3.org/2006/time#
|
||||
Date: 2020-05-26 14:20:10.531265
|
||||
|
||||
"""
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Datatype
|
||||
generalDay: URIRef # Day of month - formulated as a text string with a pattern constraint to reproduce the same lexical form as gDay, except that values up to 99 are permitted, in order to support calendars with more than 31 days in a month. Note that the value-space is not defined, so a generic OWL2 processor cannot compute ordering relationships of values of this type.
|
||||
generalMonth: URIRef # Month of year - formulated as a text string with a pattern constraint to reproduce the same lexical form as gMonth, except that values up to 20 are permitted, in order to support calendars with more than 12 months in the year. Note that the value-space is not defined, so a generic OWL2 processor cannot compute ordering relationships of values of this type.
|
||||
generalYear: URIRef # Year number - formulated as a text string with a pattern constraint to reproduce the same lexical form as gYear, but not restricted to values from the Gregorian calendar. Note that the value-space is not defined, so a generic OWL2 processor cannot compute ordering relationships of values of this type.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#Class
|
||||
DateTimeDescription: URIRef # Description of date and time structured with separate values for the various elements of a calendar-clock system. The temporal reference system is fixed to Gregorian Calendar, and the range of year, month, day properties restricted to corresponding XML Schema types xsd:gYear, xsd:gMonth and xsd:gDay, respectively.
|
||||
DateTimeInterval: URIRef # DateTimeInterval is a subclass of ProperInterval, defined using the multi-element DateTimeDescription.
|
||||
DayOfWeek: URIRef # The day of week
|
||||
Duration: URIRef # Duration of a temporal extent expressed as a number scaled by a temporal unit
|
||||
DurationDescription: URIRef # Description of temporal extent structured with separate values for the various elements of a calendar-clock system. The temporal reference system is fixed to Gregorian Calendar, and the range of each of the numeric properties is restricted to xsd:decimal
|
||||
GeneralDateTimeDescription: URIRef # Description of date and time structured with separate values for the various elements of a calendar-clock system
|
||||
GeneralDurationDescription: URIRef # Description of temporal extent structured with separate values for the various elements of a calendar-clock system.
|
||||
Instant: URIRef # A temporal entity with zero extent or duration
|
||||
Interval: URIRef # A temporal entity with an extent or duration
|
||||
MonthOfYear: URIRef # The month of the year
|
||||
ProperInterval: URIRef # A temporal entity with non-zero extent or duration, i.e. for which the value of the beginning and end are different
|
||||
TRS: URIRef # A temporal reference system, such as a temporal coordinate system (with an origin, direction, and scale), a calendar-clock combination, or a (possibly hierarchical) ordinal system. This is a stub class, representing the set of all temporal reference systems.
|
||||
TemporalDuration: URIRef # Time extent; duration of a time interval separate from its particular start position
|
||||
TemporalEntity: URIRef # A temporal interval or instant.
|
||||
TemporalPosition: URIRef # A position on a time-line
|
||||
TemporalUnit: URIRef # A standard duration, which provides a scale factor for a time extent, or the granularity or precision for a time position.
|
||||
TimePosition: URIRef # A temporal position described using either a (nominal) value from an ordinal reference system, or a (numeric) value in a temporal coordinate system.
|
||||
TimeZone: URIRef # A Time Zone specifies the amount by which the local time is offset from UTC. A time zone is usually denoted geographically (e.g. Australian Eastern Daylight Time), with a constant value in a given region. The region where it applies and the offset from UTC are specified by a locally recognised governing authority.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
day: URIRef # Day position in a calendar-clock system. The range of this property is not specified, so can be replaced by any specific representation of a calendar day from any calendar.
|
||||
dayOfYear: URIRef # The number of the day within the year
|
||||
days: URIRef # length of, or element of the length of, a temporal extent expressed in days
|
||||
hasXSDDuration: URIRef # Extent of a temporal entity, expressed using xsd:duration
|
||||
hour: URIRef # Hour position in a calendar-clock system.
|
||||
hours: URIRef # length of, or element of the length of, a temporal extent expressed in hours
|
||||
inXSDDate: URIRef # Position of an instant, expressed using xsd:date
|
||||
inXSDDateTimeStamp: (
|
||||
URIRef # Position of an instant, expressed using xsd:dateTimeStamp
|
||||
)
|
||||
inXSDgYear: URIRef # Position of an instant, expressed using xsd:gYear
|
||||
inXSDgYearMonth: URIRef # Position of an instant, expressed using xsd:gYearMonth
|
||||
minute: URIRef # Minute position in a calendar-clock system.
|
||||
minutes: URIRef # length, or element of, a temporal extent expressed in minutes
|
||||
month: URIRef # Month position in a calendar-clock system. The range of this property is not specified, so can be replaced by any specific representation of a calendar month from any calendar.
|
||||
months: URIRef # length of, or element of the length of, a temporal extent expressed in months
|
||||
nominalPosition: URIRef # The (nominal) value indicating temporal position in an ordinal reference system
|
||||
numericDuration: URIRef # Value of a temporal extent expressed as a decimal number scaled by a temporal unit
|
||||
numericPosition: URIRef # The (numeric) value indicating position within a temporal coordinate system
|
||||
second: URIRef # Second position in a calendar-clock system.
|
||||
seconds: URIRef # length of, or element of the length of, a temporal extent expressed in seconds
|
||||
week: URIRef # Week number within the year.
|
||||
weeks: URIRef # length of, or element of the length of, a temporal extent expressed in weeks
|
||||
year: URIRef # Year position in a calendar-clock system. The range of this property is not specified, so can be replaced by any specific representation of a calendar year from any calendar.
|
||||
years: URIRef # length of, or element of the length of, a temporal extent expressed in years
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DeprecatedClass
|
||||
January: URIRef # January
|
||||
Year: URIRef # Year duration
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DeprecatedProperty
|
||||
inXSDDateTime: URIRef # Position of an instant, expressed using xsd:dateTime
|
||||
xsdDateTime: URIRef # Value of DateTimeInterval expressed as a compact value.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#FunctionalProperty
|
||||
hasTRS: URIRef # The temporal reference system used by a temporal position or extent description.
|
||||
|
||||
# http://www.w3.org/2002/07/owl#ObjectProperty
|
||||
after: URIRef # Gives directionality to time. If a temporal entity T1 is after another temporal entity T2, then the beginning of T1 is after the end of T2.
|
||||
dayOfWeek: (
|
||||
URIRef # The day of week, whose value is a member of the class time:DayOfWeek
|
||||
)
|
||||
hasBeginning: URIRef # Beginning of a temporal entity.
|
||||
hasDateTimeDescription: URIRef # Value of DateTimeInterval expressed as a structured value. The beginning and end of the interval coincide with the limits of the shortest element in the description.
|
||||
hasDuration: URIRef # Duration of a temporal entity, event or activity, or thing, expressed as a scaled value
|
||||
hasDurationDescription: URIRef # Duration of a temporal entity, expressed using a structured description
|
||||
hasEnd: URIRef # End of a temporal entity.
|
||||
hasTemporalDuration: URIRef # Duration of a temporal entity.
|
||||
hasTime: URIRef # Supports the association of a temporal entity (instant or interval) to any thing
|
||||
inDateTime: (
|
||||
URIRef # Position of an instant, expressed using a structured description
|
||||
)
|
||||
inTemporalPosition: URIRef # Position of a time instant
|
||||
inTimePosition: URIRef # Position of a time instant expressed as a TimePosition
|
||||
inside: URIRef # An instant that falls inside the interval. It is not intended to include beginnings and ends of intervals.
|
||||
intervalAfter: URIRef # If a proper interval T1 is intervalAfter another proper interval T2, then the beginning of T1 is after the end of T2.
|
||||
intervalBefore: URIRef # If a proper interval T1 is intervalBefore another proper interval T2, then the end of T1 is before the beginning of T2.
|
||||
intervalContains: URIRef # If a proper interval T1 is intervalContains another proper interval T2, then the beginning of T1 is before the beginning of T2, and the end of T1 is after the end of T2.
|
||||
intervalDisjoint: URIRef # If a proper interval T1 is intervalDisjoint another proper interval T2, then the beginning of T1 is after the end of T2, or the end of T1 is before the beginning of T2, i.e. the intervals do not overlap in any way, but their ordering relationship is not known.
|
||||
intervalDuring: URIRef # If a proper interval T1 is intervalDuring another proper interval T2, then the beginning of T1 is after the beginning of T2, and the end of T1 is before the end of T2.
|
||||
intervalEquals: URIRef # If a proper interval T1 is intervalEquals another proper interval T2, then the beginning of T1 is coincident with the beginning of T2, and the end of T1 is coincident with the end of T2.
|
||||
intervalFinishedBy: URIRef # If a proper interval T1 is intervalFinishedBy another proper interval T2, then the beginning of T1 is before the beginning of T2, and the end of T1 is coincident with the end of T2.
|
||||
intervalFinishes: URIRef # If a proper interval T1 is intervalFinishes another proper interval T2, then the beginning of T1 is after the beginning of T2, and the end of T1 is coincident with the end of T2.
|
||||
intervalIn: URIRef # If a proper interval T1 is intervalIn another proper interval T2, then the beginning of T1 is after the beginning of T2 or is coincident with the beginning of T2, and the end of T1 is before the end of T2, or is coincident with the end of T2, except that end of T1 may not be coincident with the end of T2 if the beginning of T1 is coincident with the beginning of T2.
|
||||
intervalMeets: URIRef # If a proper interval T1 is intervalMeets another proper interval T2, then the end of T1 is coincident with the beginning of T2.
|
||||
intervalMetBy: URIRef # If a proper interval T1 is intervalMetBy another proper interval T2, then the beginning of T1 is coincident with the end of T2.
|
||||
intervalOverlappedBy: URIRef # If a proper interval T1 is intervalOverlappedBy another proper interval T2, then the beginning of T1 is after the beginning of T2, the beginning of T1 is before the end of T2, and the end of T1 is after the end of T2.
|
||||
intervalOverlaps: URIRef # If a proper interval T1 is intervalOverlaps another proper interval T2, then the beginning of T1 is before the beginning of T2, the end of T1 is after the beginning of T2, and the end of T1 is before the end of T2.
|
||||
intervalStartedBy: URIRef # If a proper interval T1 is intervalStarted another proper interval T2, then the beginning of T1 is coincident with the beginning of T2, and the end of T1 is after the end of T2.
|
||||
intervalStarts: URIRef # If a proper interval T1 is intervalStarts another proper interval T2, then the beginning of T1 is coincident with the beginning of T2, and the end of T1 is before the end of T2.
|
||||
monthOfYear: URIRef # The month of the year, whose value is a member of the class time:MonthOfYear
|
||||
timeZone: URIRef # The time zone for clock elements in the temporal position
|
||||
unitType: URIRef # The temporal unit which provides the precision of a date-time value or scale of a temporal extent
|
||||
|
||||
# http://www.w3.org/2002/07/owl#TransitiveProperty
|
||||
before: URIRef # Gives directionality to time. If a temporal entity T1 is before another temporal entity T2, then the end of T1 is before the beginning of T2. Thus, "before" can be considered to be basic to instants and derived for intervals.
|
||||
|
||||
# http://www.w3.org/2006/time#DayOfWeek
|
||||
Friday: URIRef # Friday
|
||||
Monday: URIRef # Monday
|
||||
Saturday: URIRef # Saturday
|
||||
Sunday: URIRef # Sunday
|
||||
Thursday: URIRef # Thursday
|
||||
Tuesday: URIRef # Tuesday
|
||||
Wednesday: URIRef # Wednesday
|
||||
|
||||
# http://www.w3.org/2006/time#TemporalUnit
|
||||
unitDay: URIRef # day
|
||||
unitHour: URIRef # hour
|
||||
unitMinute: URIRef # minute
|
||||
unitMonth: URIRef # month
|
||||
unitSecond: URIRef # second
|
||||
unitWeek: URIRef # week
|
||||
unitYear: URIRef # year
|
||||
|
||||
_NS = Namespace("http://www.w3.org/2006/time#")
|
||||
@@ -0,0 +1,27 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class VANN(DefinedNamespace):
|
||||
"""
|
||||
VANN: A vocabulary for annotating vocabulary descriptions
|
||||
|
||||
This document describes a vocabulary for annotating descriptions of vocabularies with examples and usage
|
||||
notes.
|
||||
|
||||
Generated from: https://vocab.org/vann/vann-vocab-20100607.rdf
|
||||
Date: 2020-05-26 14:21:15.580430
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/2002/07/owl#AnnotationProperty
|
||||
changes: URIRef # A reference to a resource that describes changes between this version of a vocabulary and the previous.
|
||||
example: URIRef # A reference to a resource that provides an example of how this resource can be used.
|
||||
preferredNamespacePrefix: URIRef # The preferred namespace prefix to use when using terms from this vocabulary in an XML document.
|
||||
preferredNamespaceUri: URIRef # The preferred namespace URI to use when using terms from this vocabulary in an XML document.
|
||||
termGroup: URIRef # A group of related terms in a vocabulary.
|
||||
usageNote: URIRef # A reference to a resource that provides information on how this resource is to be used.
|
||||
|
||||
_NS = Namespace("http://purl.org/vocab/vann/")
|
||||
@@ -0,0 +1,65 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class VOID(DefinedNamespace):
|
||||
"""
|
||||
Vocabulary of Interlinked Datasets (VoID)
|
||||
|
||||
The Vocabulary of Interlinked Datasets (VoID) is an RDF Schema vocabulary for expressing metadata about RDF
|
||||
datasets. It is intended as a bridge between the publishers and users of RDF data, with applications ranging
|
||||
from data discovery to cataloging and archiving of datasets. This document provides a formal definition of the
|
||||
new RDF classes and properties introduced for VoID. It is a companion to the main specification document for
|
||||
VoID, <em><a href="http://www.w3.org/TR/void/">Describing Linked Datasets with the VoID Vocabulary</a></em>.
|
||||
|
||||
Generated from: http://rdfs.org/ns/void#
|
||||
Date: 2020-05-26 14:20:11.911298
|
||||
|
||||
"""
|
||||
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
|
||||
classPartition: URIRef # A subset of a void:Dataset that contains only the entities of a certain rdfs:Class.
|
||||
classes: URIRef # The total number of distinct classes in a void:Dataset. In other words, the number of distinct resources occurring as objects of rdf:type triples in the dataset.
|
||||
dataDump: URIRef # An RDF dump, partial or complete, of a void:Dataset.
|
||||
distinctObjects: URIRef # The total number of distinct objects in a void:Dataset. In other words, the number of distinct resources that occur in the object position of triples in the dataset. Literals are included in this count.
|
||||
distinctSubjects: URIRef # The total number of distinct subjects in a void:Dataset. In other words, the number of distinct resources that occur in the subject position of triples in the dataset.
|
||||
documents: URIRef # The total number of documents, for datasets that are published as a set of individual documents, such as RDF/XML documents or RDFa-annotated web pages. Non-RDF documents, such as web pages in HTML or images, are usually not included in this count. This property is intended for datasets where the total number of triples or entities is hard to determine. void:triples or void:entities should be preferred where practical.
|
||||
entities: (
|
||||
URIRef # The total number of entities that are described in a void:Dataset.
|
||||
)
|
||||
exampleResource: URIRef # example resource of dataset
|
||||
feature: URIRef # feature
|
||||
inDataset: URIRef # Points to the void:Dataset that a document is a part of.
|
||||
linkPredicate: URIRef # a link predicate
|
||||
objectsTarget: URIRef # The dataset describing the objects of the triples contained in the Linkset.
|
||||
openSearchDescription: URIRef # An OpenSearch description document for a free-text search service over a void:Dataset.
|
||||
properties: URIRef # The total number of distinct properties in a void:Dataset. In other words, the number of distinct resources that occur in the predicate position of triples in the dataset.
|
||||
property: URIRef # The rdf:Property that is the predicate of all triples in a property-based partition.
|
||||
propertyPartition: URIRef # A subset of a void:Dataset that contains only the triples of a certain rdf:Property.
|
||||
rootResource: URIRef # A top concept or entry point for a void:Dataset that is structured in a tree-like fashion. All resources in a dataset can be reached by following links from its root resources in a small number of steps.
|
||||
sparqlEndpoint: URIRef # has a SPARQL endpoint at
|
||||
subjectsTarget: URIRef # The dataset describing the subjects of triples contained in the Linkset.
|
||||
subset: URIRef # has subset
|
||||
target: URIRef # One of the two datasets linked by the Linkset.
|
||||
triples: URIRef # The total number of triples contained in a void:Dataset.
|
||||
uriLookupEndpoint: (
|
||||
URIRef # Defines a simple URI look-up protocol for accessing a dataset.
|
||||
)
|
||||
uriRegexPattern: (
|
||||
URIRef # Defines a regular expression pattern matching URIs in the dataset.
|
||||
)
|
||||
uriSpace: URIRef # A URI that is a common string prefix of all the entity URIs in a void:Dataset.
|
||||
vocabulary: URIRef # A vocabulary that is used in the dataset.
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
Dataset: URIRef # A set of RDF triples that are published, maintained or aggregated by a single provider.
|
||||
DatasetDescription: URIRef # A web resource whose foaf:primaryTopic or foaf:topics include void:Datasets.
|
||||
Linkset: URIRef # A collection of RDF links between two void:Datasets.
|
||||
TechnicalFeature: URIRef # A technical feature of a void:Dataset, such as a supported RDF serialization format.
|
||||
|
||||
# Valid non-python identifiers
|
||||
_extras = ["class"]
|
||||
|
||||
_NS = Namespace("http://rdfs.org/ns/void#")
|
||||
@@ -0,0 +1,25 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class WGS(DefinedNamespace):
|
||||
"""
|
||||
Basic Geo (WGS84 lat/long) Vocabulary
|
||||
|
||||
The HTML Specification for the vocabulary can be found
|
||||
here <https://www.w3.org/2003/01/geo/>.
|
||||
"""
|
||||
|
||||
_NS = Namespace("https://www.w3.org/2003/01/geo/wgs84_pos#")
|
||||
|
||||
# http://www.w3.org/2000/01/rdf-schema#Class
|
||||
SpatialThing: URIRef
|
||||
Point: URIRef
|
||||
|
||||
# http://www.w3.org/2002/07/owl#DatatypeProperty
|
||||
alt: URIRef
|
||||
|
||||
lat: URIRef # http://www.w3.org/2003/01/geo/wgs84_pos#lat
|
||||
lat_long: URIRef
|
||||
location: URIRef
|
||||
long: URIRef
|
||||
@@ -0,0 +1,103 @@
|
||||
from rdflib.namespace import DefinedNamespace, Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class XSD(DefinedNamespace):
|
||||
"""
|
||||
W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes
|
||||
|
||||
Generated from: ../schemas/datatypes.xsd
|
||||
Date: 2021-09-05 20:37+10
|
||||
|
||||
"""
|
||||
|
||||
_NS = Namespace("http://www.w3.org/2001/XMLSchema#")
|
||||
|
||||
ENTITIES: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#ENTITIES
|
||||
ENTITY: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#ENTITY
|
||||
ID: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#ID
|
||||
IDREF: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#IDREF
|
||||
IDREFS: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#IDREFS
|
||||
NCName: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#NCName
|
||||
NMTOKEN: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#NMTOKEN
|
||||
NMTOKENS: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#NMTOKENS
|
||||
NOTATION: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#NOTATIONNOTATION cannot be used directly in a schema; rather a type
|
||||
Name: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#Name
|
||||
QName: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#QName
|
||||
anyURI: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#anyURI
|
||||
base64Binary: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#base64Binary
|
||||
boolean: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#boolean
|
||||
byte: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#byte
|
||||
date: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#date
|
||||
dateTime: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#dateTime
|
||||
dateTimeStamp: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#dateTimeStamp
|
||||
dayTimeDuration: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#dayTimeDuration
|
||||
decimal: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#decimal
|
||||
double: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#double
|
||||
duration: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#duration
|
||||
float: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#float
|
||||
gDay: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#gDay
|
||||
gMonth: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#gMonth
|
||||
gMonthDay: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#gMonthDay
|
||||
gYear: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#gYear
|
||||
gYearMonth: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#gYearMonth
|
||||
hexBinary: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#binary
|
||||
int: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#int
|
||||
integer: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#integer
|
||||
language: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#language
|
||||
long: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#long
|
||||
negativeInteger: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#negativeInteger
|
||||
nonNegativeInteger: (
|
||||
URIRef # see: http://www.w3.org/TR/xmlschema11-2/#nonNegativeInteger
|
||||
)
|
||||
nonPositiveInteger: (
|
||||
URIRef # see: http://www.w3.org/TR/xmlschema11-2/#nonPositiveInteger
|
||||
)
|
||||
normalizedString: (
|
||||
URIRef # see: http://www.w3.org/TR/xmlschema11-2/#normalizedString
|
||||
)
|
||||
positiveInteger: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#positiveInteger
|
||||
short: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#short
|
||||
string: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#string
|
||||
time: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#time
|
||||
token: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#token
|
||||
unsignedByte: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#unsignedByte
|
||||
unsignedInt: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#unsignedInt
|
||||
unsignedLong: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#unsignedLong
|
||||
unsignedShort: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#unsignedShort
|
||||
yearMonthDuration: (
|
||||
URIRef # see: http://www.w3.org/TR/xmlschema11-2/#yearMonthDuration
|
||||
)
|
||||
|
||||
# fundamental facets - https://www.w3.org/TR/xmlschema11-2/#rf-fund-facets
|
||||
ordered: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-ordered
|
||||
bounded: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-bounded
|
||||
cardinality: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-cardinality
|
||||
numeric: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-numeric
|
||||
|
||||
# constraining facets - https://www.w3.org/TR/xmlschema11-2/#rf-facets
|
||||
length: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-length
|
||||
minLength: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-minLength
|
||||
maxLength: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-maxLength
|
||||
pattern: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-pattern
|
||||
enumeration: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-enumeration
|
||||
whiteSpace: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-whiteSpace
|
||||
maxExclusive: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-maxExclusive
|
||||
maxInclusive: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-maxInclusive
|
||||
minExclusive: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-minExclusive
|
||||
minInclusive: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-minInclusive
|
||||
totalDigits: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-totalDigits
|
||||
fractionDigits: URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-fractionDigits
|
||||
Assertions: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#rf-assertions
|
||||
explicitTimezone: (
|
||||
URIRef # see: http://www.w3.org/TR/xmlschema11-2/#rf-explicitTimezone
|
||||
)
|
||||
|
||||
# The Seven-property Model - https://www.w3.org/TR/xmlschema11-2/#theSevenPropertyModel
|
||||
year: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-http://www.w3.org/TR/xmlschema11-2/#rf-whiteSpace
|
||||
month: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-month
|
||||
day: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-day
|
||||
hour: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-hour
|
||||
minute: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-minute
|
||||
second: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-second
|
||||
timezoneOffset: URIRef # see: https://www.w3.org/TR/xmlschema11-2/#vp-dt-timezone
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,803 @@
|
||||
"""
|
||||
Parser plugin interface.
|
||||
|
||||
This module defines the parser plugin interface and contains other
|
||||
related parser support code.
|
||||
|
||||
The module is mainly useful for those wanting to write a parser that
|
||||
can plugin to rdflib. If you are wanting to invoke a parser you likely
|
||||
want to do so through the Graph class parse method.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from io import BufferedIOBase, BytesIO, RawIOBase, StringIO, TextIOBase, TextIOWrapper
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
BinaryIO,
|
||||
List,
|
||||
Optional,
|
||||
TextIO,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
from urllib.parse import urljoin
|
||||
from urllib.request import Request, url2pathname
|
||||
from xml.sax import xmlreader
|
||||
|
||||
import rdflib.util
|
||||
from rdflib import __version__
|
||||
from rdflib._networking import _urlopen
|
||||
from rdflib.namespace import Namespace
|
||||
from rdflib.term import URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from email.message import Message
|
||||
from io import BufferedReader
|
||||
from urllib.response import addinfourl
|
||||
|
||||
from typing_extensions import Buffer
|
||||
|
||||
from rdflib.graph import Graph
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Parser",
|
||||
"InputSource",
|
||||
"StringInputSource",
|
||||
"URLInputSource",
|
||||
"FileInputSource",
|
||||
"PythonInputSource",
|
||||
]
|
||||
|
||||
|
||||
class Parser:
|
||||
__slots__ = ()
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, sink: Graph) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class BytesIOWrapper(BufferedIOBase):
|
||||
__slots__ = (
|
||||
"wrapped",
|
||||
"enc_str",
|
||||
"text_str",
|
||||
"encoding",
|
||||
"encoder",
|
||||
"has_read1",
|
||||
"has_seek",
|
||||
"_name",
|
||||
"_fileno",
|
||||
"_isatty",
|
||||
"_leftover",
|
||||
"_bytes_per_char",
|
||||
"_text_bytes_offset",
|
||||
)
|
||||
|
||||
def __init__(self, wrapped: Union[str, StringIO, TextIOBase], encoding="utf-8"):
|
||||
super(BytesIOWrapper, self).__init__()
|
||||
self.wrapped = wrapped
|
||||
self.encoding = encoding
|
||||
self.encoder = codecs.getencoder(self.encoding)
|
||||
self.enc_str: Optional[Union[BytesIO, BufferedIOBase]] = None
|
||||
self.text_str: Optional[Union[StringIO, TextIOBase]] = None
|
||||
self.has_read1: Optional[bool] = None
|
||||
self.has_seek: Optional[bool] = None
|
||||
self._name: Optional[str] = None
|
||||
self._fileno: Optional[Union[int, BaseException]] = None
|
||||
self._isatty: Optional[Union[bool, BaseException]] = None
|
||||
self._leftover: bytes = b""
|
||||
self._text_bytes_offset: int = 0
|
||||
norm_encoding = encoding.lower().replace("_", "-")
|
||||
if norm_encoding in ("utf-8", "utf8", "u8", "cp65001"):
|
||||
# utf-8 has a variable number of bytes per character, 1-4
|
||||
self._bytes_per_char: int = 1 # assume average of 1 byte per character
|
||||
elif norm_encoding in (
|
||||
"latin1",
|
||||
"latin-1",
|
||||
"iso-8859-1",
|
||||
"iso8859-1",
|
||||
"ascii",
|
||||
"us-ascii",
|
||||
):
|
||||
# these are all 1-byte-per-character encodings
|
||||
self._bytes_per_char = 1
|
||||
elif norm_encoding.startswith("utf-16") or norm_encoding.startswith("utf16"):
|
||||
# utf-16 has a variable number of bytes per character, 2-3
|
||||
self._bytes_per_char = 2 # assume average of 2 bytes per character
|
||||
elif norm_encoding.startswith("utf-32") or norm_encoding.startswith("utf32"):
|
||||
# utf-32 is fixed length with 4 bytes per character
|
||||
self._bytes_per_char = 4
|
||||
else:
|
||||
# not sure, just assume it is 2 bytes per character
|
||||
self._bytes_per_char = 2
|
||||
|
||||
def _init(self):
|
||||
name: Optional[str] = None
|
||||
if isinstance(self.wrapped, str):
|
||||
b, blen = self.encoder(self.wrapped)
|
||||
self.enc_str = BytesIO(b)
|
||||
name = "string"
|
||||
elif isinstance(self.wrapped, TextIOWrapper):
|
||||
inner = self.wrapped.buffer
|
||||
# type error: TextIOWrapper.buffer cannot be a BytesIOWrapper
|
||||
if isinstance(inner, BytesIOWrapper): # type: ignore[unreachable]
|
||||
raise Exception(
|
||||
"BytesIOWrapper cannot be wrapped in TextIOWrapper, "
|
||||
"then wrapped in another BytesIOWrapper"
|
||||
)
|
||||
else:
|
||||
self.enc_str = cast(BufferedIOBase, inner)
|
||||
elif isinstance(self.wrapped, (TextIOBase, StringIO)):
|
||||
self.text_str = self.wrapped
|
||||
use_stream: Union[BytesIO, StringIO, BufferedIOBase, TextIOBase]
|
||||
if self.enc_str is not None:
|
||||
use_stream = self.enc_str
|
||||
elif self.text_str is not None:
|
||||
use_stream = self.text_str
|
||||
else:
|
||||
raise Exception("No stream to read from")
|
||||
if name is None:
|
||||
try:
|
||||
name = use_stream.name # type: ignore[union-attr]
|
||||
except AttributeError:
|
||||
name = "stream"
|
||||
self.has_read1 = hasattr(use_stream, "read1")
|
||||
try:
|
||||
self.has_seek = use_stream.seekable()
|
||||
except AttributeError:
|
||||
self.has_seek = hasattr(use_stream, "seek")
|
||||
|
||||
self._name = name
|
||||
|
||||
def _check_fileno(self):
|
||||
use_stream: Union[BytesIO, StringIO, BufferedIOBase, TextIOBase]
|
||||
if self.enc_str is None and self.text_str is None:
|
||||
self._init()
|
||||
if self.enc_str is not None:
|
||||
use_stream = self.enc_str
|
||||
elif self.text_str is not None:
|
||||
use_stream = self.text_str
|
||||
try:
|
||||
self._fileno = use_stream.fileno()
|
||||
except OSError as e:
|
||||
self._fileno = e
|
||||
except AttributeError:
|
||||
self._fileno = -1
|
||||
|
||||
def _check_isatty(self):
|
||||
use_stream: Union[BytesIO, StringIO, BufferedIOBase, TextIOBase]
|
||||
if self.enc_str is None and self.text_str is None:
|
||||
self._init()
|
||||
if self.enc_str is not None:
|
||||
use_stream = self.enc_str
|
||||
elif self.text_str is not None:
|
||||
use_stream = self.text_str
|
||||
try:
|
||||
self._isatty = use_stream.isatty()
|
||||
except OSError as e:
|
||||
self._isatty = e
|
||||
except AttributeError:
|
||||
self._isatty = False
|
||||
|
||||
@property
|
||||
def name(self) -> Any:
|
||||
if self._name is None:
|
||||
self._init()
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def closed(self) -> bool:
|
||||
if self.enc_str is None and self.text_str is None:
|
||||
return False
|
||||
closed: Optional[bool] = None
|
||||
if self.enc_str is not None:
|
||||
try:
|
||||
closed = self.enc_str.closed
|
||||
except AttributeError:
|
||||
closed = None
|
||||
elif self.text_str is not None:
|
||||
try:
|
||||
closed = self.text_str.closed
|
||||
except AttributeError:
|
||||
closed = None
|
||||
return False if closed is None else closed
|
||||
|
||||
def readable(self) -> bool:
|
||||
return True
|
||||
|
||||
def writable(self) -> bool:
|
||||
return False
|
||||
|
||||
def truncate(self, size: Optional[int] = None) -> int:
|
||||
raise NotImplementedError("Cannot truncate on BytesIOWrapper")
|
||||
|
||||
def isatty(self) -> bool:
|
||||
if self._isatty is None:
|
||||
self._check_isatty()
|
||||
if isinstance(self._isatty, BaseException):
|
||||
raise self._isatty
|
||||
else:
|
||||
return bool(self._isatty)
|
||||
|
||||
def fileno(self) -> int:
|
||||
if self._fileno is None:
|
||||
self._check_fileno()
|
||||
if isinstance(self._fileno, BaseException):
|
||||
raise self._fileno
|
||||
else:
|
||||
return -1 if self._fileno is None else self._fileno
|
||||
|
||||
def close(self):
|
||||
if self.enc_str is None and self.text_str is None:
|
||||
return
|
||||
if self.enc_str is not None:
|
||||
try:
|
||||
self.enc_str.close()
|
||||
except AttributeError:
|
||||
pass
|
||||
elif self.text_str is not None:
|
||||
try:
|
||||
self.text_str.close()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def flush(self):
|
||||
return # Does nothing on read-only streams
|
||||
|
||||
def _read_bytes_from_text_stream(self, size: Optional[int] = -1, /) -> bytes:
|
||||
if TYPE_CHECKING:
|
||||
assert self.text_str is not None
|
||||
if size is None or size < 0:
|
||||
try:
|
||||
ret_str: str = self.text_str.read()
|
||||
except EOFError:
|
||||
ret_str = ""
|
||||
ret_encoded, enc_len = self.encoder(ret_str)
|
||||
if self._leftover:
|
||||
ret_bytes = self._leftover + ret_encoded
|
||||
self._leftover = b""
|
||||
else:
|
||||
ret_bytes = ret_encoded
|
||||
elif size == len(self._leftover):
|
||||
ret_bytes = self._leftover
|
||||
self._leftover = b""
|
||||
elif size < len(self._leftover):
|
||||
ret_bytes = self._leftover[:size]
|
||||
self._leftover = self._leftover[size:]
|
||||
else:
|
||||
d, m = divmod(size, self._bytes_per_char)
|
||||
get_per_loop = int(d) + (1 if m > 0 else 0)
|
||||
got_bytes: bytes = self._leftover
|
||||
while len(got_bytes) < size:
|
||||
try:
|
||||
got_str: str = self.text_str.read(get_per_loop)
|
||||
except EOFError:
|
||||
got_str = ""
|
||||
if len(got_str) < 1:
|
||||
break
|
||||
ret_encoded, enc_len = self.encoder(got_str)
|
||||
got_bytes += ret_encoded
|
||||
if len(got_bytes) == size:
|
||||
self._leftover = b""
|
||||
ret_bytes = got_bytes
|
||||
else:
|
||||
ret_bytes = got_bytes[:size]
|
||||
self._leftover = got_bytes[size:]
|
||||
del got_bytes
|
||||
self._text_bytes_offset += len(ret_bytes)
|
||||
return ret_bytes
|
||||
|
||||
def read(self, size: Optional[int] = -1, /) -> bytes:
|
||||
"""
|
||||
Read at most size bytes, returned as a bytes object.
|
||||
|
||||
If the size argument is negative or omitted read until EOF is reached.
|
||||
Return an empty bytes object if already at EOF.
|
||||
"""
|
||||
if size is not None and size == 0:
|
||||
return b""
|
||||
if self.enc_str is None and self.text_str is None:
|
||||
self._init()
|
||||
if self.enc_str is not None:
|
||||
ret_bytes = self.enc_str.read(size)
|
||||
else:
|
||||
ret_bytes = self._read_bytes_from_text_stream(size)
|
||||
return ret_bytes
|
||||
|
||||
def read1(self, size: Optional[int] = -1, /) -> bytes:
|
||||
"""
|
||||
Read at most size bytes, with at most one call to the underlying raw stream’s
|
||||
read() or readinto() method. Returned as a bytes object.
|
||||
|
||||
If the size argument is negative or omitted, read until EOF is reached.
|
||||
Return an empty bytes object at EOF.
|
||||
"""
|
||||
if (self.enc_str is None and self.text_str is None) or self.has_read1 is None:
|
||||
self._init()
|
||||
if not self.has_read1:
|
||||
raise NotImplementedError()
|
||||
if self.enc_str is not None:
|
||||
if size is None or size < 0:
|
||||
return self.enc_str.read1()
|
||||
return self.enc_str.read1(size)
|
||||
raise NotImplementedError("read1() not supported for TextIO in BytesIOWrapper")
|
||||
|
||||
def readinto(self, b: Buffer, /) -> int:
|
||||
"""
|
||||
Read len(b) bytes into buffer b.
|
||||
|
||||
Returns number of bytes read (0 for EOF), or error if the object
|
||||
is set not to block and has no data to read.
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(b, (memoryview, bytearray))
|
||||
if len(b) == 0:
|
||||
return 0
|
||||
if self.enc_str is None and self.text_str is None:
|
||||
self._init()
|
||||
if self.enc_str is not None:
|
||||
return self.enc_str.readinto(b)
|
||||
else:
|
||||
size = len(b)
|
||||
read_data: bytes = self._read_bytes_from_text_stream(size)
|
||||
read_len = len(read_data)
|
||||
if read_len == 0:
|
||||
return 0
|
||||
b[:read_len] = read_data
|
||||
return read_len
|
||||
|
||||
def readinto1(self, b: Buffer, /) -> int:
|
||||
"""
|
||||
Read len(b) bytes into buffer b, with at most one call to the underlying raw
|
||||
stream's read() or readinto() method.
|
||||
|
||||
Returns number of bytes read (0 for EOF), or error if the object
|
||||
is set not to block and has no data to read.
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(b, (memoryview, bytearray))
|
||||
if (self.enc_str is None and self.text_str is None) or self.has_read1 is None:
|
||||
self._init()
|
||||
if not self.has_read1:
|
||||
raise NotImplementedError()
|
||||
if self.enc_str is not None:
|
||||
return self.enc_str.readinto1(b)
|
||||
raise NotImplementedError(
|
||||
"readinto1() not supported for TextIO in BytesIOWrapper"
|
||||
)
|
||||
|
||||
def seek(self, offset: int, whence: int = 0, /) -> int:
|
||||
if self.has_seek is not None and not self.has_seek:
|
||||
raise NotImplementedError()
|
||||
if (self.enc_str is None and self.text_str is None) or self.has_seek is None:
|
||||
self._init()
|
||||
|
||||
if not whence == 0:
|
||||
raise NotImplementedError("Only SEEK_SET is supported on BytesIOWrapper")
|
||||
if offset != 0:
|
||||
raise NotImplementedError(
|
||||
"Only seeking to zero is supported on BytesIOWrapper"
|
||||
)
|
||||
if self.enc_str is not None:
|
||||
self.enc_str.seek(offset, whence)
|
||||
elif self.text_str is not None:
|
||||
self.text_str.seek(offset, whence)
|
||||
self._text_bytes_offset = 0
|
||||
self._leftover = b""
|
||||
return 0
|
||||
|
||||
def seekable(self):
|
||||
if (self.enc_str is None and self.text_str is None) or self.has_seek is None:
|
||||
self._init()
|
||||
return self.has_seek
|
||||
|
||||
def tell(self) -> int:
|
||||
if self.has_seek is not None and not self.has_seek:
|
||||
raise NotImplementedError("Cannot tell() pos because file is not seekable.")
|
||||
if self.enc_str is not None:
|
||||
try:
|
||||
self._text_bytes_offset = self.enc_str.tell()
|
||||
except AttributeError:
|
||||
pass
|
||||
return self._text_bytes_offset
|
||||
|
||||
def write(self, b, /):
|
||||
raise NotImplementedError("Cannot write to a BytesIOWrapper")
|
||||
|
||||
|
||||
class InputSource(xmlreader.InputSource):
|
||||
"""
|
||||
TODO:
|
||||
"""
|
||||
|
||||
def __init__(self, system_id: Optional[str] = None):
|
||||
xmlreader.InputSource.__init__(self, system_id=system_id)
|
||||
self.content_type: Optional[str] = None
|
||||
self.auto_close = False # see Graph.parse(), true if opened by us
|
||||
|
||||
def close(self) -> None:
|
||||
c = self.getCharacterStream()
|
||||
if c and hasattr(c, "close"):
|
||||
try:
|
||||
c.close()
|
||||
except Exception:
|
||||
pass
|
||||
f = self.getByteStream()
|
||||
if f and hasattr(f, "close"):
|
||||
try:
|
||||
f.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class PythonInputSource(InputSource):
|
||||
"""
|
||||
Constructs an RDFLib Parser InputSource from a Python data structure,
|
||||
for example, loaded from JSON with json.load or json.loads:
|
||||
|
||||
>>> import json
|
||||
>>> as_string = \"\"\"{
|
||||
... "@context" : {"ex" : "http://example.com/ns#"},
|
||||
... "@graph": [{"@type": "ex:item", "@id": "#example"}]
|
||||
... }\"\"\"
|
||||
>>> as_python = json.loads(as_string)
|
||||
>>> source = create_input_source(data=as_python)
|
||||
>>> isinstance(source, PythonInputSource)
|
||||
True
|
||||
"""
|
||||
|
||||
def __init__(self, data: Any, system_id: Optional[str] = None):
|
||||
self.content_type = None
|
||||
self.auto_close = False # see Graph.parse(), true if opened by us
|
||||
self.public_id: Optional[str] = None
|
||||
self.system_id: Optional[str] = system_id
|
||||
self.data = data
|
||||
|
||||
def getPublicId(self) -> Optional[str]: # noqa: N802
|
||||
return self.public_id
|
||||
|
||||
def setPublicId(self, public_id: Optional[str]) -> None: # noqa: N802
|
||||
self.public_id = public_id
|
||||
|
||||
def getSystemId(self) -> Optional[str]: # noqa: N802
|
||||
return self.system_id
|
||||
|
||||
def setSystemId(self, system_id: Optional[str]) -> None: # noqa: N802
|
||||
self.system_id = system_id
|
||||
|
||||
def close(self) -> None:
|
||||
self.data = None
|
||||
|
||||
|
||||
class StringInputSource(InputSource):
|
||||
"""
|
||||
Constructs an RDFLib Parser InputSource from a Python String or Bytes
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
value: Union[str, bytes],
|
||||
encoding: str = "utf-8",
|
||||
system_id: Optional[str] = None,
|
||||
):
|
||||
super(StringInputSource, self).__init__(system_id)
|
||||
stream: Union[BinaryIO, TextIO]
|
||||
if isinstance(value, str):
|
||||
stream = StringIO(value)
|
||||
self.setCharacterStream(stream)
|
||||
self.setEncoding(encoding)
|
||||
b_stream = BytesIOWrapper(value, encoding)
|
||||
self.setByteStream(b_stream)
|
||||
else:
|
||||
stream = BytesIO(value)
|
||||
self.setByteStream(stream)
|
||||
c_stream = TextIOWrapper(stream, encoding)
|
||||
self.setCharacterStream(c_stream)
|
||||
self.setEncoding(c_stream.encoding)
|
||||
|
||||
|
||||
headers = {
|
||||
"User-agent": "rdflib-%s (https://rdflib.github.io/; eikeon@eikeon.com)"
|
||||
% __version__
|
||||
}
|
||||
|
||||
|
||||
class URLInputSource(InputSource):
|
||||
"""
|
||||
Constructs an RDFLib Parser InputSource from a URL to read it from the Web.
|
||||
"""
|
||||
|
||||
links: List[str]
|
||||
|
||||
@classmethod
|
||||
def getallmatchingheaders(cls, message: Message, name) -> List[str]:
|
||||
# This is reimplemented here, because the method
|
||||
# getallmatchingheaders from HTTPMessage is broken since Python 3.0
|
||||
name = name.lower()
|
||||
return [val for key, val in message.items() if key.lower() == name]
|
||||
|
||||
@classmethod
|
||||
def get_links(cls, response: addinfourl) -> List[str]:
|
||||
linkslines = cls.getallmatchingheaders(response.headers, "Link")
|
||||
retarray: List[str] = []
|
||||
for linksline in linkslines:
|
||||
links = [linkstr.strip() for linkstr in linksline.split(",")]
|
||||
for link in links:
|
||||
retarray.append(link)
|
||||
return retarray
|
||||
|
||||
def get_alternates(self, type_: Optional[str] = None) -> List[str]:
|
||||
typestr: Optional[str] = f'type="{type_}"' if type_ else None
|
||||
relstr = 'rel="alternate"'
|
||||
alts = []
|
||||
for link in self.links:
|
||||
parts = [p.strip() for p in link.split(";")]
|
||||
if relstr not in parts:
|
||||
continue
|
||||
if typestr:
|
||||
if typestr in parts:
|
||||
alts.append(parts[0].strip("<>"))
|
||||
else:
|
||||
alts.append(parts[0].strip("<>"))
|
||||
return alts
|
||||
|
||||
def __init__(self, system_id: Optional[str] = None, format: Optional[str] = None):
|
||||
super(URLInputSource, self).__init__(system_id)
|
||||
self.url = system_id
|
||||
|
||||
# copy headers to change
|
||||
myheaders = dict(headers)
|
||||
if format == "xml":
|
||||
myheaders["Accept"] = "application/rdf+xml, */*;q=0.1"
|
||||
elif format == "n3":
|
||||
myheaders["Accept"] = "text/n3, */*;q=0.1"
|
||||
elif format in ["turtle", "ttl"]:
|
||||
myheaders["Accept"] = "text/turtle, application/x-turtle, */*;q=0.1"
|
||||
elif format == "nt":
|
||||
myheaders["Accept"] = "text/plain, */*;q=0.1"
|
||||
elif format == "trig":
|
||||
myheaders["Accept"] = "application/trig, */*;q=0.1"
|
||||
elif format == "trix":
|
||||
myheaders["Accept"] = "application/trix, */*;q=0.1"
|
||||
elif format == "json-ld":
|
||||
myheaders["Accept"] = (
|
||||
"application/ld+json, application/json;q=0.9, */*;q=0.1"
|
||||
)
|
||||
else:
|
||||
# if format not given, create an Accept header from all registered
|
||||
# parser Media Types
|
||||
from rdflib.parser import Parser
|
||||
from rdflib.plugin import plugins
|
||||
|
||||
acc = []
|
||||
for p in plugins(kind=Parser): # only get parsers
|
||||
if "/" in p.name: # all Media Types known have a / in them
|
||||
acc.append(p.name)
|
||||
|
||||
myheaders["Accept"] = ", ".join(acc)
|
||||
|
||||
req = Request(system_id, None, myheaders) # type: ignore[arg-type]
|
||||
|
||||
response: addinfourl = _urlopen(req)
|
||||
self.url = response.geturl() # in case redirections took place
|
||||
self.links = self.get_links(response)
|
||||
if format in ("json-ld", "application/ld+json"):
|
||||
alts = self.get_alternates(type_="application/ld+json")
|
||||
for link in alts:
|
||||
full_link = urljoin(self.url, link)
|
||||
if full_link != self.url and full_link != system_id:
|
||||
response = _urlopen(Request(full_link))
|
||||
self.url = response.geturl() # in case redirections took place
|
||||
break
|
||||
|
||||
self.setPublicId(self.url)
|
||||
content_types = self.getallmatchingheaders(response.headers, "content-type")
|
||||
self.content_type = content_types[0] if content_types else None
|
||||
if self.content_type is not None:
|
||||
self.content_type = self.content_type.split(";", 1)[0]
|
||||
self.setByteStream(response)
|
||||
# TODO: self.setEncoding(encoding)
|
||||
self.response_info = response.info() # a mimetools.Message instance
|
||||
|
||||
def __repr__(self) -> str:
|
||||
# type error: Incompatible return value type (got "Optional[str]", expected "str")
|
||||
return self.url # type: ignore[return-value]
|
||||
|
||||
|
||||
class FileInputSource(InputSource):
|
||||
def __init__(
|
||||
self,
|
||||
file: Union[BinaryIO, TextIO, TextIOBase, RawIOBase, BufferedIOBase],
|
||||
/,
|
||||
encoding: Optional[str] = None,
|
||||
):
|
||||
base = pathlib.Path.cwd().as_uri()
|
||||
system_id = URIRef(pathlib.Path(file.name).absolute().as_uri(), base=base) # type: ignore[union-attr]
|
||||
super(FileInputSource, self).__init__(system_id)
|
||||
self.file = file
|
||||
if isinstance(file, TextIOBase): # Python3 unicode fp
|
||||
self.setCharacterStream(file)
|
||||
self.setEncoding(file.encoding)
|
||||
try:
|
||||
b = file.buffer # type: ignore[attr-defined]
|
||||
self.setByteStream(b)
|
||||
except (AttributeError, LookupError):
|
||||
self.setByteStream(BytesIOWrapper(file, encoding=file.encoding))
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(file, BufferedReader)
|
||||
self.setByteStream(file)
|
||||
if encoding is not None:
|
||||
self.setEncoding(encoding)
|
||||
self.setCharacterStream(TextIOWrapper(file, encoding=encoding))
|
||||
else:
|
||||
# We cannot set characterStream here because
|
||||
# we do not know the Raw Bytes File encoding.
|
||||
pass
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return repr(self.file)
|
||||
|
||||
|
||||
def create_input_source(
|
||||
source: Optional[
|
||||
Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]
|
||||
] = None,
|
||||
publicID: Optional[str] = None, # noqa: N803
|
||||
location: Optional[str] = None,
|
||||
file: Optional[Union[BinaryIO, TextIO]] = None,
|
||||
data: Optional[Union[str, bytes, dict]] = None,
|
||||
format: Optional[str] = None,
|
||||
) -> InputSource:
|
||||
"""
|
||||
Return an appropriate InputSource instance for the given
|
||||
parameters.
|
||||
"""
|
||||
|
||||
# test that exactly one of source, location, file, and data is not None.
|
||||
non_empty_arguments = list(
|
||||
filter(
|
||||
lambda v: v is not None,
|
||||
[source, location, file, data],
|
||||
)
|
||||
)
|
||||
|
||||
if len(non_empty_arguments) != 1:
|
||||
raise ValueError(
|
||||
"exactly one of source, location, file or data must be given",
|
||||
)
|
||||
|
||||
input_source = None
|
||||
|
||||
if source is not None:
|
||||
if TYPE_CHECKING:
|
||||
assert file is None
|
||||
assert data is None
|
||||
assert location is None
|
||||
if isinstance(source, InputSource):
|
||||
input_source = source
|
||||
else:
|
||||
if isinstance(source, str):
|
||||
location = source
|
||||
elif isinstance(source, pathlib.PurePath):
|
||||
location = str(source)
|
||||
elif isinstance(source, bytes):
|
||||
data = source
|
||||
elif hasattr(source, "read") and not isinstance(source, Namespace):
|
||||
f = source
|
||||
input_source = InputSource()
|
||||
if hasattr(source, "encoding"):
|
||||
input_source.setCharacterStream(source)
|
||||
input_source.setEncoding(source.encoding)
|
||||
try:
|
||||
b = source.buffer # type: ignore[union-attr]
|
||||
input_source.setByteStream(b)
|
||||
except (AttributeError, LookupError):
|
||||
input_source.setByteStream(source)
|
||||
else:
|
||||
input_source.setByteStream(f)
|
||||
if f is sys.stdin:
|
||||
input_source.setSystemId("file:///dev/stdin")
|
||||
elif hasattr(f, "name"):
|
||||
input_source.setSystemId(f.name)
|
||||
else:
|
||||
raise Exception(
|
||||
"Unexpected type '%s' for source '%s'" % (type(source), source)
|
||||
)
|
||||
|
||||
absolute_location = None # Further to fix for issue 130
|
||||
|
||||
auto_close = False # make sure we close all file handles we open
|
||||
|
||||
if location is not None:
|
||||
if TYPE_CHECKING:
|
||||
assert file is None
|
||||
assert data is None
|
||||
assert source is None
|
||||
(
|
||||
absolute_location,
|
||||
auto_close,
|
||||
file,
|
||||
input_source,
|
||||
) = _create_input_source_from_location(
|
||||
file=file,
|
||||
format=format,
|
||||
input_source=input_source,
|
||||
location=location,
|
||||
)
|
||||
|
||||
if file is not None:
|
||||
if TYPE_CHECKING:
|
||||
assert location is None
|
||||
assert data is None
|
||||
assert source is None
|
||||
input_source = FileInputSource(file)
|
||||
|
||||
if data is not None:
|
||||
if TYPE_CHECKING:
|
||||
assert location is None
|
||||
assert file is None
|
||||
assert source is None
|
||||
if isinstance(data, dict):
|
||||
input_source = PythonInputSource(data)
|
||||
auto_close = True
|
||||
elif isinstance(data, (str, bytes, bytearray)):
|
||||
input_source = StringInputSource(data)
|
||||
auto_close = True
|
||||
else:
|
||||
raise RuntimeError(f"parse data can only str, or bytes. not: {type(data)}")
|
||||
|
||||
if input_source is None:
|
||||
raise Exception("could not create InputSource")
|
||||
else:
|
||||
input_source.auto_close |= auto_close
|
||||
if publicID is not None: # Further to fix for issue 130
|
||||
input_source.setPublicId(publicID)
|
||||
# Further to fix for issue 130
|
||||
elif input_source.getPublicId() is None:
|
||||
input_source.setPublicId(absolute_location or "")
|
||||
return input_source
|
||||
|
||||
|
||||
def _create_input_source_from_location(
|
||||
file: Optional[Union[BinaryIO, TextIO]],
|
||||
format: Optional[str],
|
||||
input_source: Optional[InputSource],
|
||||
location: str,
|
||||
) -> Tuple[URIRef, bool, Optional[Union[BinaryIO, TextIO]], Optional[InputSource]]:
|
||||
# Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145 and
|
||||
# https://github.com/RDFLib/rdflib/issues/1430
|
||||
# NOTE: using pathlib.Path.exists on a URL fails on windows as it is not a
|
||||
# valid path. However os.path.exists() returns false for a URL on windows
|
||||
# which is why it is being used instead.
|
||||
if os.path.exists(location):
|
||||
location = pathlib.Path(location).absolute().as_uri()
|
||||
|
||||
base = pathlib.Path.cwd().as_uri()
|
||||
|
||||
absolute_location = URIRef(rdflib.util._iri2uri(location), base=base)
|
||||
|
||||
if absolute_location.startswith("file:///"):
|
||||
filename = url2pathname(absolute_location.replace("file:///", "/"))
|
||||
file = open(filename, "rb")
|
||||
else:
|
||||
input_source = URLInputSource(absolute_location, format)
|
||||
|
||||
auto_close = True
|
||||
# publicID = publicID or absolute_location # Further to fix
|
||||
# for issue 130
|
||||
|
||||
return absolute_location, auto_close, file, input_source
|
||||
@@ -0,0 +1,611 @@
|
||||
r"""
|
||||
|
||||
This module implements the SPARQL 1.1 Property path operators, as
|
||||
defined in:
|
||||
|
||||
http://www.w3.org/TR/sparql11-query/#propertypaths
|
||||
|
||||
In SPARQL the syntax is as follows:
|
||||
|
||||
+--------------------+-------------------------------------------------+
|
||||
|Syntax | Matches |
|
||||
+====================+=================================================+
|
||||
|iri | An IRI. A path of length one. |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|^elt | Inverse path (object to subject). |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|elt1 / elt2 | A sequence path of elt1 followed by elt2. |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|elt1 | elt2 | A alternative path of elt1 or elt2 |
|
||||
| | (all possibilities are tried). |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|elt* | A path that connects the subject and object |
|
||||
| | of the path by zero or more matches of elt. |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|elt+ | A path that connects the subject and object |
|
||||
| | of the path by one or more matches of elt. |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|elt? | A path that connects the subject and object |
|
||||
| | of the path by zero or one matches of elt. |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|!iri or | Negated property set. An IRI which is not one of|
|
||||
|!(iri\ :sub:`1`\ \| | iri\ :sub:`1`...iri\ :sub:`n`. |
|
||||
|... \|iri\ :sub:`n`)| !iri is short for !(iri). |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|!^iri or | Negated property set where the excluded matches |
|
||||
|!(^iri\ :sub:`1`\ \|| are based on reversed path. That is, not one of |
|
||||
|...\|^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. |
|
||||
| | !^iri is short for !(^iri). |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|!(iri\ :sub:`1`\ \| | A combination of forward and reverse |
|
||||
|...\|iri\ :sub:`j`\ | properties in a negated property set. |
|
||||
|\|^iri\ :sub:`j+1`\ | |
|
||||
|\|... \|^iri\ | |
|
||||
|:sub:`n`)| | |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|(elt) | A group path elt, brackets control precedence. |
|
||||
+--------------------+-------------------------------------------------+
|
||||
|
||||
This module is used internally by the SPARQL engine, but the property paths
|
||||
can also be used to query RDFLib Graphs directly.
|
||||
|
||||
Where possible the SPARQL syntax is mapped to Python operators, and property
|
||||
path objects can be constructed from existing URIRefs.
|
||||
|
||||
>>> from rdflib import Graph, Namespace
|
||||
>>> from rdflib.namespace import FOAF
|
||||
|
||||
>>> ~FOAF.knows
|
||||
Path(~http://xmlns.com/foaf/0.1/knows)
|
||||
|
||||
>>> FOAF.knows/FOAF.name
|
||||
Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name)
|
||||
|
||||
>>> FOAF.name|FOAF.givenName
|
||||
Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/givenName)
|
||||
|
||||
Modifiers (?, \*, +) are done using \* (the multiplication operator) and
|
||||
the strings '\*', '?', '+', also defined as constants in this file.
|
||||
|
||||
>>> FOAF.knows*OneOrMore
|
||||
Path(http://xmlns.com/foaf/0.1/knows+)
|
||||
|
||||
The path objects can also be used with the normal graph methods.
|
||||
|
||||
First some example data:
|
||||
|
||||
>>> g=Graph()
|
||||
|
||||
>>> g=g.parse(data='''
|
||||
... @prefix : <ex:> .
|
||||
...
|
||||
... :a :p1 :c ; :p2 :f .
|
||||
... :c :p2 :e ; :p3 :g .
|
||||
... :g :p3 :h ; :p2 :j .
|
||||
... :h :p3 :a ; :p2 :g .
|
||||
...
|
||||
... :q :px :q .
|
||||
...
|
||||
... ''', format='n3') # doctest: +ELLIPSIS
|
||||
|
||||
>>> e = Namespace('ex:')
|
||||
|
||||
Graph contains:
|
||||
|
||||
>>> (e.a, e.p1/e.p2, e.e) in g
|
||||
True
|
||||
|
||||
Graph generator functions, triples, subjects, objects, etc. :
|
||||
|
||||
>>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE
|
||||
[rdflib.term.URIRef('ex:j'), rdflib.term.URIRef('ex:g'),
|
||||
rdflib.term.URIRef('ex:f')]
|
||||
|
||||
A more complete set of tests:
|
||||
|
||||
>>> list(eval_path(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ]
|
||||
True
|
||||
>>> list(eval_path(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.c, e.p3*OneOrMore, None))) == [
|
||||
... (e.c, e.g), (e.c, e.h), (e.c, e.a)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c),
|
||||
... (e.c, e.g), (e.c, e.h), (e.c, e.a)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.a, -e.p1, None))) == [(e.a, e.f)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.a, -(e.p1|e.p2), None))) == []
|
||||
True
|
||||
>>> list(eval_path(g, (e.g, -~e.p2, None))) == [(e.g, e.j)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)]
|
||||
True
|
||||
>>> list(eval_path(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)]
|
||||
True
|
||||
|
||||
>>> list(eval_path(g, (e.q, e.px*OneOrMore, None)))
|
||||
[(rdflib.term.URIRef('ex:q'), rdflib.term.URIRef('ex:q'))]
|
||||
|
||||
>>> list(eval_path(g, (None, e.p1|e.p2, e.c)))
|
||||
[(rdflib.term.URIRef('ex:a'), rdflib.term.URIRef('ex:c'))]
|
||||
|
||||
>>> list(eval_path(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ]
|
||||
True
|
||||
>>> list(eval_path(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE
|
||||
[(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:c')),
|
||||
(rdflib.term.URIRef('ex:a'), rdflib.term.URIRef('ex:c'))]
|
||||
|
||||
>>> list(eval_path(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
|
||||
[(rdflib.term.URIRef('ex:h'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:g'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:a'))]
|
||||
|
||||
>>> list(eval_path(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
|
||||
[(rdflib.term.URIRef('ex:a'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:h'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:g'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:a'))]
|
||||
|
||||
>>> list(eval_path(g, (None, -e.p1, e.f))) == [(e.a, e.f)]
|
||||
True
|
||||
>>> list(eval_path(g, (None, -(e.p1|e.p2), e.c))) == []
|
||||
True
|
||||
>>> list(eval_path(g, (None, -~e.p2, e.j))) == [(e.g, e.j)]
|
||||
True
|
||||
>>> list(eval_path(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)]
|
||||
True
|
||||
>>> list(eval_path(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)]
|
||||
True
|
||||
|
||||
>>> list(eval_path(g, (e.q, e.px*OneOrMore, None)))
|
||||
[(rdflib.term.URIRef('ex:q'), rdflib.term.URIRef('ex:q'))]
|
||||
|
||||
>>> list(eval_path(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j)))
|
||||
[(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:j'))]
|
||||
|
||||
No vars specified:
|
||||
|
||||
>>> sorted(list(eval_path(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE
|
||||
[(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:g')),
|
||||
(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:h')),
|
||||
(rdflib.term.URIRef('ex:g'), rdflib.term.URIRef('ex:a')),
|
||||
(rdflib.term.URIRef('ex:g'), rdflib.term.URIRef('ex:h')),
|
||||
(rdflib.term.URIRef('ex:h'), rdflib.term.URIRef('ex:a'))]
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import total_ordering
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Generator,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from rdflib.term import Node, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib._type_checking import _MulPathMod
|
||||
from rdflib.graph import Graph, _ObjectType, _PredicateType, _SubjectType
|
||||
from rdflib.namespace import NamespaceManager
|
||||
|
||||
|
||||
# property paths
|
||||
|
||||
ZeroOrMore = "*"
|
||||
OneOrMore = "+"
|
||||
ZeroOrOne = "?"
|
||||
|
||||
|
||||
def _n3(
|
||||
arg: Union[URIRef, Path], namespace_manager: Optional[NamespaceManager] = None
|
||||
) -> str:
|
||||
if isinstance(arg, (SequencePath, AlternativePath)) and len(arg.args) > 1:
|
||||
return "(%s)" % arg.n3(namespace_manager)
|
||||
return arg.n3(namespace_manager)
|
||||
|
||||
|
||||
@total_ordering
|
||||
class Path(ABC):
|
||||
__or__: Callable[[Path, Union[URIRef, Path]], AlternativePath]
|
||||
__invert__: Callable[[Path], InvPath]
|
||||
__neg__: Callable[[Path], NegatedPath]
|
||||
__truediv__: Callable[[Path, Union[URIRef, Path]], SequencePath]
|
||||
__mul__: Callable[[Path, str], MulPath]
|
||||
|
||||
@abstractmethod
|
||||
def eval(
|
||||
self,
|
||||
graph: Graph,
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
) -> Iterator[Tuple[_SubjectType, _ObjectType]]: ...
|
||||
|
||||
@abstractmethod
|
||||
def n3(self, namespace_manager: Optional[NamespaceManager] = None) -> str: ...
|
||||
|
||||
def __hash__(self):
|
||||
return hash(repr(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
return repr(self) == repr(other)
|
||||
|
||||
def __lt__(self, other: Any) -> bool:
|
||||
if not isinstance(other, (Path, Node)):
|
||||
raise TypeError(
|
||||
"unorderable types: %s() < %s()" % (repr(self), repr(other))
|
||||
)
|
||||
return repr(self) < repr(other)
|
||||
|
||||
|
||||
class InvPath(Path):
|
||||
def __init__(self, arg: Union[Path, URIRef]):
|
||||
self.arg = arg
|
||||
|
||||
def eval(
|
||||
self,
|
||||
graph: Graph,
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
) -> Generator[Tuple[_ObjectType, _SubjectType], None, None]:
|
||||
for s, o in eval_path(graph, (obj, self.arg, subj)):
|
||||
yield o, s
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "Path(~%s)" % (self.arg,)
|
||||
|
||||
def n3(self, namespace_manager: Optional[NamespaceManager] = None) -> str:
|
||||
return "^%s" % _n3(self.arg, namespace_manager)
|
||||
|
||||
|
||||
class SequencePath(Path):
|
||||
def __init__(self, *args: Union[Path, URIRef]):
|
||||
self.args: List[Union[Path, URIRef]] = []
|
||||
for a in args:
|
||||
if isinstance(a, SequencePath):
|
||||
self.args += a.args
|
||||
else:
|
||||
self.args.append(a)
|
||||
|
||||
def eval(
|
||||
self,
|
||||
graph: Graph,
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
def _eval_seq(
|
||||
paths: List[Union[Path, URIRef]],
|
||||
subj: Optional[_SubjectType],
|
||||
obj: Optional[_ObjectType],
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
if paths[1:]:
|
||||
for s, o in eval_path(graph, (subj, paths[0], None)):
|
||||
for r in _eval_seq(paths[1:], o, obj):
|
||||
yield s, r[1]
|
||||
|
||||
else:
|
||||
for s, o in eval_path(graph, (subj, paths[0], obj)):
|
||||
yield s, o
|
||||
|
||||
def _eval_seq_bw(
|
||||
paths: List[Union[Path, URIRef]],
|
||||
subj: Optional[_SubjectType],
|
||||
obj: _ObjectType,
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
if paths[:-1]:
|
||||
for s, o in eval_path(graph, (None, paths[-1], obj)):
|
||||
for r in _eval_seq(paths[:-1], subj, s):
|
||||
yield r[0], o
|
||||
|
||||
else:
|
||||
for s, o in eval_path(graph, (subj, paths[0], obj)):
|
||||
yield s, o
|
||||
|
||||
if subj:
|
||||
return _eval_seq(self.args, subj, obj)
|
||||
elif obj:
|
||||
return _eval_seq_bw(self.args, subj, obj)
|
||||
else: # no vars bound, we can start anywhere
|
||||
return _eval_seq(self.args, subj, obj)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "Path(%s)" % " / ".join(str(x) for x in self.args)
|
||||
|
||||
def n3(self, namespace_manager: Optional[NamespaceManager] = None) -> str:
|
||||
return "/".join(_n3(a, namespace_manager) for a in self.args)
|
||||
|
||||
|
||||
class AlternativePath(Path):
|
||||
def __init__(self, *args: Union[Path, URIRef]):
|
||||
self.args: List[Union[Path, URIRef]] = []
|
||||
for a in args:
|
||||
if isinstance(a, AlternativePath):
|
||||
self.args += a.args
|
||||
else:
|
||||
self.args.append(a)
|
||||
|
||||
def eval(
|
||||
self,
|
||||
graph: Graph,
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
for x in self.args:
|
||||
for y in eval_path(graph, (subj, x, obj)):
|
||||
yield y
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "Path(%s)" % " | ".join(str(x) for x in self.args)
|
||||
|
||||
def n3(self, namespace_manager: Optional[NamespaceManager] = None) -> str:
|
||||
return "|".join(_n3(a, namespace_manager) for a in self.args)
|
||||
|
||||
|
||||
class MulPath(Path):
|
||||
def __init__(self, path: Union[Path, URIRef], mod: _MulPathMod):
|
||||
self.path = path
|
||||
self.mod = mod
|
||||
|
||||
if mod == ZeroOrOne:
|
||||
self.zero = True
|
||||
self.more = False
|
||||
elif mod == ZeroOrMore:
|
||||
self.zero = True
|
||||
self.more = True
|
||||
elif mod == OneOrMore:
|
||||
self.zero = False
|
||||
self.more = True
|
||||
else:
|
||||
raise Exception("Unknown modifier %s" % mod)
|
||||
|
||||
def eval(
|
||||
self,
|
||||
graph: Graph,
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
first: bool = True,
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
if self.zero and first:
|
||||
if subj and obj:
|
||||
if subj == obj:
|
||||
yield subj, obj
|
||||
elif subj:
|
||||
yield subj, subj
|
||||
elif obj:
|
||||
yield obj, obj
|
||||
|
||||
def _fwd(
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
seen: Optional[Set[_SubjectType]] = None,
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
# type error: Item "None" of "Optional[Set[Node]]" has no attribute "add"
|
||||
# type error: Argument 1 to "add" of "set" has incompatible type "Optional[Node]"; expected "Node"
|
||||
seen.add(subj) # type: ignore[union-attr, arg-type]
|
||||
|
||||
for s, o in eval_path(graph, (subj, self.path, None)):
|
||||
if not obj or o == obj:
|
||||
yield s, o
|
||||
if self.more:
|
||||
# type error: Unsupported right operand type for in ("Optional[Set[Node]]")
|
||||
if o in seen: # type: ignore[operator]
|
||||
continue
|
||||
for s2, o2 in _fwd(o, obj, seen):
|
||||
yield s, o2
|
||||
|
||||
def _bwd(
|
||||
subj: Optional[_SubjectType] = None,
|
||||
obj: Optional[_ObjectType] = None,
|
||||
seen: Optional[Set[_ObjectType]] = None,
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
# type error: Item "None" of "Optional[Set[Node]]" has no attribute "add"
|
||||
# type error: Argument 1 to "add" of "set" has incompatible type "Optional[Node]"; expected "Node"
|
||||
seen.add(obj) # type: ignore[union-attr, arg-type]
|
||||
|
||||
for s, o in eval_path(graph, (None, self.path, obj)):
|
||||
if not subj or subj == s:
|
||||
yield s, o
|
||||
if self.more:
|
||||
# type error: Unsupported right operand type for in ("Optional[Set[Node]]")
|
||||
if s in seen: # type: ignore[operator]
|
||||
continue
|
||||
|
||||
for s2, o2 in _bwd(None, s, seen):
|
||||
yield s2, o
|
||||
|
||||
def _all_fwd_paths() -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
if self.zero:
|
||||
seen1 = set()
|
||||
# According to the spec, ALL nodes are possible solutions
|
||||
# (even literals)
|
||||
# we cannot do this without going through ALL triples
|
||||
# unless we keep an index of all terms somehow
|
||||
# but let's just hope this query doesn't happen very often...
|
||||
for s, o in graph.subject_objects(None):
|
||||
if s not in seen1:
|
||||
seen1.add(s)
|
||||
yield s, s
|
||||
if o not in seen1:
|
||||
seen1.add(o)
|
||||
yield o, o
|
||||
|
||||
seen = set()
|
||||
for s, o in eval_path(graph, (None, self.path, None)):
|
||||
if not self.more:
|
||||
yield s, o
|
||||
else:
|
||||
if s not in seen:
|
||||
seen.add(s)
|
||||
f = list(_fwd(s, None, set()))
|
||||
for s1, o1 in f:
|
||||
assert s1 == s
|
||||
yield s1, o1
|
||||
|
||||
done = set() # the spec does, by defn, not allow duplicates
|
||||
if subj:
|
||||
for x in _fwd(subj, obj, set()):
|
||||
if x not in done:
|
||||
done.add(x)
|
||||
yield x
|
||||
elif obj:
|
||||
for x in _bwd(subj, obj, set()):
|
||||
if x not in done:
|
||||
done.add(x)
|
||||
yield x
|
||||
else:
|
||||
for x in _all_fwd_paths():
|
||||
if x not in done:
|
||||
done.add(x)
|
||||
yield x
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "Path(%s%s)" % (self.path, self.mod)
|
||||
|
||||
def n3(self, namespace_manager: Optional[NamespaceManager] = None) -> str:
|
||||
return "%s%s" % (_n3(self.path, namespace_manager), self.mod)
|
||||
|
||||
|
||||
class NegatedPath(Path):
|
||||
def __init__(self, arg: Union[AlternativePath, InvPath, URIRef]):
|
||||
self.args: List[Union[URIRef, Path]]
|
||||
if isinstance(arg, (URIRef, InvPath)):
|
||||
self.args = [arg]
|
||||
elif isinstance(arg, AlternativePath):
|
||||
self.args = arg.args
|
||||
else:
|
||||
raise Exception(
|
||||
"Can only negate URIRefs, InvPaths or "
|
||||
+ "AlternativePaths, not: %s" % (arg,)
|
||||
)
|
||||
|
||||
def eval(self, graph, subj=None, obj=None):
|
||||
for s, p, o in graph.triples((subj, None, obj)):
|
||||
for a in self.args:
|
||||
if isinstance(a, URIRef):
|
||||
if p == a:
|
||||
break
|
||||
elif isinstance(a, InvPath):
|
||||
if (o, a.arg, s) in graph:
|
||||
break
|
||||
else:
|
||||
raise Exception("Invalid path in NegatedPath: %s" % a)
|
||||
else:
|
||||
yield s, o
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "Path(! %s)" % ",".join(str(x) for x in self.args)
|
||||
|
||||
def n3(self, namespace_manager: Optional[NamespaceManager] = None) -> str:
|
||||
return "!(%s)" % ("|".join(_n3(arg, namespace_manager) for arg in self.args))
|
||||
|
||||
|
||||
class PathList(list):
|
||||
pass
|
||||
|
||||
|
||||
def path_alternative(self: Union[URIRef, Path], other: Union[URIRef, Path]):
|
||||
"""
|
||||
alternative path
|
||||
"""
|
||||
if not isinstance(other, (URIRef, Path)):
|
||||
raise Exception("Only URIRefs or Paths can be in paths!")
|
||||
return AlternativePath(self, other)
|
||||
|
||||
|
||||
def path_sequence(self: Union[URIRef, Path], other: Union[URIRef, Path]):
|
||||
"""
|
||||
sequence path
|
||||
"""
|
||||
if not isinstance(other, (URIRef, Path)):
|
||||
raise Exception("Only URIRefs or Paths can be in paths!")
|
||||
return SequencePath(self, other)
|
||||
|
||||
|
||||
def evalPath( # noqa: N802
|
||||
graph: Graph,
|
||||
t: Tuple[
|
||||
Optional[_SubjectType],
|
||||
Union[None, Path, _PredicateType],
|
||||
Optional[_ObjectType],
|
||||
],
|
||||
) -> Iterator[Tuple[_SubjectType, _ObjectType]]:
|
||||
warnings.warn(
|
||||
DeprecationWarning(
|
||||
"rdflib.path.evalPath() is deprecated, use the (snake-cased) eval_path(). "
|
||||
"The mixed-case evalPath() function name is incompatible with PEP8 "
|
||||
"recommendations and will be replaced by eval_path() in rdflib 7.0.0."
|
||||
)
|
||||
)
|
||||
return eval_path(graph, t)
|
||||
|
||||
|
||||
def eval_path(
|
||||
graph: Graph,
|
||||
t: Tuple[
|
||||
Optional[_SubjectType],
|
||||
Union[None, Path, _PredicateType],
|
||||
Optional[_ObjectType],
|
||||
],
|
||||
) -> Iterator[Tuple[_SubjectType, _ObjectType]]:
|
||||
return ((s, o) for s, p, o in graph.triples(t))
|
||||
|
||||
|
||||
def mul_path(p: Union[URIRef, Path], mul: _MulPathMod) -> MulPath:
|
||||
"""
|
||||
cardinality path
|
||||
"""
|
||||
return MulPath(p, mul)
|
||||
|
||||
|
||||
def inv_path(p: Union[URIRef, Path]) -> InvPath:
|
||||
"""
|
||||
inverse path
|
||||
"""
|
||||
return InvPath(p)
|
||||
|
||||
|
||||
def neg_path(p: Union[URIRef, AlternativePath, InvPath]) -> NegatedPath:
|
||||
"""
|
||||
negated path
|
||||
"""
|
||||
return NegatedPath(p)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
else:
|
||||
# monkey patch
|
||||
# (these cannot be directly in terms.py
|
||||
# as it would introduce circular imports)
|
||||
|
||||
URIRef.__or__ = path_alternative
|
||||
# ignore typing here as URIRef inherits from str,
|
||||
# which has an incompatible definition of __mul__.
|
||||
URIRef.__mul__ = mul_path # type: ignore
|
||||
URIRef.__invert__ = inv_path
|
||||
URIRef.__neg__ = neg_path
|
||||
URIRef.__truediv__ = path_sequence
|
||||
|
||||
Path.__invert__ = inv_path
|
||||
# type error: Incompatible types in assignment (expression has type "Callable[[Union[URIRef, AlternativePath, InvPath]], NegatedPath]", variable has type "Callable[[Path], NegatedPath]")
|
||||
Path.__neg__ = neg_path # type: ignore[assignment]
|
||||
# type error: Incompatible types in assignment (expression has type "Callable[[Union[URIRef, Path], Literal['*', '+', '?']], MulPath]", variable has type "Callable[[Path, str], MulPath]")
|
||||
Path.__mul__ = mul_path # type: ignore[assignment]
|
||||
Path.__or__ = path_alternative
|
||||
Path.__truediv__ = path_sequence
|
||||
@@ -0,0 +1,629 @@
|
||||
"""
|
||||
Plugin support for rdf.
|
||||
|
||||
There are a number of plugin points for rdf: parser, serializer,
|
||||
store, query processor, and query result. Plugins can be registered
|
||||
either through setuptools entry_points or by calling
|
||||
rdf.plugin.register directly.
|
||||
|
||||
If you have a package that uses a setuptools based setup.py you can add the
|
||||
following to your setup::
|
||||
|
||||
entry_points = {
|
||||
'rdf.plugins.parser': [
|
||||
'nt = rdf.plugins.parsers.ntriples:NTParser',
|
||||
],
|
||||
'rdf.plugins.serializer': [
|
||||
'nt = rdf.plugins.serializers.NTSerializer:NTSerializer',
|
||||
],
|
||||
}
|
||||
|
||||
See the `setuptools dynamic discovery of services and plugins`__ for more
|
||||
information.
|
||||
|
||||
.. __: http://peak.telecommunity.com/DevCenter/setuptools#dynamic-discovery-of-services-and-plugins
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib.metadata import EntryPoint, entry_points
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Dict,
|
||||
Generic,
|
||||
Iterator,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
overload,
|
||||
)
|
||||
|
||||
import rdflib.plugins.stores.berkeleydb
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.parser import Parser
|
||||
from rdflib.query import (
|
||||
Processor,
|
||||
Result,
|
||||
ResultParser,
|
||||
ResultSerializer,
|
||||
UpdateProcessor,
|
||||
)
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.store import Store
|
||||
|
||||
__all__ = [
|
||||
"register",
|
||||
"get",
|
||||
"plugins",
|
||||
"PluginException",
|
||||
"Plugin",
|
||||
"PluginT",
|
||||
"PKGPlugin",
|
||||
]
|
||||
|
||||
rdflib_entry_points = {
|
||||
"rdf.plugins.store": Store,
|
||||
"rdf.plugins.serializer": Serializer,
|
||||
"rdf.plugins.parser": Parser,
|
||||
"rdf.plugins.resultparser": ResultParser,
|
||||
"rdf.plugins.resultserializer": ResultSerializer,
|
||||
"rdf.plugins.queryprocessor": Processor,
|
||||
"rdf.plugins.queryresult": Result,
|
||||
"rdf.plugins.updateprocessor": UpdateProcessor,
|
||||
}
|
||||
|
||||
_plugins: Dict[Tuple[str, Type[Any]], Plugin] = {}
|
||||
|
||||
|
||||
class PluginException(Error): # noqa: N818
|
||||
pass
|
||||
|
||||
|
||||
#: A generic type variable for plugins
|
||||
PluginT = TypeVar("PluginT")
|
||||
|
||||
|
||||
class Plugin(Generic[PluginT]):
|
||||
def __init__(
|
||||
self, name: str, kind: Type[PluginT], module_path: str, class_name: str
|
||||
):
|
||||
self.name = name
|
||||
self.kind = kind
|
||||
self.module_path = module_path
|
||||
self.class_name = class_name
|
||||
self._class: Optional[Type[PluginT]] = None
|
||||
|
||||
def getClass(self) -> Type[PluginT]: # noqa: N802
|
||||
if self._class is None:
|
||||
module = __import__(self.module_path, globals(), locals(), [""])
|
||||
self._class = getattr(module, self.class_name)
|
||||
return self._class
|
||||
|
||||
|
||||
class PKGPlugin(Plugin[PluginT]):
|
||||
def __init__(self, name: str, kind: Type[PluginT], ep: EntryPoint):
|
||||
self.name = name
|
||||
self.kind = kind
|
||||
self.ep = ep
|
||||
self._class: Optional[Type[PluginT]] = None
|
||||
|
||||
def getClass(self) -> Type[PluginT]: # noqa: N802
|
||||
if self._class is None:
|
||||
self._class = self.ep.load()
|
||||
return self._class
|
||||
|
||||
|
||||
def register(name: str, kind: Type[Any], module_path, class_name):
|
||||
"""
|
||||
Register the plugin for (name, kind). The module_path and
|
||||
class_name should be the path to a plugin class.
|
||||
"""
|
||||
p = Plugin(name, kind, module_path, class_name)
|
||||
_plugins[(name, kind)] = p
|
||||
|
||||
|
||||
def get(name: str, kind: Type[PluginT]) -> Type[PluginT]:
|
||||
"""
|
||||
Return the class for the specified (name, kind). Raises a
|
||||
PluginException if unable to do so.
|
||||
"""
|
||||
try:
|
||||
p: Plugin[PluginT] = _plugins[(name, kind)]
|
||||
except KeyError:
|
||||
raise PluginException("No plugin registered for (%s, %s)" % (name, kind))
|
||||
return p.getClass()
|
||||
|
||||
|
||||
all_entry_points = entry_points()
|
||||
if hasattr(all_entry_points, "select"):
|
||||
for entry_point, kind in rdflib_entry_points.items():
|
||||
for ep in all_entry_points.select(group=entry_point):
|
||||
_plugins[(ep.name, kind)] = PKGPlugin(ep.name, kind, ep)
|
||||
else:
|
||||
# Prior to Python 3.10, this returns a dict instead of the selection interface, which is slightly slower
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(all_entry_points, dict)
|
||||
for entry_point, kind in rdflib_entry_points.items():
|
||||
for ep in all_entry_points.get(entry_point, []):
|
||||
_plugins[(ep.name, kind)] = PKGPlugin(ep.name, kind, ep)
|
||||
|
||||
|
||||
@overload
|
||||
def plugins(
|
||||
name: Optional[str] = ..., kind: Type[PluginT] = ...
|
||||
) -> Iterator[Plugin[PluginT]]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def plugins(name: Optional[str] = ..., kind: None = ...) -> Iterator[Plugin]: ...
|
||||
|
||||
|
||||
def plugins(
|
||||
name: Optional[str] = None, kind: Optional[Type[PluginT]] = None
|
||||
) -> Iterator[Plugin[PluginT]]:
|
||||
"""
|
||||
A generator of the plugins.
|
||||
|
||||
Pass in name and kind to filter... else leave None to match all.
|
||||
"""
|
||||
for p in _plugins.values():
|
||||
if (name is None or name == p.name) and (kind is None or kind == p.kind):
|
||||
yield p
|
||||
|
||||
|
||||
# Register Stores
|
||||
|
||||
if rdflib.plugins.stores.berkeleydb.has_bsddb:
|
||||
# Checks for BerkeleyDB before registering it
|
||||
register(
|
||||
"BerkeleyDB",
|
||||
Store,
|
||||
"rdflib.plugins.stores.berkeleydb",
|
||||
"BerkeleyDB",
|
||||
)
|
||||
register(
|
||||
"default",
|
||||
Store,
|
||||
"rdflib.plugins.stores.memory",
|
||||
"Memory",
|
||||
)
|
||||
register(
|
||||
"Memory",
|
||||
Store,
|
||||
"rdflib.plugins.stores.memory",
|
||||
"Memory",
|
||||
)
|
||||
register(
|
||||
"SimpleMemory",
|
||||
Store,
|
||||
"rdflib.plugins.stores.memory",
|
||||
"SimpleMemory",
|
||||
)
|
||||
register(
|
||||
"Auditable",
|
||||
Store,
|
||||
"rdflib.plugins.stores.auditable",
|
||||
"AuditableStore",
|
||||
)
|
||||
register(
|
||||
"Concurrent",
|
||||
Store,
|
||||
"rdflib.plugins.stores.concurrent",
|
||||
"ConcurrentStore",
|
||||
)
|
||||
|
||||
register(
|
||||
"SPARQLStore",
|
||||
Store,
|
||||
"rdflib.plugins.stores.sparqlstore",
|
||||
"SPARQLStore",
|
||||
)
|
||||
register(
|
||||
"SPARQLUpdateStore",
|
||||
Store,
|
||||
"rdflib.plugins.stores.sparqlstore",
|
||||
"SPARQLUpdateStore",
|
||||
)
|
||||
|
||||
# Register Triple Serializers
|
||||
register(
|
||||
"application/rdf+xml",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.rdfxml",
|
||||
"XMLSerializer",
|
||||
)
|
||||
register(
|
||||
"xml",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.rdfxml",
|
||||
"XMLSerializer",
|
||||
)
|
||||
register(
|
||||
"pretty-xml",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.rdfxml",
|
||||
"PrettyXMLSerializer",
|
||||
)
|
||||
register(
|
||||
"text/n3",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.n3",
|
||||
"N3Serializer",
|
||||
)
|
||||
register(
|
||||
"n3",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.n3",
|
||||
"N3Serializer",
|
||||
)
|
||||
register(
|
||||
"text/turtle",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.turtle",
|
||||
"TurtleSerializer",
|
||||
)
|
||||
register(
|
||||
"turtle",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.turtle",
|
||||
"TurtleSerializer",
|
||||
)
|
||||
register(
|
||||
"ttl",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.turtle",
|
||||
"TurtleSerializer",
|
||||
)
|
||||
register(
|
||||
"longturtle",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.longturtle",
|
||||
"LongTurtleSerializer",
|
||||
)
|
||||
register(
|
||||
"application/n-triples",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.nt",
|
||||
"NTSerializer",
|
||||
)
|
||||
register(
|
||||
"ntriples",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.nt",
|
||||
"NTSerializer",
|
||||
)
|
||||
register(
|
||||
"nt",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.nt",
|
||||
"NTSerializer",
|
||||
)
|
||||
register(
|
||||
"nt11",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.nt",
|
||||
"NT11Serializer",
|
||||
)
|
||||
register(
|
||||
"json-ld",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.jsonld",
|
||||
"JsonLDSerializer",
|
||||
)
|
||||
register(
|
||||
"application/ld+json",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.jsonld",
|
||||
"JsonLDSerializer",
|
||||
)
|
||||
|
||||
# Register Quad Serializers
|
||||
register(
|
||||
"application/n-quads",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.nquads",
|
||||
"NQuadsSerializer",
|
||||
)
|
||||
register(
|
||||
"nquads",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.nquads",
|
||||
"NQuadsSerializer",
|
||||
)
|
||||
register(
|
||||
"application/trix",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.trix",
|
||||
"TriXSerializer",
|
||||
)
|
||||
register(
|
||||
"trix",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.trix",
|
||||
"TriXSerializer",
|
||||
)
|
||||
register(
|
||||
"application/trig",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.trig",
|
||||
"TrigSerializer",
|
||||
)
|
||||
register(
|
||||
"trig",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.trig",
|
||||
"TrigSerializer",
|
||||
)
|
||||
register(
|
||||
"hext",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.hext",
|
||||
"HextuplesSerializer",
|
||||
)
|
||||
register(
|
||||
"patch",
|
||||
Serializer,
|
||||
"rdflib.plugins.serializers.patch",
|
||||
"PatchSerializer",
|
||||
)
|
||||
|
||||
# Register Triple Parsers
|
||||
register(
|
||||
"application/rdf+xml",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.rdfxml",
|
||||
"RDFXMLParser",
|
||||
)
|
||||
register(
|
||||
"xml",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.rdfxml",
|
||||
"RDFXMLParser",
|
||||
)
|
||||
register(
|
||||
"text/n3",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.notation3",
|
||||
"N3Parser",
|
||||
)
|
||||
register(
|
||||
"n3",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.notation3",
|
||||
"N3Parser",
|
||||
)
|
||||
register(
|
||||
"text/turtle",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.notation3",
|
||||
"TurtleParser",
|
||||
)
|
||||
register(
|
||||
"turtle",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.notation3",
|
||||
"TurtleParser",
|
||||
)
|
||||
register(
|
||||
"ttl",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.notation3",
|
||||
"TurtleParser",
|
||||
)
|
||||
register(
|
||||
"application/n-triples",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.ntriples",
|
||||
"NTParser",
|
||||
)
|
||||
register(
|
||||
"ntriples",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.ntriples",
|
||||
"NTParser",
|
||||
)
|
||||
register(
|
||||
"nt",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.ntriples",
|
||||
"NTParser",
|
||||
)
|
||||
register(
|
||||
"nt11",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.ntriples",
|
||||
"NTParser",
|
||||
)
|
||||
register(
|
||||
"application/ld+json",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.jsonld",
|
||||
"JsonLDParser",
|
||||
)
|
||||
register(
|
||||
"json-ld",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.jsonld",
|
||||
"JsonLDParser",
|
||||
)
|
||||
|
||||
# Register Quad Parsers
|
||||
register(
|
||||
"application/n-quads",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.nquads",
|
||||
"NQuadsParser",
|
||||
)
|
||||
register(
|
||||
"nquads",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.nquads",
|
||||
"NQuadsParser",
|
||||
)
|
||||
register(
|
||||
"application/trix",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.trix",
|
||||
"TriXParser",
|
||||
)
|
||||
register(
|
||||
"trix",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.trix",
|
||||
"TriXParser",
|
||||
)
|
||||
register(
|
||||
"application/trig",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.trig",
|
||||
"TrigParser",
|
||||
)
|
||||
register(
|
||||
"trig",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.trig",
|
||||
"TrigParser",
|
||||
)
|
||||
register(
|
||||
"hext",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.hext",
|
||||
"HextuplesParser",
|
||||
)
|
||||
|
||||
# Register RDF Patch Parsers
|
||||
register(
|
||||
"patch",
|
||||
Parser,
|
||||
"rdflib.plugins.parsers.patch",
|
||||
"RDFPatchParser",
|
||||
)
|
||||
|
||||
# Register SPARQL Processors
|
||||
register(
|
||||
"sparql",
|
||||
Result,
|
||||
"rdflib.plugins.sparql.processor",
|
||||
"SPARQLResult",
|
||||
)
|
||||
register(
|
||||
"sparql",
|
||||
Processor,
|
||||
"rdflib.plugins.sparql.processor",
|
||||
"SPARQLProcessor",
|
||||
)
|
||||
register(
|
||||
"sparql",
|
||||
UpdateProcessor,
|
||||
"rdflib.plugins.sparql.processor",
|
||||
"SPARQLUpdateProcessor",
|
||||
)
|
||||
|
||||
# Register SPARQL Result Serializers
|
||||
register(
|
||||
"xml",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.xmlresults",
|
||||
"XMLResultSerializer",
|
||||
)
|
||||
register(
|
||||
"application/sparql-results+xml",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.xmlresults",
|
||||
"XMLResultSerializer",
|
||||
)
|
||||
register(
|
||||
"txt",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.txtresults",
|
||||
"TXTResultSerializer",
|
||||
)
|
||||
register(
|
||||
"json",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.jsonresults",
|
||||
"JSONResultSerializer",
|
||||
)
|
||||
register(
|
||||
"application/sparql-results+json",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.jsonresults",
|
||||
"JSONResultSerializer",
|
||||
)
|
||||
register(
|
||||
"csv",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.csvresults",
|
||||
"CSVResultSerializer",
|
||||
)
|
||||
register(
|
||||
"text/csv",
|
||||
ResultSerializer,
|
||||
"rdflib.plugins.sparql.results.csvresults",
|
||||
"CSVResultSerializer",
|
||||
)
|
||||
|
||||
# Register SPARQL Result Parsers
|
||||
register(
|
||||
"xml",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.xmlresults",
|
||||
"XMLResultParser",
|
||||
)
|
||||
register(
|
||||
"application/sparql-results+xml",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.xmlresults",
|
||||
"XMLResultParser",
|
||||
)
|
||||
register(
|
||||
"application/sparql-results+xml; charset=UTF-8",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.xmlresults",
|
||||
"XMLResultParser",
|
||||
)
|
||||
register(
|
||||
"application/rdf+xml",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.graph",
|
||||
"GraphResultParser",
|
||||
)
|
||||
register(
|
||||
"json",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.jsonresults",
|
||||
"JSONResultParser",
|
||||
)
|
||||
register(
|
||||
"application/sparql-results+json",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.jsonresults",
|
||||
"JSONResultParser",
|
||||
)
|
||||
register(
|
||||
"csv",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.csvresults",
|
||||
"CSVResultParser",
|
||||
)
|
||||
register(
|
||||
"text/csv",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.csvresults",
|
||||
"CSVResultParser",
|
||||
)
|
||||
register(
|
||||
"tsv",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.tsvresults",
|
||||
"TSVResultParser",
|
||||
)
|
||||
register(
|
||||
"text/tab-separated-values",
|
||||
ResultParser,
|
||||
"rdflib.plugins.sparql.results.tsvresults",
|
||||
"TSVResultParser",
|
||||
)
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Default plugins for rdflib.
|
||||
|
||||
This is a namespace package and contains the default plugins for
|
||||
rdflib.
|
||||
|
||||
"""
|
||||
@@ -0,0 +1,19 @@
|
||||
from rdflib.namespace import RDF # noqa: N999
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class RDFVOC(RDF):
|
||||
_underscore_num = True
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
|
||||
# A mapping from unqualified terms to their qualified version.
|
||||
RDF: URIRef
|
||||
Description: URIRef
|
||||
ID: URIRef
|
||||
about: URIRef
|
||||
parseType: URIRef # noqa: N815
|
||||
resource: URIRef
|
||||
li: URIRef
|
||||
nodeID: URIRef # noqa: N815
|
||||
datatype: URIRef
|
||||
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON
|
||||
(ndjson) files, into Conjunctive. The store that backs the graph *must* be able to
|
||||
handle contexts, i.e. multiple graphs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from io import TextIOWrapper
|
||||
from typing import TYPE_CHECKING, Any, BinaryIO, List, Optional, TextIO, Union
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from io import BufferedReader
|
||||
|
||||
__all__ = ["HextuplesParser"]
|
||||
|
||||
|
||||
class HextuplesParser(Parser):
|
||||
"""
|
||||
An RDFLib parser for Hextuples
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(HextuplesParser, self).__init__()
|
||||
self.default_context: Optional[Graph] = None
|
||||
self.skolemize = False
|
||||
|
||||
def _parse_hextuple(
|
||||
self, ds: Union[Dataset, ConjunctiveGraph], tup: List[Union[str, None]]
|
||||
) -> None:
|
||||
# all values check
|
||||
# subject, predicate, value, datatype cannot be None
|
||||
# language and graph may be None
|
||||
if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None:
|
||||
raise ValueError(
|
||||
f"subject, predicate, value, datatype cannot be None. Given: {tup}"
|
||||
)
|
||||
|
||||
# 1 - subject
|
||||
s: Union[URIRef, BNode]
|
||||
if tup[0].startswith("_"):
|
||||
s = BNode(value=tup[0].replace("_:", ""))
|
||||
if self.skolemize:
|
||||
s = s.skolemize()
|
||||
else:
|
||||
s = URIRef(tup[0])
|
||||
|
||||
# 2 - predicate
|
||||
p = URIRef(tup[1])
|
||||
|
||||
# 3 - value
|
||||
o: Union[URIRef, BNode, Literal]
|
||||
if tup[3] == "globalId":
|
||||
o = URIRef(tup[2])
|
||||
elif tup[3] == "localId":
|
||||
o = BNode(value=tup[2].replace("_:", ""))
|
||||
if self.skolemize:
|
||||
o = o.skolemize()
|
||||
else: # literal
|
||||
if tup[4] is None:
|
||||
o = Literal(tup[2], datatype=URIRef(tup[3]))
|
||||
else:
|
||||
o = Literal(tup[2], lang=tup[4])
|
||||
|
||||
# 6 - context
|
||||
if tup[5] is not None:
|
||||
c = (
|
||||
BNode(tup[5].replace("_:", ""))
|
||||
if tup[5].startswith("_:")
|
||||
else URIRef(tup[5])
|
||||
)
|
||||
if isinstance(c, BNode) and self.skolemize:
|
||||
c = c.skolemize()
|
||||
|
||||
ds.get_context(c).add((s, p, o))
|
||||
elif self.default_context is not None:
|
||||
self.default_context.add((s, p, o))
|
||||
else:
|
||||
raise Exception("No context to parse into!")
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "Parser"
|
||||
def parse(self, source: InputSource, graph: Graph, skolemize: bool = False, **kwargs: Any) -> None: # type: ignore[override]
|
||||
if kwargs.get("encoding") not in [None, "utf-8"]:
|
||||
warnings.warn(
|
||||
f"Hextuples files are always utf-8 encoded, "
|
||||
f"I was passed: {kwargs.get('encoding')}, "
|
||||
"but I'm still going to use utf-8"
|
||||
)
|
||||
|
||||
assert (
|
||||
graph.store.context_aware
|
||||
), "Hextuples Parser needs a context-aware store!"
|
||||
|
||||
self.skolemize = skolemize
|
||||
# Set default_union to True to mimic ConjunctiveGraph behavior
|
||||
ds = Dataset(store=graph.store, default_union=True)
|
||||
ds_default = ds.default_context # the DEFAULT_DATASET_GRAPH_ID
|
||||
if isinstance(graph, (Dataset, ConjunctiveGraph)):
|
||||
self.default_context = graph.default_context
|
||||
elif graph.identifier is not None:
|
||||
if graph.identifier == ds_default.identifier:
|
||||
self.default_context = graph
|
||||
else:
|
||||
self.default_context = ds.get_context(graph.identifier)
|
||||
else:
|
||||
# mypy thinks this is unreachable, but graph.identifier can be None
|
||||
self.default_context = ds_default # type: ignore[unreachable]
|
||||
if self.default_context is not ds_default:
|
||||
ds.default_context = self.default_context
|
||||
ds.remove_graph(ds_default) # remove the original unused default graph
|
||||
|
||||
try:
|
||||
text_stream: Optional[TextIO] = source.getCharacterStream()
|
||||
except (AttributeError, LookupError):
|
||||
text_stream = None
|
||||
try:
|
||||
binary_stream: Optional[BinaryIO] = source.getByteStream()
|
||||
except (AttributeError, LookupError):
|
||||
binary_stream = None
|
||||
|
||||
if text_stream is None and binary_stream is None:
|
||||
raise ValueError(
|
||||
f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
|
||||
)
|
||||
if TYPE_CHECKING:
|
||||
assert text_stream is not None or binary_stream is not None
|
||||
use_stream: Union[TextIO, BinaryIO]
|
||||
if _HAS_ORJSON:
|
||||
if binary_stream is not None:
|
||||
use_stream = binary_stream
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(text_stream, TextIOWrapper)
|
||||
use_stream = text_stream
|
||||
loads = orjson.loads
|
||||
else:
|
||||
if text_stream is not None:
|
||||
use_stream = text_stream
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(binary_stream, BufferedReader)
|
||||
use_stream = TextIOWrapper(binary_stream, encoding="utf-8")
|
||||
loads = json.loads
|
||||
|
||||
for line in use_stream: # type: Union[str, bytes]
|
||||
if len(line) == 0 or line.isspace():
|
||||
# Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way.
|
||||
# The result is that we accept input that would otherwise be invalid.
|
||||
# Possibly we should just let this result in an error.
|
||||
continue
|
||||
# this complex handing is because the 'value' component is
|
||||
# allowed to be "" but not None
|
||||
# all other "" values are treated as None
|
||||
raw_line: List[str] = loads(line)
|
||||
hex_tuple_line = [x if x != "" else None for x in raw_line]
|
||||
if raw_line[2] == "":
|
||||
hex_tuple_line[2] = ""
|
||||
self._parse_hextuple(ds, hex_tuple_line)
|
||||
@@ -0,0 +1,712 @@
|
||||
"""
|
||||
This parser will interpret a JSON-LD document as an RDF Graph. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
Example usage::
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
>>> test_json = '''
|
||||
... {
|
||||
... "@context": {
|
||||
... "dc": "http://purl.org/dc/terms/",
|
||||
... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
... "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
|
||||
... },
|
||||
... "@id": "http://example.org/about",
|
||||
... "dc:title": {
|
||||
... "@language": "en",
|
||||
... "@value": "Someone's Homepage"
|
||||
... }
|
||||
... }
|
||||
... '''
|
||||
>>> g = Graph().parse(data=test_json, format='json-ld')
|
||||
>>> list(g) == [(URIRef('http://example.org/about'),
|
||||
... URIRef('http://purl.org/dc/terms/title'),
|
||||
... Literal("Someone's Homepage", lang='en'))]
|
||||
True
|
||||
|
||||
"""
|
||||
|
||||
# From: https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/parser.py
|
||||
|
||||
# NOTE: This code reads the entire JSON object into memory before parsing, but
|
||||
# we should consider streaming the input to deal with arbitrarily large graphs.
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
import warnings
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
|
||||
|
||||
import rdflib.parser
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.parser import InputSource, URLInputSource
|
||||
from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef
|
||||
|
||||
from ..shared.jsonld.context import UNDEF, Context, Term
|
||||
from ..shared.jsonld.keys import (
|
||||
CONTEXT,
|
||||
GRAPH,
|
||||
ID,
|
||||
INCLUDED,
|
||||
INDEX,
|
||||
JSON,
|
||||
LANG,
|
||||
LIST,
|
||||
NEST,
|
||||
NONE,
|
||||
REV,
|
||||
SET,
|
||||
TYPE,
|
||||
VALUE,
|
||||
VOCAB,
|
||||
)
|
||||
from ..shared.jsonld.util import (
|
||||
_HAS_ORJSON,
|
||||
VOCAB_DELIMS,
|
||||
context_from_urlinputsource,
|
||||
json,
|
||||
orjson,
|
||||
source_to_json,
|
||||
)
|
||||
|
||||
__all__ = ["JsonLDParser", "to_rdf"]
|
||||
|
||||
TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB) # type: ignore[call-arg]
|
||||
|
||||
ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0
|
||||
|
||||
|
||||
class JsonLDParser(rdflib.parser.Parser):
|
||||
def __init__(self):
|
||||
super(JsonLDParser, self).__init__()
|
||||
|
||||
def parse(
|
||||
self,
|
||||
source: InputSource,
|
||||
sink: Graph,
|
||||
version: float = 1.1,
|
||||
skolemize: bool = False,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
base: Optional[str] = None,
|
||||
context: Optional[
|
||||
Union[
|
||||
List[Union[Dict[str, Any], str, None]],
|
||||
Dict[str, Any],
|
||||
str,
|
||||
]
|
||||
] = None,
|
||||
generalized_rdf: Optional[bool] = False,
|
||||
extract_all_scripts: Optional[bool] = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Parse JSON-LD from a source document.
|
||||
|
||||
The source document can be JSON or HTML with embedded JSON script
|
||||
elements (type attribute = "application/ld+json"). To process as HTML
|
||||
``source.content_type`` must be set to "text/html" or
|
||||
"application/xhtml+xml".
|
||||
|
||||
:param source: InputSource with JSON-formatted data (JSON or HTML)
|
||||
|
||||
:param sink: Graph to receive the parsed triples
|
||||
|
||||
:param version: parse as JSON-LD version, defaults to 1.1
|
||||
|
||||
:param encoding: character encoding of the JSON (should be "utf-8"
|
||||
or "utf-16"), defaults to "utf-8"
|
||||
|
||||
:param base: JSON-LD `Base IRI <https://www.w3.org/TR/json-ld/#base-iri>`_, defaults to None
|
||||
|
||||
:param context: JSON-LD `Context <https://www.w3.org/TR/json-ld/#the-context>`_, defaults to None
|
||||
|
||||
:param generalized_rdf: parse as `Generalized RDF <https://www.w3.org/TR/json-ld/#relationship-to-rdf>`_, defaults to False
|
||||
|
||||
:param extract_all_scripts: if source is an HTML document then extract
|
||||
all script elements, defaults to False (extract only the first
|
||||
script element). This is ignored if ``source.system_id`` contains
|
||||
a fragment identifier, in which case only the script element with
|
||||
matching id attribute is extracted.
|
||||
|
||||
"""
|
||||
if encoding not in ("utf-8", "utf-16"):
|
||||
warnings.warn(
|
||||
"JSON should be encoded as unicode. "
|
||||
"Given encoding was: %s" % encoding
|
||||
)
|
||||
|
||||
if not base:
|
||||
base = sink.absolutize(source.getPublicId() or source.getSystemId() or "")
|
||||
|
||||
context_data = context
|
||||
if not context_data and hasattr(source, "url") and hasattr(source, "links"):
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, URLInputSource)
|
||||
context_data = context_from_urlinputsource(source)
|
||||
|
||||
try:
|
||||
version = float(version)
|
||||
except ValueError:
|
||||
version = 1.1
|
||||
|
||||
# Get the optional fragment identifier
|
||||
try:
|
||||
fragment_id = URIRef(source.getSystemId()).fragment
|
||||
except Exception:
|
||||
fragment_id = None
|
||||
|
||||
data, html_base = source_to_json(source, fragment_id, extract_all_scripts)
|
||||
if html_base is not None:
|
||||
base = URIRef(html_base, base=base)
|
||||
|
||||
# NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be
|
||||
# context_aware. Keeping this check in case RDFLib is changed, or
|
||||
# someone passes something context_aware to this parser directly.
|
||||
conj_sink: Graph
|
||||
if not sink.context_aware:
|
||||
conj_sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier)
|
||||
else:
|
||||
conj_sink = sink
|
||||
|
||||
to_rdf(
|
||||
data,
|
||||
conj_sink,
|
||||
base,
|
||||
context_data,
|
||||
version,
|
||||
bool(generalized_rdf),
|
||||
skolemize=skolemize,
|
||||
)
|
||||
|
||||
|
||||
def to_rdf(
|
||||
data: Any,
|
||||
dataset: Graph,
|
||||
base: Optional[str] = None,
|
||||
context_data: Optional[
|
||||
Union[
|
||||
List[Union[Dict[str, Any], str, None]],
|
||||
Dict[str, Any],
|
||||
str,
|
||||
]
|
||||
] = None,
|
||||
version: Optional[float] = None,
|
||||
generalized_rdf: bool = False,
|
||||
allow_lists_of_lists: Optional[bool] = None,
|
||||
skolemize: bool = False,
|
||||
):
|
||||
# TODO: docstring w. args and return value
|
||||
context = Context(base=base, version=version)
|
||||
if context_data:
|
||||
context.load(context_data)
|
||||
parser = Parser(
|
||||
generalized_rdf=generalized_rdf,
|
||||
allow_lists_of_lists=allow_lists_of_lists,
|
||||
skolemize=skolemize,
|
||||
)
|
||||
return parser.parse(data, context, dataset)
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(
|
||||
self,
|
||||
generalized_rdf: bool = False,
|
||||
allow_lists_of_lists: Optional[bool] = None,
|
||||
skolemize: bool = False,
|
||||
):
|
||||
self.skolemize = skolemize
|
||||
self.generalized_rdf = generalized_rdf
|
||||
self.allow_lists_of_lists = (
|
||||
allow_lists_of_lists
|
||||
if allow_lists_of_lists is not None
|
||||
else ALLOW_LISTS_OF_LISTS
|
||||
)
|
||||
self.invalid_uri_to_bnode: dict[str, BNode] = {}
|
||||
|
||||
def parse(self, data: Any, context: Context, dataset: Graph) -> Graph:
|
||||
topcontext = False
|
||||
resources: Union[Dict[str, Any], List[Any]]
|
||||
if isinstance(data, list):
|
||||
resources = data
|
||||
elif isinstance(data, dict):
|
||||
local_context = data.get(CONTEXT)
|
||||
if local_context:
|
||||
context.load(local_context, context.base)
|
||||
topcontext = True
|
||||
resources = data
|
||||
# type error: Subclass of "Dict[str, Any]" and "List[Any]" cannot exist: would have incompatible method signatures
|
||||
if not isinstance(resources, list): # type: ignore[unreachable]
|
||||
resources = [resources]
|
||||
|
||||
if context.vocab:
|
||||
dataset.bind(None, context.vocab)
|
||||
for name, term in context.terms.items():
|
||||
if term.id and term.id.endswith(VOCAB_DELIMS):
|
||||
dataset.bind(name, term.id)
|
||||
|
||||
# type error: "Graph" has no attribute "default_context"
|
||||
graph = dataset.default_context if dataset.context_aware else dataset # type: ignore[attr-defined]
|
||||
|
||||
for node in resources:
|
||||
self._add_to_graph(dataset, graph, context, node, topcontext)
|
||||
|
||||
return graph
|
||||
|
||||
def _add_to_graph(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
node: Any,
|
||||
topcontext: bool = False,
|
||||
) -> Optional[Node]:
|
||||
if not isinstance(node, dict) or context.get_value(node):
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
|
||||
if CONTEXT in node and not topcontext:
|
||||
local_context = node[CONTEXT]
|
||||
if local_context:
|
||||
context = context.subcontext(local_context)
|
||||
else:
|
||||
context = Context(base=context.doc_base)
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Context]", variable has type "Context")
|
||||
context = context.get_context_for_type(node) # type: ignore[assignment]
|
||||
|
||||
id_val = context.get_id(node)
|
||||
|
||||
if id_val is None:
|
||||
nested_id = self._get_nested_id(context, node)
|
||||
if nested_id is not None and len(nested_id) > 0:
|
||||
id_val = nested_id
|
||||
|
||||
if isinstance(id_val, str):
|
||||
subj = self._to_rdf_id(context, id_val)
|
||||
else:
|
||||
subj = BNode()
|
||||
if self.skolemize:
|
||||
subj = subj.skolemize()
|
||||
|
||||
if subj is None:
|
||||
return None
|
||||
|
||||
# NOTE: crude way to signify that this node might represent a named graph
|
||||
no_id = id_val is None
|
||||
|
||||
for key, obj in node.items():
|
||||
if key == CONTEXT or key in context.get_keys(ID):
|
||||
continue
|
||||
|
||||
if key == REV or key in context.get_keys(REV):
|
||||
for rkey, robj in obj.items():
|
||||
self._key_to_graph(
|
||||
dataset,
|
||||
graph,
|
||||
context,
|
||||
subj,
|
||||
rkey,
|
||||
robj,
|
||||
reverse=True,
|
||||
no_id=no_id,
|
||||
)
|
||||
else:
|
||||
self._key_to_graph(dataset, graph, context, subj, key, obj, no_id=no_id)
|
||||
|
||||
return subj
|
||||
|
||||
# type error: Missing return statement
|
||||
def _get_nested_id(self, context: Context, node: Dict[str, Any]) -> Optional[str]: # type: ignore[return]
|
||||
for key, obj in node.items():
|
||||
if context.version >= 1.1 and key in context.get_keys(NEST):
|
||||
term = context.terms.get(key)
|
||||
if term and term.id is None:
|
||||
continue
|
||||
objs = obj if isinstance(obj, list) else [obj]
|
||||
for obj in objs:
|
||||
if not isinstance(obj, dict):
|
||||
continue
|
||||
id_val = context.get_id(obj)
|
||||
if not id_val:
|
||||
subcontext = context.get_context_for_term(
|
||||
context.terms.get(key)
|
||||
)
|
||||
id_val = self._get_nested_id(subcontext, obj)
|
||||
if isinstance(id_val, str):
|
||||
return id_val
|
||||
|
||||
def _key_to_graph(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
subj: Node,
|
||||
key: str,
|
||||
obj: Any,
|
||||
reverse: bool = False,
|
||||
no_id: bool = False,
|
||||
) -> None:
|
||||
if isinstance(obj, list):
|
||||
obj_nodes = obj
|
||||
else:
|
||||
obj_nodes = [obj]
|
||||
|
||||
term = context.terms.get(key)
|
||||
if term:
|
||||
term_id = term.id
|
||||
if term.type == JSON:
|
||||
obj_nodes = [self._to_typed_json_value(obj)]
|
||||
elif LIST in term.container:
|
||||
obj_nodes = [self._expand_nested_list(obj_nodes)]
|
||||
elif isinstance(obj, dict):
|
||||
obj_nodes = self._parse_container(context, term, obj)
|
||||
else:
|
||||
term_id = None
|
||||
|
||||
if TYPE in (key, term_id):
|
||||
term = TYPE_TERM
|
||||
|
||||
if GRAPH in (key, term_id):
|
||||
if dataset.context_aware and not no_id:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(dataset, ConjunctiveGraph)
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Node"; expected "Union[IdentifiedNode, str, None]"
|
||||
subgraph = dataset.get_context(subj) # type: ignore[arg-type]
|
||||
else:
|
||||
subgraph = graph
|
||||
for onode in obj_nodes:
|
||||
self._add_to_graph(dataset, subgraph, context, onode)
|
||||
return
|
||||
|
||||
if SET in (key, term_id):
|
||||
for onode in obj_nodes:
|
||||
self._add_to_graph(dataset, graph, context, onode)
|
||||
return
|
||||
|
||||
if INCLUDED in (key, term_id):
|
||||
for onode in obj_nodes:
|
||||
self._add_to_graph(dataset, graph, context, onode)
|
||||
return
|
||||
|
||||
if context.version >= 1.1 and key in context.get_keys(NEST):
|
||||
term = context.terms.get(key)
|
||||
if term and term.id is None:
|
||||
return
|
||||
objs = obj if isinstance(obj, list) else [obj]
|
||||
for obj in objs:
|
||||
if not isinstance(obj, dict):
|
||||
continue
|
||||
for nkey, nobj in obj.items():
|
||||
# NOTE: we've already captured subject
|
||||
if nkey in context.get_keys(ID):
|
||||
continue
|
||||
subcontext = context.get_context_for_type(obj)
|
||||
# type error: Argument 3 to "_key_to_graph" of "Parser" has incompatible type "Optional[Context]"; expected "Context"
|
||||
self._key_to_graph(dataset, graph, subcontext, subj, nkey, nobj) # type: ignore[arg-type]
|
||||
return
|
||||
|
||||
pred_uri = term.id if term else context.expand(key)
|
||||
|
||||
context = context.get_context_for_term(term)
|
||||
|
||||
# Flatten deep nested lists
|
||||
def flatten(n: Iterable[Any]) -> List[Any]:
|
||||
flattened = []
|
||||
for obj in n:
|
||||
if isinstance(obj, dict):
|
||||
objs = context.get_set(obj)
|
||||
if objs is not None:
|
||||
obj = objs
|
||||
if isinstance(obj, list):
|
||||
flattened += flatten(obj)
|
||||
continue
|
||||
flattened.append(obj)
|
||||
return flattened
|
||||
|
||||
obj_nodes = flatten(obj_nodes)
|
||||
|
||||
if not pred_uri:
|
||||
return
|
||||
|
||||
if term and term.reverse:
|
||||
reverse = not reverse
|
||||
|
||||
pred: IdentifiedNode
|
||||
bid = self._get_bnodeid(pred_uri)
|
||||
if bid:
|
||||
if not self.generalized_rdf:
|
||||
return
|
||||
pred = BNode(bid)
|
||||
if self.skolemize:
|
||||
pred = pred.skolemize()
|
||||
else:
|
||||
pred = URIRef(pred_uri)
|
||||
|
||||
for obj_node in obj_nodes:
|
||||
obj = self._to_object(dataset, graph, context, term, obj_node)
|
||||
if obj is None:
|
||||
continue
|
||||
if reverse:
|
||||
graph.add((obj, pred, subj))
|
||||
else:
|
||||
graph.add((subj, pred, obj))
|
||||
|
||||
def _parse_container(
|
||||
self, context: Context, term: Term, obj: Dict[str, Any]
|
||||
) -> List[Any]:
|
||||
if LANG in term.container:
|
||||
obj_nodes = []
|
||||
for lang, values in obj.items():
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
if lang in context.get_keys(NONE):
|
||||
obj_nodes += values
|
||||
else:
|
||||
for v in values:
|
||||
obj_nodes.append((v, lang))
|
||||
return obj_nodes
|
||||
|
||||
v11 = context.version >= 1.1
|
||||
|
||||
if v11 and GRAPH in term.container and ID in term.container:
|
||||
return [
|
||||
(
|
||||
dict({GRAPH: o})
|
||||
if k in context.get_keys(NONE)
|
||||
else dict({ID: k, GRAPH: o}) if isinstance(o, dict) else o
|
||||
)
|
||||
for k, o in obj.items()
|
||||
]
|
||||
|
||||
elif v11 and GRAPH in term.container and INDEX in term.container:
|
||||
return [dict({GRAPH: o}) for k, o in obj.items()]
|
||||
|
||||
elif v11 and GRAPH in term.container:
|
||||
return [dict({GRAPH: obj})]
|
||||
|
||||
elif v11 and ID in term.container:
|
||||
return [
|
||||
(
|
||||
dict({ID: k}, **o)
|
||||
if isinstance(o, dict) and k not in context.get_keys(NONE)
|
||||
else o
|
||||
)
|
||||
for k, o in obj.items()
|
||||
]
|
||||
|
||||
elif v11 and TYPE in term.container:
|
||||
return [
|
||||
(
|
||||
self._add_type(
|
||||
context,
|
||||
(
|
||||
{ID: context.expand(o) if term.type == VOCAB else o}
|
||||
if isinstance(o, str)
|
||||
else o
|
||||
),
|
||||
k,
|
||||
)
|
||||
if isinstance(o, (dict, str)) and k not in context.get_keys(NONE)
|
||||
else o
|
||||
)
|
||||
for k, o in obj.items()
|
||||
]
|
||||
|
||||
elif INDEX in term.container:
|
||||
obj_nodes = []
|
||||
for key, nodes in obj.items():
|
||||
if not isinstance(nodes, list):
|
||||
nodes = [nodes]
|
||||
for node in nodes:
|
||||
if v11 and term.index and key not in context.get_keys(NONE):
|
||||
if not isinstance(node, dict):
|
||||
node = {ID: node}
|
||||
values = node.get(term.index, [])
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
values.append(key)
|
||||
node[term.index] = values
|
||||
obj_nodes.append(node)
|
||||
return obj_nodes
|
||||
|
||||
return [obj]
|
||||
|
||||
@staticmethod
|
||||
def _add_type(context: Context, o: Dict[str, Any], k: str) -> Dict[str, Any]:
|
||||
otype = context.get_type(o) or []
|
||||
if otype and not isinstance(otype, list):
|
||||
otype = [otype]
|
||||
otype.append(k)
|
||||
o[TYPE] = otype
|
||||
return o
|
||||
|
||||
def _to_object(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
term: Optional[Term],
|
||||
node: Any,
|
||||
inlist: bool = False,
|
||||
) -> Optional[Node]:
|
||||
if isinstance(node, tuple):
|
||||
value, lang = node
|
||||
if value is None:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
if lang and " " in lang:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
return Literal(value, lang=lang)
|
||||
|
||||
if isinstance(node, dict):
|
||||
node_list = context.get_list(node)
|
||||
if node_list is not None:
|
||||
if inlist and not self.allow_lists_of_lists:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
listref = self._add_list(dataset, graph, context, term, node_list)
|
||||
if listref:
|
||||
return listref
|
||||
|
||||
else: # expand compacted value
|
||||
if term and term.type:
|
||||
if term.type == JSON:
|
||||
node = self._to_typed_json_value(node)
|
||||
elif node is None:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
elif term.type == ID and isinstance(node, str):
|
||||
node = {ID: context.resolve(node)}
|
||||
elif term.type == VOCAB and isinstance(node, str):
|
||||
node = {ID: context.expand(node) or context.resolve_iri(node)}
|
||||
else:
|
||||
node = {TYPE: term.type, VALUE: node}
|
||||
else:
|
||||
if node is None:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
if isinstance(node, float):
|
||||
return Literal(node, datatype=XSD.double)
|
||||
|
||||
if term and term.language is not UNDEF:
|
||||
lang = term.language
|
||||
else:
|
||||
lang = context.language
|
||||
return Literal(node, lang=lang)
|
||||
|
||||
lang = context.get_language(node)
|
||||
datatype = not lang and context.get_type(node) or None
|
||||
value = context.get_value(node)
|
||||
# type error: Unsupported operand types for in ("Optional[Any]" and "Generator[str, None, None]")
|
||||
if datatype in context.get_keys(JSON): # type: ignore[operator]
|
||||
node = self._to_typed_json_value(value)
|
||||
datatype = context.get_type(node)
|
||||
value = context.get_value(node)
|
||||
|
||||
if lang or context.get_key(VALUE) in node or VALUE in node:
|
||||
if value is None:
|
||||
return None
|
||||
if lang:
|
||||
if " " in lang:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
return Literal(value, lang=lang)
|
||||
elif datatype:
|
||||
return Literal(value, datatype=context.expand(datatype))
|
||||
else:
|
||||
return Literal(value)
|
||||
else:
|
||||
return self._add_to_graph(dataset, graph, context, node)
|
||||
|
||||
def _to_rdf_id(self, context: Context, id_val: str) -> Optional[IdentifiedNode]:
|
||||
bid = self._get_bnodeid(id_val)
|
||||
if bid:
|
||||
b = BNode(bid)
|
||||
if self.skolemize:
|
||||
return b.skolemize()
|
||||
return b
|
||||
else:
|
||||
uri = context.resolve(id_val)
|
||||
if not self.generalized_rdf and ":" not in uri:
|
||||
return None
|
||||
node: IdentifiedNode = URIRef(uri)
|
||||
if not str(node):
|
||||
if id_val not in self.invalid_uri_to_bnode:
|
||||
self.invalid_uri_to_bnode[id_val] = BNode(secrets.token_urlsafe(20))
|
||||
node = self.invalid_uri_to_bnode[id_val]
|
||||
return node
|
||||
|
||||
def _get_bnodeid(self, ref: str) -> Optional[str]:
|
||||
if not ref.startswith("_:"):
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
bid = ref.split("_:", 1)[-1]
|
||||
return bid or None
|
||||
|
||||
def _add_list(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
term: Optional[Term],
|
||||
node_list: Any,
|
||||
) -> IdentifiedNode:
|
||||
if not isinstance(node_list, list):
|
||||
node_list = [node_list]
|
||||
|
||||
first_subj: Union[URIRef, BNode] = BNode()
|
||||
if self.skolemize and isinstance(first_subj, BNode):
|
||||
first_subj = first_subj.skolemize()
|
||||
|
||||
rest: Union[URIRef, BNode, None]
|
||||
subj, rest = first_subj, None
|
||||
|
||||
for node in node_list:
|
||||
if node is None:
|
||||
continue
|
||||
|
||||
if rest:
|
||||
# type error: Statement is unreachable
|
||||
graph.add((subj, RDF.rest, rest)) # type: ignore[unreachable]
|
||||
subj = rest
|
||||
|
||||
obj = self._to_object(dataset, graph, context, term, node, inlist=True)
|
||||
|
||||
if obj is None:
|
||||
continue
|
||||
|
||||
graph.add((subj, RDF.first, obj))
|
||||
rest = BNode()
|
||||
if self.skolemize and isinstance(rest, BNode):
|
||||
rest = rest.skolemize()
|
||||
|
||||
if rest:
|
||||
graph.add((subj, RDF.rest, RDF.nil))
|
||||
return first_subj
|
||||
else:
|
||||
return RDF.nil
|
||||
|
||||
@staticmethod
|
||||
def _to_typed_json_value(value: Any) -> Dict[str, str]:
|
||||
if _HAS_ORJSON:
|
||||
val_string: str = orjson.dumps(
|
||||
value,
|
||||
option=orjson.OPT_SORT_KEYS | orjson.OPT_NON_STR_KEYS,
|
||||
).decode("utf-8")
|
||||
else:
|
||||
val_string = json.dumps(
|
||||
value, separators=(",", ":"), sort_keys=True, ensure_ascii=False
|
||||
)
|
||||
return {
|
||||
TYPE: RDF.JSON,
|
||||
VALUE: val_string,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _expand_nested_list(cls, obj_nodes: List[Any]) -> Dict[str, List[Any]]:
|
||||
result = [
|
||||
cls._expand_nested_list(o) if isinstance(o, list) else o for o in obj_nodes
|
||||
]
|
||||
return {LIST: result}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
This is a rdflib plugin for parsing NQuad files into Conjunctive
|
||||
graphs that can be used and queried. The store that backs the graph
|
||||
*must* be able to handle contexts.
|
||||
|
||||
>>> from rdflib import ConjunctiveGraph, URIRef, Namespace
|
||||
>>> g = ConjunctiveGraph()
|
||||
>>> data = open("test/data/nquads.rdflib/example.nquads", "rb")
|
||||
>>> g.parse(data, format="nquads") # doctest:+ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> assert len(g.store) == 449
|
||||
>>> # There should be 16 separate contexts
|
||||
>>> assert len([x for x in g.store.contexts()]) == 16
|
||||
>>> # is the name of entity E10009 "Arco Publications"?
|
||||
>>> # (in graph http://bibliographica.org/entity/E10009)
|
||||
>>> # Looking for:
|
||||
>>> # <http://bibliographica.org/entity/E10009>
|
||||
>>> # <http://xmlns.com/foaf/0.1/name>
|
||||
>>> # "Arco Publications"
|
||||
>>> # <http://bibliographica.org/entity/E10009>
|
||||
>>> s = URIRef("http://bibliographica.org/entity/E10009")
|
||||
>>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
|
||||
>>> assert(g.value(s, FOAF.name).eq("Arco Publications"))
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from codecs import getreader
|
||||
from typing import Any, MutableMapping, Optional
|
||||
|
||||
from rdflib.exceptions import ParserError as ParseError
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.parser import InputSource
|
||||
|
||||
# Build up from the NTriples parser:
|
||||
from rdflib.plugins.parsers.ntriples import W3CNTriplesParser, r_tail, r_wspace
|
||||
from rdflib.term import BNode
|
||||
|
||||
__all__ = ["NQuadsParser"]
|
||||
|
||||
_BNodeContextType = MutableMapping[str, BNode]
|
||||
|
||||
|
||||
class NQuadsParser(W3CNTriplesParser):
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "W3CNTriplesParser"
|
||||
def parse( # type: ignore[override]
|
||||
self,
|
||||
inputsource: InputSource,
|
||||
sink: Graph,
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
skolemize: bool = False,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Parse inputsource as an N-Quads file.
|
||||
|
||||
:type inputsource: `rdflib.parser.InputSource`
|
||||
:param inputsource: the source of N-Quads-formatted data
|
||||
:type sink: `rdflib.graph.Graph`
|
||||
:param sink: where to send parsed triples
|
||||
:type bnode_context: `dict`, optional
|
||||
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
|
||||
See `.W3CNTriplesParser.parse`
|
||||
"""
|
||||
assert (
|
||||
sink.store.context_aware
|
||||
), "NQuadsParser must be given a context-aware store."
|
||||
# Set default_union to True to mimic ConjunctiveGraph behavior
|
||||
ds = Dataset(store=sink.store, default_union=True)
|
||||
ds_default = ds.default_context # the DEFAULT_DATASET_GRAPH_ID
|
||||
new_default_context = None
|
||||
if isinstance(sink, (Dataset, ConjunctiveGraph)):
|
||||
new_default_context = sink.default_context
|
||||
elif sink.identifier is not None:
|
||||
if sink.identifier == ds_default.identifier:
|
||||
new_default_context = sink
|
||||
else:
|
||||
new_default_context = ds.get_context(sink.identifier)
|
||||
|
||||
if new_default_context is not None:
|
||||
ds.default_context = new_default_context
|
||||
ds.remove_graph(ds_default) # remove the original unused default graph
|
||||
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
|
||||
self.sink: Dataset = ds # type: ignore[assignment]
|
||||
self.skolemize = skolemize
|
||||
|
||||
source = inputsource.getCharacterStream()
|
||||
if not source:
|
||||
source = inputsource.getByteStream()
|
||||
source = getreader("utf-8")(source)
|
||||
|
||||
if not hasattr(source, "read"):
|
||||
raise ParseError("Item to parse must be a file-like object.")
|
||||
|
||||
self.file = source
|
||||
self.buffer = ""
|
||||
while True:
|
||||
self.line = __line = self.readline()
|
||||
if self.line is None:
|
||||
break
|
||||
try:
|
||||
self.parseline(bnode_context)
|
||||
except ParseError as msg:
|
||||
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
|
||||
|
||||
return self.sink
|
||||
|
||||
def parseline(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
|
||||
self.eat(r_wspace)
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
subject = self.subject(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
predicate = self.predicate()
|
||||
self.eat(r_wspace)
|
||||
|
||||
obj = self.object(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
context = self.uriref() or self.nodeid(bnode_context)
|
||||
self.eat(r_tail)
|
||||
|
||||
if self.line:
|
||||
raise ParseError("Trailing garbage")
|
||||
# Must have a context aware store - add on a normal Graph
|
||||
# discards anything where the ctx != graph.identifier
|
||||
if context:
|
||||
self.sink.get_context(context).add((subject, predicate, obj))
|
||||
else:
|
||||
self.sink.default_context.add((subject, predicate, obj))
|
||||
@@ -0,0 +1,385 @@
|
||||
"""\
|
||||
N-Triples Parser
|
||||
License: GPL 2, W3C, BSD, or MIT
|
||||
Author: Sean B. Palmer, inamidst.com
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from io import BytesIO, StringIO, TextIOBase
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Match,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Pattern,
|
||||
TextIO,
|
||||
Union,
|
||||
)
|
||||
|
||||
from rdflib.compat import _string_escape_map, decodeUnicodeEscape
|
||||
from rdflib.exceptions import ParserError as ParseError
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.term import BNode as bNode
|
||||
from rdflib.term import Literal, URIRef
|
||||
from rdflib.term import URIRef as URI # noqa: N814
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import typing_extensions as te
|
||||
|
||||
from rdflib.graph import Graph, _ObjectType, _PredicateType, _SubjectType
|
||||
|
||||
__all__ = [
|
||||
"unquote",
|
||||
"uriquote",
|
||||
"W3CNTriplesParser",
|
||||
"NTGraphSink",
|
||||
"NTParser",
|
||||
"DummySink",
|
||||
]
|
||||
|
||||
uriref = r'<([^:]+:[^\s"<>]*)>'
|
||||
literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
|
||||
litinfo = r"(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)|\^\^" + uriref + r")?"
|
||||
|
||||
r_line = re.compile(r"([^\r\n]*)(?:\r\n|\r|\n)")
|
||||
r_wspace = re.compile(r"[ \t]*")
|
||||
r_wspaces = re.compile(r"[ \t]+")
|
||||
r_tail = re.compile(r"[ \t]*\.[ \t]*(#.*)?")
|
||||
r_uriref = re.compile(uriref)
|
||||
r_nodeid = re.compile(r"_:([A-Za-z0-9_:]([-A-Za-z0-9_:\.]*[-A-Za-z0-9_:])?)")
|
||||
r_literal = re.compile(literal + litinfo)
|
||||
|
||||
bufsiz = 2048
|
||||
validate = False
|
||||
|
||||
|
||||
class DummySink:
|
||||
def __init__(self):
|
||||
self.length = 0
|
||||
|
||||
def triple(self, s, p, o):
|
||||
self.length += 1
|
||||
print(s, p, o)
|
||||
|
||||
|
||||
r_safe = re.compile(r"([\x20\x21\x23-\x5B\x5D-\x7E]+)")
|
||||
r_quot = re.compile(r"""\\([tbnrf"'\\])""")
|
||||
r_uniquot = re.compile(r"\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8})")
|
||||
|
||||
|
||||
def unquote(s: str) -> str:
|
||||
"""Unquote an N-Triples string."""
|
||||
if not validate:
|
||||
if isinstance(s, str): # nquads
|
||||
s = decodeUnicodeEscape(s)
|
||||
else:
|
||||
s = s.decode("unicode-escape") # type: ignore[unreachable]
|
||||
|
||||
return s
|
||||
else:
|
||||
result = []
|
||||
while s:
|
||||
m = r_safe.match(s)
|
||||
if m:
|
||||
s = s[m.end() :]
|
||||
result.append(m.group(1))
|
||||
continue
|
||||
|
||||
m = r_quot.match(s)
|
||||
if m:
|
||||
s = s[2:]
|
||||
result.append(_string_escape_map[m.group(1)])
|
||||
continue
|
||||
|
||||
m = r_uniquot.match(s)
|
||||
if m:
|
||||
s = s[m.end() :]
|
||||
u, U = m.groups() # noqa: N806
|
||||
codepoint = int(u or U, 16)
|
||||
if codepoint > 0x10FFFF:
|
||||
raise ParseError("Disallowed codepoint: %08X" % codepoint)
|
||||
result.append(chr(codepoint))
|
||||
elif s.startswith("\\"):
|
||||
raise ParseError("Illegal escape at: %s..." % s[:10])
|
||||
else:
|
||||
raise ParseError("Illegal literal character: %r" % s[0])
|
||||
return "".join(result)
|
||||
|
||||
|
||||
r_hibyte = re.compile(r"([\x80-\xFF])")
|
||||
|
||||
|
||||
def uriquote(uri: str) -> str:
|
||||
if not validate:
|
||||
return uri
|
||||
else:
|
||||
return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri)
|
||||
|
||||
|
||||
_BNodeContextType = MutableMapping[str, bNode]
|
||||
|
||||
|
||||
class W3CNTriplesParser:
|
||||
"""An N-Triples Parser.
|
||||
This is a legacy-style Triples parser for NTriples provided by W3C
|
||||
Usage::
|
||||
|
||||
p = W3CNTriplesParser(sink=MySink())
|
||||
sink = p.parse(f) # file; use parsestring for a string
|
||||
|
||||
To define a context in which blank node identifiers refer to the same blank node
|
||||
across instances of NTriplesParser, pass the same dict as ``bnode_context`` to each
|
||||
instance. By default, a new blank node context is created for each instance of
|
||||
`W3CNTriplesParser`.
|
||||
"""
|
||||
|
||||
__slots__ = ("_bnode_ids", "sink", "buffer", "file", "line", "skolemize")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sink: Optional[Union[DummySink, NTGraphSink]] = None,
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
):
|
||||
self.skolemize = False
|
||||
|
||||
if bnode_context is not None:
|
||||
self._bnode_ids = bnode_context
|
||||
else:
|
||||
self._bnode_ids = {}
|
||||
|
||||
self.sink: Union[DummySink, NTGraphSink]
|
||||
if sink is not None:
|
||||
self.sink = sink
|
||||
else:
|
||||
self.sink = DummySink()
|
||||
|
||||
self.buffer: Optional[str] = None
|
||||
self.file: Optional[Union[TextIO, codecs.StreamReader]] = None
|
||||
self.line: Optional[str] = ""
|
||||
|
||||
def parse(
|
||||
self,
|
||||
f: Union[TextIO, IO[bytes], codecs.StreamReader],
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
skolemize: bool = False,
|
||||
) -> Union[DummySink, NTGraphSink]:
|
||||
"""
|
||||
Parse f as an N-Triples file.
|
||||
|
||||
:type f: :term:`file object`
|
||||
:param f: the N-Triples source
|
||||
:type bnode_context: `dict`, optional
|
||||
:param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``)
|
||||
to `~rdflib.term.BNode` instances. An empty dict can be
|
||||
passed in to define a distinct context for a given call to
|
||||
`parse`.
|
||||
"""
|
||||
|
||||
if not hasattr(f, "read"):
|
||||
raise ParseError("Item to parse must be a file-like object.")
|
||||
|
||||
if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"):
|
||||
# someone still using a bytestream here?
|
||||
f = codecs.getreader("utf-8")(f)
|
||||
|
||||
self.skolemize = skolemize
|
||||
self.file = f # type: ignore[assignment]
|
||||
self.buffer = ""
|
||||
while True:
|
||||
self.line = self.readline()
|
||||
if self.line is None:
|
||||
break
|
||||
try:
|
||||
self.parseline(bnode_context=bnode_context)
|
||||
except ParseError:
|
||||
raise ParseError("Invalid line: {}".format(self.line))
|
||||
return self.sink
|
||||
|
||||
def parsestring(self, s: Union[bytes, bytearray, str], **kwargs) -> None:
|
||||
"""Parse s as an N-Triples string."""
|
||||
if not isinstance(s, (str, bytes, bytearray)):
|
||||
raise ParseError("Item to parse must be a string instance.")
|
||||
f: Union[codecs.StreamReader, StringIO]
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
f = codecs.getreader("utf-8")(BytesIO(s))
|
||||
else:
|
||||
f = StringIO(s)
|
||||
self.parse(f, **kwargs)
|
||||
|
||||
def readline(self) -> Optional[str]:
|
||||
"""Read an N-Triples line from buffered input."""
|
||||
# N-Triples lines end in either CRLF, CR, or LF
|
||||
# Therefore, we can't just use f.readline()
|
||||
if not self.buffer:
|
||||
# type error: Item "None" of "Union[TextIO, StreamReader, None]" has no attribute "read"
|
||||
buffer = self.file.read(bufsiz) # type: ignore[union-attr]
|
||||
if not buffer:
|
||||
return None
|
||||
self.buffer = buffer
|
||||
|
||||
while True:
|
||||
m = r_line.match(self.buffer)
|
||||
if m: # the more likely prospect
|
||||
self.buffer = self.buffer[m.end() :]
|
||||
return m.group(1)
|
||||
else:
|
||||
# type error: Item "None" of "Union[TextIO, StreamReader, None]" has no attribute "read"
|
||||
buffer = self.file.read(bufsiz) # type: ignore[union-attr]
|
||||
if not buffer and not self.buffer.isspace():
|
||||
# Last line does not need to be terminated with a newline
|
||||
buffer += "\n"
|
||||
elif not buffer:
|
||||
return None
|
||||
self.buffer += buffer
|
||||
|
||||
def parseline(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
|
||||
self.eat(r_wspace)
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
subject = self.subject(bnode_context)
|
||||
self.eat(r_wspaces)
|
||||
|
||||
predicate = self.predicate()
|
||||
self.eat(r_wspaces)
|
||||
|
||||
object_ = self.object(bnode_context)
|
||||
self.eat(r_tail)
|
||||
|
||||
if self.line:
|
||||
raise ParseError("Trailing garbage: {}".format(self.line))
|
||||
self.sink.triple(subject, predicate, object_)
|
||||
|
||||
def peek(self, token: str) -> bool:
|
||||
return self.line.startswith(token) # type: ignore[union-attr]
|
||||
|
||||
def eat(self, pattern: Pattern[str]) -> Match[str]:
|
||||
m = pattern.match(self.line) # type: ignore[arg-type]
|
||||
if not m: # @@ Why can't we get the original pattern?
|
||||
# print(dir(pattern))
|
||||
# print repr(self.line), type(self.line)
|
||||
raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line))
|
||||
self.line = self.line[m.end() :] # type: ignore[index]
|
||||
return m
|
||||
|
||||
def subject(self, bnode_context=None) -> Union[bNode, URIRef]:
|
||||
# @@ Consider using dictionary cases
|
||||
subj = self.uriref() or self.nodeid(bnode_context)
|
||||
if not subj:
|
||||
raise ParseError("Subject must be uriref or nodeID")
|
||||
return subj
|
||||
|
||||
def predicate(self) -> Union[bNode, URIRef]:
|
||||
pred = self.uriref()
|
||||
if not pred:
|
||||
raise ParseError("Predicate must be uriref")
|
||||
return pred
|
||||
|
||||
def object(
|
||||
self, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> Union[URI, bNode, Literal]:
|
||||
objt = self.uriref() or self.nodeid(bnode_context) or self.literal()
|
||||
if objt is False:
|
||||
raise ParseError("Unrecognised object type")
|
||||
return objt
|
||||
|
||||
def uriref(self) -> Union[te.Literal[False], URI]:
|
||||
if self.peek("<"):
|
||||
uri = self.eat(r_uriref).group(1)
|
||||
uri = unquote(uri)
|
||||
uri = uriquote(uri)
|
||||
return URI(uri)
|
||||
return False
|
||||
|
||||
def nodeid(
|
||||
self, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> Union[te.Literal[False], bNode, URI]:
|
||||
if self.peek("_"):
|
||||
if self.skolemize:
|
||||
bnode_id = self.eat(r_nodeid).group(1)
|
||||
return bNode(bnode_id).skolemize()
|
||||
|
||||
else:
|
||||
# Fix for https://github.com/RDFLib/rdflib/issues/204
|
||||
if bnode_context is None:
|
||||
bnode_context = self._bnode_ids
|
||||
bnode_id = self.eat(r_nodeid).group(1)
|
||||
new_id = bnode_context.get(bnode_id, None)
|
||||
if new_id is not None:
|
||||
# Re-map to id specific to this doc
|
||||
return bNode(new_id)
|
||||
else:
|
||||
# Replace with freshly-generated document-specific BNode id
|
||||
bnode = bNode()
|
||||
# Store the mapping
|
||||
bnode_context[bnode_id] = bnode
|
||||
return bnode
|
||||
return False
|
||||
|
||||
def literal(self) -> Union[te.Literal[False], Literal]:
|
||||
if self.peek('"'):
|
||||
lit, lang, dtype = self.eat(r_literal).groups()
|
||||
if lang:
|
||||
lang = lang
|
||||
else:
|
||||
lang = None
|
||||
if dtype:
|
||||
dtype = unquote(dtype)
|
||||
dtype = uriquote(dtype)
|
||||
dtype = URI(dtype)
|
||||
else:
|
||||
dtype = None
|
||||
if lang and dtype:
|
||||
raise ParseError("Can't have both a language and a datatype")
|
||||
lit = unquote(lit)
|
||||
return Literal(lit, lang, dtype)
|
||||
return False
|
||||
|
||||
|
||||
class NTGraphSink:
|
||||
__slots__ = ("g",)
|
||||
|
||||
def __init__(self, graph: Graph):
|
||||
self.g = graph
|
||||
|
||||
def triple(self, s: _SubjectType, p: _PredicateType, o: _ObjectType) -> None:
|
||||
self.g.add((s, p, o))
|
||||
|
||||
|
||||
class NTParser(Parser):
|
||||
"""parser for the ntriples format, often stored with the .nt extension
|
||||
|
||||
See http://www.w3.org/TR/rdf-testcases/#ntriples"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
@classmethod
|
||||
def parse(cls, source: InputSource, sink: Graph, **kwargs: Any) -> None:
|
||||
"""
|
||||
Parse the NT format
|
||||
|
||||
:type source: `rdflib.parser.InputSource`
|
||||
:param source: the source of NT-formatted data
|
||||
:type sink: `rdflib.graph.Graph`
|
||||
:param sink: where to send parsed triples
|
||||
:param kwargs: Additional arguments to pass to `.W3CNTriplesParser.parse`
|
||||
"""
|
||||
f: Union[TextIO, IO[bytes], codecs.StreamReader]
|
||||
f = source.getCharacterStream()
|
||||
if not f:
|
||||
b = source.getByteStream()
|
||||
# TextIOBase includes: StringIO and TextIOWrapper
|
||||
if isinstance(b, TextIOBase):
|
||||
# f is not really a ByteStream, but a CharacterStream
|
||||
f = b # type: ignore[assignment]
|
||||
else:
|
||||
# since N-Triples 1.1 files can and should be utf-8 encoded
|
||||
f = codecs.getreader("utf-8")(b)
|
||||
parser = W3CNTriplesParser(NTGraphSink(sink))
|
||||
parser.parse(f, **kwargs)
|
||||
f.close()
|
||||
@@ -0,0 +1,183 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from codecs import getreader
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union
|
||||
|
||||
from rdflib.exceptions import ParserError as ParseError
|
||||
from rdflib.graph import Dataset
|
||||
from rdflib.parser import InputSource
|
||||
from rdflib.plugins.parsers.nquads import NQuadsParser
|
||||
|
||||
# Build up from the NTriples parser:
|
||||
from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace
|
||||
from rdflib.term import BNode, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import typing_extensions as te
|
||||
|
||||
__all__ = ["RDFPatchParser", "Operation"]
|
||||
|
||||
_BNodeContextType = MutableMapping[str, BNode]
|
||||
|
||||
|
||||
class Operation(Enum):
|
||||
"""
|
||||
Enum of RDF Patch operations.
|
||||
|
||||
Operations:
|
||||
- `AddTripleOrQuad` (A): Adds a triple or quad.
|
||||
- `DeleteTripleOrQuad` (D): Deletes a triple or quad.
|
||||
- `AddPrefix` (PA): Adds a prefix.
|
||||
- `DeletePrefix` (PD): Deletes a prefix.
|
||||
- `TransactionStart` (TX): Starts a transaction.
|
||||
- `TransactionCommit` (TC): Commits a transaction.
|
||||
- `TransactionAbort` (TA): Aborts a transaction.
|
||||
- `Header` (H): Specifies a header.
|
||||
"""
|
||||
|
||||
AddTripleOrQuad = "A"
|
||||
DeleteTripleOrQuad = "D"
|
||||
AddPrefix = "PA"
|
||||
DeletePrefix = "PD"
|
||||
TransactionStart = "TX"
|
||||
TransactionCommit = "TC"
|
||||
TransactionAbort = "TA"
|
||||
Header = "H"
|
||||
|
||||
|
||||
class RDFPatchParser(NQuadsParser):
|
||||
def parse( # type: ignore[override]
|
||||
self,
|
||||
inputsource: InputSource,
|
||||
sink: Dataset,
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
skolemize: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Dataset:
|
||||
"""
|
||||
Parse inputsource as an RDF Patch file.
|
||||
|
||||
:type inputsource: `rdflib.parser.InputSource`
|
||||
:param inputsource: the source of RDF Patch formatted data
|
||||
:type sink: `rdflib.graph.Dataset`
|
||||
:param sink: where to send parsed data
|
||||
:type bnode_context: `dict`, optional
|
||||
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
|
||||
See `.W3CNTriplesParser.parse`
|
||||
"""
|
||||
assert sink.store.context_aware, (
|
||||
"RDFPatchParser must be given" " a context aware store."
|
||||
)
|
||||
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
|
||||
self.sink: Dataset = Dataset(store=sink.store)
|
||||
self.skolemize = skolemize
|
||||
|
||||
source = inputsource.getCharacterStream()
|
||||
if not source:
|
||||
source = inputsource.getByteStream()
|
||||
source = getreader("utf-8")(source)
|
||||
|
||||
if not hasattr(source, "read"):
|
||||
raise ParseError("Item to parse must be a file-like object.")
|
||||
|
||||
self.file = source
|
||||
self.buffer = ""
|
||||
while True:
|
||||
self.line = __line = self.readline()
|
||||
if self.line is None:
|
||||
break
|
||||
try:
|
||||
self.parsepatch(bnode_context)
|
||||
except ParseError as msg:
|
||||
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
|
||||
return self.sink
|
||||
|
||||
def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
|
||||
self.eat(r_wspace)
|
||||
# From spec: "No comments should be included (comments start # and run to end
|
||||
# of line)."
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
# if header, transaction, skip
|
||||
operation = self.operation()
|
||||
self.eat(r_wspace)
|
||||
|
||||
if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]:
|
||||
self.add_or_remove_triple_or_quad(operation, bnode_context)
|
||||
elif operation == Operation.AddPrefix:
|
||||
self.add_prefix()
|
||||
elif operation == Operation.DeletePrefix:
|
||||
self.delete_prefix()
|
||||
|
||||
def add_or_remove_triple_or_quad(
|
||||
self, operation, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> None:
|
||||
self.eat(r_wspace)
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
subject = self.labeled_bnode() or self.subject(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
predicate = self.predicate()
|
||||
self.eat(r_wspace)
|
||||
|
||||
obj = self.labeled_bnode() or self.object(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context)
|
||||
self.eat(r_tail)
|
||||
|
||||
if self.line:
|
||||
raise ParseError("Trailing garbage")
|
||||
# Must have a context aware store - add on a normal Graph
|
||||
# discards anything where the ctx != graph.identifier
|
||||
if operation == Operation.AddTripleOrQuad:
|
||||
if context:
|
||||
self.sink.get_context(context).add((subject, predicate, obj))
|
||||
else:
|
||||
self.sink.default_context.add((subject, predicate, obj))
|
||||
elif operation == Operation.DeleteTripleOrQuad:
|
||||
if context:
|
||||
self.sink.get_context(context).remove((subject, predicate, obj))
|
||||
else:
|
||||
self.sink.default_context.remove((subject, predicate, obj))
|
||||
|
||||
def add_prefix(self):
|
||||
# Extract prefix and URI from the line
|
||||
prefix, ns, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
|
||||
ns_stripped = ns.strip("<>")
|
||||
self.sink.bind(prefix, ns_stripped)
|
||||
|
||||
def delete_prefix(self):
|
||||
prefix, _, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
|
||||
self.sink.namespace_manager.bind(prefix, None, replace=True)
|
||||
|
||||
def operation(self) -> Operation:
|
||||
for op in Operation:
|
||||
if self.line.startswith(op.value): # type: ignore[union-attr]
|
||||
self.eat_op(op.value)
|
||||
return op
|
||||
raise ValueError(
|
||||
f'Invalid or no Operation found in line: "{self.line}". Valid Operations '
|
||||
f"codes are {', '.join([op.value for op in Operation])}"
|
||||
)
|
||||
|
||||
def eat_op(self, op: str) -> None:
|
||||
self.line = self.line.lstrip(op) # type: ignore[union-attr]
|
||||
|
||||
def nodeid(
|
||||
self, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> Union[te.Literal[False], BNode, URIRef]:
|
||||
if self.peek("_"):
|
||||
return BNode(self.eat(r_nodeid).group(1))
|
||||
return False
|
||||
|
||||
def labeled_bnode(self):
|
||||
if self.peek("<_"):
|
||||
plain_uri = self.eat(r_uriref).group(1)
|
||||
bnode_id = r_nodeid.match(plain_uri).group(1) # type: ignore[union-attr]
|
||||
return BNode(bnode_id)
|
||||
return False
|
||||
@@ -0,0 +1,651 @@
|
||||
"""
|
||||
An RDF/XML parser for RDFLib
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple
|
||||
from urllib.parse import urldefrag, urljoin
|
||||
from xml.sax import handler, make_parser, xmlreader
|
||||
from xml.sax.handler import ErrorHandler
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.exceptions import Error, ParserError
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, is_ncname
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.plugins.parsers.RDFVOC import RDFVOC
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# from xml.sax.expatreader import ExpatLocator
|
||||
from xml.sax.xmlreader import AttributesImpl, Locator
|
||||
|
||||
from rdflib.graph import _ObjectType, _SubjectType, _TripleType
|
||||
|
||||
__all__ = ["create_parser", "BagID", "ElementHandler", "RDFXMLHandler", "RDFXMLParser"]
|
||||
|
||||
RDFNS = RDFVOC
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
|
||||
# A mapping from unqualified terms to their qualified version.
|
||||
UNQUALIFIED = {
|
||||
"about": RDFVOC.about,
|
||||
"ID": RDFVOC.ID,
|
||||
"type": RDFVOC.type,
|
||||
"resource": RDFVOC.resource,
|
||||
"parseType": RDFVOC.parseType,
|
||||
}
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
|
||||
CORE_SYNTAX_TERMS = [
|
||||
RDFVOC.RDF,
|
||||
RDFVOC.ID,
|
||||
RDFVOC.about,
|
||||
RDFVOC.parseType,
|
||||
RDFVOC.resource,
|
||||
RDFVOC.nodeID,
|
||||
RDFVOC.datatype,
|
||||
]
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
|
||||
SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDFVOC.Description, RDFVOC.li]
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
|
||||
OLD_TERMS = [
|
||||
URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
|
||||
URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
|
||||
URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"),
|
||||
]
|
||||
|
||||
NODE_ELEMENT_EXCEPTIONS = (
|
||||
CORE_SYNTAX_TERMS
|
||||
+ [
|
||||
RDFVOC.li,
|
||||
]
|
||||
+ OLD_TERMS
|
||||
)
|
||||
NODE_ELEMENT_ATTRIBUTES = [RDFVOC.ID, RDFVOC.nodeID, RDFVOC.about]
|
||||
|
||||
PROPERTY_ELEMENT_EXCEPTIONS = (
|
||||
CORE_SYNTAX_TERMS
|
||||
+ [
|
||||
RDFVOC.Description,
|
||||
]
|
||||
+ OLD_TERMS
|
||||
)
|
||||
PROPERTY_ATTRIBUTE_EXCEPTIONS = (
|
||||
CORE_SYNTAX_TERMS + [RDFVOC.Description, RDFVOC.li] + OLD_TERMS
|
||||
)
|
||||
PROPERTY_ELEMENT_ATTRIBUTES = [RDFVOC.ID, RDFVOC.resource, RDFVOC.nodeID]
|
||||
|
||||
XMLNS = "http://www.w3.org/XML/1998/namespace"
|
||||
BASE = (XMLNS, "base")
|
||||
LANG = (XMLNS, "lang")
|
||||
|
||||
|
||||
class BagID(URIRef):
|
||||
__slots__ = ["li"]
|
||||
|
||||
def __init__(self, val):
|
||||
# type error: Too many arguments for "__init__" of "object"
|
||||
super(URIRef, self).__init__(val) # type: ignore[call-arg]
|
||||
self.li = 0
|
||||
|
||||
def next_li(self):
|
||||
self.li += 1
|
||||
# type error: Type expected within [...]
|
||||
return RDFNS["_%s" % self.li] # type: ignore[misc]
|
||||
|
||||
|
||||
class ElementHandler:
|
||||
__slots__ = [
|
||||
"start",
|
||||
"char",
|
||||
"end",
|
||||
"li",
|
||||
"id",
|
||||
"base",
|
||||
"subject",
|
||||
"predicate",
|
||||
"object",
|
||||
"list",
|
||||
"language",
|
||||
"datatype",
|
||||
"declared",
|
||||
"data",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.start = None
|
||||
self.char = None
|
||||
self.end = None
|
||||
self.li = 0
|
||||
self.id = None
|
||||
self.base = None
|
||||
self.subject = None
|
||||
self.object = None
|
||||
self.list = None
|
||||
self.language = None
|
||||
self.datatype = None
|
||||
self.declared = None
|
||||
self.data = None
|
||||
|
||||
def next_li(self):
|
||||
self.li += 1
|
||||
return RDFVOC["_%s" % self.li]
|
||||
|
||||
|
||||
class RDFXMLHandler(handler.ContentHandler):
|
||||
def __init__(self, store: Graph):
|
||||
self.store = store
|
||||
self.preserve_bnode_ids = False
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
document_element = ElementHandler()
|
||||
document_element.start = self.document_element_start
|
||||
document_element.end = lambda name, qname: None
|
||||
self.stack: List[Optional[ElementHandler]] = [
|
||||
None,
|
||||
document_element,
|
||||
]
|
||||
self.ids: Dict[str, int] = {} # remember IDs we have already seen
|
||||
self.bnode: Dict[str, Identifier] = {}
|
||||
self._ns_contexts: List[Dict[str, Optional[str]]] = [
|
||||
{}
|
||||
] # contains uri -> prefix dicts
|
||||
self._current_context: Dict[str, Optional[str]] = self._ns_contexts[-1]
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def setDocumentLocator(self, locator: Locator):
|
||||
self.locator = locator
|
||||
|
||||
def startDocument(self) -> None:
|
||||
pass
|
||||
|
||||
def startPrefixMapping(self, prefix: Optional[str], namespace: str) -> None:
|
||||
self._ns_contexts.append(self._current_context.copy())
|
||||
self._current_context[namespace] = prefix
|
||||
self.store.bind(prefix, namespace or "", override=False)
|
||||
|
||||
def endPrefixMapping(self, prefix: Optional[str]) -> None:
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
del self._ns_contexts[-1]
|
||||
|
||||
def startElementNS(
|
||||
self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
stack = self.stack
|
||||
stack.append(ElementHandler())
|
||||
current = self.current
|
||||
parent = self.parent
|
||||
# type error: No overlaod for "get" of "AttributesImpl" mactches tuple (str, str)
|
||||
base = attrs.get(BASE, None) # type: ignore[call-overload, unused-ignore]
|
||||
if base is not None:
|
||||
base, frag = urldefrag(base)
|
||||
if parent and parent.base:
|
||||
base = urljoin(parent.base, base)
|
||||
else:
|
||||
systemId = self.locator.getPublicId() or self.locator.getSystemId()
|
||||
if systemId:
|
||||
base = urljoin(systemId, base)
|
||||
else:
|
||||
if parent:
|
||||
base = parent.base
|
||||
if base is None:
|
||||
systemId = self.locator.getPublicId() or self.locator.getSystemId()
|
||||
if systemId:
|
||||
base, frag = urldefrag(systemId)
|
||||
current.base = base
|
||||
# type error: No overlaod for "get" of "AttributesImpl" mactches tuple (str, str)
|
||||
language = attrs.get(LANG, None) # type: ignore[call-overload, unused-ignore]
|
||||
if language is None:
|
||||
if parent:
|
||||
language = parent.language
|
||||
current.language = language
|
||||
current.start(name, qname, attrs)
|
||||
|
||||
def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None:
|
||||
self.current.end(name, qname)
|
||||
self.stack.pop()
|
||||
|
||||
def characters(self, content: str) -> None:
|
||||
char = self.current.char
|
||||
if char:
|
||||
char(content)
|
||||
|
||||
def ignorableWhitespace(self, content) -> None:
|
||||
pass
|
||||
|
||||
def processingInstruction(self, target, data) -> None:
|
||||
pass
|
||||
|
||||
def add_reified(self, sid: Identifier, spo: _TripleType):
|
||||
s, p, o = spo
|
||||
self.store.add((sid, RDF.type, RDF.Statement))
|
||||
self.store.add((sid, RDF.subject, s))
|
||||
self.store.add((sid, RDF.predicate, p))
|
||||
self.store.add((sid, RDF.object, o))
|
||||
|
||||
def error(self, message: str) -> NoReturn:
|
||||
locator = self.locator
|
||||
info = "%s:%s:%s: " % (
|
||||
locator.getSystemId(),
|
||||
locator.getLineNumber(),
|
||||
locator.getColumnNumber(),
|
||||
)
|
||||
raise ParserError(info + message)
|
||||
|
||||
def get_current(self) -> Optional[ElementHandler]:
|
||||
return self.stack[-2]
|
||||
|
||||
# Create a read only property called current so that self.current
|
||||
# give the current element handler.
|
||||
current = property(get_current)
|
||||
|
||||
def get_next(self) -> Optional[ElementHandler]:
|
||||
return self.stack[-1]
|
||||
|
||||
# Create a read only property that gives the element handler to be
|
||||
# used for the next element.
|
||||
next = property(get_next)
|
||||
|
||||
def get_parent(self) -> Optional[ElementHandler]:
|
||||
return self.stack[-3]
|
||||
|
||||
# Create a read only property that gives the current parent
|
||||
# element handler
|
||||
parent = property(get_parent)
|
||||
|
||||
def absolutize(self, uri: str) -> URIRef:
|
||||
# type error: Argument "allow_fragments" to "urljoin" has incompatible type "int"; expected "bool"
|
||||
result = urljoin(self.current.base, uri, allow_fragments=1) # type: ignore[arg-type]
|
||||
if uri and uri[-1] == "#" and result[-1] != "#":
|
||||
result = "%s#" % result
|
||||
return URIRef(result)
|
||||
|
||||
def convert(
|
||||
self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl
|
||||
) -> Tuple[URIRef, Dict[URIRef, str]]:
|
||||
if name[0] is None:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[Optional[str], str]")
|
||||
name = URIRef(name[1]) # type: ignore[assignment]
|
||||
else:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[Optional[str], str]")
|
||||
# type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Optional[str], str]"; expected "Iterable[str]"
|
||||
name = URIRef("".join(name)) # type: ignore[assignment, arg-type]
|
||||
atts = {}
|
||||
for n, v in attrs.items():
|
||||
# mypy error: mypy thinks n[0]==None is unreachable
|
||||
if n[0] is None:
|
||||
att = n[1] # type: ignore[unreachable, unused-ignore]
|
||||
else:
|
||||
att = "".join(n)
|
||||
if att.startswith(XMLNS) or att[0:3].lower() == "xml":
|
||||
pass
|
||||
elif att in UNQUALIFIED:
|
||||
# if not RDFNS[att] in atts:
|
||||
# type error: Variable "att" is not valid as a type
|
||||
atts[RDFNS[att]] = v # type: ignore[misc, valid-type]
|
||||
else:
|
||||
atts[URIRef(att)] = v
|
||||
# type error: Incompatible return value type (got "Tuple[Tuple[Optional[str], str], Dict[Any, Any]]", expected "Tuple[URIRef, Dict[URIRef, str]]")
|
||||
return name, atts # type: ignore[return-value]
|
||||
|
||||
def document_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
if name[0] and URIRef("".join(name)) == RDFVOC.RDF:
|
||||
next = self.next
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
else:
|
||||
self.node_element_start(name, qname, attrs)
|
||||
# self.current.end = self.node_element_end
|
||||
# TODO... set end to something that sets start such that
|
||||
# another element will cause error
|
||||
|
||||
def node_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[str, str]")
|
||||
name, atts = self.convert(name, qname, attrs) # type: ignore[assignment]
|
||||
current = self.current
|
||||
absolutize = self.absolutize
|
||||
|
||||
next = self.next
|
||||
next.start = self.property_element_start
|
||||
next.end = self.property_element_end
|
||||
|
||||
if name in NODE_ELEMENT_EXCEPTIONS:
|
||||
# type error: Not all arguments converted during string formatting
|
||||
self.error("Invalid node element URI: %s" % name) # type: ignore[str-format]
|
||||
subject: _SubjectType
|
||||
if RDFVOC.ID in atts:
|
||||
if RDFVOC.about in atts or RDFVOC.nodeID in atts:
|
||||
self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
|
||||
|
||||
id = atts[RDFVOC.ID]
|
||||
if not is_ncname(id):
|
||||
self.error("rdf:ID value is not a valid NCName: %s" % id)
|
||||
subject = absolutize("#%s" % id)
|
||||
if subject in self.ids:
|
||||
self.error("two elements cannot use the same ID: '%s'" % subject)
|
||||
self.ids[subject] = 1 # IDs can only appear once within a document
|
||||
elif RDFVOC.nodeID in atts:
|
||||
if RDFVOC.ID in atts or RDFVOC.about in atts:
|
||||
self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
|
||||
nodeID = atts[RDFVOC.nodeID]
|
||||
if not is_ncname(nodeID):
|
||||
self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
|
||||
if self.preserve_bnode_ids is False:
|
||||
if nodeID in self.bnode:
|
||||
subject = self.bnode[nodeID]
|
||||
else:
|
||||
subject = BNode()
|
||||
self.bnode[nodeID] = subject
|
||||
else:
|
||||
subject = BNode(nodeID)
|
||||
elif RDFVOC.about in atts:
|
||||
if RDFVOC.ID in atts or RDFVOC.nodeID in atts:
|
||||
self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
|
||||
subject = absolutize(atts[RDFVOC.about])
|
||||
else:
|
||||
subject = BNode()
|
||||
|
||||
if name != RDFVOC.Description: # S1
|
||||
# error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str"
|
||||
self.store.add((subject, RDF.type, absolutize(name))) # type: ignore[arg-type]
|
||||
|
||||
object: _ObjectType
|
||||
language = current.language
|
||||
for att in atts:
|
||||
if not att.startswith(str(RDFNS)):
|
||||
predicate = absolutize(att)
|
||||
try:
|
||||
object = Literal(atts[att], language)
|
||||
except Error as e:
|
||||
# type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str"
|
||||
self.error(e.msg) # type: ignore[arg-type]
|
||||
elif att == RDF.type: # S2
|
||||
predicate = RDF.type
|
||||
object = absolutize(atts[RDF.type])
|
||||
elif att in NODE_ELEMENT_ATTRIBUTES:
|
||||
continue
|
||||
elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: # S3
|
||||
self.error("Invalid property attribute URI: %s" % att)
|
||||
# type error: Statement is unreachable
|
||||
continue # type: ignore[unreachable] # for when error does not throw an exception
|
||||
else:
|
||||
predicate = absolutize(att)
|
||||
try:
|
||||
object = Literal(atts[att], language)
|
||||
except Error as e:
|
||||
# type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str"
|
||||
self.error(e.msg) # type: ignore[arg-type]
|
||||
self.store.add((subject, predicate, object))
|
||||
|
||||
current.subject = subject
|
||||
|
||||
def node_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
# repeat node-elements are only allowed
|
||||
# at at top-level
|
||||
|
||||
if self.parent.object and self.current != self.stack[2]:
|
||||
self.error(
|
||||
"Repeat node-elements inside property elements: %s" % "".join(name)
|
||||
)
|
||||
|
||||
self.parent.object = self.current.subject
|
||||
|
||||
def property_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[str, str]")
|
||||
name, atts = self.convert(name, qname, attrs) # type: ignore[assignment]
|
||||
current = self.current
|
||||
absolutize = self.absolutize
|
||||
|
||||
next = self.next
|
||||
object: Optional[_ObjectType] = None
|
||||
current.data = None
|
||||
current.list = None
|
||||
|
||||
# type error: "Tuple[str, str]" has no attribute "startswith"
|
||||
if not name.startswith(str(RDFNS)): # type: ignore[attr-defined]
|
||||
# type error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str"
|
||||
current.predicate = absolutize(name) # type: ignore[arg-type]
|
||||
elif name == RDFVOC.li:
|
||||
current.predicate = current.next_li()
|
||||
elif name in PROPERTY_ELEMENT_EXCEPTIONS:
|
||||
# type error: Not all arguments converted during string formatting
|
||||
self.error("Invalid property element URI: %s" % name) # type: ignore[str-format]
|
||||
else:
|
||||
# type error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str"
|
||||
current.predicate = absolutize(name) # type: ignore[arg-type]
|
||||
|
||||
id = atts.get(RDFVOC.ID, None)
|
||||
if id is not None:
|
||||
if not is_ncname(id):
|
||||
self.error("rdf:ID value is not a value NCName: %s" % id)
|
||||
current.id = absolutize("#%s" % id)
|
||||
else:
|
||||
current.id = None
|
||||
|
||||
resource = atts.get(RDFVOC.resource, None)
|
||||
nodeID = atts.get(RDFVOC.nodeID, None)
|
||||
parse_type = atts.get(RDFVOC.parseType, None)
|
||||
if resource is not None and nodeID is not None:
|
||||
self.error("Property element cannot have both rdf:nodeID and rdf:resource")
|
||||
if resource is not None:
|
||||
object = absolutize(resource)
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
elif nodeID is not None:
|
||||
if not is_ncname(nodeID):
|
||||
self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
|
||||
if self.preserve_bnode_ids is False:
|
||||
if nodeID in self.bnode:
|
||||
object = self.bnode[nodeID]
|
||||
else:
|
||||
subject = BNode()
|
||||
self.bnode[nodeID] = subject
|
||||
object = subject
|
||||
else:
|
||||
object = subject = BNode(nodeID)
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
else:
|
||||
if parse_type is not None:
|
||||
for att in atts:
|
||||
if att != RDFVOC.parseType and att != RDFVOC.ID:
|
||||
self.error("Property attr '%s' now allowed here" % att)
|
||||
if parse_type == "Resource":
|
||||
current.subject = object = BNode()
|
||||
current.char = self.property_element_char
|
||||
next.start = self.property_element_start
|
||||
next.end = self.property_element_end
|
||||
elif parse_type == "Collection":
|
||||
current.char = None
|
||||
object = current.list = RDF.nil # BNode()
|
||||
# self.parent.subject
|
||||
next.start = self.node_element_start
|
||||
next.end = self.list_node_element_end
|
||||
else: # if parse_type=="Literal":
|
||||
# All other values are treated as Literal
|
||||
# See: http://www.w3.org/TR/rdf-syntax-grammar/
|
||||
# parseTypeOtherPropertyElt
|
||||
object = Literal("", datatype=RDFVOC.XMLLiteral)
|
||||
current.char = self.literal_element_char
|
||||
current.declared = {XMLNS: "xml"}
|
||||
next.start = self.literal_element_start
|
||||
next.char = self.literal_element_char
|
||||
next.end = self.literal_element_end
|
||||
current.object = object
|
||||
return
|
||||
else:
|
||||
object = None
|
||||
current.char = self.property_element_char
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
|
||||
datatype = current.datatype = atts.get(RDFVOC.datatype, None)
|
||||
language = current.language
|
||||
if datatype is not None:
|
||||
# TODO: check that there are no atts other than datatype and id
|
||||
datatype = absolutize(datatype)
|
||||
else:
|
||||
for att in atts:
|
||||
if not att.startswith(str(RDFNS)):
|
||||
predicate = absolutize(att)
|
||||
elif att in PROPERTY_ELEMENT_ATTRIBUTES:
|
||||
continue
|
||||
elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
|
||||
self.error("""Invalid property attribute URI: %s""" % att)
|
||||
else:
|
||||
predicate = absolutize(att)
|
||||
o: _ObjectType
|
||||
if att == RDF.type:
|
||||
o = URIRef(atts[att])
|
||||
else:
|
||||
if datatype is not None:
|
||||
# type error: Statement is unreachable
|
||||
language = None # type: ignore[unreachable]
|
||||
o = Literal(atts[att], language, datatype)
|
||||
|
||||
if object is None:
|
||||
object = BNode()
|
||||
self.store.add((object, predicate, o))
|
||||
if object is None:
|
||||
current.data = ""
|
||||
current.object = None
|
||||
else:
|
||||
current.data = None
|
||||
current.object = object
|
||||
|
||||
def property_element_char(self, data: str) -> None:
|
||||
current = self.current
|
||||
if current.data is not None:
|
||||
current.data += data
|
||||
|
||||
def property_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
current = self.current
|
||||
if current.data is not None and current.object is None:
|
||||
literalLang = current.language
|
||||
if current.datatype is not None:
|
||||
literalLang = None
|
||||
current.object = Literal(current.data, literalLang, current.datatype)
|
||||
current.data = None
|
||||
if self.next.end == self.list_node_element_end:
|
||||
if current.object != RDF.nil:
|
||||
self.store.add((current.list, RDF.rest, RDF.nil))
|
||||
if current.object is not None:
|
||||
self.store.add((self.parent.subject, current.predicate, current.object))
|
||||
if current.id is not None:
|
||||
self.add_reified(
|
||||
current.id, (self.parent.subject, current.predicate, current.object)
|
||||
)
|
||||
current.subject = None
|
||||
|
||||
def list_node_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
current = self.current
|
||||
if self.parent.list == RDF.nil:
|
||||
list = BNode()
|
||||
# Removed between 20030123 and 20030905
|
||||
# self.store.add((list, RDF.type, LIST))
|
||||
self.parent.list = list
|
||||
self.store.add((self.parent.list, RDF.first, current.subject))
|
||||
self.parent.object = list
|
||||
self.parent.char = None
|
||||
else:
|
||||
list = BNode()
|
||||
# Removed between 20030123 and 20030905
|
||||
# self.store.add((list, RDF.type, LIST))
|
||||
self.store.add((self.parent.list, RDF.rest, list))
|
||||
self.store.add((list, RDF.first, current.subject))
|
||||
self.parent.list = list
|
||||
|
||||
def literal_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
current = self.current
|
||||
self.next.start = self.literal_element_start
|
||||
self.next.char = self.literal_element_char
|
||||
self.next.end = self.literal_element_end
|
||||
current.declared = self.parent.declared.copy()
|
||||
if name[0]:
|
||||
prefix = self._current_context[name[0]]
|
||||
if prefix:
|
||||
current.object = "<%s:%s" % (prefix, name[1])
|
||||
else:
|
||||
current.object = "<%s" % name[1]
|
||||
if not name[0] in current.declared: # noqa: E713
|
||||
current.declared[name[0]] = prefix
|
||||
if prefix:
|
||||
current.object += ' xmlns:%s="%s"' % (prefix, name[0])
|
||||
else:
|
||||
current.object += ' xmlns="%s"' % name[0]
|
||||
else:
|
||||
current.object = "<%s" % name[1]
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Tuple[str, str]")
|
||||
for name, value in attrs.items(): # type: ignore[assignment, unused-ignore]
|
||||
if name[0]:
|
||||
if not name[0] in current.declared: # noqa: E713
|
||||
current.declared[name[0]] = self._current_context[name[0]]
|
||||
name = current.declared[name[0]] + ":" + name[1]
|
||||
else:
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Tuple[str, str]")
|
||||
name = name[1] # type: ignore[assignment]
|
||||
current.object += " %s=%s" % (name, quoteattr(value))
|
||||
current.object += ">"
|
||||
|
||||
def literal_element_char(self, data: str) -> None:
|
||||
self.current.object += escape(data)
|
||||
|
||||
def literal_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
if name[0]:
|
||||
prefix = self._current_context[name[0]]
|
||||
if prefix:
|
||||
end = "</%s:%s>" % (prefix, name[1])
|
||||
else:
|
||||
end = "</%s>" % name[1]
|
||||
else:
|
||||
end = "</%s>" % name[1]
|
||||
self.parent.object += self.current.object + end
|
||||
|
||||
|
||||
def create_parser(target: InputSource, store: Graph) -> xmlreader.XMLReader:
|
||||
parser = make_parser()
|
||||
try:
|
||||
# Workaround for bug in expatreader.py. Needed when
|
||||
# expatreader is trying to guess a prefix.
|
||||
parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") # type: ignore[attr-defined]
|
||||
except AttributeError:
|
||||
pass # Not present in Jython (at least)
|
||||
parser.setFeature(handler.feature_namespaces, 1)
|
||||
rdfxml = RDFXMLHandler(store)
|
||||
# type error: Argument 1 to "setDocumentLocator" of "RDFXMLHandler" has incompatible type "InputSource"; expected "Locator"
|
||||
rdfxml.setDocumentLocator(target) # type: ignore[arg-type]
|
||||
# rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
|
||||
parser.setContentHandler(rdfxml)
|
||||
parser.setErrorHandler(ErrorHandler())
|
||||
return parser
|
||||
|
||||
|
||||
class RDFXMLParser(Parser):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, sink: Graph, **args: Any) -> None:
|
||||
self._parser = create_parser(source, sink)
|
||||
content_handler = self._parser.getContentHandler()
|
||||
preserve_bnode_ids = args.get("preserve_bnode_ids", None)
|
||||
if preserve_bnode_ids is not None:
|
||||
# type error: ContentHandler has no attribute "preserve_bnode_ids"
|
||||
content_handler.preserve_bnode_ids = preserve_bnode_ids # type: ignore[attr-defined, unused-ignore]
|
||||
# # We're only using it once now
|
||||
# content_handler.reset()
|
||||
# self._parser.reset()
|
||||
self._parser.parse(source)
|
||||
@@ -0,0 +1,177 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, MutableSequence
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.parser import InputSource, Parser
|
||||
|
||||
from .notation3 import RDFSink, SinkParser
|
||||
|
||||
|
||||
def becauseSubGraph(*args, **kwargs): # noqa: N802
|
||||
pass
|
||||
|
||||
|
||||
class TrigSinkParser(SinkParser):
|
||||
def directiveOrStatement(self, argstr: str, h: int) -> int: # noqa: N802
|
||||
# import pdb; pdb.set_trace()
|
||||
|
||||
i = self.skipSpace(argstr, h)
|
||||
if i < 0:
|
||||
return i # EOF
|
||||
|
||||
j = self.graph(argstr, i)
|
||||
if j >= 0:
|
||||
return j
|
||||
|
||||
j = self.sparqlDirective(argstr, i)
|
||||
if j >= 0:
|
||||
return j
|
||||
|
||||
j = self.directive(argstr, i)
|
||||
if j >= 0:
|
||||
return self.checkDot(argstr, j)
|
||||
|
||||
j = self.statement(argstr, i)
|
||||
if j >= 0:
|
||||
return self.checkDot(argstr, j)
|
||||
|
||||
return j
|
||||
|
||||
def labelOrSubject( # noqa: N802
|
||||
self, argstr: str, i: int, res: MutableSequence[Any]
|
||||
) -> int:
|
||||
j = self.skipSpace(argstr, i)
|
||||
if j < 0:
|
||||
return j # eof
|
||||
i = j
|
||||
|
||||
j = self.uri_ref2(argstr, i, res)
|
||||
if j >= 0:
|
||||
return j
|
||||
|
||||
if argstr[i] == "[":
|
||||
j = self.skipSpace(argstr, i + 1)
|
||||
if j < 0:
|
||||
self.BadSyntax(argstr, i, "Expected ] got EOF")
|
||||
if argstr[j] == "]":
|
||||
res.append(self.blankNode())
|
||||
return j + 1
|
||||
return -1
|
||||
|
||||
def graph(self, argstr: str, i: int) -> int:
|
||||
"""
|
||||
Parse trig graph, i.e.
|
||||
|
||||
<urn:graphname> = { .. triples .. }
|
||||
|
||||
return -1 if it doesn't look like a graph-decl
|
||||
raise Exception if it looks like a graph, but isn't.
|
||||
"""
|
||||
|
||||
need_graphid = False
|
||||
# import pdb; pdb.set_trace()
|
||||
j = self.sparqlTok("GRAPH", argstr, i) # optional GRAPH keyword
|
||||
if j >= 0:
|
||||
i = j
|
||||
need_graphid = True
|
||||
|
||||
r: MutableSequence[Any] = []
|
||||
j = self.labelOrSubject(argstr, i, r)
|
||||
if j >= 0:
|
||||
graph = r[0]
|
||||
i = j
|
||||
elif need_graphid:
|
||||
self.BadSyntax(argstr, i, "GRAPH keyword must be followed by graph name")
|
||||
else:
|
||||
graph = self._store.graph.identifier # hack
|
||||
|
||||
j = self.skipSpace(argstr, i)
|
||||
if j < 0:
|
||||
self.BadSyntax(argstr, i, "EOF found when expected graph")
|
||||
|
||||
if argstr[j : j + 1] == "=": # optional = for legacy support
|
||||
i = self.skipSpace(argstr, j + 1)
|
||||
if i < 0:
|
||||
self.BadSyntax(argstr, i, "EOF found when expecting '{'")
|
||||
else:
|
||||
i = j
|
||||
|
||||
if argstr[i : i + 1] != "{":
|
||||
return -1 # the node wasn't part of a graph
|
||||
|
||||
j = i + 1
|
||||
|
||||
if self._context is not None:
|
||||
self.BadSyntax(argstr, i, "Nested graphs are not allowed")
|
||||
|
||||
oldParentContext = self._parentContext # noqa: N806
|
||||
self._parentContext = self._context
|
||||
reason2 = self._reason2
|
||||
self._reason2 = becauseSubGraph
|
||||
# type error: Incompatible types in assignment (expression has type "Graph", variable has type "Optional[Formula]")
|
||||
self._context = self._store.newGraph(graph) # type: ignore[assignment]
|
||||
|
||||
while 1:
|
||||
i = self.skipSpace(argstr, j)
|
||||
if i < 0:
|
||||
self.BadSyntax(argstr, i, "needed '}', found end.")
|
||||
|
||||
if argstr[i : i + 1] == "}":
|
||||
j = i + 1
|
||||
break
|
||||
|
||||
j = self.directiveOrStatement(argstr, i)
|
||||
if j < 0:
|
||||
self.BadSyntax(argstr, i, "expected statement or '}'")
|
||||
|
||||
self._context = self._parentContext
|
||||
self._reason2 = reason2
|
||||
self._parentContext = oldParentContext
|
||||
# res.append(subj.close()) # No use until closed
|
||||
return j
|
||||
|
||||
|
||||
class TrigParser(Parser):
|
||||
"""
|
||||
An RDFLib parser for TriG
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, graph: Graph, encoding: str = "utf-8") -> None:
|
||||
if encoding not in [None, "utf-8"]:
|
||||
raise Exception(
|
||||
# type error: Unsupported left operand type for % ("Tuple[str, str]")
|
||||
("TriG files are always utf-8 encoded, ", "I was passed: %s") # type: ignore[operator]
|
||||
% encoding
|
||||
)
|
||||
|
||||
# we're currently being handed a Graph, not a ConjunctiveGraph
|
||||
assert graph.store.context_aware, "TriG Parser needs a context-aware store!"
|
||||
|
||||
conj_graph = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
|
||||
conj_graph.default_context = graph # TODO: CG __init__ should have a
|
||||
# default_context arg
|
||||
# TODO: update N3Processor so that it can use conj_graph as the sink
|
||||
conj_graph.namespace_manager = graph.namespace_manager
|
||||
|
||||
sink = RDFSink(conj_graph)
|
||||
|
||||
baseURI = conj_graph.absolutize( # noqa: N806
|
||||
source.getPublicId() or source.getSystemId() or ""
|
||||
)
|
||||
p = TrigSinkParser(sink, baseURI=baseURI, turtle=True)
|
||||
|
||||
stream = source.getCharacterStream() # try to get str stream first
|
||||
if not stream:
|
||||
# fallback to get the bytes stream
|
||||
stream = source.getByteStream()
|
||||
p.loadStream(stream)
|
||||
|
||||
for prefix, namespace in p._bindings.items():
|
||||
conj_graph.bind(prefix, namespace)
|
||||
|
||||
# return ???
|
||||
@@ -0,0 +1,296 @@
|
||||
"""
|
||||
A TriX parser for RDFLib
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple
|
||||
from xml.sax import handler, make_parser
|
||||
from xml.sax.handler import ErrorHandler
|
||||
|
||||
from rdflib.exceptions import ParserError
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import Namespace
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.store import Store
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# from xml.sax.expatreader import ExpatLocator
|
||||
from xml.sax.xmlreader import AttributesImpl, Locator, XMLReader
|
||||
|
||||
__all__ = ["create_parser", "TriXHandler", "TriXParser"]
|
||||
|
||||
|
||||
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
|
||||
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
|
||||
|
||||
|
||||
class TriXHandler(handler.ContentHandler):
|
||||
"""An Sax Handler for TriX. See http://sw.nokia.com/trix/"""
|
||||
|
||||
lang: Optional[str]
|
||||
datatype: Optional[str]
|
||||
|
||||
def __init__(self, store: Store):
|
||||
self.store = store
|
||||
self.preserve_bnode_ids = False
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
self.bnode: Dict[str, BNode] = {}
|
||||
self.graph: Optional[Graph] = None
|
||||
self.triple: Optional[List[Identifier]] = None
|
||||
self.state = 0
|
||||
self.lang = None
|
||||
self.datatype = None
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def setDocumentLocator(self, locator: Locator):
|
||||
self.locator = locator
|
||||
|
||||
def startDocument(self) -> None:
|
||||
pass
|
||||
|
||||
def startPrefixMapping(self, prefix: Optional[str], namespace: str) -> None:
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix: Optional[str]) -> None:
|
||||
pass
|
||||
|
||||
def startElementNS(
|
||||
self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
if name[0] != str(TRIXNS):
|
||||
self.error(
|
||||
"Only elements in the TriX namespace are allowed. %s!=%s"
|
||||
% (name[0], TRIXNS)
|
||||
)
|
||||
|
||||
if name[1].lower() == "trix":
|
||||
if self.state == 0:
|
||||
self.state = 1
|
||||
else:
|
||||
self.error("Unexpected TriX element")
|
||||
|
||||
elif name[1] == "graph":
|
||||
if self.state == 1:
|
||||
self.state = 2
|
||||
else:
|
||||
self.error("Unexpected graph element")
|
||||
|
||||
elif name[1] == "uri":
|
||||
if self.state == 2:
|
||||
# the context uri
|
||||
self.state = 3
|
||||
elif self.state == 4:
|
||||
# part of a triple
|
||||
pass
|
||||
else:
|
||||
self.error("Unexpected uri element")
|
||||
|
||||
elif name[1] == "triple":
|
||||
if self.state == 2:
|
||||
if self.graph is None:
|
||||
# anonymous graph, create one with random bnode id
|
||||
self.graph = Graph(store=self.store)
|
||||
# start of a triple
|
||||
self.triple = []
|
||||
self.state = 4
|
||||
else:
|
||||
self.error("Unexpected triple element")
|
||||
|
||||
elif name[1] == "typedLiteral":
|
||||
if self.state == 4:
|
||||
# part of triple
|
||||
self.lang = None
|
||||
self.datatype = None
|
||||
|
||||
try:
|
||||
self.lang = attrs.getValue((str(XMLNS), "lang")) # type: ignore[arg-type, unused-ignore]
|
||||
except Exception:
|
||||
# language not required - ignore
|
||||
pass
|
||||
try:
|
||||
self.datatype = attrs.getValueByQName("datatype") # type: ignore[arg-type, unused-ignore]
|
||||
except KeyError:
|
||||
self.error("No required attribute 'datatype'")
|
||||
else:
|
||||
self.error("Unexpected typedLiteral element")
|
||||
|
||||
elif name[1] == "plainLiteral":
|
||||
if self.state == 4:
|
||||
# part of triple
|
||||
self.lang = None
|
||||
self.datatype = None
|
||||
try:
|
||||
# type error: Argument 1 to "getValue" of "AttributesImpl" has incompatible type "Tuple[str, str]"; expected "str"
|
||||
self.lang = attrs.getValue((str(XMLNS), "lang")) # type: ignore[arg-type, unused-ignore]
|
||||
except Exception:
|
||||
# language not required - ignore
|
||||
pass
|
||||
|
||||
else:
|
||||
self.error("Unexpected plainLiteral element")
|
||||
|
||||
elif name[1] == "id":
|
||||
if self.state == 2:
|
||||
# the context uri
|
||||
self.state = 3
|
||||
|
||||
elif self.state == 4:
|
||||
# part of triple
|
||||
pass
|
||||
else:
|
||||
self.error("Unexpected id element")
|
||||
|
||||
else:
|
||||
self.error("Unknown element %s in TriX namespace" % name[1])
|
||||
|
||||
self.chars = ""
|
||||
|
||||
def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None:
|
||||
if TYPE_CHECKING:
|
||||
assert self.triple is not None
|
||||
if name[0] != str(TRIXNS):
|
||||
self.error(
|
||||
"Only elements in the TriX namespace are allowed. %s!=%s"
|
||||
% (name[0], TRIXNS)
|
||||
)
|
||||
|
||||
if name[1] == "uri":
|
||||
if self.state == 3:
|
||||
self.graph = Graph(
|
||||
store=self.store, identifier=URIRef(self.chars.strip())
|
||||
)
|
||||
self.state = 2
|
||||
elif self.state == 4:
|
||||
self.triple += [URIRef(self.chars.strip())]
|
||||
else:
|
||||
self.error(
|
||||
"Illegal internal self.state - This should never "
|
||||
+ "happen if the SAX parser ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "id":
|
||||
if self.state == 3:
|
||||
self.graph = Graph(
|
||||
self.store, identifier=self.get_bnode(self.chars.strip())
|
||||
)
|
||||
self.state = 2
|
||||
elif self.state == 4:
|
||||
self.triple += [self.get_bnode(self.chars.strip())]
|
||||
else:
|
||||
self.error(
|
||||
"Illegal internal self.state - This should never "
|
||||
+ "happen if the SAX parser ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "plainLiteral" or name[1] == "typedLiteral":
|
||||
if self.state == 4:
|
||||
self.triple += [
|
||||
Literal(self.chars, lang=self.lang, datatype=self.datatype)
|
||||
]
|
||||
else:
|
||||
self.error(
|
||||
"This should never happen if the SAX parser "
|
||||
+ "ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "triple":
|
||||
if self.state == 4:
|
||||
if len(self.triple) != 3:
|
||||
self.error(
|
||||
"Triple has wrong length, got %d elements: %s"
|
||||
% (len(self.triple), self.triple)
|
||||
)
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "add"
|
||||
# type error: Argument 1 to "add" of "Graph" has incompatible type "List[Identifier]"; expected "Tuple[Node, Node, Node]"
|
||||
self.graph.add(self.triple) # type: ignore[union-attr, arg-type]
|
||||
# self.store.store.add(self.triple,context=self.graph)
|
||||
# self.store.addN([self.triple+[self.graph]])
|
||||
self.state = 2
|
||||
else:
|
||||
self.error(
|
||||
"This should never happen if the SAX parser "
|
||||
+ "ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "graph":
|
||||
self.graph = None
|
||||
self.state = 1
|
||||
|
||||
elif name[1].lower() == "trix":
|
||||
self.state = 0
|
||||
|
||||
else:
|
||||
self.error("Unexpected close element")
|
||||
|
||||
def get_bnode(self, label: str) -> BNode:
|
||||
if self.preserve_bnode_ids:
|
||||
bn = BNode(label)
|
||||
else:
|
||||
if label in self.bnode:
|
||||
bn = self.bnode[label]
|
||||
else:
|
||||
bn = BNode(label)
|
||||
self.bnode[label] = bn
|
||||
return bn
|
||||
|
||||
def characters(self, content: str) -> None:
|
||||
self.chars += content
|
||||
|
||||
def ignorableWhitespace(self, content) -> None:
|
||||
pass
|
||||
|
||||
def processingInstruction(self, target, data) -> None:
|
||||
pass
|
||||
|
||||
def error(self, message: str) -> NoReturn:
|
||||
locator = self.locator
|
||||
info = "%s:%s:%s: " % (
|
||||
locator.getSystemId(),
|
||||
locator.getLineNumber(),
|
||||
locator.getColumnNumber(),
|
||||
)
|
||||
raise ParserError(info + message)
|
||||
|
||||
|
||||
def create_parser(store: Store) -> XMLReader:
|
||||
parser = make_parser()
|
||||
try:
|
||||
# Workaround for bug in expatreader.py. Needed when
|
||||
# expatreader is trying to guess a prefix.
|
||||
# type error: "XMLReader" has no attribute "start_namespace_decl"
|
||||
parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") # type: ignore[attr-defined]
|
||||
except AttributeError:
|
||||
pass # Not present in Jython (at least)
|
||||
parser.setFeature(handler.feature_namespaces, 1)
|
||||
trix = TriXHandler(store)
|
||||
parser.setContentHandler(trix)
|
||||
parser.setErrorHandler(ErrorHandler())
|
||||
return parser
|
||||
|
||||
|
||||
class TriXParser(Parser):
|
||||
"""A parser for TriX. See http://sw.nokia.com/trix/"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, sink: Graph, **args: Any) -> None:
|
||||
assert (
|
||||
sink.store.context_aware
|
||||
), "TriXParser must be given a context aware store."
|
||||
|
||||
self._parser = create_parser(sink.store)
|
||||
content_handler = self._parser.getContentHandler()
|
||||
preserve_bnode_ids = args.get("preserve_bnode_ids", None)
|
||||
if preserve_bnode_ids is not None:
|
||||
# type error: ContentHandler has no attribute "preserve_bnode_ids"
|
||||
content_handler.preserve_bnode_ids = preserve_bnode_ids # type: ignore[attr-defined, unused-ignore]
|
||||
# We're only using it once now
|
||||
# content_handler.reset()
|
||||
# self._parser.reset()
|
||||
self._parser.parse(source)
|
||||
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
HextuplesSerializer RDF graph serializer for RDFLib.
|
||||
See <https://github.com/ontola/hextuples> for details about the format.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from typing import IO, Any, Callable, List, Optional, Type, Union, cast
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Literal, URIRef
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
__all__ = ["HextuplesSerializer"]
|
||||
|
||||
|
||||
class HextuplesSerializer(Serializer):
|
||||
"""
|
||||
Serializes RDF graphs to NTriples format.
|
||||
"""
|
||||
|
||||
contexts: List[Union[Graph, IdentifiedNode]]
|
||||
dumps: Callable
|
||||
|
||||
def __new__(cls, store: Union[Graph, Dataset, ConjunctiveGraph]):
|
||||
if _HAS_ORJSON:
|
||||
cls.str_local_id: Union[str, Any] = orjson.Fragment(b'"localId"')
|
||||
cls.str_global_id: Union[str, Any] = orjson.Fragment(b'"globalId"')
|
||||
cls.empty: Union[str, Any] = orjson.Fragment(b'""')
|
||||
cls.lang_str: Union[str, Any] = orjson.Fragment(
|
||||
b'"' + RDF.langString.encode("utf-8") + b'"'
|
||||
)
|
||||
cls.xsd_string: Union[str, Any] = orjson.Fragment(
|
||||
b'"' + XSD.string.encode("utf-8") + b'"'
|
||||
)
|
||||
else:
|
||||
cls.str_local_id = "localId"
|
||||
cls.str_global_id = "globalId"
|
||||
cls.empty = ""
|
||||
cls.lang_str = f"{RDF.langString}"
|
||||
cls.xsd_string = f"{XSD.string}"
|
||||
return super(cls, cls).__new__(cls)
|
||||
|
||||
def __init__(self, store: Union[Graph, Dataset, ConjunctiveGraph]):
|
||||
self.default_context: Optional[Union[Graph, IdentifiedNode]]
|
||||
self.graph_type: Union[Type[Graph], Type[Dataset], Type[ConjunctiveGraph]]
|
||||
if isinstance(store, (Dataset, ConjunctiveGraph)):
|
||||
self.graph_type = (
|
||||
Dataset if isinstance(store, Dataset) else ConjunctiveGraph
|
||||
)
|
||||
self.contexts = list(store.contexts())
|
||||
if store.default_context:
|
||||
self.default_context = store.default_context
|
||||
self.contexts.append(store.default_context)
|
||||
else:
|
||||
self.default_context = None
|
||||
else:
|
||||
self.graph_type = Graph
|
||||
self.contexts = [store]
|
||||
self.default_context = None
|
||||
|
||||
Serializer.__init__(self, store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn(
|
||||
"base has no meaning for Hextuples serialization. "
|
||||
"I will ignore this value"
|
||||
)
|
||||
|
||||
if encoding not in [None, "utf-8"]:
|
||||
warnings.warn(
|
||||
f"Hextuples files are always utf-8 encoded. "
|
||||
f"I was passed: {encoding}, "
|
||||
"but I'm still going to use utf-8 anyway!"
|
||||
)
|
||||
|
||||
if self.store.formula_aware is True:
|
||||
raise Exception(
|
||||
"Hextuple serialization can't (yet) handle formula-aware stores"
|
||||
)
|
||||
context: Union[Graph, IdentifiedNode]
|
||||
context_str: Union[bytes, str]
|
||||
for context in self.contexts:
|
||||
for triple in context:
|
||||
# Generate context string just once, because it doesn't change
|
||||
# for every triple in this context
|
||||
context_str = cast(
|
||||
Union[str, bytes],
|
||||
(
|
||||
self.empty
|
||||
if self.graph_type is Graph
|
||||
else (
|
||||
orjson.Fragment('"' + self._context_str(context) + '"')
|
||||
if _HAS_ORJSON
|
||||
else self._context_str(context)
|
||||
)
|
||||
),
|
||||
)
|
||||
hl = self._hex_line(triple, context_str)
|
||||
if hl is not None:
|
||||
stream.write(hl if _HAS_ORJSON else hl.encode())
|
||||
|
||||
def _hex_line(self, triple, context_str: Union[bytes, str]):
|
||||
if isinstance(
|
||||
triple[0], (URIRef, BNode)
|
||||
): # exclude QuotedGraph and other objects
|
||||
# value
|
||||
value = (
|
||||
triple[2]
|
||||
if isinstance(triple[2], Literal)
|
||||
else self._iri_or_bn(triple[2])
|
||||
)
|
||||
|
||||
# datatype
|
||||
if isinstance(triple[2], URIRef):
|
||||
# datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode"
|
||||
datatype = self.str_global_id
|
||||
elif isinstance(triple[2], BNode):
|
||||
# datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode"
|
||||
datatype = self.str_local_id
|
||||
elif isinstance(triple[2], Literal):
|
||||
if triple[2].datatype is not None:
|
||||
datatype = f"{triple[2].datatype}"
|
||||
else:
|
||||
if triple[2].language is not None: # language
|
||||
datatype = self.lang_str
|
||||
else:
|
||||
datatype = self.xsd_string
|
||||
else:
|
||||
return None # can't handle non URI, BN or Literal Object (QuotedGraph)
|
||||
|
||||
# language
|
||||
if isinstance(triple[2], Literal):
|
||||
if triple[2].language is not None:
|
||||
language = f"{triple[2].language}"
|
||||
else:
|
||||
language = self.empty
|
||||
else:
|
||||
language = self.empty
|
||||
line_list = [
|
||||
self._iri_or_bn(triple[0]),
|
||||
triple[1],
|
||||
value,
|
||||
datatype,
|
||||
language,
|
||||
context_str,
|
||||
]
|
||||
outline: Union[str, bytes]
|
||||
if _HAS_ORJSON:
|
||||
outline = orjson.dumps(line_list, option=orjson.OPT_APPEND_NEWLINE)
|
||||
else:
|
||||
outline = json.dumps(line_list) + "\n"
|
||||
return outline
|
||||
else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects
|
||||
return None
|
||||
|
||||
def _iri_or_bn(self, i_):
|
||||
if isinstance(i_, URIRef):
|
||||
return f"{i_}"
|
||||
elif isinstance(i_, BNode):
|
||||
return f"{i_.n3()}"
|
||||
else:
|
||||
return None
|
||||
|
||||
def _context_str(self, context: Union[Graph, IdentifiedNode]) -> str:
|
||||
context_identifier: IdentifiedNode = (
|
||||
context.identifier if isinstance(context, Graph) else context
|
||||
)
|
||||
if context_identifier == DATASET_DEFAULT_GRAPH_ID:
|
||||
return ""
|
||||
if self.default_context is not None:
|
||||
if (
|
||||
isinstance(self.default_context, IdentifiedNode)
|
||||
and context_identifier == self.default_context
|
||||
):
|
||||
return ""
|
||||
elif (
|
||||
isinstance(self.default_context, Graph)
|
||||
and context_identifier == self.default_context.identifier
|
||||
):
|
||||
return ""
|
||||
if self.graph_type is Graph:
|
||||
# Only emit a context name when serializing a Dataset or ConjunctiveGraph
|
||||
return ""
|
||||
return (
|
||||
f"{context_identifier}"
|
||||
if isinstance(context_identifier, URIRef)
|
||||
else context_identifier.n3()
|
||||
)
|
||||
@@ -0,0 +1,433 @@
|
||||
"""
|
||||
This serialiser will output an RDF Graph as a JSON-LD formatted document. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
Example usage::
|
||||
|
||||
>>> from rdflib import Graph
|
||||
>>> testrdf = '''
|
||||
... @prefix dc: <http://purl.org/dc/terms/> .
|
||||
... <http://example.org/about>
|
||||
... dc:title "Someone's Homepage"@en .
|
||||
... '''
|
||||
|
||||
>>> g = Graph().parse(data=testrdf, format='n3')
|
||||
|
||||
>>> print(g.serialize(format='json-ld', indent=2))
|
||||
[
|
||||
{
|
||||
"@id": "http://example.org/about",
|
||||
"http://purl.org/dc/terms/title": [
|
||||
{
|
||||
"@language": "en",
|
||||
"@value": "Someone's Homepage"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
"""
|
||||
|
||||
# From: https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/serializer.py
|
||||
|
||||
# NOTE: This code writes the entire JSON object into memory before serialising,
|
||||
# but we should consider streaming the output to deal with arbitrarily large
|
||||
# graphs.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Dict, List, Optional
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Graph, _ObjectType
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, URIRef
|
||||
|
||||
from ..shared.jsonld.context import UNDEF, Context
|
||||
from ..shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB
|
||||
from ..shared.jsonld.util import _HAS_ORJSON, json, orjson
|
||||
|
||||
__all__ = ["JsonLDSerializer", "from_rdf"]
|
||||
|
||||
|
||||
PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}
|
||||
|
||||
|
||||
class JsonLDSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(JsonLDSerializer, self).__init__(store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# TODO: docstring w. args and return value
|
||||
encoding = encoding or "utf-8"
|
||||
if encoding not in ("utf-8", "utf-16"):
|
||||
warnings.warn(
|
||||
"JSON should be encoded as unicode. " f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
context_data = kwargs.get("context")
|
||||
use_native_types = (kwargs.get("use_native_types", False),)
|
||||
use_rdf_type = kwargs.get("use_rdf_type", False)
|
||||
auto_compact = kwargs.get("auto_compact", False)
|
||||
|
||||
indent = kwargs.get("indent", 2)
|
||||
separators = kwargs.get("separators", (",", ": "))
|
||||
sort_keys = kwargs.get("sort_keys", True)
|
||||
ensure_ascii = kwargs.get("ensure_ascii", False)
|
||||
|
||||
obj = from_rdf(
|
||||
self.store,
|
||||
context_data,
|
||||
base,
|
||||
use_native_types,
|
||||
use_rdf_type,
|
||||
auto_compact=auto_compact,
|
||||
)
|
||||
if _HAS_ORJSON:
|
||||
option: int = orjson.OPT_NON_STR_KEYS
|
||||
if indent is not None:
|
||||
option |= orjson.OPT_INDENT_2
|
||||
if sort_keys:
|
||||
option |= orjson.OPT_SORT_KEYS
|
||||
if ensure_ascii:
|
||||
warnings.warn("Cannot use ensure_ascii with orjson")
|
||||
data_bytes = orjson.dumps(obj, option=option)
|
||||
stream.write(data_bytes)
|
||||
else:
|
||||
data = json.dumps(
|
||||
obj,
|
||||
indent=indent,
|
||||
separators=separators,
|
||||
sort_keys=sort_keys,
|
||||
ensure_ascii=ensure_ascii,
|
||||
)
|
||||
stream.write(data.encode(encoding, "replace"))
|
||||
|
||||
|
||||
def from_rdf(
|
||||
graph,
|
||||
context_data=None,
|
||||
base=None,
|
||||
use_native_types=False,
|
||||
use_rdf_type=False,
|
||||
auto_compact=False,
|
||||
startnode=None,
|
||||
index=False,
|
||||
):
|
||||
# TODO: docstring w. args and return value
|
||||
# TODO: support for index and startnode
|
||||
|
||||
if not context_data and auto_compact:
|
||||
context_data = dict(
|
||||
(pfx, str(ns))
|
||||
for (pfx, ns) in graph.namespaces()
|
||||
if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace"
|
||||
)
|
||||
|
||||
if isinstance(context_data, Context):
|
||||
context = context_data
|
||||
context_data = context.to_dict()
|
||||
else:
|
||||
context = Context(context_data, base=base)
|
||||
|
||||
converter = Converter(context, use_native_types, use_rdf_type)
|
||||
result = converter.convert(graph)
|
||||
|
||||
if converter.context.active:
|
||||
if isinstance(result, list):
|
||||
result = {context.get_key(GRAPH): result}
|
||||
result[CONTEXT] = context_data
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Converter:
|
||||
def __init__(self, context: Context, use_native_types: bool, use_rdf_type: bool):
|
||||
self.context = context
|
||||
self.use_native_types = context.active or use_native_types
|
||||
self.use_rdf_type = use_rdf_type
|
||||
|
||||
def convert(self, graph: Graph):
|
||||
# TODO: bug in rdflib dataset parsing (nquads et al):
|
||||
# plain triples end up in separate unnamed graphs (rdflib issue #436)
|
||||
if graph.context_aware:
|
||||
# type error: "Graph" has no attribute "contexts"
|
||||
all_contexts = list(graph.contexts()) # type: ignore[attr-defined]
|
||||
has_dataset_default_id = any(
|
||||
c.identifier == DATASET_DEFAULT_GRAPH_ID for c in all_contexts
|
||||
)
|
||||
if (
|
||||
has_dataset_default_id
|
||||
# # type error: "Graph" has no attribute "contexts"
|
||||
and graph.default_context.identifier == DATASET_DEFAULT_GRAPH_ID # type: ignore[attr-defined]
|
||||
):
|
||||
default_graph = graph.default_context # type: ignore[attr-defined]
|
||||
else:
|
||||
default_graph = Graph()
|
||||
graphs = [default_graph]
|
||||
default_graph_id = default_graph.identifier
|
||||
|
||||
for g in all_contexts:
|
||||
if g in graphs:
|
||||
continue
|
||||
if isinstance(g.identifier, URIRef):
|
||||
graphs.append(g)
|
||||
else:
|
||||
default_graph += g
|
||||
else:
|
||||
graphs = [graph]
|
||||
default_graph_id = graph.identifier
|
||||
|
||||
context = self.context
|
||||
|
||||
objs: List[Any] = []
|
||||
for g in graphs:
|
||||
obj = {}
|
||||
graphname = None
|
||||
|
||||
if isinstance(g.identifier, URIRef):
|
||||
if g.identifier != default_graph_id:
|
||||
graphname = context.shrink_iri(g.identifier)
|
||||
obj[context.id_key] = graphname
|
||||
|
||||
nodes = self.from_graph(g)
|
||||
|
||||
if not graphname and len(nodes) == 1:
|
||||
obj.update(nodes[0])
|
||||
else:
|
||||
if not nodes:
|
||||
continue
|
||||
obj[context.graph_key] = nodes
|
||||
|
||||
if objs and objs[0].get(context.get_key(ID)) == graphname:
|
||||
objs[0].update(obj)
|
||||
else:
|
||||
objs.append(obj)
|
||||
|
||||
if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
|
||||
default = objs[0]
|
||||
items = default.get(context.graph_key)
|
||||
if len(default) == 1 and items:
|
||||
objs = items
|
||||
elif len(objs) == 1 and self.context.active:
|
||||
objs = objs[0]
|
||||
|
||||
return objs
|
||||
|
||||
def from_graph(self, graph: Graph):
|
||||
nodemap: Dict[Any, Any] = {}
|
||||
|
||||
for s in set(graph.subjects()):
|
||||
## only iri:s and unreferenced (rest will be promoted to top if needed)
|
||||
if isinstance(s, URIRef) or (
|
||||
isinstance(s, BNode) and not any(graph.subjects(None, s))
|
||||
):
|
||||
self.process_subject(graph, s, nodemap)
|
||||
|
||||
return list(nodemap.values())
|
||||
|
||||
def process_subject(self, graph: Graph, s: IdentifiedNode, nodemap):
|
||||
if isinstance(s, URIRef):
|
||||
node_id = self.context.shrink_iri(s)
|
||||
elif isinstance(s, BNode):
|
||||
node_id = s.n3()
|
||||
else:
|
||||
# This does not seem right, this probably should be an error.
|
||||
node_id = None
|
||||
|
||||
# used_as_object = any(graph.subjects(None, s))
|
||||
if node_id in nodemap:
|
||||
return None
|
||||
|
||||
node = {}
|
||||
node[self.context.id_key] = node_id
|
||||
nodemap[node_id] = node
|
||||
|
||||
for p, o in graph.predicate_objects(s):
|
||||
# type error: Argument 3 to "add_to_node" of "Converter" has incompatible type "Node"; expected "IdentifiedNode"
|
||||
# type error: Argument 4 to "add_to_node" of "Converter" has incompatible type "Node"; expected "Identifier"
|
||||
self.add_to_node(graph, s, p, o, node, nodemap) # type: ignore[arg-type]
|
||||
|
||||
return node
|
||||
|
||||
def add_to_node(
|
||||
self,
|
||||
graph: Graph,
|
||||
s: IdentifiedNode,
|
||||
p: IdentifiedNode,
|
||||
o: Identifier,
|
||||
s_node: Dict[str, Any],
|
||||
nodemap,
|
||||
):
|
||||
context = self.context
|
||||
|
||||
if isinstance(o, Literal):
|
||||
datatype = str(o.datatype) if o.datatype else None
|
||||
language = o.language
|
||||
term = context.find_term(str(p), datatype, language=language)
|
||||
else:
|
||||
containers = [LIST, None] if graph.value(o, RDF.first) else [None]
|
||||
for container in containers:
|
||||
for coercion in (ID, VOCAB, UNDEF):
|
||||
# type error: Argument 2 to "find_term" of "Context" has incompatible type "object"; expected "Union[str, Defined, None]"
|
||||
# type error: Argument 3 to "find_term" of "Context" has incompatible type "Optional[str]"; expected "Union[Defined, str]"
|
||||
term = context.find_term(str(p), coercion, container) # type: ignore[arg-type]
|
||||
if term:
|
||||
break
|
||||
if term:
|
||||
break
|
||||
|
||||
node = None
|
||||
use_set = not context.active
|
||||
|
||||
if term:
|
||||
p_key = term.name
|
||||
|
||||
if term.type:
|
||||
node = self.type_coerce(o, term.type)
|
||||
# type error: "Identifier" has no attribute "language"
|
||||
elif term.language and o.language == term.language: # type: ignore[attr-defined]
|
||||
node = str(o)
|
||||
# type error: Right operand of "and" is never evaluated
|
||||
elif context.language and (term.language is None and o.language is None): # type: ignore[unreachable]
|
||||
node = str(o) # type: ignore[unreachable]
|
||||
|
||||
if LIST in term.container:
|
||||
node = [
|
||||
self.type_coerce(v, term.type)
|
||||
or self.to_raw_value(graph, s, v, nodemap)
|
||||
for v in self.to_collection(graph, o)
|
||||
]
|
||||
elif LANG in term.container and language:
|
||||
value = s_node.setdefault(p_key, {})
|
||||
values = value.get(language)
|
||||
node = str(o)
|
||||
if values or SET in term.container:
|
||||
if not isinstance(values, list):
|
||||
value[language] = values = [values]
|
||||
values.append(node)
|
||||
else:
|
||||
value[language] = node
|
||||
return
|
||||
elif SET in term.container:
|
||||
use_set = True
|
||||
|
||||
else:
|
||||
p_key = context.to_symbol(p)
|
||||
# TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
|
||||
key_term = context.terms.get(p_key)
|
||||
if key_term and (key_term.type or key_term.container):
|
||||
p_key = p
|
||||
if not term and p == RDF.type and not self.use_rdf_type:
|
||||
if isinstance(o, URIRef):
|
||||
node = context.to_symbol(o)
|
||||
p_key = context.type_key
|
||||
|
||||
if node is None:
|
||||
node = self.to_raw_value(graph, s, o, nodemap)
|
||||
|
||||
value = s_node.get(p_key)
|
||||
if value:
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value.append(node)
|
||||
elif use_set:
|
||||
value = [node]
|
||||
else:
|
||||
value = node
|
||||
s_node[p_key] = value
|
||||
|
||||
def type_coerce(self, o: Identifier, coerce_type: str):
|
||||
if coerce_type == ID:
|
||||
if isinstance(o, URIRef):
|
||||
return self.context.shrink_iri(o)
|
||||
elif isinstance(o, BNode):
|
||||
return o.n3()
|
||||
else:
|
||||
return o
|
||||
elif coerce_type == VOCAB and isinstance(o, URIRef):
|
||||
return self.context.to_symbol(o)
|
||||
elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
|
||||
return o
|
||||
else:
|
||||
return None
|
||||
|
||||
def to_raw_value(
|
||||
self, graph: Graph, s: IdentifiedNode, o: Identifier, nodemap: Dict[str, Any]
|
||||
):
|
||||
context = self.context
|
||||
coll = self.to_collection(graph, o)
|
||||
if coll is not None:
|
||||
coll = [
|
||||
self.to_raw_value(graph, s, lo, nodemap)
|
||||
for lo in self.to_collection(graph, o)
|
||||
]
|
||||
return {context.list_key: coll}
|
||||
elif isinstance(o, BNode):
|
||||
embed = (
|
||||
False # TODO: self.context.active or using startnode and only one ref
|
||||
)
|
||||
onode = self.process_subject(graph, o, nodemap)
|
||||
if onode:
|
||||
if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
|
||||
return onode
|
||||
else:
|
||||
nodemap[onode[context.id_key]] = onode
|
||||
return {context.id_key: o.n3()}
|
||||
elif isinstance(o, URIRef):
|
||||
# TODO: embed if o != startnode (else reverse)
|
||||
return {context.id_key: context.shrink_iri(o)}
|
||||
elif isinstance(o, Literal):
|
||||
# TODO: if compact
|
||||
native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
|
||||
if native:
|
||||
v = o.toPython()
|
||||
else:
|
||||
v = str(o)
|
||||
if o.datatype:
|
||||
if native and self.context.active:
|
||||
return v
|
||||
return {
|
||||
context.type_key: context.to_symbol(o.datatype),
|
||||
context.value_key: v,
|
||||
}
|
||||
elif o.language and o.language != context.language:
|
||||
return {context.lang_key: o.language, context.value_key: v}
|
||||
# type error: Right operand of "and" is never evaluated
|
||||
elif not context.active or context.language and not o.language: # type: ignore[unreachable]
|
||||
return {context.value_key: v}
|
||||
else:
|
||||
return v
|
||||
|
||||
def to_collection(self, graph: Graph, l_: Identifier):
|
||||
if l_ != RDF.nil and not graph.value(l_, RDF.first):
|
||||
return None
|
||||
list_nodes: List[Optional[_ObjectType]] = []
|
||||
chain = set([l_])
|
||||
while l_:
|
||||
if l_ == RDF.nil:
|
||||
return list_nodes
|
||||
if isinstance(l_, URIRef):
|
||||
return None
|
||||
first, rest = None, None
|
||||
for p, o in graph.predicate_objects(l_):
|
||||
if not first and p == RDF.first:
|
||||
first = o
|
||||
elif not rest and p == RDF.rest:
|
||||
rest = o
|
||||
elif p != RDF.type or o != RDF.List:
|
||||
return None
|
||||
list_nodes.append(first)
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Identifier")
|
||||
l_ = rest # type: ignore[assignment]
|
||||
if l_ in chain:
|
||||
return None
|
||||
chain.add(l_)
|
||||
+326
@@ -0,0 +1,326 @@
|
||||
"""
|
||||
LongTurtle RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
|
||||
|
||||
This variant, longturtle as opposed to just turtle, makes some small format changes
|
||||
to turtle - the original turtle serializer. It:
|
||||
|
||||
* uses PREFIX instead of @prefix
|
||||
* uses BASE instead of @base
|
||||
* adds a new line at RDF.type, or 'a'
|
||||
* adds a newline and an indent for all triples with more than one object (object list)
|
||||
* adds a new line and ';' for the last triple in a set with '.'
|
||||
on the start of the next line
|
||||
* uses default encoding (encode()) is used instead of "latin-1"
|
||||
|
||||
- Nicholas Car, 2023
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.compare import to_canonical_graph
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
from .turtle import RecursiveSerializer
|
||||
|
||||
__all__ = ["LongTurtleSerializer"]
|
||||
|
||||
SUBJECT = 0
|
||||
VERB = 1
|
||||
OBJECT = 2
|
||||
|
||||
_GEN_QNAME_FOR_DT = False
|
||||
_SPACIOUS_OUTPUT = False
|
||||
|
||||
|
||||
class LongTurtleSerializer(RecursiveSerializer):
|
||||
short_name = "longturtle"
|
||||
indentString = " "
|
||||
|
||||
def __init__(self, store):
|
||||
self._ns_rewrite = {}
|
||||
store = to_canonical_graph(store)
|
||||
content = store.serialize(format="application/n-triples")
|
||||
lines = content.split("\n")
|
||||
lines.sort()
|
||||
graph = Graph()
|
||||
graph.parse(
|
||||
data="\n".join(lines), format="application/n-triples", skolemize=True
|
||||
)
|
||||
graph = graph.de_skolemize()
|
||||
super(LongTurtleSerializer, self).__init__(graph)
|
||||
self.keywords = {RDF.type: "a"}
|
||||
self.reset()
|
||||
self.stream = None
|
||||
self._spacious: bool = _SPACIOUS_OUTPUT
|
||||
|
||||
def addNamespace(self, prefix, namespace):
|
||||
# Turtle does not support prefixes that start with _
|
||||
# if they occur in the graph, rewrite to p_blah
|
||||
# this is more complicated since we need to make sure p_blah
|
||||
# does not already exist. And we register namespaces as we go, i.e.
|
||||
# we may first see a triple with prefix _9 - rewrite it to p_9
|
||||
# and then later find a triple with a "real" p_9 prefix
|
||||
|
||||
# so we need to keep track of ns rewrites we made so far.
|
||||
|
||||
if (prefix > "" and prefix[0] == "_") or self.namespaces.get(
|
||||
prefix, namespace
|
||||
) != namespace:
|
||||
if prefix not in self._ns_rewrite:
|
||||
p = "p" + prefix
|
||||
while p in self.namespaces:
|
||||
p = "p" + p
|
||||
self._ns_rewrite[prefix] = p
|
||||
|
||||
prefix = self._ns_rewrite.get(prefix, prefix)
|
||||
|
||||
super(LongTurtleSerializer, self).addNamespace(prefix, namespace)
|
||||
return prefix
|
||||
|
||||
def reset(self):
|
||||
super(LongTurtleSerializer, self).reset()
|
||||
self._shortNames = {}
|
||||
self._started = False
|
||||
self._ns_rewrite = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
subjects_list = self.orderSubjects()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for subject in subjects_list:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
|
||||
self.endDocument()
|
||||
|
||||
self.base = None
|
||||
|
||||
def preprocessTriple(self, triple):
|
||||
super(LongTurtleSerializer, self).preprocessTriple(triple)
|
||||
for i, node in enumerate(triple):
|
||||
if node in self.keywords:
|
||||
continue
|
||||
# Don't use generated prefixes for subjects and objects
|
||||
self.getQName(node, gen_prefix=(i == VERB))
|
||||
if isinstance(node, Literal) and node.datatype:
|
||||
self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
|
||||
p = triple[1]
|
||||
if isinstance(p, BNode): # hmm - when is P ever a bnode?
|
||||
self._references[p] += 1
|
||||
|
||||
def getQName(self, uri, gen_prefix=True):
|
||||
if not isinstance(uri, URIRef):
|
||||
return None
|
||||
|
||||
try:
|
||||
parts = self.store.compute_qname(uri, generate=gen_prefix)
|
||||
except Exception:
|
||||
# is the uri a namespace in itself?
|
||||
pfx = self.store.store.prefix(uri)
|
||||
|
||||
if pfx is not None:
|
||||
parts = (pfx, uri, "")
|
||||
else:
|
||||
# nothing worked
|
||||
return None
|
||||
|
||||
prefix, namespace, local = parts
|
||||
|
||||
# QName cannot end with .
|
||||
if local.endswith("."):
|
||||
return None
|
||||
|
||||
prefix = self.addNamespace(prefix, namespace)
|
||||
|
||||
return "%s:%s" % (prefix, local)
|
||||
|
||||
def startDocument(self):
|
||||
self._started = True
|
||||
ns_list = sorted(self.namespaces.items())
|
||||
|
||||
if self.base:
|
||||
self.write(self.indent() + "BASE <%s>\n" % self.base)
|
||||
for prefix, uri in ns_list:
|
||||
self.write(self.indent() + "PREFIX %s: <%s>\n" % (prefix, uri))
|
||||
if ns_list and self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def endDocument(self):
|
||||
if self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def statement(self, subject):
|
||||
self.subjectDone(subject)
|
||||
return self.s_squared(subject) or self.s_default(subject)
|
||||
|
||||
def s_default(self, subject):
|
||||
self.write("\n" + self.indent())
|
||||
self.path(subject, SUBJECT)
|
||||
self.write("\n" + self.indent())
|
||||
self.predicateList(subject)
|
||||
self.write("\n.")
|
||||
return True
|
||||
|
||||
def s_squared(self, subject):
|
||||
if (self._references[subject] > 0) or not isinstance(subject, BNode):
|
||||
return False
|
||||
self.write("\n" + self.indent() + "[]")
|
||||
self.predicateList(subject, newline=False)
|
||||
self.write("\n.")
|
||||
return True
|
||||
|
||||
def path(self, node, position, newline=False):
|
||||
if not (
|
||||
self.p_squared(node, position) or self.p_default(node, position, newline)
|
||||
):
|
||||
raise Error("Cannot serialize node '%s'" % (node,))
|
||||
|
||||
def p_default(self, node, position, newline=False):
|
||||
if position != SUBJECT and not newline:
|
||||
self.write(" ")
|
||||
self.write(self.label(node, position))
|
||||
return True
|
||||
|
||||
def label(self, node, position):
|
||||
if node == RDF.nil:
|
||||
return "()"
|
||||
if position is VERB and node in self.keywords:
|
||||
return self.keywords[node]
|
||||
if isinstance(node, Literal):
|
||||
return node._literal_n3(
|
||||
use_plain=True,
|
||||
qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT),
|
||||
)
|
||||
else:
|
||||
node = self.relativize(node)
|
||||
|
||||
return self.getQName(node, position == VERB) or node.n3()
|
||||
|
||||
def p_squared(
|
||||
self,
|
||||
node,
|
||||
position,
|
||||
):
|
||||
if (
|
||||
not isinstance(node, BNode)
|
||||
or node in self._serialized
|
||||
or self._references[node] > 1
|
||||
or position == SUBJECT
|
||||
):
|
||||
return False
|
||||
|
||||
if self.isValidList(node):
|
||||
# this is a list
|
||||
self.depth += 2
|
||||
self.write(" (\n")
|
||||
self.depth -= 2
|
||||
self.doList(node)
|
||||
self.write("\n" + self.indent() + ")")
|
||||
else:
|
||||
# this is a Blank Node
|
||||
self.subjectDone(node)
|
||||
self.write("\n" + self.indent(1) + "[\n")
|
||||
self.depth += 1
|
||||
self.predicateList(node)
|
||||
self.depth -= 1
|
||||
self.write("\n" + self.indent(1) + "]")
|
||||
|
||||
return True
|
||||
|
||||
def isValidList(self, l_):
|
||||
"""
|
||||
Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
||||
"""
|
||||
try:
|
||||
if self.store.value(l_, RDF.first) is None:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
while l_:
|
||||
if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2:
|
||||
return False
|
||||
l_ = self.store.value(l_, RDF.rest)
|
||||
return True
|
||||
|
||||
def doList(self, l_):
|
||||
i = 0
|
||||
while l_:
|
||||
item = self.store.value(l_, RDF.first)
|
||||
if item is not None:
|
||||
if i == 0:
|
||||
self.write(self.indent(1))
|
||||
else:
|
||||
self.write("\n" + self.indent(1))
|
||||
self.path(item, OBJECT, newline=True)
|
||||
self.subjectDone(l_)
|
||||
l_ = self.store.value(l_, RDF.rest)
|
||||
i += 1
|
||||
|
||||
def predicateList(self, subject, newline=False):
|
||||
properties = self.buildPredicateHash(subject)
|
||||
propList = self.sortProperties(properties)
|
||||
if len(propList) == 0:
|
||||
return
|
||||
self.write(self.indent(1))
|
||||
self.verb(propList[0], newline=True)
|
||||
self.objectList(properties[propList[0]])
|
||||
for predicate in propList[1:]:
|
||||
self.write(" ;\n" + self.indent(1))
|
||||
self.verb(predicate, newline=True)
|
||||
self.objectList(properties[predicate])
|
||||
self.write(" ;")
|
||||
|
||||
def verb(self, node, newline=False):
|
||||
self.path(node, VERB, newline)
|
||||
|
||||
def objectList(self, objects):
|
||||
count = len(objects)
|
||||
if count == 0:
|
||||
return
|
||||
depthmod = (count == 1) and 0 or 1
|
||||
self.depth += depthmod
|
||||
first_nl = False
|
||||
if count > 1:
|
||||
if not isinstance(objects[0], BNode):
|
||||
self.write("\n" + self.indent(1))
|
||||
else:
|
||||
self.write(" ")
|
||||
first_nl = True
|
||||
self.path(objects[0], OBJECT, newline=first_nl)
|
||||
for obj in objects[1:]:
|
||||
self.write(" ,")
|
||||
if not isinstance(obj, BNode):
|
||||
self.write("\n" + self.indent(1))
|
||||
self.path(obj, OBJECT, newline=True)
|
||||
self.depth -= depthmod
|
||||
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
Notation 3 (N3) RDF graph serializer for RDFLib.
|
||||
"""
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import OWL, Namespace
|
||||
from rdflib.plugins.serializers.turtle import OBJECT, SUBJECT, TurtleSerializer
|
||||
|
||||
__all__ = ["N3Serializer"]
|
||||
|
||||
SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")
|
||||
|
||||
|
||||
class N3Serializer(TurtleSerializer):
|
||||
short_name = "n3"
|
||||
|
||||
def __init__(self, store: Graph, parent=None):
|
||||
super(N3Serializer, self).__init__(store)
|
||||
self.keywords.update({OWL.sameAs: "=", SWAP_LOG.implies: "=>"})
|
||||
self.parent = parent
|
||||
|
||||
def reset(self):
|
||||
super(N3Serializer, self).reset()
|
||||
self._stores = {}
|
||||
|
||||
def endDocument(self): # noqa: N802
|
||||
if not self.parent:
|
||||
super(N3Serializer, self).endDocument()
|
||||
|
||||
def indent(self, modifier=0):
|
||||
indent = super(N3Serializer, self).indent(modifier)
|
||||
if self.parent is not None:
|
||||
indent += self.parent.indent() # modifier)
|
||||
return indent
|
||||
|
||||
def preprocessTriple(self, triple): # noqa: N802
|
||||
super(N3Serializer, self).preprocessTriple(triple)
|
||||
if isinstance(triple[0], Graph):
|
||||
for t in triple[0]:
|
||||
self.preprocessTriple(t)
|
||||
if isinstance(triple[1], Graph):
|
||||
for t in triple[1]:
|
||||
self.preprocessTriple(t)
|
||||
if isinstance(triple[2], Graph):
|
||||
for t in triple[2]:
|
||||
self.preprocessTriple(t)
|
||||
|
||||
def getQName(self, uri, gen_prefix=True): # noqa: N802
|
||||
qname = None
|
||||
if self.parent is not None:
|
||||
qname = self.parent.getQName(uri, gen_prefix)
|
||||
if qname is None:
|
||||
qname = super(N3Serializer, self).getQName(uri, gen_prefix)
|
||||
return qname
|
||||
|
||||
def statement(self, subject):
|
||||
self.subjectDone(subject)
|
||||
properties = self.buildPredicateHash(subject)
|
||||
if len(properties) == 0:
|
||||
return False
|
||||
return self.s_clause(subject) or super(N3Serializer, self).statement(subject)
|
||||
|
||||
def path(self, node, position, newline=False):
|
||||
if not self.p_clause(node, position):
|
||||
super(N3Serializer, self).path(node, position, newline)
|
||||
|
||||
def s_clause(self, subject):
|
||||
if isinstance(subject, Graph):
|
||||
self.write("\n" + self.indent())
|
||||
self.p_clause(subject, SUBJECT)
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def p_clause(self, node, position):
|
||||
if isinstance(node, Graph):
|
||||
self.subjectDone(node)
|
||||
if position is OBJECT:
|
||||
self.write(" ")
|
||||
self.write("{")
|
||||
self.depth += 1
|
||||
serializer = N3Serializer(node, parent=self)
|
||||
# type error: Argument 1 to "serialize" of "TurtleSerializer" has incompatible type "Optional[IO[bytes]]"; expected "IO[bytes]"
|
||||
serializer.serialize(self.stream) # type: ignore[arg-type]
|
||||
self.depth -= 1
|
||||
self.write(self.indent() + "}")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.plugins.serializers.nt import _quoteLiteral
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import Literal
|
||||
|
||||
__all__ = ["NQuadsSerializer"]
|
||||
|
||||
|
||||
class NQuadsSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
if not store.context_aware:
|
||||
raise Exception(
|
||||
"NQuads serialization only makes " "sense for context-aware stores!"
|
||||
)
|
||||
|
||||
super(NQuadsSerializer, self).__init__(store)
|
||||
self.store: ConjunctiveGraph
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn("NQuadsSerializer does not support base.")
|
||||
if encoding is not None and encoding.lower() != self.encoding.lower():
|
||||
warnings.warn(
|
||||
"NQuadsSerializer does not use custom encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
encoding = self.encoding
|
||||
for context in self.store.contexts():
|
||||
for triple in context:
|
||||
stream.write(
|
||||
_nq_row(triple, context.identifier).encode(encoding, "replace")
|
||||
)
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
|
||||
def _nq_row(triple, context):
|
||||
if isinstance(triple[2], Literal):
|
||||
return "%s %s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
_quoteLiteral(triple[2]),
|
||||
context.n3(),
|
||||
)
|
||||
else:
|
||||
return "%s %s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
triple[2].n3(),
|
||||
context.n3(),
|
||||
)
|
||||
@@ -0,0 +1,115 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import warnings
|
||||
from typing import IO, TYPE_CHECKING, Any, Optional, Tuple, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import Literal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _TripleType
|
||||
|
||||
"""
|
||||
N-Triples RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
|
||||
format.
|
||||
"""
|
||||
|
||||
__all__ = ["NTSerializer"]
|
||||
|
||||
|
||||
class NTSerializer(Serializer):
|
||||
"""
|
||||
Serializes RDF graphs to NTriples format.
|
||||
"""
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
Serializer.__init__(self, store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn("NTSerializer does not support base.")
|
||||
if encoding != "utf-8":
|
||||
warnings.warn(
|
||||
"NTSerializer always uses UTF-8 encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
for triple in self.store:
|
||||
stream.write(_nt_row(triple).encode())
|
||||
|
||||
|
||||
class NT11Serializer(NTSerializer):
|
||||
"""
|
||||
Serializes RDF graphs to RDF 1.1 NTriples format.
|
||||
|
||||
Exactly like nt - only utf8 encoded.
|
||||
"""
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
Serializer.__init__(self, store) # default to utf-8
|
||||
|
||||
|
||||
def _nt_row(triple: _TripleType) -> str:
|
||||
if isinstance(triple[2], Literal):
|
||||
return "%s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
_quoteLiteral(triple[2]),
|
||||
)
|
||||
else:
|
||||
return "%s %s %s .\n" % (triple[0].n3(), triple[1].n3(), triple[2].n3())
|
||||
|
||||
|
||||
def _quoteLiteral(l_: Literal) -> str: # noqa: N802
|
||||
"""
|
||||
a simpler version of term.Literal.n3()
|
||||
"""
|
||||
|
||||
encoded = _quote_encode(l_)
|
||||
|
||||
if l_.language:
|
||||
if l_.datatype:
|
||||
raise Exception("Literal has datatype AND language!")
|
||||
return "%s@%s" % (encoded, l_.language)
|
||||
elif l_.datatype:
|
||||
return "%s^^<%s>" % (encoded, l_.datatype)
|
||||
else:
|
||||
return "%s" % encoded
|
||||
|
||||
|
||||
def _quote_encode(l_: str) -> str:
|
||||
return '"%s"' % l_.replace("\\", "\\\\").replace("\n", "\\n").replace(
|
||||
'"', '\\"'
|
||||
).replace("\r", "\\r")
|
||||
|
||||
|
||||
def _nt_unicode_error_resolver(
|
||||
err: UnicodeError,
|
||||
) -> Tuple[Union[str, bytes], int]:
|
||||
"""
|
||||
Do unicode char replaces as defined in https://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#ntrip_strings
|
||||
"""
|
||||
|
||||
def _replace_single(c):
|
||||
c = ord(c)
|
||||
fmt = "\\u%04X" if c <= 0xFFFF else "\\U%08X"
|
||||
return fmt % c
|
||||
|
||||
# type error: "UnicodeError" has no attribute "object"
|
||||
# type error: "UnicodeError" has no attribute "start"
|
||||
# type error: "UnicodeError" has no attribute "end"
|
||||
string = err.object[err.start : err.end] # type: ignore[attr-defined]
|
||||
# type error: "UnicodeError" has no attribute "end"
|
||||
return "".join(_replace_single(c) for c in string), err.end # type: ignore[attr-defined]
|
||||
|
||||
|
||||
codecs.register_error("_rdflib_nt_escape", _nt_unicode_error_resolver)
|
||||
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from rdflib import Dataset
|
||||
from rdflib.plugins.serializers.nquads import _nq_row
|
||||
from rdflib.plugins.serializers.nt import _nt_row
|
||||
from rdflib.serializer import Serializer
|
||||
|
||||
add_remove_methods = {"add": "A", "remove": "D"}
|
||||
|
||||
|
||||
class PatchSerializer(Serializer):
|
||||
"""
|
||||
Creates an RDF patch file to add and remove triples/quads.
|
||||
Can either:
|
||||
- Create an add or delete patch for a single Dataset.
|
||||
- Create a patch to represent the difference between two Datasets.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: Dataset,
|
||||
):
|
||||
self.store: Dataset = store
|
||||
super().__init__(store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Serialize the store to the given stream.
|
||||
:param stream: The stream to serialize to.
|
||||
:param base: The base URI to use for the serialization.
|
||||
:param encoding: The encoding to use for the serialization.
|
||||
:param kwargs: Additional keyword arguments.
|
||||
Supported keyword arguments:
|
||||
- operation: The operation to perform. Either 'add' or 'remove'.
|
||||
- target: The target Dataset to compare against.
|
||||
NB: Only one of 'operation' or 'target' should be provided.
|
||||
- header_id: The header ID to use.
|
||||
- header_prev: The previous header ID to use.
|
||||
"""
|
||||
operation = kwargs.get("operation")
|
||||
target = kwargs.get("target")
|
||||
header_id = kwargs.get("header_id")
|
||||
header_prev = kwargs.get("header_prev")
|
||||
if not header_id:
|
||||
header_id = f"uuid:{uuid4()}"
|
||||
encoding = self.encoding
|
||||
if base is not None:
|
||||
warnings.warn("PatchSerializer does not support base.")
|
||||
if encoding is not None and encoding.lower() != self.encoding.lower():
|
||||
warnings.warn(
|
||||
"PatchSerializer does not use custom encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
def write_header():
|
||||
stream.write(f"H id <{header_id}> .\n".encode(encoding, "replace"))
|
||||
if header_prev:
|
||||
stream.write(f"H prev <{header_prev}>\n".encode(encoding, "replace"))
|
||||
stream.write("TX .\n".encode(encoding, "replace"))
|
||||
|
||||
def write_triples(contexts, op_code, use_passed_contexts=False):
|
||||
for context in contexts:
|
||||
if not use_passed_contexts:
|
||||
context = self.store.get_context(context.identifier)
|
||||
for triple in context:
|
||||
stream.write(
|
||||
self._patch_row(triple, context.identifier, op_code).encode(
|
||||
encoding, "replace"
|
||||
)
|
||||
)
|
||||
|
||||
if operation:
|
||||
assert operation in add_remove_methods, f"Invalid operation: {operation}"
|
||||
elif not target:
|
||||
# No operation specified and no target specified
|
||||
# Fall back to default operation of "add" to prevent a no-op
|
||||
operation = "add"
|
||||
write_header()
|
||||
if operation:
|
||||
operation_code = add_remove_methods.get(operation)
|
||||
write_triples(self.store.contexts(), operation_code)
|
||||
elif target:
|
||||
to_add, to_remove = self._diff(target)
|
||||
write_triples(to_add.contexts(), "A", use_passed_contexts=True)
|
||||
write_triples(to_remove.contexts(), "D", use_passed_contexts=True)
|
||||
|
||||
stream.write("TC .\n".encode(encoding, "replace"))
|
||||
|
||||
def _diff(self, target):
|
||||
rows_to_add = target - self.store
|
||||
rows_to_remove = self.store - target
|
||||
return rows_to_add, rows_to_remove
|
||||
|
||||
def _patch_row(self, triple, context_id, operation):
|
||||
if context_id == self.store.default_context.identifier:
|
||||
return f"{operation} {_nt_row(triple)}"
|
||||
else:
|
||||
return f"{operation} {_nq_row(triple, context_id)}"
|
||||
@@ -0,0 +1,391 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.dom.minidom
|
||||
from typing import IO, Any, Dict, Generator, Optional, Set, Tuple
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.collection import Collection
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, RDFS, Namespace # , split_uri
|
||||
from rdflib.plugins.parsers.RDFVOC import RDFVOC
|
||||
from rdflib.plugins.serializers.xmlwriter import XMLWriter
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, Node, URIRef
|
||||
from rdflib.util import first, more_than
|
||||
|
||||
from .xmlwriter import ESCAPE_ENTITIES
|
||||
|
||||
__all__ = ["fix", "XMLSerializer", "PrettyXMLSerializer"]
|
||||
|
||||
|
||||
class XMLSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(XMLSerializer, self).__init__(store)
|
||||
|
||||
def __bindings(self) -> Generator[Tuple[str, URIRef], None, None]:
|
||||
store = self.store
|
||||
nm = store.namespace_manager
|
||||
bindings: Dict[str, URIRef] = {}
|
||||
|
||||
for predicate in set(store.predicates()):
|
||||
# type error: Argument 1 to "compute_qname_strict" of "NamespaceManager" has incompatible type "Node"; expected "str"
|
||||
prefix, namespace, name = nm.compute_qname_strict(predicate) # type: ignore[arg-type]
|
||||
bindings[prefix] = URIRef(namespace)
|
||||
|
||||
RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") # noqa: N806
|
||||
|
||||
if "rdf" in bindings:
|
||||
assert bindings["rdf"] == RDFNS
|
||||
else:
|
||||
bindings["rdf"] = RDFNS
|
||||
|
||||
for prefix, namespace in bindings.items():
|
||||
yield prefix, namespace
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
self.__stream = stream
|
||||
self.__serialized: Dict[Identifier, int] = {}
|
||||
encoding = self.encoding
|
||||
self.write = write = lambda uni: stream.write(uni.encode(encoding, "replace"))
|
||||
|
||||
# startDocument
|
||||
write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
|
||||
|
||||
# startRDF
|
||||
write("<rdf:RDF\n")
|
||||
|
||||
# If provided, write xml:base attribute for the RDF
|
||||
if "xml_base" in kwargs:
|
||||
write(' xml:base="%s"\n' % kwargs["xml_base"])
|
||||
elif self.base:
|
||||
write(' xml:base="%s"\n' % self.base)
|
||||
# TODO:
|
||||
# assert(
|
||||
# namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
|
||||
bindings = list(self.__bindings())
|
||||
bindings.sort()
|
||||
|
||||
for prefix, namespace in bindings:
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
else:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
write(">\n")
|
||||
|
||||
# write out triples by subject
|
||||
for subject in self.store.subjects():
|
||||
# type error: Argument 1 to "subject" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.subject(subject, 1) # type: ignore[arg-type]
|
||||
|
||||
# endRDF
|
||||
write("</rdf:RDF>\n")
|
||||
|
||||
# Set to None so that the memory can get garbage collected.
|
||||
# self.__serialized = None
|
||||
del self.__serialized
|
||||
|
||||
def subject(self, subject: Identifier, depth: int = 1) -> None:
|
||||
if subject not in self.__serialized:
|
||||
self.__serialized[subject] = 1
|
||||
|
||||
if isinstance(subject, (BNode, URIRef)):
|
||||
write = self.write
|
||||
indent = " " * depth
|
||||
element_name = "rdf:Description"
|
||||
|
||||
if isinstance(subject, BNode):
|
||||
write('%s<%s rdf:nodeID="%s"' % (indent, element_name, subject))
|
||||
else:
|
||||
uri = quoteattr(self.relativize(subject))
|
||||
write("%s<%s rdf:about=%s" % (indent, element_name, uri))
|
||||
|
||||
if (subject, None, None) in self.store:
|
||||
write(">\n")
|
||||
|
||||
for predicate, object in self.store.predicate_objects(subject):
|
||||
# type error: Argument 1 to "predicate" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
# type error: Argument 2 to "predicate" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.predicate(predicate, object, depth + 1) # type: ignore[arg-type]
|
||||
write("%s</%s>\n" % (indent, element_name))
|
||||
|
||||
else:
|
||||
write("/>\n")
|
||||
|
||||
def predicate(
|
||||
self, predicate: Identifier, object: Identifier, depth: int = 1
|
||||
) -> None:
|
||||
write = self.write
|
||||
indent = " " * depth
|
||||
qname = self.store.namespace_manager.qname_strict(predicate)
|
||||
|
||||
if isinstance(object, Literal):
|
||||
attributes = ""
|
||||
|
||||
if object.language:
|
||||
attributes += ' xml:lang="%s"' % object.language
|
||||
|
||||
if object.datatype:
|
||||
attributes += ' rdf:datatype="%s"' % object.datatype
|
||||
|
||||
write(
|
||||
"%s<%s%s>%s</%s>\n"
|
||||
% (indent, qname, attributes, escape(object, ESCAPE_ENTITIES), qname)
|
||||
)
|
||||
else:
|
||||
if isinstance(object, BNode):
|
||||
write('%s<%s rdf:nodeID="%s"/>\n' % (indent, qname, object))
|
||||
else:
|
||||
write(
|
||||
"%s<%s rdf:resource=%s/>\n"
|
||||
% (indent, qname, quoteattr(self.relativize(object)))
|
||||
)
|
||||
|
||||
|
||||
XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
|
||||
XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
|
||||
OWL_NS = Namespace("http://www.w3.org/2002/07/owl#")
|
||||
|
||||
|
||||
# TODO:
|
||||
def fix(val: str) -> str:
|
||||
"strip off _: from nodeIDs... as they are not valid NCNames"
|
||||
if val.startswith("_:"):
|
||||
return val[2:]
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
class PrettyXMLSerializer(Serializer):
|
||||
def __init__(self, store: Graph, max_depth=3):
|
||||
super(PrettyXMLSerializer, self).__init__(store)
|
||||
self.forceRDFAbout: Set[URIRef] = set()
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.__serialized: Dict[Identifier, int] = {}
|
||||
store = self.store
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif store.base is not None:
|
||||
self.base = store.base
|
||||
self.max_depth = kwargs.get("max_depth", 3)
|
||||
assert self.max_depth > 0, "max_depth must be greater than 0"
|
||||
|
||||
self.nm = nm = store.namespace_manager
|
||||
self.writer = writer = XMLWriter(stream, nm, encoding)
|
||||
namespaces = {}
|
||||
|
||||
possible: Set[Node] = set(store.predicates()).union(
|
||||
store.objects(None, RDF.type)
|
||||
)
|
||||
|
||||
for predicate in possible:
|
||||
# type error: Argument 1 to "compute_qname_strict" of "NamespaceManager" has incompatible type "Node"; expected "str"
|
||||
prefix, namespace, local = nm.compute_qname_strict(predicate) # type: ignore[arg-type]
|
||||
namespaces[prefix] = namespace
|
||||
|
||||
namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
|
||||
writer.push(RDFVOC.RDF)
|
||||
|
||||
if "xml_base" in kwargs:
|
||||
writer.attribute(XMLBASE, kwargs["xml_base"])
|
||||
elif self.base:
|
||||
writer.attribute(XMLBASE, self.base)
|
||||
|
||||
writer.namespaces(namespaces.items())
|
||||
|
||||
subject: IdentifiedNode
|
||||
# Write out subjects that can not be inline
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "IdentifiedNode")
|
||||
for subject in store.subjects(): # type: ignore[assignment]
|
||||
if (None, None, subject) in store:
|
||||
if (subject, None, subject) in store:
|
||||
self.subject(subject, 1)
|
||||
else:
|
||||
self.subject(subject, 1)
|
||||
|
||||
# write out anything that has not yet been reached
|
||||
# write out BNodes last (to ensure they can be inlined where possible)
|
||||
bnodes = set()
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "IdentifiedNode")
|
||||
for subject in store.subjects(): # type: ignore[assignment]
|
||||
if isinstance(subject, BNode):
|
||||
bnodes.add(subject)
|
||||
continue
|
||||
self.subject(subject, 1)
|
||||
|
||||
# now serialize only those BNodes that have not been serialized yet
|
||||
for bnode in bnodes:
|
||||
if bnode not in self.__serialized:
|
||||
self.subject(subject, 1)
|
||||
|
||||
writer.pop(RDFVOC.RDF)
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
# Set to None so that the memory can get garbage collected.
|
||||
self.__serialized = None # type: ignore[assignment]
|
||||
|
||||
def subject(self, subject: Identifier, depth: int = 1):
|
||||
store = self.store
|
||||
writer = self.writer
|
||||
|
||||
if subject in self.forceRDFAbout:
|
||||
writer.push(RDFVOC.Description)
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
writer.pop(RDFVOC.Description)
|
||||
self.forceRDFAbout.remove(subject) # type: ignore[arg-type]
|
||||
|
||||
elif subject not in self.__serialized:
|
||||
self.__serialized[subject] = 1
|
||||
type = first(store.objects(subject, RDF.type))
|
||||
|
||||
try:
|
||||
# type error: Argument 1 to "qname" of "NamespaceManager" has incompatible type "Optional[Node]"; expected "str"
|
||||
self.nm.qname(type) # type: ignore[arg-type]
|
||||
except Exception:
|
||||
type = None
|
||||
|
||||
element = type or RDFVOC.Description
|
||||
# type error: Argument 1 to "push" of "XMLWriter" has incompatible type "Node"; expected "str"
|
||||
writer.push(element) # type: ignore[arg-type]
|
||||
|
||||
if isinstance(subject, BNode):
|
||||
|
||||
def subj_as_obj_more_than(ceil):
|
||||
return True
|
||||
# more_than(store.triples((None, None, subject)), ceil)
|
||||
|
||||
# here we only include BNode labels if they are referenced
|
||||
# more than once (this reduces the use of redundant BNode
|
||||
# identifiers)
|
||||
if subj_as_obj_more_than(1):
|
||||
writer.attribute(RDFVOC.nodeID, fix(subject))
|
||||
|
||||
else:
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
|
||||
if (subject, None, None) in store:
|
||||
for predicate, object in store.predicate_objects(subject):
|
||||
if not (predicate == RDF.type and object == type):
|
||||
# type error: Argument 1 to "predicate" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
# type error: Argument 2 to "predicate" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.predicate(predicate, object, depth + 1) # type: ignore[arg-type]
|
||||
|
||||
# type error: Argument 1 to "pop" of "XMLWriter" has incompatible type "Node"; expected "Optional[str]"
|
||||
writer.pop(element) # type: ignore[arg-type]
|
||||
|
||||
elif subject in self.forceRDFAbout:
|
||||
# TODO FIXME?: this looks like a duplicate of first condition
|
||||
writer.push(RDFVOC.Description)
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
writer.pop(RDFVOC.Description)
|
||||
self.forceRDFAbout.remove(subject) # type: ignore[arg-type]
|
||||
|
||||
def predicate(
|
||||
self, predicate: Identifier, object: Identifier, depth: int = 1
|
||||
) -> None:
|
||||
writer = self.writer
|
||||
store = self.store
|
||||
writer.push(predicate)
|
||||
|
||||
if isinstance(object, Literal):
|
||||
if object.language:
|
||||
writer.attribute(XMLLANG, object.language)
|
||||
|
||||
if object.datatype == RDF.XMLLiteral and isinstance(
|
||||
object.value, xml.dom.minidom.Document
|
||||
):
|
||||
writer.attribute(RDFVOC.parseType, "Literal")
|
||||
writer.text("")
|
||||
writer.stream.write(object)
|
||||
else:
|
||||
if object.datatype:
|
||||
writer.attribute(RDFVOC.datatype, object.datatype)
|
||||
writer.text(object)
|
||||
|
||||
elif (
|
||||
object in self.__serialized
|
||||
or not (object, None, None) in store # noqa: E713
|
||||
):
|
||||
if isinstance(object, BNode):
|
||||
if more_than(store.triples((None, None, object)), 0):
|
||||
writer.attribute(RDFVOC.nodeID, fix(object))
|
||||
else:
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
else:
|
||||
if first(store.objects(object, RDF.first)): # may not have type
|
||||
# RDF.List
|
||||
|
||||
self.__serialized[object] = 1
|
||||
|
||||
# Warn that any assertions on object other than
|
||||
# RDF.first and RDF.rest are ignored... including RDF.List
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Assertions on %s other than RDF.first " % repr(object)
|
||||
+ "and RDF.rest are ignored ... including RDF.List",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
writer.attribute(RDFVOC.parseType, "Collection")
|
||||
|
||||
col = Collection(store, object)
|
||||
|
||||
for item in col:
|
||||
if isinstance(item, URIRef):
|
||||
self.forceRDFAbout.add(item)
|
||||
# type error: Argument 1 to "subject" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.subject(item) # type: ignore[arg-type]
|
||||
|
||||
if not isinstance(item, URIRef):
|
||||
# type error: Invalid index type "Node" for "Dict[Identifier, int]"; expected type "Identifier"
|
||||
self.__serialized[item] = 1 # type: ignore[index]
|
||||
else:
|
||||
if first(
|
||||
store.triples_choices(
|
||||
# type error: Argument 1 to "triples_choices" of "Graph" has incompatible type "Tuple[Identifier, URIRef, List[URIRef]]"; expected "Union[Tuple[List[Node], Node, Node], Tuple[Node, List[Node], Node], Tuple[Node, Node, List[Node]]]"
|
||||
(object, RDF.type, [OWL_NS.Class, RDFS.Class]) # type: ignore[arg-type]
|
||||
)
|
||||
) and isinstance(object, URIRef):
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
elif depth <= self.max_depth:
|
||||
self.subject(object, depth + 1)
|
||||
|
||||
elif isinstance(object, BNode):
|
||||
if (
|
||||
object not in self.__serialized
|
||||
and (object, None, None) in store
|
||||
and len(list(store.subjects(object=object))) == 1
|
||||
):
|
||||
# inline blank nodes if they haven't been serialized yet
|
||||
# and are only referenced once (regardless of depth)
|
||||
self.subject(object, depth + 1)
|
||||
else:
|
||||
writer.attribute(RDFVOC.nodeID, fix(object))
|
||||
|
||||
else:
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
writer.pop(predicate)
|
||||
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Trig RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TR/trig/> for syntax specification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.plugins.serializers.turtle import TurtleSerializer
|
||||
from rdflib.term import BNode, Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _ContextType, _SubjectType
|
||||
|
||||
__all__ = ["TrigSerializer"]
|
||||
|
||||
|
||||
class TrigSerializer(TurtleSerializer):
|
||||
short_name = "trig"
|
||||
indentString = 4 * " "
|
||||
|
||||
def __init__(self, store: Union[Graph, ConjunctiveGraph]):
|
||||
self.default_context: Optional[Node]
|
||||
if store.context_aware:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(store, ConjunctiveGraph)
|
||||
self.contexts = list(store.contexts())
|
||||
self.default_context = store.default_context.identifier
|
||||
if store.default_context:
|
||||
self.contexts.append(store.default_context)
|
||||
else:
|
||||
self.contexts = [store]
|
||||
self.default_context = None
|
||||
|
||||
super(TrigSerializer, self).__init__(store)
|
||||
|
||||
def preprocess(self) -> None:
|
||||
for context in self.contexts:
|
||||
# do not write unnecessary prefix (ex: for an empty default graph)
|
||||
if len(context) == 0:
|
||||
continue
|
||||
self.store = context
|
||||
# Don't generate a new prefix for a graph URI if one already exists
|
||||
self.getQName(context.identifier, False)
|
||||
self._subjects = {}
|
||||
|
||||
for triple in context:
|
||||
self.preprocessTriple(triple)
|
||||
|
||||
for subject in self._subjects.keys():
|
||||
self._references[subject] += 1
|
||||
|
||||
self._contexts[context] = (self.orderSubjects(), self._subjects)
|
||||
|
||||
def reset(self) -> None:
|
||||
super(TrigSerializer, self).reset()
|
||||
self._contexts: Dict[
|
||||
_ContextType,
|
||||
Tuple[List[_SubjectType], Dict[_SubjectType, bool]],
|
||||
] = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for store, (ordered_subjects, subjects) in self._contexts.items():
|
||||
if not ordered_subjects:
|
||||
continue
|
||||
|
||||
self._serialized = {}
|
||||
self.store = store
|
||||
self._subjects = subjects
|
||||
|
||||
if self.default_context and store.identifier == self.default_context:
|
||||
self.write(self.indent() + "\n{")
|
||||
else:
|
||||
iri: Optional[str]
|
||||
if isinstance(store.identifier, BNode):
|
||||
iri = store.identifier.n3()
|
||||
else:
|
||||
# Show the full graph URI if a prefix for it doesn't already exist
|
||||
iri = self.getQName(store.identifier, False)
|
||||
if iri is None:
|
||||
iri = store.identifier.n3()
|
||||
self.write(self.indent() + "\n%s {" % iri)
|
||||
|
||||
self.depth += 1
|
||||
for subject in ordered_subjects:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
self.depth -= 1
|
||||
self.write("}\n")
|
||||
|
||||
self.endDocument()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.namespace import Namespace
|
||||
from rdflib.plugins.serializers.xmlwriter import XMLWriter
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
__all__ = ["TriXSerializer"]
|
||||
|
||||
# TODO: Move this somewhere central
|
||||
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
|
||||
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
|
||||
|
||||
|
||||
class TriXSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(TriXSerializer, self).__init__(store)
|
||||
if not store.context_aware:
|
||||
raise Exception(
|
||||
"TriX serialization only makes sense for context-aware stores"
|
||||
)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
nm = self.store.namespace_manager
|
||||
|
||||
self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})
|
||||
|
||||
self.writer.push(TRIXNS["TriX"])
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is None and self.store.base is not None:
|
||||
base = self.store.base
|
||||
if base is not None:
|
||||
self.writer.attribute("http://www.w3.org/XML/1998/namespacebase", base)
|
||||
self.writer.namespaces()
|
||||
|
||||
if isinstance(self.store, ConjunctiveGraph):
|
||||
for subgraph in self.store.contexts():
|
||||
self._writeGraph(subgraph)
|
||||
elif isinstance(self.store, Graph):
|
||||
self._writeGraph(self.store)
|
||||
else:
|
||||
raise Exception(f"Unknown graph type: {type(self.store)}")
|
||||
|
||||
self.writer.pop()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
def _writeGraph(self, graph): # noqa: N802
|
||||
self.writer.push(TRIXNS["graph"])
|
||||
if graph.base:
|
||||
self.writer.attribute(
|
||||
"http://www.w3.org/XML/1998/namespacebase", graph.base
|
||||
)
|
||||
if isinstance(graph.identifier, URIRef):
|
||||
self.writer.element(TRIXNS["uri"], content=str(graph.identifier))
|
||||
|
||||
for triple in graph.triples((None, None, None)):
|
||||
self._writeTriple(triple)
|
||||
self.writer.pop()
|
||||
|
||||
def _writeTriple(self, triple): # noqa: N802
|
||||
self.writer.push(TRIXNS["triple"])
|
||||
for component in triple:
|
||||
if isinstance(component, URIRef):
|
||||
self.writer.element(TRIXNS["uri"], content=str(component))
|
||||
elif isinstance(component, BNode):
|
||||
self.writer.element(TRIXNS["id"], content=str(component))
|
||||
elif isinstance(component, Literal):
|
||||
if component.datatype:
|
||||
self.writer.element(
|
||||
TRIXNS["typedLiteral"],
|
||||
content=str(component),
|
||||
attributes={TRIXNS["datatype"]: str(component.datatype)},
|
||||
)
|
||||
elif component.language:
|
||||
self.writer.element(
|
||||
TRIXNS["plainLiteral"],
|
||||
content=str(component),
|
||||
attributes={XMLNS["lang"]: str(component.language)},
|
||||
)
|
||||
else:
|
||||
self.writer.element(TRIXNS["plainLiteral"], content=str(component))
|
||||
self.writer.pop()
|
||||
@@ -0,0 +1,453 @@
|
||||
"""
|
||||
Turtle RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
DefaultDict,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, RDFS
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, Literal, Node, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _PredicateType, _SubjectType, _TripleType
|
||||
|
||||
__all__ = ["RecursiveSerializer", "TurtleSerializer"]
|
||||
|
||||
|
||||
class RecursiveSerializer(Serializer):
|
||||
topClasses = [RDFS.Class]
|
||||
predicateOrder = [RDF.type, RDFS.label]
|
||||
maxDepth = 10
|
||||
indentString = " "
|
||||
roundtrip_prefixes: Tuple[Any, ...] = ()
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
super(RecursiveSerializer, self).__init__(store)
|
||||
self.stream: Optional[IO[bytes]] = None
|
||||
self.reset()
|
||||
|
||||
def addNamespace(self, prefix: str, uri: URIRef) -> None:
|
||||
if prefix in self.namespaces and self.namespaces[prefix] != uri:
|
||||
raise Exception(
|
||||
"Trying to override namespace prefix %s => %s, but it's already bound to %s"
|
||||
% (prefix, uri, self.namespaces[prefix])
|
||||
)
|
||||
self.namespaces[prefix] = uri
|
||||
|
||||
def checkSubject(self, subject: _SubjectType) -> bool:
|
||||
"""Check to see if the subject should be serialized yet"""
|
||||
if (
|
||||
(self.isDone(subject))
|
||||
or (subject not in self._subjects)
|
||||
or ((subject in self._topLevels) and (self.depth > 1))
|
||||
or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth))
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
def isDone(self, subject: _SubjectType) -> bool:
|
||||
"""Return true if subject is serialized"""
|
||||
return subject in self._serialized
|
||||
|
||||
def orderSubjects(self) -> List[_SubjectType]:
|
||||
seen: Dict[_SubjectType, bool] = {}
|
||||
subjects: List[_SubjectType] = []
|
||||
|
||||
for classURI in self.topClasses:
|
||||
members = list(self.store.subjects(RDF.type, classURI))
|
||||
members.sort()
|
||||
|
||||
subjects.extend(members)
|
||||
for member in members:
|
||||
self._topLevels[member] = True
|
||||
seen[member] = True
|
||||
|
||||
recursable = [
|
||||
(isinstance(subject, BNode), self._references[subject], subject)
|
||||
for subject in self._subjects
|
||||
if subject not in seen
|
||||
]
|
||||
|
||||
recursable.sort()
|
||||
subjects.extend([subject for (isbnode, refs, subject) in recursable])
|
||||
|
||||
return subjects
|
||||
|
||||
def preprocess(self) -> None:
|
||||
for triple in self.store.triples((None, None, None)):
|
||||
self.preprocessTriple(triple)
|
||||
|
||||
def preprocessTriple(self, spo: _TripleType) -> None:
|
||||
s, p, o = spo
|
||||
self._references[o] += 1
|
||||
self._subjects[s] = True
|
||||
|
||||
def reset(self) -> None:
|
||||
self.depth = 0
|
||||
# Typed none because nothing is using it ...
|
||||
self.lists: Dict[None, None] = {}
|
||||
self.namespaces: Dict[str, URIRef] = {}
|
||||
self._references: DefaultDict[Node, int] = defaultdict(int)
|
||||
self._serialized: Dict[_SubjectType, bool] = {}
|
||||
self._subjects: Dict[_SubjectType, bool] = {}
|
||||
self._topLevels: Dict[_SubjectType, bool] = {}
|
||||
|
||||
if self.roundtrip_prefixes:
|
||||
if hasattr(self.roundtrip_prefixes, "__iter__"):
|
||||
for prefix, ns in self.store.namespaces():
|
||||
if prefix in self.roundtrip_prefixes:
|
||||
self.addNamespace(prefix, ns)
|
||||
else:
|
||||
for prefix, ns in self.store.namespaces():
|
||||
self.addNamespace(prefix, ns)
|
||||
|
||||
def buildPredicateHash(
|
||||
self, subject: _SubjectType
|
||||
) -> Mapping[_PredicateType, List[Node]]:
|
||||
"""
|
||||
Build a hash key by predicate to a list of objects for the given
|
||||
subject
|
||||
"""
|
||||
properties: Dict[_PredicateType, List[Node]] = {}
|
||||
for s, p, o in self.store.triples((subject, None, None)):
|
||||
oList = properties.get(p, [])
|
||||
oList.append(o)
|
||||
properties[p] = oList
|
||||
return properties
|
||||
|
||||
def sortProperties(
|
||||
self, properties: Mapping[_PredicateType, List[Node]]
|
||||
) -> List[_PredicateType]:
|
||||
"""Take a hash from predicate uris to lists of values.
|
||||
Sort the lists of values. Return a sorted list of properties."""
|
||||
# Sort object lists
|
||||
for prop, objects in properties.items():
|
||||
objects.sort()
|
||||
|
||||
# Make sorted list of properties
|
||||
propList: List[_PredicateType] = []
|
||||
seen: Dict[_PredicateType, bool] = {}
|
||||
for prop in self.predicateOrder:
|
||||
if (prop in properties) and (prop not in seen):
|
||||
propList.append(prop)
|
||||
seen[prop] = True
|
||||
props = list(properties.keys())
|
||||
props.sort()
|
||||
for prop in props:
|
||||
if prop not in seen:
|
||||
propList.append(prop)
|
||||
seen[prop] = True
|
||||
return propList
|
||||
|
||||
def subjectDone(self, subject: _SubjectType) -> None:
|
||||
"""Mark a subject as done."""
|
||||
self._serialized[subject] = True
|
||||
|
||||
def indent(self, modifier: int = 0) -> str:
|
||||
"""Returns indent string multiplied by the depth"""
|
||||
return (self.depth + modifier) * self.indentString
|
||||
|
||||
def write(self, text: str) -> None:
|
||||
"""Write text in given encoding."""
|
||||
# type error: Item "None" of "Optional[IO[bytes]]" has no attribute "write"
|
||||
self.stream.write(text.encode(self.encoding, "replace")) # type: ignore[union-attr]
|
||||
|
||||
|
||||
SUBJECT = 0
|
||||
VERB = 1
|
||||
OBJECT = 2
|
||||
|
||||
_GEN_QNAME_FOR_DT = False
|
||||
_SPACIOUS_OUTPUT = False
|
||||
|
||||
|
||||
class TurtleSerializer(RecursiveSerializer):
|
||||
short_name = "turtle"
|
||||
indentString = " "
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
self._ns_rewrite: Dict[str, str] = {}
|
||||
super(TurtleSerializer, self).__init__(store)
|
||||
self.keywords: Dict[Node, str] = {RDF.type: "a"}
|
||||
self.reset()
|
||||
self.stream = None
|
||||
self._spacious = _SPACIOUS_OUTPUT
|
||||
|
||||
# type error: Return type "str" of "addNamespace" incompatible with return type "None" in supertype "RecursiveSerializer"
|
||||
def addNamespace(self, prefix: str, namespace: URIRef) -> str: # type: ignore[override]
|
||||
# Turtle does not support prefix that start with _
|
||||
# if they occur in the graph, rewrite to p_blah
|
||||
# this is more complicated since we need to make sure p_blah
|
||||
# does not already exist. And we register namespaces as we go, i.e.
|
||||
# we may first see a triple with prefix _9 - rewrite it to p_9
|
||||
# and then later find a triple with a "real" p_9 prefix
|
||||
|
||||
# so we need to keep track of ns rewrites we made so far.
|
||||
|
||||
if (prefix > "" and prefix[0] == "_") or self.namespaces.get(
|
||||
prefix, namespace
|
||||
) != namespace:
|
||||
if prefix not in self._ns_rewrite:
|
||||
p = "p" + prefix
|
||||
while p in self.namespaces:
|
||||
p = "p" + p
|
||||
self._ns_rewrite[prefix] = p
|
||||
|
||||
prefix = self._ns_rewrite.get(prefix, prefix)
|
||||
|
||||
super(TurtleSerializer, self).addNamespace(prefix, namespace)
|
||||
return prefix
|
||||
|
||||
def reset(self) -> None:
|
||||
super(TurtleSerializer, self).reset()
|
||||
# typing as Dict[None, None] because nothing seems to be using it
|
||||
self._shortNames: Dict[None, None] = {}
|
||||
self._started = False
|
||||
self._ns_rewrite = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
subjects_list = self.orderSubjects()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for subject in subjects_list:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
|
||||
self.endDocument()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
self.base = None
|
||||
|
||||
def preprocessTriple(self, triple: _TripleType) -> None:
|
||||
super(TurtleSerializer, self).preprocessTriple(triple)
|
||||
for i, node in enumerate(triple):
|
||||
if i == VERB and node in self.keywords:
|
||||
# predicate is a keyword
|
||||
continue
|
||||
# Don't use generated prefixes for subjects and objects
|
||||
self.getQName(node, gen_prefix=(i == VERB))
|
||||
if isinstance(node, Literal) and node.datatype:
|
||||
self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
|
||||
p = triple[1]
|
||||
if isinstance(p, BNode): # hmm - when is P ever a bnode?
|
||||
self._references[p] += 1
|
||||
|
||||
# TODO: Rename to get_pname
|
||||
def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]:
|
||||
if not isinstance(uri, URIRef):
|
||||
return None
|
||||
|
||||
parts = None
|
||||
|
||||
try:
|
||||
parts = self.store.compute_qname(uri, generate=gen_prefix)
|
||||
except Exception:
|
||||
# is the uri a namespace in itself?
|
||||
pfx = self.store.store.prefix(uri)
|
||||
|
||||
if pfx is not None:
|
||||
parts = (pfx, uri, "")
|
||||
else:
|
||||
# nothing worked
|
||||
return None
|
||||
|
||||
prefix, namespace, local = parts
|
||||
|
||||
local = local.replace(r"(", r"\(").replace(r")", r"\)")
|
||||
|
||||
# QName cannot end with .
|
||||
if local.endswith("."):
|
||||
return None
|
||||
|
||||
prefix = self.addNamespace(prefix, namespace)
|
||||
|
||||
return "%s:%s" % (prefix, local)
|
||||
|
||||
def startDocument(self) -> None:
|
||||
self._started = True
|
||||
ns_list = sorted(self.namespaces.items())
|
||||
|
||||
if self.base:
|
||||
self.write(self.indent() + "@base <%s> .\n" % self.base)
|
||||
for prefix, uri in ns_list:
|
||||
self.write(self.indent() + "@prefix %s: <%s> .\n" % (prefix, uri))
|
||||
if ns_list and self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def endDocument(self) -> None:
|
||||
if self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def statement(self, subject: _SubjectType) -> bool:
|
||||
self.subjectDone(subject)
|
||||
return self.s_squared(subject) or self.s_default(subject)
|
||||
|
||||
def s_default(self, subject: _SubjectType) -> bool:
|
||||
self.write("\n" + self.indent())
|
||||
self.path(subject, SUBJECT)
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
|
||||
def s_squared(self, subject: _SubjectType) -> bool:
|
||||
if (self._references[subject] > 0) or not isinstance(subject, BNode):
|
||||
return False
|
||||
self.write("\n" + self.indent() + "[]")
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
|
||||
def path(self, node: Node, position: int, newline: bool = False) -> None:
|
||||
if not (
|
||||
self.p_squared(node, position, newline)
|
||||
or self.p_default(node, position, newline)
|
||||
):
|
||||
raise Error("Cannot serialize node '%s'" % (node,))
|
||||
|
||||
def p_default(self, node: Node, position: int, newline: bool = False) -> bool:
|
||||
if position != SUBJECT and not newline:
|
||||
self.write(" ")
|
||||
self.write(self.label(node, position))
|
||||
return True
|
||||
|
||||
def label(self, node: Node, position: int) -> str:
|
||||
if node == RDF.nil:
|
||||
return "()"
|
||||
if position is VERB and node in self.keywords:
|
||||
return self.keywords[node]
|
||||
if isinstance(node, Literal):
|
||||
return node._literal_n3(
|
||||
use_plain=True,
|
||||
qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT),
|
||||
)
|
||||
else:
|
||||
node = self.relativize(node) # type: ignore[type-var]
|
||||
|
||||
return self.getQName(node, position == VERB) or node.n3()
|
||||
|
||||
def p_squared(self, node: Node, position: int, newline: bool = False) -> bool:
|
||||
if (
|
||||
not isinstance(node, BNode)
|
||||
or node in self._serialized
|
||||
or self._references[node] > 1
|
||||
or position == SUBJECT
|
||||
):
|
||||
return False
|
||||
|
||||
if not newline:
|
||||
self.write(" ")
|
||||
|
||||
if self.isValidList(node):
|
||||
# this is a list
|
||||
self.write("(")
|
||||
self.depth += 1 # 2
|
||||
self.doList(node)
|
||||
self.depth -= 1 # 2
|
||||
self.write(" )")
|
||||
else:
|
||||
self.subjectDone(node)
|
||||
self.depth += 2
|
||||
# self.write('[\n' + self.indent())
|
||||
self.write("[")
|
||||
self.depth -= 1
|
||||
# self.predicateList(node, newline=True)
|
||||
self.predicateList(node, newline=False)
|
||||
# self.write('\n' + self.indent() + ']')
|
||||
self.write(" ]")
|
||||
self.depth -= 1
|
||||
|
||||
return True
|
||||
|
||||
def isValidList(self, l_: Node) -> bool:
|
||||
"""
|
||||
Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
||||
"""
|
||||
try:
|
||||
if self.store.value(l_, RDF.first) is None:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
while l_:
|
||||
if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2:
|
||||
return False
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node")
|
||||
l_ = self.store.value(l_, RDF.rest) # type: ignore[assignment]
|
||||
return True
|
||||
|
||||
def doList(self, l_: Node) -> None:
|
||||
while l_:
|
||||
item = self.store.value(l_, RDF.first)
|
||||
if item is not None:
|
||||
self.path(item, OBJECT)
|
||||
self.subjectDone(l_)
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node")
|
||||
l_ = self.store.value(l_, RDF.rest) # type: ignore[assignment]
|
||||
|
||||
def predicateList(self, subject: Node, newline: bool = False) -> None:
|
||||
properties = self.buildPredicateHash(subject)
|
||||
propList = self.sortProperties(properties)
|
||||
if len(propList) == 0:
|
||||
return
|
||||
self.verb(propList[0], newline=newline)
|
||||
self.objectList(properties[propList[0]])
|
||||
for predicate in propList[1:]:
|
||||
self.write(" ;\n" + self.indent(1))
|
||||
self.verb(predicate, newline=True)
|
||||
self.objectList(properties[predicate])
|
||||
|
||||
def verb(self, node: Node, newline: bool = False) -> None:
|
||||
self.path(node, VERB, newline)
|
||||
|
||||
def objectList(self, objects: Sequence[Node]) -> None:
|
||||
count = len(objects)
|
||||
if count == 0:
|
||||
return
|
||||
depthmod = (count == 1) and 0 or 1
|
||||
self.depth += depthmod
|
||||
self.path(objects[0], OBJECT)
|
||||
for obj in objects[1:]:
|
||||
self.write(",\n" + self.indent(1))
|
||||
self.path(obj, OBJECT, newline=True)
|
||||
self.depth -= depthmod
|
||||
+128
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
from typing import IO, TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.term import URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.namespace import Namespace, NamespaceManager
|
||||
|
||||
|
||||
__all__ = ["XMLWriter"]
|
||||
|
||||
ESCAPE_ENTITIES = {"\r": " "}
|
||||
|
||||
|
||||
class XMLWriter:
|
||||
def __init__(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
namespace_manager: NamespaceManager,
|
||||
encoding: Optional[str] = None,
|
||||
decl: int = 1,
|
||||
extra_ns: Optional[Dict[str, Namespace]] = None,
|
||||
):
|
||||
encoding = encoding or "utf-8"
|
||||
encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding)
|
||||
# NOTE on type ignores: this is mainly because the variable is being re-used.
|
||||
# type error: Incompatible types in assignment (expression has type "StreamWriter", variable has type "IO[bytes]")
|
||||
self.stream = stream = stream_writer(stream) # type: ignore[assignment]
|
||||
if decl:
|
||||
# type error: No overload variant of "write" of "IO" matches argument type "str"
|
||||
stream.write('<?xml version="1.0" encoding="%s"?>' % encoding) # type: ignore[call-overload]
|
||||
self.element_stack: List[str] = []
|
||||
self.nm = namespace_manager
|
||||
self.extra_ns = extra_ns or {}
|
||||
self.closed = True
|
||||
|
||||
def __get_indent(self) -> str:
|
||||
return " " * len(self.element_stack)
|
||||
|
||||
indent = property(__get_indent)
|
||||
|
||||
def __close_start_tag(self) -> None:
|
||||
if not self.closed: # TODO:
|
||||
self.closed = True
|
||||
self.stream.write(">")
|
||||
|
||||
def push(self, uri: str) -> None:
|
||||
self.__close_start_tag()
|
||||
write = self.stream.write
|
||||
write("\n")
|
||||
write(self.indent)
|
||||
write("<%s" % self.qname(uri))
|
||||
self.element_stack.append(uri)
|
||||
self.closed = False
|
||||
self.parent = False
|
||||
|
||||
def pop(self, uri: Optional[str] = None) -> None:
|
||||
top = self.element_stack.pop()
|
||||
if uri:
|
||||
assert uri == top
|
||||
write = self.stream.write
|
||||
if not self.closed:
|
||||
self.closed = True
|
||||
write("/>")
|
||||
else:
|
||||
if self.parent:
|
||||
write("\n")
|
||||
write(self.indent)
|
||||
write("</%s>" % self.qname(top))
|
||||
self.parent = True
|
||||
|
||||
def element(
|
||||
self, uri: str, content: str, attributes: Dict[URIRef, str] = {}
|
||||
) -> None:
|
||||
"""Utility method for adding a complete simple element"""
|
||||
self.push(uri)
|
||||
for k, v in attributes.items():
|
||||
self.attribute(k, v)
|
||||
self.text(content)
|
||||
self.pop()
|
||||
|
||||
def namespaces(self, namespaces: Iterable[Tuple[str, str]] = None) -> None:
|
||||
if not namespaces:
|
||||
namespaces = self.nm.namespaces()
|
||||
|
||||
write = self.stream.write
|
||||
write("\n")
|
||||
for prefix, namespace in namespaces:
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
# Allow user-provided namespace bindings to prevail
|
||||
elif prefix not in self.extra_ns:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
|
||||
for prefix, namespace in self.extra_ns.items():
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
else:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
|
||||
def attribute(self, uri: str, value: str) -> None:
|
||||
write = self.stream.write
|
||||
write(" %s=%s" % (self.qname(uri), quoteattr(value)))
|
||||
|
||||
def text(self, text: str) -> None:
|
||||
self.__close_start_tag()
|
||||
if "<" in text and ">" in text and "]]>" not in text:
|
||||
self.stream.write("<![CDATA[")
|
||||
self.stream.write(text)
|
||||
self.stream.write("]]>")
|
||||
else:
|
||||
self.stream.write(escape(text, ESCAPE_ENTITIES))
|
||||
|
||||
def qname(self, uri: str) -> str:
|
||||
"""Compute qname for a uri using our extra namespaces,
|
||||
or the given namespace manager"""
|
||||
|
||||
for pre, ns in self.extra_ns.items():
|
||||
if uri.startswith(ns):
|
||||
if pre != "":
|
||||
return ":".join([pre, uri[len(ns) :]])
|
||||
else:
|
||||
return uri[len(ns) :]
|
||||
|
||||
return self.nm.qname_strict(uri)
|
||||
+676
@@ -0,0 +1,676 @@
|
||||
"""
|
||||
Implementation of the JSON-LD Context structure. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
"""
|
||||
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/context.py
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import namedtuple
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Collection,
|
||||
Dict,
|
||||
Generator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from urllib.parse import urljoin, urlsplit
|
||||
|
||||
from rdflib.namespace import RDF
|
||||
|
||||
from .errors import (
|
||||
INVALID_CONTEXT_ENTRY,
|
||||
INVALID_REMOTE_CONTEXT,
|
||||
RECURSIVE_CONTEXT_INCLUSION,
|
||||
)
|
||||
from .keys import (
|
||||
BASE,
|
||||
CONTAINER,
|
||||
CONTEXT,
|
||||
GRAPH,
|
||||
ID,
|
||||
IMPORT,
|
||||
INCLUDED,
|
||||
INDEX,
|
||||
JSON,
|
||||
LANG,
|
||||
LIST,
|
||||
NEST,
|
||||
NONE,
|
||||
PREFIX,
|
||||
PROPAGATE,
|
||||
PROTECTED,
|
||||
REV,
|
||||
SET,
|
||||
TYPE,
|
||||
VALUE,
|
||||
VERSION,
|
||||
VOCAB,
|
||||
)
|
||||
from .util import norm_url, source_to_json, split_iri
|
||||
|
||||
NODE_KEYS = {GRAPH, ID, INCLUDED, JSON, LIST, NEST, NONE, REV, SET, TYPE, VALUE, LANG}
|
||||
|
||||
|
||||
class Defined(int):
|
||||
pass
|
||||
|
||||
|
||||
UNDEF = Defined(0)
|
||||
|
||||
# From <https://tools.ietf.org/html/rfc3986#section-2.2>
|
||||
URI_GEN_DELIMS = (":", "/", "?", "#", "[", "]", "@")
|
||||
|
||||
_ContextSourceType = Union[
|
||||
List[Union[Dict[str, Any], str, None]], Dict[str, Any], str, None
|
||||
]
|
||||
|
||||
|
||||
class Context:
|
||||
def __init__(
|
||||
self,
|
||||
source: _ContextSourceType = None,
|
||||
base: Optional[str] = None,
|
||||
version: Optional[float] = 1.1,
|
||||
):
|
||||
self.version: float = version or 1.1
|
||||
self.language = None
|
||||
self.vocab: Optional[str] = None
|
||||
self._base: Optional[str]
|
||||
self.base = base
|
||||
self.doc_base = base
|
||||
self.terms: Dict[str, Any] = {}
|
||||
# _alias maps NODE_KEY to list of aliases
|
||||
self._alias: Dict[str, List[str]] = {}
|
||||
self._lookup: Dict[Tuple[str, Any, Union[Defined, str], bool], Term] = {}
|
||||
self._prefixes: Dict[str, Any] = {}
|
||||
self.active = False
|
||||
self.parent: Optional[Context] = None
|
||||
self.propagate = True
|
||||
self._context_cache: Dict[str, Any] = {}
|
||||
if source:
|
||||
self.load(source)
|
||||
|
||||
@property
|
||||
def base(self) -> Optional[str]:
|
||||
return self._base
|
||||
|
||||
@base.setter
|
||||
def base(self, base: Optional[str]):
|
||||
if base:
|
||||
hash_index = base.find("#")
|
||||
if hash_index > -1:
|
||||
base = base[0:hash_index]
|
||||
self._base = (
|
||||
self.resolve_iri(base)
|
||||
if (hasattr(self, "_base") and base is not None)
|
||||
else base
|
||||
)
|
||||
self._basedomain = "%s://%s" % urlsplit(base)[0:2] if base else None
|
||||
|
||||
def subcontext(self, source: Any, propagate: bool = True) -> Context:
|
||||
# IMPROVE: to optimize, implement SubContext with parent fallback support
|
||||
parent = self.parent if self.propagate is False else self
|
||||
# type error: Item "None" of "Optional[Context]" has no attribute "_subcontext"
|
||||
return parent._subcontext(source, propagate) # type: ignore[union-attr]
|
||||
|
||||
def _subcontext(self, source: Any, propagate: bool) -> Context:
|
||||
ctx = Context(version=self.version)
|
||||
ctx.propagate = propagate
|
||||
ctx.parent = self
|
||||
ctx.language = self.language
|
||||
ctx.vocab = self.vocab
|
||||
ctx.base = self.base
|
||||
ctx.doc_base = self.doc_base
|
||||
ctx._alias = {k: l[:] for k, l in self._alias.items()} # noqa: E741
|
||||
ctx.terms = self.terms.copy()
|
||||
ctx._lookup = self._lookup.copy()
|
||||
ctx._prefixes = self._prefixes.copy()
|
||||
ctx._context_cache = self._context_cache
|
||||
ctx.load(source)
|
||||
return ctx
|
||||
|
||||
def _clear(self) -> None:
|
||||
self.language = None
|
||||
self.vocab = None
|
||||
self.terms = {}
|
||||
self._alias = {}
|
||||
self._lookup = {}
|
||||
self._prefixes = {}
|
||||
self.active = False
|
||||
self.propagate = True
|
||||
|
||||
def get_context_for_term(self, term: Optional[Term]) -> Context:
|
||||
if term and term.context is not UNDEF:
|
||||
return self._subcontext(term.context, propagate=True)
|
||||
return self
|
||||
|
||||
def get_context_for_type(self, node: Any) -> Optional[Context]:
|
||||
if self.version >= 1.1:
|
||||
rtype = self.get_type(node) if isinstance(node, dict) else None
|
||||
if not isinstance(rtype, list):
|
||||
rtype = [rtype] if rtype else []
|
||||
|
||||
typeterm = None
|
||||
for rt in rtype:
|
||||
try:
|
||||
typeterm = self.terms.get(rt)
|
||||
except TypeError:
|
||||
# extra lenience, triggers if type is set to a literal
|
||||
pass
|
||||
if typeterm is not None:
|
||||
break
|
||||
|
||||
if typeterm and typeterm.context:
|
||||
subcontext = self.subcontext(typeterm.context, propagate=False)
|
||||
if subcontext:
|
||||
return subcontext
|
||||
|
||||
return self.parent if self.propagate is False else self
|
||||
|
||||
def get_id(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, ID)
|
||||
|
||||
def get_type(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, TYPE)
|
||||
|
||||
def get_language(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, LANG)
|
||||
|
||||
def get_value(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, VALUE)
|
||||
|
||||
def get_graph(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, GRAPH)
|
||||
|
||||
def get_list(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, LIST)
|
||||
|
||||
def get_set(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, SET)
|
||||
|
||||
def get_rev(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, REV)
|
||||
|
||||
def _get(self, obj: Dict[str, Any], key: str) -> Any:
|
||||
for alias in self._alias.get(key, []):
|
||||
if alias in obj:
|
||||
return obj.get(alias)
|
||||
return obj.get(key)
|
||||
|
||||
# type error: Missing return statement
|
||||
def get_key(self, key: str) -> str: # type: ignore[return]
|
||||
for alias in self.get_keys(key):
|
||||
return alias
|
||||
|
||||
def get_keys(self, key: str) -> Generator[str, None, None]:
|
||||
if key in self._alias:
|
||||
for alias in self._alias[key]:
|
||||
yield alias
|
||||
yield key
|
||||
|
||||
lang_key = property(lambda self: self.get_key(LANG))
|
||||
id_key = property(lambda self: self.get_key(ID))
|
||||
type_key = property(lambda self: self.get_key(TYPE))
|
||||
value_key = property(lambda self: self.get_key(VALUE))
|
||||
list_key = property(lambda self: self.get_key(LIST))
|
||||
rev_key = property(lambda self: self.get_key(REV))
|
||||
graph_key = property(lambda self: self.get_key(GRAPH))
|
||||
|
||||
def add_term(
|
||||
self,
|
||||
name: str,
|
||||
idref: str,
|
||||
coercion: Union[Defined, str] = UNDEF,
|
||||
container: Union[Collection[Any], str, Defined] = UNDEF,
|
||||
index: Optional[Union[str, Defined]] = None,
|
||||
language: Optional[Union[str, Defined]] = UNDEF,
|
||||
reverse: bool = False,
|
||||
context: Any = UNDEF,
|
||||
prefix: Optional[bool] = None,
|
||||
protected: bool = False,
|
||||
):
|
||||
if self.version < 1.1 or prefix is None:
|
||||
prefix = isinstance(idref, str) and idref.endswith(URI_GEN_DELIMS)
|
||||
|
||||
if not self._accept_term(name):
|
||||
return
|
||||
|
||||
if self.version >= 1.1:
|
||||
existing = self.terms.get(name)
|
||||
if existing and existing.protected:
|
||||
return
|
||||
|
||||
if isinstance(container, (list, set, tuple)):
|
||||
container = set(container)
|
||||
elif container is not UNDEF:
|
||||
container = set([container])
|
||||
else:
|
||||
container = set()
|
||||
|
||||
term = Term(
|
||||
idref,
|
||||
name,
|
||||
coercion,
|
||||
container,
|
||||
index,
|
||||
language,
|
||||
reverse,
|
||||
context,
|
||||
prefix,
|
||||
protected,
|
||||
)
|
||||
|
||||
self.terms[name] = term
|
||||
|
||||
container_key: Union[Defined, str]
|
||||
for container_key in (LIST, LANG, SET): # , INDEX, ID, GRAPH):
|
||||
if container_key in container:
|
||||
break
|
||||
else:
|
||||
container_key = UNDEF
|
||||
|
||||
self._lookup[(idref, coercion or language, container_key, reverse)] = term
|
||||
|
||||
if term.prefix is True:
|
||||
self._prefixes[idref] = name
|
||||
|
||||
def find_term(
|
||||
self,
|
||||
idref: str,
|
||||
coercion: Optional[Union[str, Defined]] = None,
|
||||
container: Union[Defined, str] = UNDEF,
|
||||
language: Optional[str] = None,
|
||||
reverse: bool = False,
|
||||
):
|
||||
lu = self._lookup
|
||||
|
||||
if coercion is None:
|
||||
coercion = language
|
||||
|
||||
if coercion is not UNDEF and container:
|
||||
found = lu.get((idref, coercion, container, reverse))
|
||||
if found:
|
||||
return found
|
||||
|
||||
if coercion is not UNDEF:
|
||||
found = lu.get((idref, coercion, UNDEF, reverse))
|
||||
if found:
|
||||
return found
|
||||
|
||||
if container:
|
||||
found = lu.get((idref, coercion, container, reverse))
|
||||
if found:
|
||||
return found
|
||||
elif language:
|
||||
found = lu.get((idref, UNDEF, LANG, reverse))
|
||||
if found:
|
||||
return found
|
||||
else:
|
||||
found = lu.get((idref, coercion or UNDEF, SET, reverse))
|
||||
if found:
|
||||
return found
|
||||
|
||||
return lu.get((idref, UNDEF, UNDEF, reverse))
|
||||
|
||||
def resolve(self, curie_or_iri: str) -> str:
|
||||
iri = self.expand(curie_or_iri, False)
|
||||
# type error: Argument 1 to "isblank" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
if self.isblank(iri): # type: ignore[arg-type]
|
||||
# type error: Incompatible return value type (got "Optional[str]", expected "str")
|
||||
return iri # type: ignore[return-value]
|
||||
# type error: Unsupported right operand type for in ("Optional[str]")
|
||||
if " " in iri: # type: ignore[operator]
|
||||
return ""
|
||||
# type error: Argument 1 to "resolve_iri" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
return self.resolve_iri(iri) # type: ignore[arg-type]
|
||||
|
||||
def resolve_iri(self, iri: str) -> str:
|
||||
# type error: Argument 1 to "norm_url" has incompatible type "Optional[str]"; expected "str"
|
||||
return norm_url(self._base, iri) # type: ignore[arg-type]
|
||||
|
||||
def isblank(self, ref: str) -> bool:
|
||||
return ref.startswith("_:")
|
||||
|
||||
def expand(self, term_curie_or_iri: Any, use_vocab: bool = True) -> Optional[str]:
|
||||
if not isinstance(term_curie_or_iri, str):
|
||||
return term_curie_or_iri
|
||||
|
||||
if not self._accept_term(term_curie_or_iri):
|
||||
return ""
|
||||
|
||||
if use_vocab:
|
||||
term = self.terms.get(term_curie_or_iri)
|
||||
if term:
|
||||
return term.id
|
||||
|
||||
is_term, pfx, local = self._prep_expand(term_curie_or_iri)
|
||||
if pfx == "_":
|
||||
return term_curie_or_iri
|
||||
|
||||
if pfx is not None:
|
||||
ns = self.terms.get(pfx)
|
||||
if ns and ns.prefix and ns.id:
|
||||
return ns.id + local
|
||||
elif is_term and use_vocab:
|
||||
if self.vocab:
|
||||
return self.vocab + term_curie_or_iri
|
||||
return None
|
||||
|
||||
return self.resolve_iri(term_curie_or_iri)
|
||||
|
||||
def shrink_iri(self, iri: str) -> str:
|
||||
ns, name = split_iri(str(iri))
|
||||
pfx = self._prefixes.get(ns)
|
||||
if pfx:
|
||||
# type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Any, Optional[str]]"; expected "Iterable[str]"
|
||||
return ":".join((pfx, name)) # type: ignore[arg-type]
|
||||
elif self._base:
|
||||
if str(iri) == self._base:
|
||||
return ""
|
||||
# type error: Argument 1 to "startswith" of "str" has incompatible type "Optional[str]"; expected "Union[str, Tuple[str, ...]]"
|
||||
elif iri.startswith(self._basedomain): # type: ignore[arg-type]
|
||||
# type error: Argument 1 to "len" has incompatible type "Optional[str]"; expected "Sized"
|
||||
return iri[len(self._basedomain) :] # type: ignore[arg-type]
|
||||
return iri
|
||||
|
||||
def to_symbol(self, iri: str) -> Optional[str]:
|
||||
iri = str(iri)
|
||||
term = self.find_term(iri)
|
||||
if term:
|
||||
return term.name
|
||||
ns, name = split_iri(iri)
|
||||
if ns == self.vocab:
|
||||
return name
|
||||
pfx = self._prefixes.get(ns)
|
||||
if pfx:
|
||||
# type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Any, Optional[str]]"; expected "Iterable[str]"
|
||||
return ":".join((pfx, name)) # type: ignore[arg-type]
|
||||
return iri
|
||||
|
||||
def load(
|
||||
self,
|
||||
source: _ContextSourceType,
|
||||
base: Optional[str] = None,
|
||||
referenced_contexts: Set[Any] = None,
|
||||
):
|
||||
self.active = True
|
||||
sources: List[Tuple[Optional[str], Union[Dict[str, Any], str, None]]] = []
|
||||
# "Union[List[Union[Dict[str, Any], str]], List[Dict[str, Any]], List[str]]" : expression
|
||||
# "Union[List[Dict[str, Any]], Dict[str, Any], List[str], str]" : variable
|
||||
source = source if isinstance(source, list) else [source]
|
||||
referenced_contexts = referenced_contexts or set()
|
||||
self._prep_sources(base, source, sources, referenced_contexts)
|
||||
for source_url, source in sources:
|
||||
if source is None:
|
||||
self._clear()
|
||||
else:
|
||||
# type error: Argument 1 to "_read_source" of "Context" has incompatible type "Union[Dict[str, Any], str]"; expected "Dict[str, Any]"
|
||||
self._read_source(source, source_url, referenced_contexts) # type: ignore[arg-type]
|
||||
|
||||
def _accept_term(self, key: str) -> bool:
|
||||
if self.version < 1.1:
|
||||
return True
|
||||
if key and len(key) > 1 and key[0] == "@" and key[1].isalnum():
|
||||
return key in NODE_KEYS
|
||||
else:
|
||||
return True
|
||||
|
||||
def _prep_sources(
|
||||
self,
|
||||
base: Optional[str],
|
||||
inputs: Union[List[Union[Dict[str, Any], str, None]], List[str]],
|
||||
sources: List[Tuple[Optional[str], Union[Dict[str, Any], str, None]]],
|
||||
referenced_contexts: Set[str],
|
||||
in_source_url: Optional[str] = None,
|
||||
):
|
||||
for source in inputs:
|
||||
source_url = in_source_url
|
||||
new_base = base
|
||||
if isinstance(source, str):
|
||||
source_url = source
|
||||
source_doc_base = base or self.doc_base
|
||||
new_ctx = self._fetch_context(
|
||||
source, source_doc_base, referenced_contexts
|
||||
)
|
||||
if new_ctx is None:
|
||||
continue
|
||||
else:
|
||||
if base:
|
||||
if TYPE_CHECKING:
|
||||
# if base is not None, then source_doc_base won't be
|
||||
# none due to how it is assigned.
|
||||
assert source_doc_base is not None
|
||||
new_base = urljoin(source_doc_base, source_url)
|
||||
source = new_ctx
|
||||
|
||||
if isinstance(source, dict):
|
||||
if CONTEXT in source:
|
||||
source = source[CONTEXT]
|
||||
# type ignore: Incompatible types in assignment (expression has type "List[Union[Dict[str, Any], str, None]]", variable has type "Union[Dict[str, Any], str, None]")
|
||||
source = source if isinstance(source, list) else [source] # type: ignore[assignment]
|
||||
|
||||
if isinstance(source, list):
|
||||
# type error: Statement is unreachable
|
||||
self._prep_sources( # type: ignore[unreachable]
|
||||
new_base, source, sources, referenced_contexts, source_url
|
||||
)
|
||||
else:
|
||||
sources.append((source_url, source))
|
||||
|
||||
def _fetch_context(
|
||||
self, source: str, base: Optional[str], referenced_contexts: Set[str]
|
||||
):
|
||||
# type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]"
|
||||
source_url = urljoin(base, source) # type: ignore[type-var]
|
||||
|
||||
if source_url in referenced_contexts:
|
||||
raise RECURSIVE_CONTEXT_INCLUSION
|
||||
|
||||
# type error: Argument 1 to "add" of "set" has incompatible type "Optional[str]"; expected "str"
|
||||
referenced_contexts.add(source_url) # type: ignore[arg-type]
|
||||
|
||||
if source_url in self._context_cache:
|
||||
return self._context_cache[source_url]
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "str")
|
||||
source_json, _ = source_to_json(source_url)
|
||||
if source_json and CONTEXT not in source_json:
|
||||
raise INVALID_REMOTE_CONTEXT
|
||||
|
||||
# type error: Invalid index type "Optional[str]" for "Dict[str, Any]"; expected type "str"
|
||||
self._context_cache[source_url] = source_json # type: ignore[index]
|
||||
|
||||
return source_json
|
||||
|
||||
def _read_source(
|
||||
self,
|
||||
source: Dict[str, Any],
|
||||
source_url: Optional[str] = None,
|
||||
referenced_contexts: Optional[Set[str]] = None,
|
||||
):
|
||||
imports = source.get(IMPORT)
|
||||
if imports:
|
||||
if not isinstance(imports, str):
|
||||
raise INVALID_CONTEXT_ENTRY
|
||||
|
||||
imported = self._fetch_context(
|
||||
imports, self.base, referenced_contexts or set()
|
||||
)
|
||||
if not isinstance(imported, dict):
|
||||
raise INVALID_CONTEXT_ENTRY
|
||||
|
||||
imported = imported[CONTEXT]
|
||||
imported.update(source)
|
||||
source = imported
|
||||
|
||||
self.vocab = source.get(VOCAB, self.vocab)
|
||||
self.version = source.get(VERSION, self.version)
|
||||
protected = source.get(PROTECTED, False)
|
||||
|
||||
for key, value in source.items():
|
||||
if key in {VOCAB, VERSION, IMPORT, PROTECTED}:
|
||||
continue
|
||||
elif key == PROPAGATE and isinstance(value, bool):
|
||||
self.propagate = value
|
||||
elif key == LANG:
|
||||
self.language = value
|
||||
elif key == BASE:
|
||||
if not source_url and not imports:
|
||||
self.base = value
|
||||
else:
|
||||
self._read_term(source, key, value, protected)
|
||||
|
||||
def _read_term(
|
||||
self,
|
||||
source: Dict[str, Any],
|
||||
name: str,
|
||||
dfn: Union[Dict[str, Any], str],
|
||||
protected: bool = False,
|
||||
) -> None:
|
||||
idref = None
|
||||
if isinstance(dfn, dict):
|
||||
# term = self._create_term(source, key, value)
|
||||
rev = dfn.get(REV)
|
||||
protected = dfn.get(PROTECTED, protected)
|
||||
|
||||
coercion = dfn.get(TYPE, UNDEF)
|
||||
if coercion and coercion not in (ID, TYPE, VOCAB):
|
||||
coercion = self._rec_expand(source, coercion)
|
||||
|
||||
idref = rev or dfn.get(ID, UNDEF)
|
||||
if idref == TYPE:
|
||||
idref = str(RDF.type)
|
||||
coercion = VOCAB
|
||||
elif idref is not UNDEF:
|
||||
idref = self._rec_expand(source, idref)
|
||||
elif ":" in name:
|
||||
idref = self._rec_expand(source, name)
|
||||
elif self.vocab:
|
||||
idref = self.vocab + name
|
||||
|
||||
context = dfn.get(CONTEXT, UNDEF)
|
||||
|
||||
self.add_term(
|
||||
name,
|
||||
idref,
|
||||
coercion,
|
||||
dfn.get(CONTAINER, UNDEF),
|
||||
dfn.get(INDEX, UNDEF),
|
||||
dfn.get(LANG, UNDEF),
|
||||
bool(rev),
|
||||
context,
|
||||
dfn.get(PREFIX),
|
||||
protected=protected,
|
||||
)
|
||||
else:
|
||||
if isinstance(dfn, str):
|
||||
if not self._accept_term(dfn):
|
||||
return
|
||||
idref = self._rec_expand(source, dfn)
|
||||
# type error: Argument 2 to "add_term" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
self.add_term(name, idref, protected=protected) # type: ignore[arg-type]
|
||||
|
||||
if idref in NODE_KEYS:
|
||||
self._alias.setdefault(idref, []).append(name)
|
||||
else:
|
||||
# undo aliases that may have been inherited from parent context
|
||||
for v in self._alias.values():
|
||||
if name in v:
|
||||
v.remove(name)
|
||||
|
||||
def _rec_expand(
|
||||
self, source: Dict[str, Any], expr: Optional[str], prev: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
if expr == prev or expr in NODE_KEYS:
|
||||
return expr
|
||||
|
||||
nxt: Optional[str]
|
||||
# type error: Argument 1 to "_prep_expand" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
is_term, pfx, nxt = self._prep_expand(expr) # type: ignore[arg-type]
|
||||
if pfx:
|
||||
iri = self._get_source_id(source, pfx)
|
||||
if iri is None:
|
||||
if pfx + ":" == self.vocab:
|
||||
return expr
|
||||
else:
|
||||
term = self.terms.get(pfx)
|
||||
if term:
|
||||
iri = term.id
|
||||
|
||||
if iri is None:
|
||||
nxt = expr
|
||||
else:
|
||||
nxt = iri + nxt
|
||||
else:
|
||||
nxt = self._get_source_id(source, nxt) or nxt
|
||||
if ":" not in nxt and self.vocab:
|
||||
return self.vocab + nxt
|
||||
|
||||
return self._rec_expand(source, nxt, expr)
|
||||
|
||||
def _prep_expand(self, expr: str) -> Tuple[bool, Optional[str], str]:
|
||||
if ":" not in expr:
|
||||
return True, None, expr
|
||||
pfx, local = expr.split(":", 1)
|
||||
if not local.startswith("//"):
|
||||
return False, pfx, local
|
||||
else:
|
||||
return False, None, expr
|
||||
|
||||
def _get_source_id(self, source: Dict[str, Any], key: str) -> Optional[str]:
|
||||
# .. from source dict or if already defined
|
||||
term = source.get(key)
|
||||
if term is None:
|
||||
dfn = self.terms.get(key)
|
||||
if dfn:
|
||||
term = dfn.id
|
||||
elif isinstance(term, dict):
|
||||
term = term.get(ID)
|
||||
return term
|
||||
|
||||
def _term_dict(self, term: Term) -> Union[Dict[str, Any], str]:
|
||||
tdict: Dict[str, Any] = {}
|
||||
if term.type != UNDEF:
|
||||
tdict[TYPE] = self.shrink_iri(term.type)
|
||||
if term.container:
|
||||
tdict[CONTAINER] = list(term.container)
|
||||
if term.language != UNDEF:
|
||||
tdict[LANG] = term.language
|
||||
if term.reverse:
|
||||
tdict[REV] = term.id
|
||||
else:
|
||||
tdict[ID] = term.id
|
||||
if tdict.keys() == {ID}:
|
||||
return tdict[ID]
|
||||
return tdict
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns a dictionary representation of the context that can be
|
||||
serialized to JSON.
|
||||
|
||||
:return: a dictionary representation of the context.
|
||||
"""
|
||||
r = {v: k for (k, v) in self._prefixes.items()}
|
||||
r.update({term.name: self._term_dict(term) for term in self._lookup.values()})
|
||||
if self.base:
|
||||
r[BASE] = self.base
|
||||
if self.language:
|
||||
r[LANG] = self.language
|
||||
return r
|
||||
|
||||
|
||||
Term = namedtuple(
|
||||
"Term",
|
||||
"id, name, type, container, index, language, reverse, context," "prefix, protected",
|
||||
)
|
||||
|
||||
Term.__new__.__defaults__ = (UNDEF, UNDEF, UNDEF, UNDEF, False, UNDEF, False, False)
|
||||
@@ -0,0 +1,9 @@
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/errors.py
|
||||
class JSONLDException(ValueError): # noqa: N818
|
||||
pass
|
||||
|
||||
|
||||
# http://www.w3.org/TR/json-ld-api/#idl-def-JsonLdErrorCode.{code-message}
|
||||
RECURSIVE_CONTEXT_INCLUSION = JSONLDException("recursive context inclusion")
|
||||
INVALID_REMOTE_CONTEXT = JSONLDException("invalid remote context")
|
||||
INVALID_CONTEXT_ENTRY = JSONLDException("invalid context entry")
|
||||
@@ -0,0 +1,24 @@
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/keys.py
|
||||
BASE = "@base"
|
||||
CONTAINER = "@container"
|
||||
CONTEXT = "@context"
|
||||
# DIRECTION = u'@direction'
|
||||
GRAPH = "@graph"
|
||||
ID = "@id"
|
||||
IMPORT = "@import"
|
||||
INCLUDED = "@included"
|
||||
INDEX = "@index"
|
||||
JSON = "@json"
|
||||
LANG = LANGUAGE = "@language"
|
||||
LIST = "@list"
|
||||
NEST = "@nest"
|
||||
NONE = "@none"
|
||||
PREFIX = "@prefix"
|
||||
PROPAGATE = "@propagate"
|
||||
PROTECTED = "@protected"
|
||||
REV = REVERSE = "@reverse"
|
||||
SET = "@set"
|
||||
TYPE = "@type"
|
||||
VALUE = "@value"
|
||||
VERSION = "@version"
|
||||
VOCAB = "@vocab"
|
||||
@@ -0,0 +1,355 @@
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/util.py
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
from html.parser import HTMLParser
|
||||
from io import StringIO, TextIOBase, TextIOWrapper
|
||||
from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, TextIO, Tuple, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import json
|
||||
else:
|
||||
try:
|
||||
import json
|
||||
|
||||
assert json # workaround for pyflakes issue #13
|
||||
except ImportError:
|
||||
import simplejson as json
|
||||
|
||||
from posixpath import normpath, sep
|
||||
from typing import TYPE_CHECKING, cast
|
||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
|
||||
from rdflib.parser import (
|
||||
BytesIOWrapper,
|
||||
InputSource,
|
||||
PythonInputSource,
|
||||
StringInputSource,
|
||||
URLInputSource,
|
||||
create_input_source,
|
||||
)
|
||||
|
||||
|
||||
def source_to_json(
|
||||
source: Optional[
|
||||
Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]
|
||||
],
|
||||
fragment_id: Optional[str] = None,
|
||||
extract_all_scripts: Optional[bool] = False,
|
||||
) -> Tuple[Union[Dict, List[Dict]], Any]:
|
||||
"""Extract JSON from a source document.
|
||||
|
||||
The source document can be JSON or HTML with embedded JSON script elements (type attribute = "application/ld+json").
|
||||
To process as HTML ``source.content_type`` must be set to "text/html" or "application/xhtml+xml".
|
||||
|
||||
:param source: the input source document (JSON or HTML)
|
||||
|
||||
:param fragment_id: if source is an HTML document then extract only the script element with matching id attribute, defaults to None
|
||||
|
||||
:param extract_all_scripts: if source is an HTML document then extract all script elements (unless fragment_id is provided), defaults to False (extract only the first script element)
|
||||
|
||||
:return: Tuple with the extracted JSON document and value of the HTML base element
|
||||
"""
|
||||
|
||||
if isinstance(source, PythonInputSource):
|
||||
return source.data, None
|
||||
|
||||
if isinstance(source, StringInputSource):
|
||||
# A StringInputSource is assumed to be never a HTMLJSON doc
|
||||
html_base: Any = None
|
||||
# We can get the original string from the StringInputSource
|
||||
# It's hidden in the BytesIOWrapper 'wrapped' attribute
|
||||
b_stream = source.getByteStream()
|
||||
original_string: Optional[str] = None
|
||||
json_dict: Union[Dict, List[Dict]]
|
||||
if isinstance(b_stream, BytesIOWrapper):
|
||||
wrapped_inner = cast(Union[str, StringIO, TextIOBase], b_stream.wrapped)
|
||||
if isinstance(wrapped_inner, str):
|
||||
original_string = wrapped_inner
|
||||
elif isinstance(wrapped_inner, StringIO):
|
||||
original_string = wrapped_inner.getvalue()
|
||||
if _HAS_ORJSON:
|
||||
if original_string is not None:
|
||||
json_dict = orjson.loads(original_string)
|
||||
elif isinstance(b_stream, BytesIOWrapper):
|
||||
# use the CharacterStream instead
|
||||
c_stream = source.getCharacterStream()
|
||||
json_dict = orjson.loads(c_stream.read())
|
||||
else:
|
||||
# orjson assumes its in utf-8 encoding so
|
||||
# don't bother to check the source.getEncoding()
|
||||
json_dict = orjson.loads(b_stream.read())
|
||||
else:
|
||||
if original_string is not None:
|
||||
json_dict = json.loads(original_string)
|
||||
else:
|
||||
json_dict = json.load(source.getCharacterStream())
|
||||
return json_dict, html_base
|
||||
|
||||
# TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
|
||||
source = create_input_source(source, format="json-ld")
|
||||
try:
|
||||
content_type = source.content_type
|
||||
except (AttributeError, LookupError):
|
||||
content_type = None
|
||||
|
||||
is_html = content_type is not None and content_type.lower() in (
|
||||
"text/html",
|
||||
"application/xhtml+xml",
|
||||
)
|
||||
if is_html:
|
||||
html_docparser: Optional[HTMLJSONParser] = HTMLJSONParser(
|
||||
fragment_id=fragment_id, extract_all_scripts=extract_all_scripts
|
||||
)
|
||||
else:
|
||||
html_docparser = None
|
||||
try:
|
||||
b_stream = source.getByteStream()
|
||||
except (AttributeError, LookupError):
|
||||
b_stream = None
|
||||
try:
|
||||
c_stream = source.getCharacterStream()
|
||||
except (AttributeError, LookupError):
|
||||
c_stream = None
|
||||
if b_stream is None and c_stream is None:
|
||||
raise ValueError(
|
||||
f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
|
||||
)
|
||||
try:
|
||||
b_encoding: Optional[str] = None if b_stream is None else source.getEncoding()
|
||||
except (AttributeError, LookupError):
|
||||
b_encoding = None
|
||||
underlying_string: Optional[str] = None
|
||||
if b_stream is not None and isinstance(b_stream, BytesIOWrapper):
|
||||
# Try to find an underlying wrapped Unicode string to use?
|
||||
wrapped_inner = b_stream.wrapped
|
||||
if isinstance(wrapped_inner, str):
|
||||
underlying_string = wrapped_inner
|
||||
elif isinstance(wrapped_inner, StringIO):
|
||||
underlying_string = wrapped_inner.getvalue()
|
||||
try:
|
||||
if is_html and html_docparser is not None:
|
||||
# Offload parsing to the HTMLJSONParser
|
||||
if underlying_string is not None:
|
||||
html_string: str = underlying_string
|
||||
elif c_stream is not None:
|
||||
html_string = c_stream.read()
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert b_stream is not None
|
||||
if b_encoding is None:
|
||||
b_encoding = "utf-8"
|
||||
html_string = TextIOWrapper(b_stream, encoding=b_encoding).read()
|
||||
html_docparser.feed(html_string)
|
||||
json_dict, html_base = html_docparser.get_json(), html_docparser.get_base()
|
||||
elif _HAS_ORJSON:
|
||||
html_base = None
|
||||
if underlying_string is not None:
|
||||
json_dict = orjson.loads(underlying_string)
|
||||
elif (
|
||||
(b_stream is not None and isinstance(b_stream, BytesIOWrapper))
|
||||
or b_stream is None
|
||||
) and c_stream is not None:
|
||||
# use the CharacterStream instead
|
||||
json_dict = orjson.loads(c_stream.read())
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert b_stream is not None
|
||||
# b_stream is not None
|
||||
json_dict = orjson.loads(b_stream.read())
|
||||
else:
|
||||
html_base = None
|
||||
if underlying_string is not None:
|
||||
return json.loads(underlying_string)
|
||||
if c_stream is not None:
|
||||
use_stream = c_stream
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert b_stream is not None
|
||||
# b_stream is not None
|
||||
if b_encoding is None:
|
||||
b_encoding = "utf-8"
|
||||
use_stream = TextIOWrapper(b_stream, encoding=b_encoding)
|
||||
json_dict = json.load(use_stream)
|
||||
return json_dict, html_base
|
||||
finally:
|
||||
if b_stream is not None:
|
||||
try:
|
||||
b_stream.close()
|
||||
except AttributeError:
|
||||
pass
|
||||
if c_stream is not None:
|
||||
try:
|
||||
c_stream.close()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
VOCAB_DELIMS = ("#", "/", ":")
|
||||
|
||||
|
||||
def split_iri(iri: str) -> Tuple[str, Optional[str]]:
|
||||
for delim in VOCAB_DELIMS:
|
||||
at = iri.rfind(delim)
|
||||
if at > -1:
|
||||
return iri[: at + 1], iri[at + 1 :]
|
||||
return iri, None
|
||||
|
||||
|
||||
def norm_url(base: str, url: str) -> str:
|
||||
"""
|
||||
>>> norm_url('http://example.org/', '/one')
|
||||
'http://example.org/one'
|
||||
>>> norm_url('http://example.org/', '/one#')
|
||||
'http://example.org/one#'
|
||||
>>> norm_url('http://example.org/one', 'two')
|
||||
'http://example.org/two'
|
||||
>>> norm_url('http://example.org/one/', 'two')
|
||||
'http://example.org/one/two'
|
||||
>>> norm_url('http://example.org/', 'http://example.net/one')
|
||||
'http://example.net/one'
|
||||
>>> norm_url('http://example.org/', 'http://example.org//one')
|
||||
'http://example.org//one'
|
||||
"""
|
||||
if "://" in url:
|
||||
return url
|
||||
|
||||
# Fix for URNs
|
||||
parsed_base = urlsplit(base)
|
||||
parsed_url = urlsplit(url)
|
||||
if parsed_url.scheme:
|
||||
# Assume full URL
|
||||
return url
|
||||
if parsed_base.scheme in ("urn", "urn-x"):
|
||||
# No scheme -> assume relative and join paths
|
||||
base_path_parts = parsed_base.path.split("/", 1)
|
||||
base_path = "/" + (base_path_parts[1] if len(base_path_parts) > 1 else "")
|
||||
joined_path = urljoin(base_path, parsed_url.path)
|
||||
fragment = f"#{parsed_url.fragment}" if parsed_url.fragment else ""
|
||||
result = f"{parsed_base.scheme}:{base_path_parts[0]}{joined_path}{fragment}"
|
||||
else:
|
||||
parts = urlsplit(urljoin(base, url))
|
||||
path = normpath(parts[2])
|
||||
if sep != "/":
|
||||
path = "/".join(path.split(sep))
|
||||
if parts[2].endswith("/") and not path.endswith("/"):
|
||||
path += "/"
|
||||
result = urlunsplit(parts[0:2] + (path,) + parts[3:])
|
||||
if url.endswith("#") and not result.endswith("#"):
|
||||
result += "#"
|
||||
return result
|
||||
|
||||
|
||||
# type error: Missing return statement
|
||||
def context_from_urlinputsource(source: URLInputSource) -> Optional[str]: # type: ignore[return]
|
||||
"""
|
||||
Please note that JSON-LD documents served with the application/ld+json media type
|
||||
MUST have all context information, including references to external contexts,
|
||||
within the body of the document. Contexts linked via a
|
||||
http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be
|
||||
ignored for such documents.
|
||||
"""
|
||||
if source.content_type != "application/ld+json":
|
||||
try:
|
||||
# source.links is the new way of getting Link headers from URLInputSource
|
||||
links = source.links
|
||||
except AttributeError:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
for link in links:
|
||||
if ' rel="http://www.w3.org/ns/json-ld#context"' in link:
|
||||
i, j = link.index("<"), link.index(">")
|
||||
if i > -1 and j > -1:
|
||||
# type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]"
|
||||
return urljoin(source.url, link[i + 1 : j]) # type: ignore[type-var]
|
||||
|
||||
|
||||
__all__ = [
|
||||
"json",
|
||||
"source_to_json",
|
||||
"split_iri",
|
||||
"norm_url",
|
||||
"context_from_urlinputsource",
|
||||
"orjson",
|
||||
"_HAS_ORJSON",
|
||||
]
|
||||
|
||||
|
||||
class HTMLJSONParser(HTMLParser):
|
||||
def __init__(
|
||||
self,
|
||||
fragment_id: Optional[str] = None,
|
||||
extract_all_scripts: Optional[bool] = False,
|
||||
):
|
||||
super().__init__()
|
||||
self.fragment_id = fragment_id
|
||||
self.json: List[Dict] = []
|
||||
self.contains_json = False
|
||||
self.fragment_id_does_not_match = False
|
||||
self.base = None
|
||||
self.extract_all_scripts = extract_all_scripts
|
||||
self.script_count = 0
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
self.contains_json = False
|
||||
self.fragment_id_does_not_match = False
|
||||
|
||||
# Only set self. contains_json to True if the
|
||||
# type is 'application/ld+json'
|
||||
if tag == "script":
|
||||
for attr, value in attrs:
|
||||
if attr == "type" and value == "application/ld+json":
|
||||
self.contains_json = True
|
||||
elif attr == "id" and self.fragment_id and value != self.fragment_id:
|
||||
self.fragment_id_does_not_match = True
|
||||
|
||||
elif tag == "base":
|
||||
for attr, value in attrs:
|
||||
if attr == "href":
|
||||
self.base = value
|
||||
|
||||
def handle_data(self, data):
|
||||
# Only do something when we know the context is a
|
||||
# script element containing application/ld+json
|
||||
|
||||
if self.contains_json is True and self.fragment_id_does_not_match is False:
|
||||
|
||||
if not self.extract_all_scripts and self.script_count > 0:
|
||||
return
|
||||
|
||||
if data.strip() == "":
|
||||
# skip empty data elements
|
||||
return
|
||||
|
||||
# Try to parse the json
|
||||
if _HAS_ORJSON:
|
||||
# orjson can load a unicode string
|
||||
# if that's the only thing we have,
|
||||
# its not worth encoding it to bytes
|
||||
parsed = orjson.loads(data)
|
||||
else:
|
||||
parsed = json.loads(data)
|
||||
|
||||
# Add to the result document
|
||||
if isinstance(parsed, list):
|
||||
self.json.extend(parsed)
|
||||
else:
|
||||
self.json.append(parsed)
|
||||
|
||||
self.script_count += 1
|
||||
|
||||
def get_json(self) -> List[Dict]:
|
||||
return self.json
|
||||
|
||||
def get_base(self):
|
||||
return self.base
|
||||
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
SPARQL implementation for RDFLib
|
||||
|
||||
.. versionadded:: 4.0
|
||||
"""
|
||||
|
||||
from importlib.metadata import entry_points
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
SPARQL_LOAD_GRAPHS = True
|
||||
"""
|
||||
If True, using FROM <uri> and FROM NAMED <uri>
|
||||
will load/parse more data
|
||||
"""
|
||||
|
||||
|
||||
SPARQL_DEFAULT_GRAPH_UNION = True
|
||||
"""
|
||||
If True - the default graph in the RDF Dataset is the union of all
|
||||
named graphs (like RDFLib's ConjunctiveGraph)
|
||||
"""
|
||||
|
||||
|
||||
CUSTOM_EVALS = {}
|
||||
"""
|
||||
Custom evaluation functions
|
||||
|
||||
These must be functions taking (ctx, part) and raise
|
||||
NotImplementedError if they cannot handle a certain part
|
||||
"""
|
||||
|
||||
|
||||
PLUGIN_ENTRY_POINT = "rdf.plugins.sparqleval"
|
||||
|
||||
|
||||
from . import operators, parser, parserutils
|
||||
from .processor import prepareQuery, prepareUpdate, processUpdate
|
||||
|
||||
assert parser
|
||||
assert operators
|
||||
assert parserutils
|
||||
|
||||
|
||||
all_entry_points = entry_points()
|
||||
if hasattr(all_entry_points, "select"):
|
||||
for ep in all_entry_points.select(group=PLUGIN_ENTRY_POINT):
|
||||
CUSTOM_EVALS[ep.name] = ep.load()
|
||||
else:
|
||||
# Prior to Python 3.10, this returns a dict instead of the selection interface
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(all_entry_points, dict)
|
||||
for ep in all_entry_points.get(PLUGIN_ENTRY_POINT, []):
|
||||
CUSTOM_EVALS[ep.name] = ep.load()
|
||||
|
||||
__all__ = [
|
||||
"prepareQuery",
|
||||
"prepareUpdate",
|
||||
"processUpdate",
|
||||
"operators",
|
||||
"parser",
|
||||
"parserutils",
|
||||
"CUSTOM_EVALS",
|
||||
]
|
||||
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Aggregation functions
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Mapping,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.namespace import XSD
|
||||
from rdflib.plugins.sparql.datatypes import type_promotion
|
||||
from rdflib.plugins.sparql.evalutils import _eval, _val
|
||||
from rdflib.plugins.sparql.operators import numeric
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.plugins.sparql.sparql import FrozenBindings, NotBoundError, SPARQLTypeError
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
|
||||
class Accumulator:
|
||||
"""abstract base class for different aggregation functions"""
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
self.get_value: Callable[[], Optional[Literal]]
|
||||
self.update: Callable[[FrozenBindings, Aggregator], None]
|
||||
self.var = aggregation.res
|
||||
self.expr = aggregation.vars
|
||||
if not aggregation.distinct:
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
self.distinct = False
|
||||
else:
|
||||
self.distinct = aggregation.distinct
|
||||
self.seen: Set[Any] = set()
|
||||
|
||||
def dont_care(self, row: FrozenBindings) -> bool:
|
||||
"""skips distinct test"""
|
||||
return True
|
||||
|
||||
def use_row(self, row: FrozenBindings) -> bool:
|
||||
"""tests distinct with set"""
|
||||
return _eval(self.expr, row) not in self.seen
|
||||
|
||||
def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None:
|
||||
"""sets final value in bindings"""
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Literal]", target has type "Identifier")
|
||||
bindings[self.var] = self.get_value() # type: ignore[assignment]
|
||||
|
||||
|
||||
class Counter(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Counter, self).__init__(aggregation)
|
||||
self.value = 0
|
||||
if self.expr == "*":
|
||||
# cannot eval "*" => always use the full row
|
||||
# type error: Cannot assign to a method
|
||||
self.eval_row = self.eval_full_row # type: ignore[assignment]
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
val = self.eval_row(row)
|
||||
except NotBoundError:
|
||||
# skip UNDEF
|
||||
return
|
||||
self.value += 1
|
||||
if self.distinct:
|
||||
self.seen.add(val)
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.value)
|
||||
|
||||
def eval_row(self, row: FrozenBindings) -> Identifier:
|
||||
return _eval(self.expr, row)
|
||||
|
||||
def eval_full_row(self, row: FrozenBindings) -> FrozenBindings:
|
||||
return row
|
||||
|
||||
def use_row(self, row: FrozenBindings) -> bool:
|
||||
try:
|
||||
return self.eval_row(row) not in self.seen
|
||||
except NotBoundError:
|
||||
# happens when counting zero optional nodes. See issue #2229
|
||||
return False
|
||||
|
||||
|
||||
@overload
|
||||
def type_safe_numbers(*args: int) -> Tuple[int]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def type_safe_numbers(
|
||||
*args: Union[Decimal, float, int]
|
||||
) -> Tuple[Union[float, int]]: ...
|
||||
|
||||
|
||||
def type_safe_numbers(*args: Union[Decimal, float, int]) -> Iterable[Union[float, int]]:
|
||||
if any(isinstance(arg, float) for arg in args) and any(
|
||||
isinstance(arg, Decimal) for arg in args
|
||||
):
|
||||
return map(float, args)
|
||||
# type error: Incompatible return value type (got "Tuple[Union[Decimal, float, int], ...]", expected "Iterable[Union[float, int]]")
|
||||
# NOTE on type error: if args contains a Decimal it will nopt get here.
|
||||
return args # type: ignore[return-value]
|
||||
|
||||
|
||||
class Sum(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Sum, self).__init__(aggregation)
|
||||
self.value = 0
|
||||
self.datatype: Optional[str] = None
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
dt = self.datatype
|
||||
if dt is None:
|
||||
dt = value.datatype
|
||||
else:
|
||||
# type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef"
|
||||
dt = type_promotion(dt, value.datatype) # type: ignore[arg-type]
|
||||
self.datatype = dt
|
||||
self.value = sum(type_safe_numbers(self.value, numeric(value)))
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
except NotBoundError:
|
||||
# skip UNDEF
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.value, datatype=self.datatype)
|
||||
|
||||
|
||||
class Average(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Average, self).__init__(aggregation)
|
||||
self.counter = 0
|
||||
self.sum = 0
|
||||
self.datatype: Optional[str] = None
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
dt = self.datatype
|
||||
self.sum = sum(type_safe_numbers(self.sum, numeric(value)))
|
||||
if dt is None:
|
||||
dt = value.datatype
|
||||
else:
|
||||
# type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef"
|
||||
dt = type_promotion(dt, value.datatype) # type: ignore[arg-type]
|
||||
self.datatype = dt
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
self.counter += 1
|
||||
# skip UNDEF or BNode => SPARQLTypeError
|
||||
except NotBoundError:
|
||||
pass
|
||||
except SPARQLTypeError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
if self.counter == 0:
|
||||
return Literal(0)
|
||||
if self.datatype in (XSD.float, XSD.double):
|
||||
return Literal(self.sum / self.counter)
|
||||
else:
|
||||
return Literal(Decimal(self.sum) / Decimal(self.counter))
|
||||
|
||||
|
||||
class Extremum(Accumulator):
|
||||
"""abstract base class for Minimum and Maximum"""
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
self.compare: Callable[[Any, Any], Any]
|
||||
super(Extremum, self).__init__(aggregation)
|
||||
self.value: Any = None
|
||||
# DISTINCT would not change the value for MIN or MAX
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
|
||||
def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None:
|
||||
if self.value is not None:
|
||||
# simply do not set if self.value is still None
|
||||
bindings[self.var] = Literal(self.value)
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
if self.value is None:
|
||||
self.value = _eval(self.expr, row)
|
||||
else:
|
||||
# self.compare is implemented by Minimum/Maximum
|
||||
self.value = self.compare(self.value, _eval(self.expr, row))
|
||||
# skip UNDEF or BNode => SPARQLTypeError
|
||||
except NotBoundError:
|
||||
pass
|
||||
except SPARQLTypeError:
|
||||
pass
|
||||
|
||||
|
||||
_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal)
|
||||
|
||||
|
||||
class Minimum(Extremum):
|
||||
def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT:
|
||||
return min(val1, val2, key=_val)
|
||||
|
||||
|
||||
class Maximum(Extremum):
|
||||
def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT:
|
||||
return max(val1, val2, key=_val)
|
||||
|
||||
|
||||
class Sample(Accumulator):
|
||||
"""takes the first eligible value"""
|
||||
|
||||
def __init__(self, aggregation):
|
||||
super(Sample, self).__init__(aggregation)
|
||||
# DISTINCT would not change the value
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
# set the value now
|
||||
aggregator.bindings[self.var] = _eval(self.expr, row)
|
||||
# and skip this accumulator for future rows
|
||||
del aggregator.accumulators[self.var]
|
||||
except NotBoundError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> None:
|
||||
# set None if no value was set
|
||||
return None
|
||||
|
||||
|
||||
class GroupConcat(Accumulator):
|
||||
value: List[Literal]
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(GroupConcat, self).__init__(aggregation)
|
||||
# only GROUPCONCAT needs to have a list as accumulator
|
||||
self.value = []
|
||||
if aggregation.separator is None:
|
||||
self.separator = " "
|
||||
else:
|
||||
self.separator = aggregation.separator
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
# skip UNDEF
|
||||
if isinstance(value, NotBoundError):
|
||||
return
|
||||
self.value.append(value)
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
# skip UNDEF
|
||||
# NOTE: It seems like this is not the way undefined values occur, they
|
||||
# come through not as exceptions but as values. This is left here
|
||||
# however as it may occur in some cases.
|
||||
# TODO: Consider removing this.
|
||||
except NotBoundError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.separator.join(str(v) for v in self.value))
|
||||
|
||||
|
||||
class Aggregator:
|
||||
"""combines different Accumulator objects"""
|
||||
|
||||
accumulator_classes = {
|
||||
"Aggregate_Count": Counter,
|
||||
"Aggregate_Sample": Sample,
|
||||
"Aggregate_Sum": Sum,
|
||||
"Aggregate_Avg": Average,
|
||||
"Aggregate_Min": Minimum,
|
||||
"Aggregate_Max": Maximum,
|
||||
"Aggregate_GroupConcat": GroupConcat,
|
||||
}
|
||||
|
||||
def __init__(self, aggregations: List[CompValue]):
|
||||
self.bindings: Dict[Variable, Identifier] = {}
|
||||
self.accumulators: Dict[str, Accumulator] = {}
|
||||
for a in aggregations:
|
||||
accumulator_class = self.accumulator_classes.get(a.name)
|
||||
if accumulator_class is None:
|
||||
raise Exception("Unknown aggregate function " + a.name)
|
||||
self.accumulators[a.res] = accumulator_class(a)
|
||||
|
||||
def update(self, row: FrozenBindings) -> None:
|
||||
"""update all own accumulators"""
|
||||
# SAMPLE accumulators may delete themselves
|
||||
# => iterate over list not generator
|
||||
|
||||
for acc in list(self.accumulators.values()):
|
||||
if acc.use_row(row):
|
||||
acc.update(row, self)
|
||||
|
||||
def get_bindings(self) -> Mapping[Variable, Identifier]:
|
||||
"""calculate and set last values"""
|
||||
for acc in self.accumulators.values():
|
||||
acc.set_value(self.bindings)
|
||||
return self.bindings
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Utility functions for supporting the XML Schema Datatypes hierarchy
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set
|
||||
|
||||
from rdflib.namespace import XSD
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
XSD_DTs: Set[URIRef] = set(
|
||||
(
|
||||
XSD.integer,
|
||||
XSD.decimal,
|
||||
XSD.float,
|
||||
XSD.double,
|
||||
XSD.string,
|
||||
XSD.boolean,
|
||||
XSD.dateTime,
|
||||
XSD.nonPositiveInteger,
|
||||
XSD.negativeInteger,
|
||||
XSD.long,
|
||||
XSD.int,
|
||||
XSD.short,
|
||||
XSD.byte,
|
||||
XSD.nonNegativeInteger,
|
||||
XSD.unsignedLong,
|
||||
XSD.unsignedInt,
|
||||
XSD.unsignedShort,
|
||||
XSD.unsignedByte,
|
||||
XSD.positiveInteger,
|
||||
XSD.date,
|
||||
)
|
||||
)
|
||||
|
||||
# adding dateTime datatypes
|
||||
|
||||
XSD_DateTime_DTs = set((XSD.dateTime, XSD.date, XSD.time))
|
||||
|
||||
XSD_Duration_DTs = set((XSD.duration, XSD.dayTimeDuration, XSD.yearMonthDuration))
|
||||
|
||||
_sub_types: Dict[URIRef, List[URIRef]] = {
|
||||
XSD.integer: [
|
||||
XSD.nonPositiveInteger,
|
||||
XSD.negativeInteger,
|
||||
XSD.long,
|
||||
XSD.int,
|
||||
XSD.short,
|
||||
XSD.byte,
|
||||
XSD.nonNegativeInteger,
|
||||
XSD.positiveInteger,
|
||||
XSD.unsignedLong,
|
||||
XSD.unsignedInt,
|
||||
XSD.unsignedShort,
|
||||
XSD.unsignedByte,
|
||||
],
|
||||
}
|
||||
|
||||
_super_types: Dict[URIRef, URIRef] = {}
|
||||
for superdt in XSD_DTs:
|
||||
for subdt in _sub_types.get(superdt, []):
|
||||
_super_types[subdt] = superdt
|
||||
|
||||
# we only care about float, double, integer, decimal
|
||||
_typePromotionMap: Dict[URIRef, Dict[URIRef, URIRef]] = {
|
||||
XSD.float: {XSD.integer: XSD.float, XSD.decimal: XSD.float, XSD.double: XSD.double},
|
||||
XSD.double: {
|
||||
XSD.integer: XSD.double,
|
||||
XSD.float: XSD.double,
|
||||
XSD.decimal: XSD.double,
|
||||
},
|
||||
XSD.decimal: {
|
||||
XSD.integer: XSD.decimal,
|
||||
XSD.float: XSD.float,
|
||||
XSD.double: XSD.double,
|
||||
},
|
||||
XSD.integer: {
|
||||
XSD.decimal: XSD.decimal,
|
||||
XSD.float: XSD.float,
|
||||
XSD.double: XSD.double,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def type_promotion(t1: URIRef, t2: Optional[URIRef]) -> URIRef:
|
||||
if t2 is None:
|
||||
return t1
|
||||
t1 = _super_types.get(t1, t1)
|
||||
t2 = _super_types.get(t2, t2)
|
||||
if t1 == t2:
|
||||
return t1 # matching super-types
|
||||
try:
|
||||
if TYPE_CHECKING:
|
||||
# type assert because mypy is confused and thinks t2 can be None
|
||||
assert t2 is not None
|
||||
return _typePromotionMap[t1][t2]
|
||||
except KeyError:
|
||||
raise TypeError("Operators cannot combine datatypes %s and %s" % (t1, t2))
|
||||
@@ -0,0 +1,685 @@
|
||||
"""
|
||||
These method recursively evaluate the SPARQL Algebra
|
||||
|
||||
evalQuery is the entry-point, it will setup context and
|
||||
return the SPARQLResult object
|
||||
|
||||
evalPart is called on each level and will delegate to the right method
|
||||
|
||||
A rdflib.plugins.sparql.sparql.QueryContext is passed along, keeping
|
||||
information needed for evaluation
|
||||
|
||||
A list of dicts (solution mappings) is returned, apart from GroupBy which may
|
||||
also return a dict of list of dicts
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import itertools
|
||||
import re
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Deque,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql import CUSTOM_EVALS, parser
|
||||
from rdflib.plugins.sparql.aggregates import Aggregator
|
||||
from rdflib.plugins.sparql.evalutils import (
|
||||
_ebv,
|
||||
_eval,
|
||||
_fillTemplate,
|
||||
_join,
|
||||
_minus,
|
||||
_val,
|
||||
)
|
||||
from rdflib.plugins.sparql.parserutils import CompValue, value
|
||||
from rdflib.plugins.sparql.sparql import (
|
||||
AlreadyBound,
|
||||
FrozenBindings,
|
||||
FrozenDict,
|
||||
Query,
|
||||
QueryContext,
|
||||
SPARQLError,
|
||||
)
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.paths import Path
|
||||
|
||||
import json
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
_Triple = Tuple[Identifier, Identifier, Identifier]
|
||||
|
||||
|
||||
def evalBGP(
|
||||
ctx: QueryContext, bgp: List[_Triple]
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""
|
||||
A basic graph pattern
|
||||
"""
|
||||
|
||||
if not bgp:
|
||||
yield ctx.solution()
|
||||
return
|
||||
|
||||
s, p, o = bgp[0]
|
||||
|
||||
_s = ctx[s]
|
||||
_p = ctx[p]
|
||||
_o = ctx[o]
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "triples"
|
||||
# type Argument 1 to "triples" of "Graph" has incompatible type "Tuple[Union[str, Path, None], Union[str, Path, None], Union[str, Path, None]]"; expected "Tuple[Optional[Node], Optional[Node], Optional[Node]]"
|
||||
for ss, sp, so in ctx.graph.triples((_s, _p, _o)): # type: ignore[union-attr, arg-type]
|
||||
if None in (_s, _p, _o):
|
||||
c = ctx.push()
|
||||
else:
|
||||
c = ctx
|
||||
|
||||
if _s is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[s] = ss # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
if _p is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[p] = sp # type: ignore[assignment]
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
try:
|
||||
if _o is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[o] = so # type: ignore[assignment]
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
for x in evalBGP(c, bgp[1:]):
|
||||
yield x
|
||||
|
||||
|
||||
def evalExtend(
|
||||
ctx: QueryContext, extend: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart from GROUP BY
|
||||
|
||||
for c in evalPart(ctx, extend.p):
|
||||
try:
|
||||
e = _eval(extend.expr, c.forget(ctx, _except=extend._vars))
|
||||
if isinstance(e, SPARQLError):
|
||||
raise e
|
||||
|
||||
yield c.merge({extend.var: e})
|
||||
|
||||
except SPARQLError:
|
||||
yield c
|
||||
|
||||
|
||||
def evalLazyJoin(
|
||||
ctx: QueryContext, join: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""
|
||||
A lazy join will push the variables bound
|
||||
in the first part to the second part,
|
||||
essentially doing the join implicitly
|
||||
hopefully evaluating much fewer triples
|
||||
"""
|
||||
for a in evalPart(ctx, join.p1):
|
||||
c = ctx.thaw(a)
|
||||
for b in evalPart(c, join.p2):
|
||||
yield b.merge(a) # merge, as some bindings may have been forgotten
|
||||
|
||||
|
||||
def evalJoin(ctx: QueryContext, join: CompValue) -> Generator[FrozenDict, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart from GROUP BY
|
||||
# only ever for join.p1
|
||||
|
||||
if join.lazy:
|
||||
return evalLazyJoin(ctx, join)
|
||||
else:
|
||||
a = evalPart(ctx, join.p1)
|
||||
b = set(evalPart(ctx, join.p2))
|
||||
return _join(a, b)
|
||||
|
||||
|
||||
def evalUnion(ctx: QueryContext, union: CompValue) -> List[Any]:
|
||||
branch1_branch2 = []
|
||||
for x in evalPart(ctx, union.p1):
|
||||
branch1_branch2.append(x)
|
||||
for x in evalPart(ctx, union.p2):
|
||||
branch1_branch2.append(x)
|
||||
return branch1_branch2
|
||||
|
||||
|
||||
def evalMinus(ctx: QueryContext, minus: CompValue) -> Generator[FrozenDict, None, None]:
|
||||
a = evalPart(ctx, minus.p1)
|
||||
b = set(evalPart(ctx, minus.p2))
|
||||
return _minus(a, b)
|
||||
|
||||
|
||||
def evalLeftJoin(
|
||||
ctx: QueryContext, join: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# import pdb; pdb.set_trace()
|
||||
for a in evalPart(ctx, join.p1):
|
||||
ok = False
|
||||
c = ctx.thaw(a)
|
||||
for b in evalPart(c, join.p2):
|
||||
if _ebv(join.expr, b.forget(ctx)):
|
||||
ok = True
|
||||
yield b
|
||||
if not ok:
|
||||
# we've cheated, the ctx above may contain
|
||||
# vars bound outside our scope
|
||||
# before we yield a solution without the OPTIONAL part
|
||||
# check that we would have had no OPTIONAL matches
|
||||
# even without prior bindings...
|
||||
p1_vars = join.p1._vars
|
||||
if p1_vars is None or not any(
|
||||
_ebv(join.expr, b)
|
||||
for b in evalPart(ctx.thaw(a.remember(p1_vars)), join.p2)
|
||||
):
|
||||
yield a
|
||||
|
||||
|
||||
def evalFilter(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart!
|
||||
for c in evalPart(ctx, part.p):
|
||||
if _ebv(
|
||||
part.expr,
|
||||
c.forget(ctx, _except=part._vars) if not part.no_isolated_scope else c,
|
||||
):
|
||||
yield c
|
||||
|
||||
|
||||
def evalGraph(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
if ctx.dataset is None:
|
||||
raise Exception(
|
||||
"Non-conjunctive-graph doesn't know about "
|
||||
+ "graphs. Try a query without GRAPH."
|
||||
)
|
||||
|
||||
ctx = ctx.clone()
|
||||
graph: Union[str, Path, None, Graph] = ctx[part.term]
|
||||
prev_graph = ctx.graph
|
||||
if graph is None:
|
||||
for graph in ctx.dataset.contexts():
|
||||
# in SPARQL the default graph is NOT a named graph
|
||||
if graph == ctx.dataset.default_context:
|
||||
continue
|
||||
|
||||
c = ctx.pushGraph(graph)
|
||||
c = c.push()
|
||||
graphSolution = [{part.term: graph.identifier}]
|
||||
for x in _join(evalPart(c, part.p), graphSolution):
|
||||
x.ctx.graph = prev_graph
|
||||
yield x
|
||||
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert not isinstance(graph, Graph)
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Union[str, Path]"; expected "Union[Node, str, None]"
|
||||
c = ctx.pushGraph(ctx.dataset.get_context(graph)) # type: ignore[arg-type]
|
||||
for x in evalPart(c, part.p):
|
||||
x.ctx.graph = prev_graph
|
||||
yield x
|
||||
|
||||
|
||||
def evalValues(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
for r in part.p.res:
|
||||
c = ctx.push()
|
||||
try:
|
||||
for k, v in r.items():
|
||||
if v != "UNDEF":
|
||||
c[k] = v
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
yield c.solution()
|
||||
|
||||
|
||||
def evalMultiset(ctx: QueryContext, part: CompValue):
|
||||
if part.p.name == "values":
|
||||
return evalValues(ctx, part)
|
||||
|
||||
return evalPart(ctx, part.p)
|
||||
|
||||
|
||||
def evalPart(ctx: QueryContext, part: CompValue) -> Any:
|
||||
# try custom evaluation functions
|
||||
for name, c in CUSTOM_EVALS.items():
|
||||
try:
|
||||
return c(ctx, part)
|
||||
except NotImplementedError:
|
||||
pass # the given custome-function did not handle this part
|
||||
|
||||
if part.name == "BGP":
|
||||
# Reorder triples patterns by number of bound nodes in the current ctx
|
||||
# Do patterns with more bound nodes first
|
||||
triples = sorted(
|
||||
part.triples, key=lambda t: len([n for n in t if ctx[n] is None])
|
||||
)
|
||||
|
||||
return evalBGP(ctx, triples)
|
||||
elif part.name == "Filter":
|
||||
return evalFilter(ctx, part)
|
||||
elif part.name == "Join":
|
||||
return evalJoin(ctx, part)
|
||||
elif part.name == "LeftJoin":
|
||||
return evalLeftJoin(ctx, part)
|
||||
elif part.name == "Graph":
|
||||
return evalGraph(ctx, part)
|
||||
elif part.name == "Union":
|
||||
return evalUnion(ctx, part)
|
||||
elif part.name == "ToMultiSet":
|
||||
return evalMultiset(ctx, part)
|
||||
elif part.name == "Extend":
|
||||
return evalExtend(ctx, part)
|
||||
elif part.name == "Minus":
|
||||
return evalMinus(ctx, part)
|
||||
|
||||
elif part.name == "Project":
|
||||
return evalProject(ctx, part)
|
||||
elif part.name == "Slice":
|
||||
return evalSlice(ctx, part)
|
||||
elif part.name == "Distinct":
|
||||
return evalDistinct(ctx, part)
|
||||
elif part.name == "Reduced":
|
||||
return evalReduced(ctx, part)
|
||||
|
||||
elif part.name == "OrderBy":
|
||||
return evalOrderBy(ctx, part)
|
||||
elif part.name == "Group":
|
||||
return evalGroup(ctx, part)
|
||||
elif part.name == "AggregateJoin":
|
||||
return evalAggregateJoin(ctx, part)
|
||||
|
||||
elif part.name == "SelectQuery":
|
||||
return evalSelectQuery(ctx, part)
|
||||
elif part.name == "AskQuery":
|
||||
return evalAskQuery(ctx, part)
|
||||
elif part.name == "ConstructQuery":
|
||||
return evalConstructQuery(ctx, part)
|
||||
|
||||
elif part.name == "ServiceGraphPattern":
|
||||
return evalServiceQuery(ctx, part)
|
||||
|
||||
elif part.name == "DescribeQuery":
|
||||
return evalDescribeQuery(ctx, part)
|
||||
|
||||
else:
|
||||
raise Exception("I dont know: %s" % part.name)
|
||||
|
||||
|
||||
def evalServiceQuery(ctx: QueryContext, part: CompValue):
|
||||
res = {}
|
||||
match = re.match(
|
||||
"^service <(.*)>[ \n]*{(.*)}[ \n]*$",
|
||||
# type error: Argument 2 to "get" of "CompValue" has incompatible type "str"; expected "bool" [arg-type]
|
||||
part.get("service_string", ""), # type: ignore[arg-type]
|
||||
re.DOTALL | re.I,
|
||||
)
|
||||
|
||||
if match:
|
||||
service_url = match.group(1)
|
||||
service_query = _buildQueryStringForServiceCall(ctx, match.group(2))
|
||||
|
||||
query_settings = {"query": service_query, "output": "json"}
|
||||
headers = {
|
||||
"accept": "application/sparql-results+json",
|
||||
"user-agent": "rdflibForAnUser",
|
||||
}
|
||||
# GET is easier to cache so prefer that if the query is not to long
|
||||
if len(service_query) < 600:
|
||||
response = urlopen(
|
||||
Request(service_url + "?" + urlencode(query_settings), headers=headers)
|
||||
)
|
||||
else:
|
||||
response = urlopen(
|
||||
Request(
|
||||
service_url,
|
||||
data=urlencode(query_settings).encode(),
|
||||
headers=headers,
|
||||
)
|
||||
)
|
||||
if response.status == 200:
|
||||
if _HAS_ORJSON:
|
||||
json_dict = orjson.loads(response.read())
|
||||
else:
|
||||
json_dict = json.loads(response.read())
|
||||
variables = res["vars_"] = json_dict["head"]["vars"]
|
||||
# or just return the bindings?
|
||||
res = json_dict["results"]["bindings"]
|
||||
if len(res) > 0:
|
||||
for r in res:
|
||||
# type error: Argument 2 to "_yieldBindingsFromServiceCallResult" has incompatible type "str"; expected "Dict[str, Dict[str, str]]"
|
||||
for bound in _yieldBindingsFromServiceCallResult(ctx, r, variables): # type: ignore[arg-type]
|
||||
yield bound
|
||||
else:
|
||||
raise Exception(
|
||||
"Service: %s responded with code: %s", service_url, response.status
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Build a query string to be used by the service call.
|
||||
It is supposed to pass in the existing bound solutions.
|
||||
Re-adds prefixes if added and sets the base.
|
||||
Wraps it in select if needed.
|
||||
"""
|
||||
|
||||
|
||||
def _buildQueryStringForServiceCall(ctx: QueryContext, service_query: str) -> str:
|
||||
try:
|
||||
parser.parseQuery(service_query)
|
||||
except ParseException:
|
||||
# This could be because we don't have a select around the service call.
|
||||
service_query = "SELECT REDUCED * WHERE {" + service_query + "}"
|
||||
# type error: Item "None" of "Optional[Prologue]" has no attribute "namespace_manager"
|
||||
for p in ctx.prologue.namespace_manager.store.namespaces(): # type: ignore[union-attr]
|
||||
service_query = "PREFIX " + p[0] + ":" + p[1].n3() + " " + service_query
|
||||
# re add the base if one was defined
|
||||
# type error: Item "None" of "Optional[Prologue]" has no attribute "base"
|
||||
base = ctx.prologue.base # type: ignore[union-attr]
|
||||
if base is not None and len(base) > 0:
|
||||
service_query = "BASE <" + base + "> " + service_query
|
||||
sol = [v for v in ctx.solution() if isinstance(v, Variable)]
|
||||
if len(sol) > 0:
|
||||
variables = " ".join([v.n3() for v in sol])
|
||||
variables_bound = " ".join([ctx.get(v).n3() for v in sol])
|
||||
service_query = (
|
||||
service_query + "VALUES (" + variables + ") {(" + variables_bound + ")}"
|
||||
)
|
||||
return service_query
|
||||
|
||||
|
||||
def _yieldBindingsFromServiceCallResult(
|
||||
ctx: QueryContext, r: Dict[str, Dict[str, str]], variables: List[str]
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res_dict: Dict[Variable, Identifier] = {}
|
||||
for var in variables:
|
||||
if var in r and r[var]:
|
||||
var_binding = r[var]
|
||||
var_type = var_binding["type"]
|
||||
if var_type == "uri":
|
||||
res_dict[Variable(var)] = URIRef(var_binding["value"])
|
||||
elif var_type == "literal":
|
||||
res_dict[Variable(var)] = Literal(
|
||||
var_binding["value"],
|
||||
datatype=var_binding.get("datatype"),
|
||||
lang=var_binding.get("xml:lang"),
|
||||
)
|
||||
# This is here because of
|
||||
# https://www.w3.org/TR/2006/NOTE-rdf-sparql-json-res-20061004/#variable-binding-results
|
||||
elif var_type == "typed-literal":
|
||||
res_dict[Variable(var)] = Literal(
|
||||
var_binding["value"], datatype=URIRef(var_binding["datatype"])
|
||||
)
|
||||
elif var_type == "bnode":
|
||||
res_dict[Variable(var)] = BNode(var_binding["value"])
|
||||
else:
|
||||
raise ValueError(f"invalid type {var_type!r} for variable {var!r}")
|
||||
yield FrozenBindings(ctx, res_dict)
|
||||
|
||||
|
||||
def evalGroup(ctx: QueryContext, group: CompValue):
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-query/#defn_algGroup
|
||||
"""
|
||||
# grouping should be implemented by evalAggregateJoin
|
||||
return evalPart(ctx, group.p)
|
||||
|
||||
|
||||
def evalAggregateJoin(
|
||||
ctx: QueryContext, agg: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# import pdb ; pdb.set_trace()
|
||||
p = evalPart(ctx, agg.p)
|
||||
# p is always a Group, we always get a dict back
|
||||
|
||||
group_expr = agg.p.expr
|
||||
res: Dict[Any, Any] = collections.defaultdict(
|
||||
lambda: Aggregator(aggregations=agg.A)
|
||||
)
|
||||
|
||||
if group_expr is None:
|
||||
# no grouping, just COUNT in SELECT clause
|
||||
# get 1 aggregator for counting
|
||||
aggregator = res[True]
|
||||
for row in p:
|
||||
aggregator.update(row)
|
||||
else:
|
||||
for row in p:
|
||||
# determine right group aggregator for row
|
||||
k = tuple(_eval(e, row, False) for e in group_expr)
|
||||
res[k].update(row)
|
||||
|
||||
# all rows are done; yield aggregated values
|
||||
for aggregator in res.values():
|
||||
yield FrozenBindings(ctx, aggregator.get_bindings())
|
||||
|
||||
# there were no matches
|
||||
if len(res) == 0:
|
||||
yield FrozenBindings(ctx)
|
||||
|
||||
|
||||
def evalOrderBy(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res = evalPart(ctx, part.p)
|
||||
|
||||
for e in reversed(part.expr):
|
||||
reverse = bool(e.order and e.order == "DESC")
|
||||
res = sorted(
|
||||
res, key=lambda x: _val(value(x, e.expr, variables=True)), reverse=reverse
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalSlice(ctx: QueryContext, slice: CompValue):
|
||||
res = evalPart(ctx, slice.p)
|
||||
|
||||
return itertools.islice(
|
||||
res,
|
||||
slice.start,
|
||||
slice.start + slice.length if slice.length is not None else None,
|
||||
)
|
||||
|
||||
|
||||
def evalReduced(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""apply REDUCED to result
|
||||
|
||||
REDUCED is not as strict as DISTINCT, but if the incoming rows were sorted
|
||||
it should produce the same result with limited extra memory and time per
|
||||
incoming row.
|
||||
"""
|
||||
|
||||
# This implementation uses a most recently used strategy and a limited
|
||||
# buffer size. It relates to a LRU caching algorithm:
|
||||
# https://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used_.28LRU.29
|
||||
MAX = 1
|
||||
# TODO: add configuration or determine "best" size for most use cases
|
||||
# 0: No reduction
|
||||
# 1: compare only with the last row, almost no reduction with
|
||||
# unordered incoming rows
|
||||
# N: The greater the buffer size the greater the reduction but more
|
||||
# memory and time are needed
|
||||
|
||||
# mixed data structure: set for lookup, deque for append/pop/remove
|
||||
mru_set = set()
|
||||
mru_queue: Deque[Any] = collections.deque()
|
||||
|
||||
for row in evalPart(ctx, part.p):
|
||||
if row in mru_set:
|
||||
# forget last position of row
|
||||
mru_queue.remove(row)
|
||||
else:
|
||||
# row seems to be new
|
||||
yield row
|
||||
mru_set.add(row)
|
||||
if len(mru_set) > MAX:
|
||||
# drop the least recently used row from buffer
|
||||
mru_set.remove(mru_queue.pop())
|
||||
# put row to the front
|
||||
mru_queue.appendleft(row)
|
||||
|
||||
|
||||
def evalDistinct(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res = evalPart(ctx, part.p)
|
||||
|
||||
done = set()
|
||||
for x in res:
|
||||
if x not in done:
|
||||
yield x
|
||||
done.add(x)
|
||||
|
||||
|
||||
def evalProject(ctx: QueryContext, project: CompValue):
|
||||
res = evalPart(ctx, project.p)
|
||||
return (row.project(project.PV) for row in res)
|
||||
|
||||
|
||||
def evalSelectQuery(
|
||||
ctx: QueryContext, query: CompValue
|
||||
) -> Mapping[str, Union[str, List[Variable], Iterable[FrozenDict]]]:
|
||||
res: Dict[str, Union[str, List[Variable], Iterable[FrozenDict]]] = {}
|
||||
res["type_"] = "SELECT"
|
||||
res["bindings"] = evalPart(ctx, query.p)
|
||||
res["vars_"] = query.PV
|
||||
return res
|
||||
|
||||
|
||||
def evalAskQuery(ctx: QueryContext, query: CompValue) -> Mapping[str, Union[str, bool]]:
|
||||
res: Dict[str, Union[bool, str]] = {}
|
||||
res["type_"] = "ASK"
|
||||
res["askAnswer"] = False
|
||||
for x in evalPart(ctx, query.p):
|
||||
res["askAnswer"] = True
|
||||
break
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalConstructQuery(
|
||||
ctx: QueryContext, query: CompValue
|
||||
) -> Mapping[str, Union[str, Graph]]:
|
||||
template = query.template
|
||||
|
||||
if not template:
|
||||
# a construct-where query
|
||||
template = query.p.p.triples # query->project->bgp ...
|
||||
|
||||
graph = Graph()
|
||||
|
||||
for c in evalPart(ctx, query.p):
|
||||
graph += _fillTemplate(template, c)
|
||||
|
||||
res: Dict[str, Union[str, Graph]] = {}
|
||||
res["type_"] = "CONSTRUCT"
|
||||
res["graph"] = graph
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]:
|
||||
# Create a result graph and bind namespaces from the graph being queried
|
||||
graph = Graph()
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "namespaces"
|
||||
for pfx, ns in ctx.graph.namespaces(): # type: ignore[union-attr]
|
||||
graph.bind(pfx, ns)
|
||||
|
||||
to_describe = set()
|
||||
|
||||
# Explicit IRIs may be provided to a DESCRIBE query.
|
||||
# If there is a WHERE clause, explicit IRIs may be provided in
|
||||
# addition to projected variables. Find those explicit IRIs and
|
||||
# prepare to describe them.
|
||||
for iri in query.PV:
|
||||
if isinstance(iri, URIRef):
|
||||
to_describe.add(iri)
|
||||
|
||||
# If there is a WHERE clause, evaluate it then find the unique set of
|
||||
# resources to describe across all bindings and projected variables
|
||||
if query.p is not None:
|
||||
bindings = evalPart(ctx, query.p)
|
||||
to_describe.update(*(set(binding.values()) for binding in bindings))
|
||||
|
||||
# Get a CBD for all resources identified to describe
|
||||
for resource in to_describe:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "cbd"
|
||||
ctx.graph.cbd(resource, target_graph=graph) # type: ignore[union-attr]
|
||||
|
||||
res: Dict[str, Union[str, Graph]] = {}
|
||||
res["type_"] = "DESCRIBE"
|
||||
res["graph"] = graph
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalQuery(
|
||||
graph: Graph,
|
||||
query: Query,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Mapping[Any, Any]:
|
||||
"""
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
main = query.algebra
|
||||
|
||||
initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())
|
||||
|
||||
ctx = QueryContext(
|
||||
graph, initBindings=initBindings, datasetClause=main.datasetClause
|
||||
)
|
||||
|
||||
ctx.prologue = query.prologue
|
||||
|
||||
return evalPart(ctx, main)
|
||||
@@ -0,0 +1,188 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
from typing import (
|
||||
Any,
|
||||
DefaultDict,
|
||||
Generator,
|
||||
Iterable,
|
||||
Mapping,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.plugins.sparql.operators import EBV
|
||||
from rdflib.plugins.sparql.parserutils import CompValue, Expr
|
||||
from rdflib.plugins.sparql.sparql import (
|
||||
FrozenBindings,
|
||||
FrozenDict,
|
||||
NotBoundError,
|
||||
QueryContext,
|
||||
SPARQLError,
|
||||
)
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
_ContextType = Union[FrozenBindings, QueryContext]
|
||||
_FrozenDictT = TypeVar("_FrozenDictT", bound=FrozenDict)
|
||||
|
||||
|
||||
def _diff(
|
||||
a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT], expr
|
||||
) -> Set[_FrozenDictT]:
|
||||
res = set()
|
||||
|
||||
for x in a:
|
||||
if all(not x.compatible(y) or not _ebv(expr, x.merge(y)) for y in b):
|
||||
res.add(x)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _minus(
|
||||
a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT]
|
||||
) -> Generator[_FrozenDictT, None, None]:
|
||||
for x in a:
|
||||
if all((not x.compatible(y)) or x.disjointDomain(y) for y in b):
|
||||
yield x
|
||||
|
||||
|
||||
@overload
|
||||
def _join(
|
||||
a: Iterable[FrozenBindings], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenBindings, None, None]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _join(
|
||||
a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenDict, None, None]: ...
|
||||
|
||||
|
||||
def _join(
|
||||
a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenDict, None, None]:
|
||||
for x in a:
|
||||
for y in b:
|
||||
if x.compatible(y):
|
||||
yield x.merge(y)
|
||||
|
||||
|
||||
def _ebv(expr: Union[Literal, Variable, Expr], ctx: FrozenDict) -> bool:
|
||||
"""
|
||||
Return true/false for the given expr
|
||||
Either the expr is itself true/false
|
||||
or evaluates to something, with the given ctx
|
||||
|
||||
an error is false
|
||||
"""
|
||||
|
||||
try:
|
||||
return EBV(expr)
|
||||
except SPARQLError:
|
||||
pass
|
||||
if isinstance(expr, Expr):
|
||||
try:
|
||||
return EBV(expr.eval(ctx))
|
||||
except SPARQLError:
|
||||
return False # filter error == False
|
||||
# type error: Subclass of "Literal" and "CompValue" cannot exist: would have incompatible method signatures
|
||||
elif isinstance(expr, CompValue): # type: ignore[unreachable]
|
||||
raise Exception("Weird - filter got a CompValue without evalfn! %r" % expr)
|
||||
elif isinstance(expr, Variable):
|
||||
try:
|
||||
return EBV(ctx[expr])
|
||||
except: # noqa: E722
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
@overload
|
||||
def _eval(
|
||||
expr: Union[Literal, URIRef],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = ...,
|
||||
) -> Union[Literal, URIRef]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _eval(
|
||||
expr: Union[Variable, Expr],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = ...,
|
||||
) -> Union[Any, SPARQLError]: ...
|
||||
|
||||
|
||||
def _eval(
|
||||
expr: Union[Literal, URIRef, Variable, Expr],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = True,
|
||||
) -> Any:
|
||||
if isinstance(expr, (Literal, URIRef)):
|
||||
return expr
|
||||
if isinstance(expr, Expr):
|
||||
return expr.eval(ctx)
|
||||
elif isinstance(expr, Variable):
|
||||
try:
|
||||
return ctx[expr]
|
||||
except KeyError:
|
||||
if raise_not_bound_error:
|
||||
raise NotBoundError("Variable %s is not bound" % expr)
|
||||
else:
|
||||
return None
|
||||
elif isinstance(expr, CompValue): # type: ignore[unreachable]
|
||||
raise Exception("Weird - _eval got a CompValue without evalfn! %r" % expr)
|
||||
else:
|
||||
raise Exception("Cannot eval thing: %s (%s)" % (expr, type(expr)))
|
||||
|
||||
|
||||
def _filter(
|
||||
a: Iterable[FrozenDict], expr: Union[Literal, Variable, Expr]
|
||||
) -> Generator[FrozenDict, None, None]:
|
||||
for c in a:
|
||||
if _ebv(expr, c):
|
||||
yield c
|
||||
|
||||
|
||||
def _fillTemplate(
|
||||
template: Iterable[Tuple[Identifier, Identifier, Identifier]],
|
||||
solution: _ContextType,
|
||||
) -> Generator[Tuple[Identifier, Identifier, Identifier], None, None]:
|
||||
"""
|
||||
For construct/deleteWhere and friends
|
||||
|
||||
Fill a triple template with instantiated variables
|
||||
"""
|
||||
|
||||
bnodeMap: DefaultDict[BNode, BNode] = collections.defaultdict(BNode)
|
||||
for t in template:
|
||||
s, p, o = t
|
||||
|
||||
_s = solution.get(s)
|
||||
_p = solution.get(p)
|
||||
_o = solution.get(o)
|
||||
|
||||
# instantiate new bnodes for each solution
|
||||
_s, _p, _o = [
|
||||
bnodeMap[x] if isinstance(x, BNode) else y for x, y in zip(t, (_s, _p, _o))
|
||||
]
|
||||
|
||||
if _s is not None and _p is not None and _o is not None:
|
||||
yield (_s, _p, _o)
|
||||
|
||||
|
||||
_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal)
|
||||
|
||||
|
||||
def _val(v: _ValueT) -> Tuple[int, _ValueT]:
|
||||
"""utilitity for ordering things"""
|
||||
if isinstance(v, Variable):
|
||||
return (0, v)
|
||||
elif isinstance(v, BNode):
|
||||
return (1, v)
|
||||
elif isinstance(v, URIRef):
|
||||
return (2, v)
|
||||
elif isinstance(v, Literal):
|
||||
return (3, v)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
|
||||
NOTE: PyParsing setResultName/__call__ provides a very similar solution to this
|
||||
I didn't realise at the time of writing and I will remove a
|
||||
lot of this code at some point
|
||||
|
||||
Utility classes for creating an abstract-syntax tree out with pyparsing actions
|
||||
|
||||
Lets you label and group parts of parser production rules
|
||||
|
||||
For example:
|
||||
|
||||
# [5] BaseDecl ::= 'BASE' IRIREF
|
||||
BaseDecl = Comp('Base', Keyword('BASE') + Param('iri',IRIREF))
|
||||
|
||||
After parsing, this gives you back an CompValue object,
|
||||
which is a dict/object with the parameters specified.
|
||||
So you can access the parameters are attributes or as keys:
|
||||
|
||||
baseDecl.iri
|
||||
|
||||
Comp lets you set an evalFn that is bound to the eval method of
|
||||
the resulting CompValue
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import OrderedDict
|
||||
from types import MethodType
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pyparsing import ParserElement, ParseResults, TokenConverter, originalTextFor
|
||||
|
||||
from rdflib.term import BNode, Identifier, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.plugins.sparql.sparql import FrozenBindings
|
||||
|
||||
|
||||
# This is an alternative
|
||||
|
||||
# Comp('Sum')( Param('x')(Number) + '+' + Param('y')(Number) )
|
||||
|
||||
|
||||
def value(
|
||||
ctx: FrozenBindings,
|
||||
val: Any,
|
||||
variables: bool = False,
|
||||
errors: bool = False,
|
||||
) -> Any:
|
||||
"""
|
||||
utility function for evaluating something...
|
||||
|
||||
Variables will be looked up in the context
|
||||
Normally, non-bound vars is an error,
|
||||
set variables=True to return unbound vars
|
||||
|
||||
Normally, an error raises the error,
|
||||
set errors=True to return error
|
||||
|
||||
"""
|
||||
|
||||
if isinstance(val, Expr):
|
||||
return val.eval(ctx) # recurse?
|
||||
elif isinstance(val, CompValue):
|
||||
raise Exception("What do I do with this CompValue? %s" % val)
|
||||
|
||||
elif isinstance(val, list):
|
||||
return [value(ctx, x, variables, errors) for x in val]
|
||||
|
||||
elif isinstance(val, (BNode, Variable)):
|
||||
r = ctx.get(val)
|
||||
if isinstance(r, SPARQLError) and not errors:
|
||||
raise r
|
||||
if r is not None:
|
||||
return r
|
||||
|
||||
# not bound
|
||||
if variables:
|
||||
return val
|
||||
else:
|
||||
raise NotBoundError
|
||||
|
||||
elif isinstance(val, ParseResults) and len(val) == 1:
|
||||
return value(ctx, val[0], variables, errors)
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
class ParamValue:
|
||||
"""
|
||||
The result of parsing a Param
|
||||
This just keeps the name/value
|
||||
All cleverness is in the CompValue
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, name: str, tokenList: Union[List[Any], ParseResults], isList: bool
|
||||
):
|
||||
self.isList = isList
|
||||
self.name = name
|
||||
if isinstance(tokenList, (list, ParseResults)) and len(tokenList) == 1:
|
||||
tokenList = tokenList[0]
|
||||
|
||||
self.tokenList = tokenList
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "Param(%s, %s)" % (self.name, self.tokenList)
|
||||
|
||||
|
||||
class Param(TokenConverter):
|
||||
"""
|
||||
A pyparsing token for labelling a part of the parse-tree
|
||||
if isList is true repeat occurrences of ParamList have
|
||||
their values merged in a list
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr, isList: bool = False):
|
||||
self.isList = isList
|
||||
TokenConverter.__init__(self, expr)
|
||||
self.setName(name)
|
||||
self.addParseAction(self.postParse2)
|
||||
|
||||
def postParse2(self, tokenList: Union[List[Any], ParseResults]) -> ParamValue:
|
||||
return ParamValue(self.name, tokenList, self.isList)
|
||||
|
||||
|
||||
class ParamList(Param):
|
||||
"""
|
||||
A shortcut for a Param with isList=True
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr):
|
||||
Param.__init__(self, name, expr, True)
|
||||
|
||||
|
||||
_ValT = TypeVar("_ValT")
|
||||
|
||||
|
||||
class CompValue(OrderedDict):
|
||||
"""
|
||||
The result of parsing a Comp
|
||||
Any included Params are available as Dict keys
|
||||
or as attributes
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, **values):
|
||||
OrderedDict.__init__(self)
|
||||
self.name = name
|
||||
self.update(values)
|
||||
|
||||
def clone(self) -> CompValue:
|
||||
return CompValue(self.name, **self)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.name + "_" + OrderedDict.__str__(self)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name + "_" + dict.__repr__(self)
|
||||
|
||||
def _value(
|
||||
self, val: _ValT, variables: bool = False, errors: bool = False
|
||||
) -> Union[_ValT, Any]:
|
||||
if self.ctx is not None:
|
||||
return value(self.ctx, val, variables)
|
||||
else:
|
||||
return val
|
||||
|
||||
def __getitem__(self, a):
|
||||
return self._value(OrderedDict.__getitem__(self, a))
|
||||
|
||||
# type error: Signature of "get" incompatible with supertype "dict"
|
||||
# type error: Signature of "get" incompatible with supertype "Mapping" [override]
|
||||
def get(self, a, variables: bool = False, errors: bool = False): # type: ignore[override]
|
||||
return self._value(OrderedDict.get(self, a, a), variables, errors)
|
||||
|
||||
def __getattr__(self, a: str) -> Any:
|
||||
# Hack hack: OrderedDict relies on this
|
||||
if a in ("_OrderedDict__root", "_OrderedDict__end"):
|
||||
raise AttributeError()
|
||||
try:
|
||||
return self[a]
|
||||
except KeyError:
|
||||
# raise AttributeError('no such attribute '+a)
|
||||
return None
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# this is here because properties are dynamically set on CompValue
|
||||
def __setattr__(self, __name: str, __value: Any) -> None: ...
|
||||
|
||||
|
||||
class Expr(CompValue):
|
||||
"""
|
||||
A CompValue that is evaluatable
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
evalfn: Optional[Callable[[Any, Any], Any]] = None,
|
||||
**values,
|
||||
):
|
||||
super(Expr, self).__init__(name, **values)
|
||||
|
||||
self._evalfn = None
|
||||
if evalfn:
|
||||
self._evalfn = MethodType(evalfn, self)
|
||||
|
||||
def eval(self, ctx: Any = {}) -> Union[SPARQLError, Any]:
|
||||
try:
|
||||
self.ctx: Optional[Union[Mapping, FrozenBindings]] = ctx
|
||||
# type error: "None" not callable
|
||||
return self._evalfn(ctx) # type: ignore[misc]
|
||||
except SPARQLError as e:
|
||||
return e
|
||||
finally:
|
||||
self.ctx = None
|
||||
|
||||
|
||||
class Comp(TokenConverter):
|
||||
"""
|
||||
A pyparsing token for grouping together things with a label
|
||||
Any sub-tokens that are not Params will be ignored.
|
||||
|
||||
Returns CompValue / Expr objects - depending on whether evalFn is set.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr: ParserElement):
|
||||
self.expr = expr
|
||||
TokenConverter.__init__(self, expr)
|
||||
self.setName(name)
|
||||
self.evalfn: Optional[Callable[[Any, Any], Any]] = None
|
||||
|
||||
def postParse(
|
||||
self, instring: str, loc: int, tokenList: ParseResults
|
||||
) -> Union[Expr, CompValue]:
|
||||
res: Union[Expr, CompValue]
|
||||
if self.evalfn:
|
||||
res = Expr(self.name)
|
||||
res._evalfn = MethodType(self.evalfn, res)
|
||||
else:
|
||||
res = CompValue(self.name)
|
||||
if self.name == "ServiceGraphPattern":
|
||||
# Then this must be a service graph pattern and have
|
||||
# already matched.
|
||||
# lets assume there is one, for now, then test for two later.
|
||||
sgp = originalTextFor(self.expr)
|
||||
service_string = sgp.searchString(instring)[0][0]
|
||||
res["service_string"] = service_string
|
||||
|
||||
for t in tokenList:
|
||||
if isinstance(t, ParamValue):
|
||||
if t.isList:
|
||||
if t.name not in res:
|
||||
res[t.name] = []
|
||||
res[t.name].append(t.tokenList)
|
||||
else:
|
||||
res[t.name] = t.tokenList
|
||||
# res.append(t.tokenList)
|
||||
# if isinstance(t,CompValue):
|
||||
# res.update(t)
|
||||
return res
|
||||
|
||||
def setEvalFn(self, evalfn: Callable[[Any, Any], Any]) -> Comp:
|
||||
self.evalfn = evalfn
|
||||
return self
|
||||
|
||||
|
||||
def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str:
|
||||
out: List[str] = []
|
||||
for e in t.asList():
|
||||
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
|
||||
for k, v in sorted(t.items()):
|
||||
out.append("%s%s- %s:\n" % (indent, " " * depth, k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 1))
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _prettify_sub_parsetree(
|
||||
t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None],
|
||||
indent: str = "",
|
||||
depth: int = 0,
|
||||
) -> str:
|
||||
out: List[str] = []
|
||||
if isinstance(t, CompValue):
|
||||
out.append("%s%s> %s:\n" % (indent, " " * depth, t.name))
|
||||
for k, v in t.items():
|
||||
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
|
||||
elif isinstance(t, dict):
|
||||
for k, v in t.items():
|
||||
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
|
||||
elif isinstance(t, list):
|
||||
for e in t:
|
||||
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
|
||||
else:
|
||||
out.append("%s%s- %r\n" % (indent, " " * depth, t))
|
||||
return "".join(out)
|
||||
|
||||
|
||||
# hurrah for circular imports
|
||||
from rdflib.plugins.sparql.sparql import NotBoundError, SPARQLError # noqa: E402
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Code for tying SPARQL Engine into RDFLib
|
||||
|
||||
These should be automatically registered with RDFLib
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Mapping, Optional, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate
|
||||
from rdflib.plugins.sparql.evaluate import evalQuery
|
||||
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
|
||||
from rdflib.plugins.sparql.sparql import Query, Update
|
||||
from rdflib.plugins.sparql.update import evalUpdate
|
||||
from rdflib.query import Processor, Result, UpdateProcessor
|
||||
from rdflib.term import Identifier
|
||||
|
||||
|
||||
def prepareQuery(
|
||||
queryString: str,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Query:
|
||||
"""
|
||||
Parse and translate a SPARQL Query
|
||||
"""
|
||||
if initNs is None:
|
||||
initNs = {}
|
||||
ret = translateQuery(parseQuery(queryString), base, initNs)
|
||||
ret._original_args = (queryString, initNs, base)
|
||||
return ret
|
||||
|
||||
|
||||
def prepareUpdate(
|
||||
updateString: str,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Update:
|
||||
"""
|
||||
Parse and translate a SPARQL Update
|
||||
"""
|
||||
if initNs is None:
|
||||
initNs = {}
|
||||
ret = translateUpdate(parseUpdate(updateString), base, initNs)
|
||||
ret._original_args = (updateString, initNs, base)
|
||||
return ret
|
||||
|
||||
|
||||
def processUpdate(
|
||||
graph: Graph,
|
||||
updateString: str,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Process a SPARQL Update Request
|
||||
returns Nothing on success or raises Exceptions on error
|
||||
"""
|
||||
evalUpdate(
|
||||
graph, translateUpdate(parseUpdate(updateString), base, initNs), initBindings
|
||||
)
|
||||
|
||||
|
||||
class SPARQLResult(Result):
|
||||
def __init__(self, res: Mapping[str, Any]):
|
||||
Result.__init__(self, res["type_"])
|
||||
self.vars = res.get("vars_")
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "MutableSequence[Mapping[Variable, Identifier]]")
|
||||
self.bindings = res.get("bindings") # type: ignore[assignment]
|
||||
self.askAnswer = res.get("askAnswer")
|
||||
self.graph = res.get("graph")
|
||||
|
||||
|
||||
class SPARQLUpdateProcessor(UpdateProcessor):
|
||||
def __init__(self, graph):
|
||||
self.graph = graph
|
||||
|
||||
def update(
|
||||
self,
|
||||
strOrQuery: Union[str, Update],
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
|
||||
if isinstance(strOrQuery, str):
|
||||
strOrQuery = translateUpdate(parseUpdate(strOrQuery), initNs=initNs)
|
||||
|
||||
return evalUpdate(self.graph, strOrQuery, initBindings)
|
||||
|
||||
|
||||
class SPARQLProcessor(Processor):
|
||||
def __init__(self, graph):
|
||||
self.graph = graph
|
||||
|
||||
# NOTE on type error: this is because the super type constructor does not
|
||||
# accept base argument and thie position of the DEBUG argument is
|
||||
# different.
|
||||
# type error: Signature of "query" incompatible with supertype "Processor"
|
||||
def query( # type: ignore[override]
|
||||
self,
|
||||
strOrQuery: Union[str, Query],
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
DEBUG: bool = False,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Evaluate a query with the given initial bindings, and initial
|
||||
namespaces. The given base is used to resolve relative URIs in
|
||||
the query and will be overridden by any BASE given in the query.
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
|
||||
if isinstance(strOrQuery, str):
|
||||
strOrQuery = translateQuery(parseQuery(strOrQuery), base, initNs)
|
||||
|
||||
return evalQuery(self.graph, strOrQuery, initBindings, base)
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
Parsers and serializers for SPARQL Result formats
|
||||
"""
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
|
||||
This module implements a parser and serializer for the CSV SPARQL result
|
||||
formats
|
||||
|
||||
http://www.w3.org/TR/sparql11-results-csv-tsv/
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import csv
|
||||
from io import BufferedIOBase, TextIOBase
|
||||
from typing import IO, Dict, List, Optional, Union, cast
|
||||
|
||||
from rdflib.plugins.sparql.processor import SPARQLResult
|
||||
from rdflib.query import Result, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
|
||||
class CSVResultParser(ResultParser):
|
||||
def __init__(self):
|
||||
self.delim = ","
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
r = Result("SELECT")
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
if isinstance(source.read(0), bytes):
|
||||
# if reading from source returns bytes do utf-8 decoding
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
source = codecs.getreader("utf-8")(source) # type: ignore[assignment]
|
||||
|
||||
reader = csv.reader(source, delimiter=self.delim)
|
||||
r.vars = [Variable(x) for x in next(reader)]
|
||||
r.bindings = []
|
||||
|
||||
for row in reader:
|
||||
r.bindings.append(self.parseRow(row, r.vars))
|
||||
|
||||
return r
|
||||
|
||||
def parseRow(
|
||||
self, row: List[str], v: List[Variable]
|
||||
) -> Dict[Variable, Union[BNode, URIRef, Literal]]:
|
||||
return dict(
|
||||
(var, val)
|
||||
for var, val in zip(v, [self.convertTerm(t) for t in row])
|
||||
if val is not None
|
||||
)
|
||||
|
||||
def convertTerm(self, t: str) -> Optional[Union[BNode, URIRef, Literal]]:
|
||||
if t == "":
|
||||
return None
|
||||
if t.startswith("_:"):
|
||||
return BNode(t) # or generate new IDs?
|
||||
if t.startswith("http://") or t.startswith("https://"): # TODO: more?
|
||||
return URIRef(t)
|
||||
return Literal(t)
|
||||
|
||||
|
||||
class CSVResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: SPARQLResult):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
self.delim = ","
|
||||
if result.type != "SELECT":
|
||||
raise Exception("CSVSerializer can only serialize select query results")
|
||||
|
||||
def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs) -> None:
|
||||
# the serialiser writes bytes in the given encoding
|
||||
# in py3 csv.writer is unicode aware and writes STRINGS,
|
||||
# so we encode afterward
|
||||
|
||||
import codecs
|
||||
|
||||
# TODO: Find a better solution for all this casting
|
||||
writable_stream = cast(Union[TextIOBase, BufferedIOBase], stream)
|
||||
if isinstance(writable_stream, TextIOBase):
|
||||
string_stream: TextIOBase = writable_stream
|
||||
else:
|
||||
byte_stream = cast(BufferedIOBase, writable_stream)
|
||||
string_stream = cast(TextIOBase, codecs.getwriter(encoding)(byte_stream))
|
||||
|
||||
out = csv.writer(string_stream, delimiter=self.delim)
|
||||
|
||||
vs = [self.serializeTerm(v, encoding) for v in self.result.vars] # type: ignore[union-attr]
|
||||
out.writerow(vs)
|
||||
for row in self.result.bindings:
|
||||
out.writerow(
|
||||
[self.serializeTerm(row.get(v), encoding) for v in self.result.vars] # type: ignore[union-attr]
|
||||
)
|
||||
|
||||
def serializeTerm(
|
||||
self, term: Optional[Identifier], encoding: str
|
||||
) -> Union[str, Identifier]:
|
||||
if term is None:
|
||||
return ""
|
||||
elif isinstance(term, BNode):
|
||||
return f"_:{term}"
|
||||
else:
|
||||
return term
|
||||
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Optional
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.query import Result, ResultParser
|
||||
|
||||
|
||||
class GraphResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str]) -> Result: # type: ignore[override]
|
||||
res = Result("CONSTRUCT") # hmm - or describe?type_)
|
||||
res.graph = Graph()
|
||||
res.graph.parse(source, format=content_type)
|
||||
|
||||
return res
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
"""A Serializer for SPARQL results in JSON:
|
||||
|
||||
http://www.w3.org/TR/rdf-sparql-json-res/
|
||||
|
||||
Bits and pieces borrowed from:
|
||||
http://projects.bigasterisk.com/sparqlhttp/
|
||||
|
||||
Authors: Drew Perttula, Gunnar Aastrand Grimnes
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import IO, Any, Dict, Mapping, MutableSequence, Optional
|
||||
|
||||
from rdflib.query import Result, ResultException, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
|
||||
class JSONResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
inp = source.read()
|
||||
if _HAS_ORJSON:
|
||||
try:
|
||||
loaded = orjson.loads(inp)
|
||||
except Exception as e:
|
||||
raise ResultException(f"Failed to parse result: {e}")
|
||||
else:
|
||||
if isinstance(inp, bytes):
|
||||
inp = inp.decode("utf-8")
|
||||
loaded = json.loads(inp)
|
||||
return JSONResult(loaded)
|
||||
|
||||
|
||||
class JSONResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: Result):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
# type error: Signature of "serialize" incompatible with supertype "ResultSerializer"
|
||||
def serialize(self, stream: IO, encoding: str = None) -> None: # type: ignore[override]
|
||||
res: Dict[str, Any] = {}
|
||||
if self.result.type == "ASK":
|
||||
res["head"] = {}
|
||||
res["boolean"] = self.result.askAnswer
|
||||
else:
|
||||
# select
|
||||
res["results"] = {}
|
||||
res["head"] = {}
|
||||
res["head"]["vars"] = self.result.vars
|
||||
res["results"]["bindings"] = [
|
||||
self._bindingToJSON(x) for x in self.result.bindings
|
||||
]
|
||||
if _HAS_ORJSON:
|
||||
try:
|
||||
r_bytes = orjson.dumps(res, option=orjson.OPT_NON_STR_KEYS)
|
||||
except Exception as e:
|
||||
raise ResultException(f"Failed to serialize result: {e}")
|
||||
if encoding is not None:
|
||||
# Note, orjson will always write utf-8 even if
|
||||
# encoding is specified as something else.
|
||||
try:
|
||||
stream.write(r_bytes)
|
||||
except (TypeError, ValueError):
|
||||
stream.write(r_bytes.decode("utf-8"))
|
||||
else:
|
||||
stream.write(r_bytes.decode("utf-8"))
|
||||
else:
|
||||
r_str = json.dumps(res, allow_nan=False, ensure_ascii=False)
|
||||
if encoding is not None:
|
||||
try:
|
||||
stream.write(r_str.encode(encoding))
|
||||
except (TypeError, ValueError):
|
||||
stream.write(r_str)
|
||||
else:
|
||||
stream.write(r_str)
|
||||
|
||||
def _bindingToJSON(self, b: Mapping[Variable, Identifier]) -> Dict[Variable, Any]:
|
||||
res = {}
|
||||
for var in b:
|
||||
j = termToJSON(self, b[var])
|
||||
if j is not None:
|
||||
res[var] = termToJSON(self, b[var])
|
||||
return res
|
||||
|
||||
|
||||
class JSONResult(Result):
|
||||
def __init__(self, json: Dict[str, Any]):
|
||||
self.json = json
|
||||
if "boolean" in json:
|
||||
type_ = "ASK"
|
||||
elif "results" in json:
|
||||
type_ = "SELECT"
|
||||
else:
|
||||
raise ResultException("No boolean or results in json!")
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "ASK":
|
||||
self.askAnswer = bool(json["boolean"])
|
||||
else:
|
||||
self.bindings = self._get_bindings()
|
||||
self.vars = [Variable(x) for x in json["head"]["vars"]]
|
||||
|
||||
def _get_bindings(self) -> MutableSequence[Mapping[Variable, Identifier]]:
|
||||
ret: MutableSequence[Mapping[Variable, Identifier]] = []
|
||||
for row in self.json["results"]["bindings"]:
|
||||
outRow: Dict[Variable, Identifier] = {}
|
||||
for k, v in row.items():
|
||||
outRow[Variable(k)] = parseJsonTerm(v)
|
||||
ret.append(outRow)
|
||||
return ret
|
||||
|
||||
|
||||
def parseJsonTerm(d: Dict[str, str]) -> Identifier:
|
||||
"""rdflib object (Literal, URIRef, BNode) for the given json-format dict.
|
||||
|
||||
input is like:
|
||||
{ 'type': 'uri', 'value': 'http://famegame.com/2006/01/username' }
|
||||
{ 'type': 'literal', 'value': 'drewp' }
|
||||
"""
|
||||
|
||||
t = d["type"]
|
||||
if t == "uri":
|
||||
return URIRef(d["value"])
|
||||
elif t == "literal":
|
||||
return Literal(d["value"], datatype=d.get("datatype"), lang=d.get("xml:lang"))
|
||||
elif t == "typed-literal":
|
||||
return Literal(d["value"], datatype=URIRef(d["datatype"]))
|
||||
elif t == "bnode":
|
||||
return BNode(d["value"])
|
||||
else:
|
||||
raise NotImplementedError("json term type %r" % t)
|
||||
|
||||
|
||||
def termToJSON(
|
||||
self: JSONResultSerializer, term: Optional[Identifier]
|
||||
) -> Optional[Dict[str, str]]:
|
||||
if isinstance(term, URIRef):
|
||||
return {"type": "uri", "value": str(term)}
|
||||
elif isinstance(term, Literal):
|
||||
r = {"type": "literal", "value": str(term)}
|
||||
|
||||
if term.datatype is not None:
|
||||
r["datatype"] = str(term.datatype)
|
||||
if term.language is not None:
|
||||
r["xml:lang"] = term.language
|
||||
return r
|
||||
|
||||
elif isinstance(term, BNode):
|
||||
return {"type": "bnode", "value": str(term)}
|
||||
elif term is None:
|
||||
return None
|
||||
else:
|
||||
raise ResultException("Unknown term type: %s (%s)" % (term, type(term)))
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, MutableMapping, Optional, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, Namespace
|
||||
from rdflib.query import Result, ResultParser
|
||||
from rdflib.term import Node, Variable
|
||||
|
||||
RS = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/result-set#")
|
||||
|
||||
|
||||
class RDFResultParser(ResultParser):
|
||||
def parse(self, source: Union[IO, Graph], **kwargs: Any) -> Result:
|
||||
return RDFResult(source, **kwargs)
|
||||
|
||||
|
||||
class RDFResult(Result):
|
||||
def __init__(self, source: Union[IO, Graph], **kwargs: Any):
|
||||
if not isinstance(source, Graph):
|
||||
graph = Graph()
|
||||
graph.parse(source, **kwargs)
|
||||
else:
|
||||
graph = source
|
||||
|
||||
rs = graph.value(predicate=RDF.type, object=RS.ResultSet)
|
||||
# there better be only one :)
|
||||
|
||||
if rs is None:
|
||||
type_ = "CONSTRUCT"
|
||||
|
||||
# use a new graph
|
||||
g = Graph()
|
||||
g += graph
|
||||
|
||||
else:
|
||||
askAnswer = graph.value(rs, RS.boolean)
|
||||
|
||||
if askAnswer is not None:
|
||||
type_ = "ASK"
|
||||
else:
|
||||
type_ = "SELECT"
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "SELECT":
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Node"; expected "str"
|
||||
self.vars = [Variable(v) for v in graph.objects(rs, RS.resultVariable)] # type: ignore[arg-type]
|
||||
|
||||
self.bindings = []
|
||||
|
||||
for s in graph.objects(rs, RS.solution):
|
||||
sol: MutableMapping[Variable, Optional[Node]] = {}
|
||||
for b in graph.objects(s, RS.binding):
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Optional[Node]"; expected "str"
|
||||
sol[Variable(graph.value(b, RS.variable))] = graph.value( # type: ignore[arg-type]
|
||||
b, RS.value
|
||||
)
|
||||
# error: Argument 1 to "append" of "list" has incompatible type "MutableMapping[Variable, Optional[Node]]"; expected "Mapping[Variable, Identifier]"
|
||||
self.bindings.append(sol) # type: ignore[arg-type]
|
||||
elif type_ == "ASK":
|
||||
# type error: Item "Node" of "Optional[Node]" has no attribute "value"
|
||||
# type error: Item "None" of "Optional[Node]" has no attribute "value"
|
||||
self.askAnswer = askAnswer.value # type: ignore[union-attr]
|
||||
# type error: Item "Node" of "Optional[Node]" has no attribute "value"
|
||||
# type error: Item "None" of "Optional[Node]" has no attribute "value"
|
||||
if askAnswer.value is None: # type: ignore[union-attr]
|
||||
raise Exception("Malformed boolean in ask answer!")
|
||||
elif type_ == "CONSTRUCT":
|
||||
self.graph = g
|
||||
+105
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
This implements the Tab Separated SPARQL Result Format
|
||||
|
||||
It is implemented with pyparsing, reusing the elements from the SPARQL Parser
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import typing
|
||||
from typing import IO, Union
|
||||
|
||||
from pyparsing import (
|
||||
FollowedBy,
|
||||
LineEnd,
|
||||
Literal,
|
||||
Optional,
|
||||
ParserElement,
|
||||
Suppress,
|
||||
ZeroOrMore,
|
||||
)
|
||||
|
||||
from rdflib.plugins.sparql.parser import (
|
||||
BLANK_NODE_LABEL,
|
||||
IRIREF,
|
||||
LANGTAG,
|
||||
STRING_LITERAL1,
|
||||
STRING_LITERAL2,
|
||||
BooleanLiteral,
|
||||
NumericLiteral,
|
||||
Var,
|
||||
)
|
||||
from rdflib.plugins.sparql.parserutils import Comp, CompValue, Param
|
||||
from rdflib.query import Result, ResultParser
|
||||
from rdflib.term import BNode, URIRef
|
||||
from rdflib.term import Literal as RDFLiteral
|
||||
|
||||
ParserElement.setDefaultWhitespaceChars(" \n")
|
||||
|
||||
|
||||
String = STRING_LITERAL1 | STRING_LITERAL2
|
||||
|
||||
RDFLITERAL = Comp(
|
||||
"literal",
|
||||
Param("string", String)
|
||||
+ Optional(
|
||||
Param("lang", LANGTAG.leaveWhitespace())
|
||||
| Literal("^^").leaveWhitespace() + Param("datatype", IRIREF).leaveWhitespace()
|
||||
),
|
||||
)
|
||||
|
||||
NONE_VALUE = object()
|
||||
|
||||
EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
|
||||
EMPTY.setParseAction(lambda x: NONE_VALUE)
|
||||
|
||||
TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral
|
||||
|
||||
ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
|
||||
ROW.parseWithTabs()
|
||||
|
||||
HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
|
||||
HEADER.parseWithTabs()
|
||||
|
||||
|
||||
class TSVResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser" [override]
|
||||
def parse(self, source: IO, content_type: typing.Optional[str] = None) -> Result: # type: ignore[override]
|
||||
if isinstance(source.read(0), bytes):
|
||||
# if reading from source returns bytes do utf-8 decoding
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
source = codecs.getreader("utf-8")(source) # type: ignore[assignment]
|
||||
|
||||
r = Result("SELECT")
|
||||
|
||||
header = source.readline()
|
||||
|
||||
r.vars = list(HEADER.parseString(header.strip(), parseAll=True))
|
||||
r.bindings = []
|
||||
while True:
|
||||
line = source.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip("\n")
|
||||
if line == "":
|
||||
continue
|
||||
|
||||
row = ROW.parseString(line, parseAll=True)
|
||||
# type error: Generator has incompatible item type "object"; expected "Identifier"
|
||||
r.bindings.append(dict(zip(r.vars, (self.convertTerm(x) for x in row)))) # type: ignore[misc]
|
||||
|
||||
return r
|
||||
|
||||
def convertTerm(
|
||||
self, t: Union[object, RDFLiteral, BNode, CompValue, URIRef]
|
||||
) -> typing.Optional[Union[object, BNode, URIRef, RDFLiteral]]:
|
||||
if t is NONE_VALUE:
|
||||
return None
|
||||
if isinstance(t, CompValue):
|
||||
if t.name == "literal":
|
||||
return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype)
|
||||
else:
|
||||
raise Exception("I dont know how to handle this: %s" % (t,))
|
||||
else:
|
||||
return t
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from typing import IO, List, Optional, Union
|
||||
|
||||
from rdflib.namespace import NamespaceManager
|
||||
from rdflib.query import ResultSerializer
|
||||
from rdflib.term import BNode, Literal, URIRef, Variable
|
||||
|
||||
|
||||
def _termString(
|
||||
t: Optional[Union[URIRef, Literal, BNode]],
|
||||
namespace_manager: Optional[NamespaceManager],
|
||||
) -> str:
|
||||
if t is None:
|
||||
return "-"
|
||||
if namespace_manager:
|
||||
if isinstance(t, URIRef):
|
||||
return namespace_manager.normalizeUri(t)
|
||||
elif isinstance(t, BNode):
|
||||
return t.n3()
|
||||
elif isinstance(t, Literal):
|
||||
return t._literal_n3(qname_callback=namespace_manager.normalizeUri)
|
||||
else:
|
||||
return t.n3()
|
||||
|
||||
|
||||
class TXTResultSerializer(ResultSerializer):
|
||||
"""
|
||||
A write-only QueryResult serializer for text/ascii tables
|
||||
"""
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO,
|
||||
encoding: str = "utf-8",
|
||||
*,
|
||||
namespace_manager: Optional[NamespaceManager] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
return a text table of query results
|
||||
"""
|
||||
|
||||
def c(s, w):
|
||||
"""
|
||||
center the string s in w wide string
|
||||
"""
|
||||
w -= len(s)
|
||||
h1 = h2 = w // 2
|
||||
if w % 2:
|
||||
h2 += 1
|
||||
return " " * h1 + s + " " * h2
|
||||
|
||||
if self.result.type != "SELECT":
|
||||
raise Exception("Can only pretty print SELECT results!")
|
||||
string_stream = StringIO()
|
||||
if not self.result:
|
||||
string_stream.write("(no results)\n")
|
||||
else:
|
||||
keys: List[Variable] = self.result.vars # type: ignore[assignment]
|
||||
maxlen = [0] * len(keys)
|
||||
b = [
|
||||
# type error: Value of type "Union[Tuple[Node, Node, Node], bool, ResultRow]" is not indexable
|
||||
# type error: Argument 1 to "_termString" has incompatible type "Union[Node, Any]"; expected "Union[URIRef, Literal, BNode, None]" [arg-type]
|
||||
# type error: No overload variant of "__getitem__" of "tuple" matches argument type "Variable"
|
||||
# NOTE on type error: The problem here is that r can be more types than _termString expects because result can be a result of multiple types.
|
||||
[_termString(r[k], namespace_manager) for k in keys] # type: ignore[index, arg-type, call-overload]
|
||||
for r in self.result
|
||||
]
|
||||
for r in b:
|
||||
for i in range(len(keys)):
|
||||
maxlen[i] = max(maxlen[i], len(r[i]))
|
||||
string_stream.write(
|
||||
"|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n"
|
||||
)
|
||||
string_stream.write("-" * (len(maxlen) + sum(maxlen)) + "\n")
|
||||
for r in sorted(b):
|
||||
string_stream.write(
|
||||
"|".join([t + " " * (i - len(t)) for i, t in zip(maxlen, r)]) + "\n"
|
||||
)
|
||||
text_val = string_stream.getvalue()
|
||||
try:
|
||||
stream.write(text_val.encode(encoding))
|
||||
except (TypeError, ValueError):
|
||||
stream.write(text_val)
|
||||
+301
@@ -0,0 +1,301 @@
|
||||
"""A Parser for SPARQL results in XML:
|
||||
|
||||
http://www.w3.org/TR/rdf-sparql-XMLres/
|
||||
|
||||
Bits and pieces borrowed from:
|
||||
http://projects.bigasterisk.com/sparqlhttp/
|
||||
|
||||
Authors: Drew Perttula, Gunnar Aastrand Grimnes
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import xml.etree.ElementTree as xml_etree # noqa: N813
|
||||
from io import BytesIO
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Dict,
|
||||
Optional,
|
||||
Sequence,
|
||||
TextIO,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
from xml.dom import XML_NAMESPACE
|
||||
from xml.sax.saxutils import XMLGenerator
|
||||
from xml.sax.xmlreader import AttributesNSImpl
|
||||
|
||||
from rdflib.query import Result, ResultException, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
try:
|
||||
# https://adamj.eu/tech/2021/12/29/python-type-hints-optional-imports/
|
||||
import lxml.etree as lxml_etree
|
||||
|
||||
FOUND_LXML = True
|
||||
except ImportError:
|
||||
FOUND_LXML = False
|
||||
|
||||
SPARQL_XML_NAMESPACE = "http://www.w3.org/2005/sparql-results#"
|
||||
RESULTS_NS_ET = "{%s}" % SPARQL_XML_NAMESPACE
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XMLResultParser(ResultParser):
|
||||
# TODO FIXME: content_type should be a keyword only arg.
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
return XMLResult(source)
|
||||
|
||||
|
||||
class XMLResult(Result):
|
||||
def __init__(self, source: IO, content_type: Optional[str] = None):
|
||||
parser_encoding: Optional[str] = None
|
||||
if hasattr(source, "encoding"):
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, TextIO)
|
||||
parser_encoding = "utf-8"
|
||||
source_str = source.read()
|
||||
source = BytesIO(source_str.encode(parser_encoding))
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, BinaryIO)
|
||||
|
||||
if FOUND_LXML:
|
||||
lxml_parser = lxml_etree.XMLParser(huge_tree=True, encoding=parser_encoding)
|
||||
tree = cast(
|
||||
xml_etree.ElementTree,
|
||||
lxml_etree.parse(source, parser=lxml_parser),
|
||||
)
|
||||
else:
|
||||
xml_parser = xml_etree.XMLParser(encoding=parser_encoding)
|
||||
tree = xml_etree.parse(source, parser=xml_parser)
|
||||
|
||||
boolean = tree.find(RESULTS_NS_ET + "boolean")
|
||||
results = tree.find(RESULTS_NS_ET + "results")
|
||||
|
||||
if boolean is not None:
|
||||
type_ = "ASK"
|
||||
elif results is not None:
|
||||
type_ = "SELECT"
|
||||
else:
|
||||
raise ResultException("No RDF result-bindings or boolean answer found!")
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "SELECT":
|
||||
self.bindings = []
|
||||
for result in results: # type: ignore[union-attr]
|
||||
if result.tag != f"{RESULTS_NS_ET}result":
|
||||
# This is here because with lxml this also gets comments,
|
||||
# not just elements. Also this should not operate on non
|
||||
# "result" elements.
|
||||
continue
|
||||
r = {}
|
||||
for binding in result:
|
||||
if binding.tag != f"{RESULTS_NS_ET}binding":
|
||||
# This is here because with lxml this also gets
|
||||
# comments, not just elements. Also this should not
|
||||
# operate on non "binding" elements.
|
||||
continue
|
||||
# type error: error: Argument 1 to "Variable" has incompatible type "Union[str, None, Any]"; expected "str"
|
||||
# NOTE on type error: Element.get() can return None, and
|
||||
# this will invariably fail if passed into Variable
|
||||
# constructor as value
|
||||
r[Variable(binding.get("name"))] = parseTerm(binding[0]) # type: ignore[arg-type] # FIXME
|
||||
self.bindings.append(r)
|
||||
|
||||
self.vars = [
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Optional[str]"; expected "str"
|
||||
# NOTE on type error: Element.get() can return None, and this
|
||||
# will invariably fail if passed into Variable constructor as
|
||||
# value
|
||||
Variable(x.get("name")) # type: ignore[arg-type] # FIXME
|
||||
for x in tree.findall(
|
||||
"./%shead/%svariable" % (RESULTS_NS_ET, RESULTS_NS_ET)
|
||||
)
|
||||
]
|
||||
|
||||
else:
|
||||
self.askAnswer = boolean.text.lower().strip() == "true" # type: ignore[union-attr]
|
||||
|
||||
|
||||
def parseTerm(element: xml_etree.Element) -> Union[URIRef, Literal, BNode]:
|
||||
"""rdflib object (Literal, URIRef, BNode) for the given
|
||||
elementtree element"""
|
||||
tag, text = element.tag, element.text
|
||||
if tag == RESULTS_NS_ET + "literal":
|
||||
if text is None:
|
||||
text = ""
|
||||
datatype = None
|
||||
lang = None
|
||||
if element.get("datatype", None):
|
||||
# type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str"
|
||||
datatype = URIRef(element.get("datatype")) # type: ignore[arg-type]
|
||||
elif element.get("{%s}lang" % XML_NAMESPACE, None):
|
||||
lang = element.get("{%s}lang" % XML_NAMESPACE)
|
||||
|
||||
ret = Literal(text, datatype=datatype, lang=lang)
|
||||
|
||||
return ret
|
||||
elif tag == RESULTS_NS_ET + "uri":
|
||||
# type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str"
|
||||
return URIRef(text) # type: ignore[arg-type]
|
||||
elif tag == RESULTS_NS_ET + "bnode":
|
||||
return BNode(text)
|
||||
else:
|
||||
raise TypeError("unknown binding type %r" % element)
|
||||
|
||||
|
||||
class XMLResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: Result):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs: Any) -> None:
|
||||
writer = SPARQLXMLWriter(stream, encoding)
|
||||
if self.result.type == "ASK":
|
||||
writer.write_header([])
|
||||
# type error: Argument 1 to "write_ask" of "SPARQLXMLWriter" has incompatible type "Optional[bool]"; expected "bool"
|
||||
writer.write_ask(self.result.askAnswer) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Argument 1 to "write_header" of "SPARQLXMLWriter" has incompatible type "Optional[List[Variable]]"; expected "Sequence[Variable]"
|
||||
writer.write_header(self.result.vars) # type: ignore[arg-type]
|
||||
writer.write_results_header()
|
||||
for b in self.result.bindings:
|
||||
writer.write_start_result()
|
||||
for key, val in b.items():
|
||||
writer.write_binding(key, val)
|
||||
|
||||
writer.write_end_result()
|
||||
|
||||
writer.close()
|
||||
|
||||
|
||||
# TODO: Rewrite with ElementTree?
|
||||
class SPARQLXMLWriter:
|
||||
"""
|
||||
Python saxutils-based SPARQL XML Writer
|
||||
"""
|
||||
|
||||
def __init__(self, output: IO, encoding: str = "utf-8"):
|
||||
writer = XMLGenerator(output, encoding)
|
||||
writer.startDocument()
|
||||
writer.startPrefixMapping("", SPARQL_XML_NAMESPACE)
|
||||
writer.startPrefixMapping("xml", XML_NAMESPACE)
|
||||
writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "sparql"), "sparql", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer = writer
|
||||
self._output = output
|
||||
self._encoding = encoding
|
||||
self._results = False
|
||||
|
||||
def write_header(self, allvarsL: Sequence[Variable]) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "head"), "head", AttributesNSImpl({}, {})
|
||||
)
|
||||
for i in range(0, len(allvarsL)):
|
||||
attr_vals = {
|
||||
(None, "name"): str(allvarsL[i]),
|
||||
}
|
||||
attr_qnames = {
|
||||
(None, "name"): "name",
|
||||
}
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "variable"),
|
||||
"variable",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" [arg-type]
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type]
|
||||
)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "variable"), "variable")
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "head"), "head")
|
||||
|
||||
def write_ask(self, val: bool) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "boolean"), "boolean", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(str(val).lower())
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "boolean"), "boolean")
|
||||
|
||||
def write_results_header(self) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "results"), "results", AttributesNSImpl({}, {})
|
||||
)
|
||||
self._results = True
|
||||
|
||||
def write_start_result(self) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "result"), "result", AttributesNSImpl({}, {})
|
||||
)
|
||||
self._resultStarted = True
|
||||
|
||||
def write_end_result(self) -> None:
|
||||
assert self._resultStarted
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "result"), "result")
|
||||
self._resultStarted = False
|
||||
|
||||
def write_binding(self, name: Variable, val: Identifier) -> None:
|
||||
assert self._resultStarted
|
||||
|
||||
attr_vals: Dict[Tuple[Optional[str], str], str] = {
|
||||
(None, "name"): str(name),
|
||||
}
|
||||
attr_qnames: Dict[Tuple[Optional[str], str], str] = {
|
||||
(None, "name"): "name",
|
||||
}
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "binding"),
|
||||
"binding",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type, unused-ignore]
|
||||
)
|
||||
|
||||
if isinstance(val, URIRef):
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "uri"), "uri", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "uri"), "uri")
|
||||
elif isinstance(val, BNode):
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "bnode"), "bnode", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "bnode"), "bnode")
|
||||
elif isinstance(val, Literal):
|
||||
attr_vals = {}
|
||||
attr_qnames = {}
|
||||
if val.language:
|
||||
attr_vals[(XML_NAMESPACE, "lang")] = val.language
|
||||
attr_qnames[(XML_NAMESPACE, "lang")] = "xml:lang"
|
||||
elif val.datatype:
|
||||
attr_vals[(None, "datatype")] = val.datatype
|
||||
attr_qnames[(None, "datatype")] = "datatype"
|
||||
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "literal"),
|
||||
"literal",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type, unused-ignore]
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "literal"), "literal")
|
||||
|
||||
else:
|
||||
raise Exception("Unsupported RDF term: %s" % val)
|
||||
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "binding"), "binding")
|
||||
|
||||
def close(self) -> None:
|
||||
if self._results:
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "results"), "results")
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "sparql"), "sparql")
|
||||
self.writer.endDocument()
|
||||
@@ -0,0 +1,499 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import itertools
|
||||
import typing as t
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Container,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
import rdflib.plugins.sparql
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import NamespaceManager
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.paths import Path
|
||||
|
||||
|
||||
_AnyT = TypeVar("_AnyT")
|
||||
|
||||
|
||||
class SPARQLError(Exception):
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
Exception.__init__(self, msg)
|
||||
|
||||
|
||||
class NotBoundError(SPARQLError):
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
SPARQLError.__init__(self, msg)
|
||||
|
||||
|
||||
class AlreadyBound(SPARQLError): # noqa: N818
|
||||
"""Raised when trying to bind a variable that is already bound!"""
|
||||
|
||||
def __init__(self):
|
||||
SPARQLError.__init__(self)
|
||||
|
||||
|
||||
class SPARQLTypeError(SPARQLError):
|
||||
def __init__(self, msg: Optional[str]):
|
||||
SPARQLError.__init__(self, msg)
|
||||
|
||||
|
||||
class Bindings(MutableMapping):
|
||||
"""
|
||||
|
||||
A single level of a stack of variable-value bindings.
|
||||
Each dict keeps a reference to the dict below it,
|
||||
any failed lookup is propegated back
|
||||
|
||||
In python 3.3 this could be a collections.ChainMap
|
||||
"""
|
||||
|
||||
def __init__(self, outer: Optional[Bindings] = None, d=[]):
|
||||
self._d: Dict[str, str] = dict(d)
|
||||
self.outer = outer
|
||||
|
||||
def __getitem__(self, key: str) -> str:
|
||||
if key in self._d:
|
||||
return self._d[key]
|
||||
|
||||
if not self.outer:
|
||||
raise KeyError()
|
||||
return self.outer[key]
|
||||
|
||||
def __contains__(self, key: Any) -> bool:
|
||||
try:
|
||||
self[key]
|
||||
return True
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key: str, value: Any) -> None:
|
||||
self._d[key] = value
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
raise Exception("DelItem is not implemented!")
|
||||
|
||||
def __len__(self) -> int:
|
||||
i = 0
|
||||
d: Optional[Bindings] = self
|
||||
while d is not None:
|
||||
i += len(d._d)
|
||||
d = d.outer
|
||||
return i
|
||||
|
||||
def __iter__(self) -> Generator[str, None, None]:
|
||||
d: Optional[Bindings] = self
|
||||
while d is not None:
|
||||
yield from d._d
|
||||
d = d.outer
|
||||
|
||||
def __str__(self) -> str:
|
||||
# type error: Generator has incompatible item type "Tuple[Any, str]"; expected "str"
|
||||
return "Bindings({" + ", ".join((k, self[k]) for k in self) + "})" # type: ignore[misc]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return str(self)
|
||||
|
||||
|
||||
class FrozenDict(Mapping):
|
||||
"""
|
||||
An immutable hashable dict
|
||||
|
||||
Taken from http://stackoverflow.com/a/2704866/81121
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
self._d: Dict[Identifier, Identifier] = dict(*args, **kwargs)
|
||||
self._hash: Optional[int] = None
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._d)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._d)
|
||||
|
||||
def __getitem__(self, key: Identifier) -> Identifier:
|
||||
return self._d[key]
|
||||
|
||||
def __hash__(self) -> int:
|
||||
# It would have been simpler and maybe more obvious to
|
||||
# use hash(tuple(sorted(self._d.items()))) from this discussion
|
||||
# so far, but this solution is O(n). I don't know what kind of
|
||||
# n we are going to run into, but sometimes it's hard to resist the
|
||||
# urge to optimize when it will gain improved algorithmic performance.
|
||||
if self._hash is None:
|
||||
self._hash = 0
|
||||
for key, value in self.items():
|
||||
self._hash ^= hash(key)
|
||||
self._hash ^= hash(value)
|
||||
return self._hash
|
||||
|
||||
def project(self, vars: Container[Variable]) -> FrozenDict:
|
||||
return FrozenDict(x for x in self.items() if x[0] in vars)
|
||||
|
||||
def disjointDomain(self, other: t.Mapping[Identifier, Identifier]) -> bool:
|
||||
return not bool(set(self).intersection(other))
|
||||
|
||||
def compatible(self, other: t.Mapping[Identifier, Identifier]) -> bool:
|
||||
for k in self:
|
||||
try:
|
||||
if self[k] != other[k]:
|
||||
return False
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def merge(self, other: t.Mapping[Identifier, Identifier]) -> FrozenDict:
|
||||
res = FrozenDict(itertools.chain(self.items(), other.items()))
|
||||
|
||||
return res
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self._d)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return repr(self._d)
|
||||
|
||||
|
||||
class FrozenBindings(FrozenDict):
|
||||
def __init__(self, ctx: QueryContext, *args, **kwargs):
|
||||
FrozenDict.__init__(self, *args, **kwargs)
|
||||
self.ctx = ctx
|
||||
|
||||
def __getitem__(self, key: Union[Identifier, str]) -> Identifier:
|
||||
if not isinstance(key, Node):
|
||||
key = Variable(key)
|
||||
|
||||
if not isinstance(key, (BNode, Variable)):
|
||||
return key
|
||||
|
||||
if key not in self._d:
|
||||
# type error: Value of type "Optional[Dict[Variable, Identifier]]" is not indexable
|
||||
# type error: Invalid index type "Union[BNode, Variable]" for "Optional[Dict[Variable, Identifier]]"; expected type "Variable"
|
||||
return self.ctx.initBindings[key] # type: ignore[index]
|
||||
else:
|
||||
return self._d[key]
|
||||
|
||||
def project(self, vars: Container[Variable]) -> FrozenBindings:
|
||||
return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in vars))
|
||||
|
||||
def merge(self, other: t.Mapping[Identifier, Identifier]) -> FrozenBindings:
|
||||
res = FrozenBindings(self.ctx, itertools.chain(self.items(), other.items()))
|
||||
return res
|
||||
|
||||
@property
|
||||
def now(self) -> datetime.datetime:
|
||||
return self.ctx.now
|
||||
|
||||
@property
|
||||
def bnodes(self) -> t.Mapping[Identifier, BNode]:
|
||||
return self.ctx.bnodes
|
||||
|
||||
@property
|
||||
def prologue(self) -> Optional[Prologue]:
|
||||
return self.ctx.prologue
|
||||
|
||||
def forget(
|
||||
self, before: QueryContext, _except: Optional[Container[Variable]] = None
|
||||
) -> FrozenBindings:
|
||||
"""
|
||||
return a frozen dict only of bindings made in self
|
||||
since before
|
||||
"""
|
||||
if not _except:
|
||||
_except = []
|
||||
|
||||
# bindings from initBindings are newer forgotten
|
||||
return FrozenBindings(
|
||||
self.ctx,
|
||||
(
|
||||
x
|
||||
for x in self.items()
|
||||
if (
|
||||
x[0] in _except
|
||||
# type error: Unsupported right operand type for in ("Optional[Dict[Variable, Identifier]]")
|
||||
or x[0] in self.ctx.initBindings # type: ignore[operator]
|
||||
or before[x[0]] is None
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def remember(self, these) -> FrozenBindings:
|
||||
"""
|
||||
return a frozen dict only of bindings in these
|
||||
"""
|
||||
return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in these))
|
||||
|
||||
|
||||
class QueryContext:
|
||||
"""
|
||||
Query context - passed along when evaluating the query
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
graph: Optional[Graph] = None,
|
||||
bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
datasetClause=None,
|
||||
):
|
||||
self.initBindings = initBindings
|
||||
self.bindings = Bindings(d=bindings or [])
|
||||
if initBindings:
|
||||
self.bindings.update(initBindings)
|
||||
|
||||
self.graph: Optional[Graph]
|
||||
self._dataset: Optional[Union[Dataset, ConjunctiveGraph]]
|
||||
if isinstance(graph, (Dataset, ConjunctiveGraph)):
|
||||
if datasetClause:
|
||||
self._dataset = Dataset()
|
||||
self.graph = Graph()
|
||||
for d in datasetClause:
|
||||
if d.default:
|
||||
from_graph = graph.get_context(d.default)
|
||||
self.graph += from_graph
|
||||
if not from_graph:
|
||||
self.load(d.default, default=True)
|
||||
elif d.named:
|
||||
namedGraphs = Graph(
|
||||
store=self.dataset.store, identifier=d.named
|
||||
)
|
||||
from_named_graphs = graph.get_context(d.named)
|
||||
namedGraphs += from_named_graphs
|
||||
if not from_named_graphs:
|
||||
self.load(d.named, default=False)
|
||||
else:
|
||||
self._dataset = graph
|
||||
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
|
||||
self.graph = self.dataset
|
||||
else:
|
||||
self.graph = self.dataset.default_context
|
||||
else:
|
||||
self._dataset = None
|
||||
self.graph = graph
|
||||
|
||||
self.prologue: Optional[Prologue] = None
|
||||
self._now: Optional[datetime.datetime] = None
|
||||
|
||||
self.bnodes: t.MutableMapping[Identifier, BNode] = collections.defaultdict(
|
||||
BNode
|
||||
)
|
||||
|
||||
@property
|
||||
def now(self) -> datetime.datetime:
|
||||
if self._now is None:
|
||||
self._now = datetime.datetime.now(datetime.timezone.utc)
|
||||
return self._now
|
||||
|
||||
def clone(
|
||||
self, bindings: Optional[Union[FrozenBindings, Bindings, List[Any]]] = None
|
||||
) -> QueryContext:
|
||||
r = QueryContext(
|
||||
self._dataset if self._dataset is not None else self.graph,
|
||||
bindings or self.bindings,
|
||||
initBindings=self.initBindings,
|
||||
)
|
||||
r.prologue = self.prologue
|
||||
r.graph = self.graph
|
||||
r.bnodes = self.bnodes
|
||||
return r
|
||||
|
||||
@property
|
||||
def dataset(self) -> ConjunctiveGraph:
|
||||
""" "current dataset"""
|
||||
if self._dataset is None:
|
||||
raise Exception(
|
||||
"You performed a query operation requiring "
|
||||
+ "a dataset (i.e. ConjunctiveGraph), but "
|
||||
+ "operating currently on a single graph."
|
||||
)
|
||||
return self._dataset
|
||||
|
||||
def load(
|
||||
self,
|
||||
source: URIRef,
|
||||
default: bool = False,
|
||||
into: Optional[Identifier] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Load data from the source into the query context's.
|
||||
|
||||
:param source: The source to load from.
|
||||
:param default: If `True`, triples from the source will be added
|
||||
to the default graph, otherwise it will be loaded into a
|
||||
graph with ``source`` URI as its name.
|
||||
:param into: The name of the graph to load the data into. If
|
||||
`None`, the source URI will be used as as the name of the
|
||||
graph.
|
||||
:param kwargs: Keyword arguments to pass to
|
||||
:meth:`rdflib.graph.Graph.parse`.
|
||||
"""
|
||||
|
||||
def _load(graph, source):
|
||||
try:
|
||||
return graph.parse(source, format="turtle", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="xml", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="n3", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="nt", **kwargs)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"Could not load %s as either RDF/XML, N3 or NTriples" % source
|
||||
)
|
||||
|
||||
if not rdflib.plugins.sparql.SPARQL_LOAD_GRAPHS:
|
||||
# we are not loading - if we already know the graph
|
||||
# being "loaded", just add it to the default-graph
|
||||
if default:
|
||||
# Unsupported left operand type for + ("None")
|
||||
self.graph += self.dataset.get_context(source) # type: ignore[operator]
|
||||
else:
|
||||
if default:
|
||||
_load(self.graph, source)
|
||||
else:
|
||||
if into is None:
|
||||
into = source
|
||||
_load(self.dataset.get_context(into), source)
|
||||
|
||||
def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
|
||||
# in SPARQL BNodes are just labels
|
||||
if not isinstance(key, (BNode, Variable)):
|
||||
return key
|
||||
try:
|
||||
return self.bindings[key]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def get(self, key: str, default: Optional[Any] = None) -> Any:
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def solution(self, vars: Optional[Iterable[Variable]] = None) -> FrozenBindings:
|
||||
"""
|
||||
Return a static copy of the current variable bindings as dict
|
||||
"""
|
||||
if vars:
|
||||
return FrozenBindings(
|
||||
self, ((k, v) for k, v in self.bindings.items() if k in vars)
|
||||
)
|
||||
else:
|
||||
return FrozenBindings(self, self.bindings.items())
|
||||
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
if key in self.bindings and self.bindings[key] != value:
|
||||
raise AlreadyBound()
|
||||
|
||||
self.bindings[key] = value
|
||||
|
||||
def pushGraph(self, graph: Optional[Graph]) -> QueryContext:
|
||||
r = self.clone()
|
||||
r.graph = graph
|
||||
return r
|
||||
|
||||
def push(self) -> QueryContext:
|
||||
r = self.clone(Bindings(self.bindings))
|
||||
return r
|
||||
|
||||
def clean(self) -> QueryContext:
|
||||
return self.clone([])
|
||||
|
||||
def thaw(self, frozenbindings: FrozenBindings) -> QueryContext:
|
||||
"""
|
||||
Create a new read/write query context from the given solution
|
||||
"""
|
||||
c = self.clone(frozenbindings)
|
||||
|
||||
return c
|
||||
|
||||
|
||||
class Prologue:
|
||||
"""
|
||||
A class for holding prefixing bindings and base URI information
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.base: Optional[str] = None
|
||||
self.namespace_manager = NamespaceManager(Graph()) # ns man needs a store
|
||||
|
||||
def resolvePName(self, prefix: Optional[str], localname: Optional[str]) -> URIRef:
|
||||
ns = self.namespace_manager.store.namespace(prefix or "")
|
||||
if ns is None:
|
||||
raise Exception("Unknown namespace prefix : %s" % prefix)
|
||||
return URIRef(ns + (localname or ""))
|
||||
|
||||
def bind(self, prefix: Optional[str], uri: Any) -> None:
|
||||
self.namespace_manager.bind(prefix, uri, replace=True)
|
||||
|
||||
def absolutize(
|
||||
self, iri: Optional[Union[CompValue, str]]
|
||||
) -> Optional[Union[CompValue, str]]:
|
||||
"""
|
||||
Apply BASE / PREFIXes to URIs
|
||||
(and to datatypes in Literals)
|
||||
|
||||
TODO: Move resolving URIs to pre-processing
|
||||
"""
|
||||
|
||||
if isinstance(iri, CompValue):
|
||||
if iri.name == "pname":
|
||||
return self.resolvePName(iri.prefix, iri.localname)
|
||||
if iri.name == "literal":
|
||||
# type error: Argument "datatype" to "Literal" has incompatible type "Union[CompValue, Identifier, None]"; expected "Optional[str]"
|
||||
return Literal(
|
||||
iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype) # type: ignore[arg-type]
|
||||
)
|
||||
elif isinstance(iri, URIRef) and not ":" in iri: # noqa: E713
|
||||
return URIRef(iri, base=self.base)
|
||||
|
||||
return iri
|
||||
|
||||
|
||||
class Query:
|
||||
"""
|
||||
A parsed and translated query
|
||||
"""
|
||||
|
||||
def __init__(self, prologue: Prologue, algebra: CompValue):
|
||||
self.prologue = prologue
|
||||
self.algebra = algebra
|
||||
self._original_args: Tuple[str, Mapping[str, str], Optional[str]]
|
||||
|
||||
|
||||
class Update:
|
||||
"""
|
||||
A parsed and translated update
|
||||
"""
|
||||
|
||||
def __init__(self, prologue: Prologue, algebra: List[CompValue]):
|
||||
self.prologue = prologue
|
||||
self.algebra = algebra
|
||||
self._original_args: Tuple[str, Mapping[str, str], Optional[str]]
|
||||
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
|
||||
Code for carrying out Update Operations
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator, Mapping, Optional, Sequence
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql.evaluate import evalBGP, evalPart
|
||||
from rdflib.plugins.sparql.evalutils import _fillTemplate, _join
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.plugins.sparql.sparql import FrozenDict, QueryContext, Update
|
||||
from rdflib.term import Identifier, URIRef, Variable
|
||||
|
||||
|
||||
def _graphOrDefault(ctx: QueryContext, g: str) -> Optional[Graph]:
|
||||
if g == "DEFAULT":
|
||||
return ctx.graph
|
||||
else:
|
||||
return ctx.dataset.get_context(g)
|
||||
|
||||
|
||||
def _graphAll(ctx: QueryContext, g: str) -> Sequence[Graph]:
|
||||
"""
|
||||
return a list of graphs
|
||||
"""
|
||||
if g == "DEFAULT":
|
||||
# type error: List item 0 has incompatible type "Optional[Graph]"; expected "Graph"
|
||||
return [ctx.graph] # type: ignore[list-item]
|
||||
elif g == "NAMED":
|
||||
return [
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
c
|
||||
for c in ctx.dataset.contexts()
|
||||
if c.identifier != ctx.graph.identifier # type: ignore[union-attr]
|
||||
]
|
||||
elif g == "ALL":
|
||||
return list(ctx.dataset.contexts())
|
||||
else:
|
||||
return [ctx.dataset.get_context(g)]
|
||||
|
||||
|
||||
def evalLoad(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#load
|
||||
"""
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(u.iri, URIRef)
|
||||
|
||||
if u.graphiri:
|
||||
ctx.load(u.iri, default=False, into=u.graphiri)
|
||||
else:
|
||||
ctx.load(u.iri, default=True)
|
||||
|
||||
|
||||
def evalCreate(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#create
|
||||
"""
|
||||
g = ctx.dataset.get_context(u.graphiri)
|
||||
if len(g) > 0:
|
||||
raise Exception("Graph %s already exists." % g.identifier)
|
||||
raise Exception("Create not implemented!")
|
||||
|
||||
|
||||
def evalClear(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#clear
|
||||
"""
|
||||
for g in _graphAll(ctx, u.graphiri):
|
||||
g.remove((None, None, None))
|
||||
|
||||
|
||||
def evalDrop(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#drop
|
||||
"""
|
||||
if ctx.dataset.store.graph_aware:
|
||||
for g in _graphAll(ctx, u.graphiri):
|
||||
ctx.dataset.store.remove_graph(g)
|
||||
else:
|
||||
evalClear(ctx, u)
|
||||
|
||||
|
||||
def evalInsertData(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#insertData
|
||||
"""
|
||||
# add triples
|
||||
g = ctx.graph
|
||||
g += u.triples
|
||||
# add quads
|
||||
# u.quads is a dict of graphURI=>[triples]
|
||||
for g in u.quads:
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]"
|
||||
cg = ctx.dataset.get_context(g) # type: ignore[arg-type]
|
||||
cg += u.quads[g]
|
||||
|
||||
|
||||
def evalDeleteData(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#deleteData
|
||||
"""
|
||||
# remove triples
|
||||
g = ctx.graph
|
||||
g -= u.triples
|
||||
|
||||
# remove quads
|
||||
# u.quads is a dict of graphURI=>[triples]
|
||||
for g in u.quads:
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]"
|
||||
cg = ctx.dataset.get_context(g) # type: ignore[arg-type]
|
||||
cg -= u.quads[g]
|
||||
|
||||
|
||||
def evalDeleteWhere(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#deleteWhere
|
||||
"""
|
||||
|
||||
res: Iterator[FrozenDict] = evalBGP(ctx, u.triples)
|
||||
for g in u.quads:
|
||||
cg = ctx.dataset.get_context(g)
|
||||
c = ctx.pushGraph(cg)
|
||||
res = _join(res, list(evalBGP(c, u.quads[g])))
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "FrozenBindings", variable has type "QueryContext")
|
||||
for c in res: # type: ignore[assignment]
|
||||
g = ctx.graph
|
||||
g -= _fillTemplate(u.triples, c)
|
||||
|
||||
for g in u.quads:
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg -= _fillTemplate(u.quads[g], c)
|
||||
|
||||
|
||||
def evalModify(ctx: QueryContext, u: CompValue) -> None:
|
||||
originalctx = ctx
|
||||
|
||||
# Using replaces the dataset for evaluating the where-clause
|
||||
dg: Optional[Graph]
|
||||
if u.using:
|
||||
otherDefault = False
|
||||
for d in u.using:
|
||||
if d.default:
|
||||
if not otherDefault:
|
||||
# replace current default graph
|
||||
dg = Graph()
|
||||
ctx = ctx.pushGraph(dg)
|
||||
otherDefault = True
|
||||
|
||||
ctx.load(d.default, default=True)
|
||||
|
||||
elif d.named:
|
||||
g = d.named
|
||||
ctx.load(g, default=False)
|
||||
|
||||
# "The WITH clause provides a convenience for when an operation
|
||||
# primarily refers to a single graph. If a graph name is specified
|
||||
# in a WITH clause, then - for the purposes of evaluating the
|
||||
# WHERE clause - this will define an RDF Dataset containing a
|
||||
# default graph with the specified name, but only in the absence
|
||||
# of USING or USING NAMED clauses. In the presence of one or more
|
||||
# graphs referred to in USING clauses and/or USING NAMED clauses,
|
||||
# the WITH clause will be ignored while evaluating the WHERE
|
||||
# clause."
|
||||
if not u.using and u.withClause:
|
||||
g = ctx.dataset.get_context(u.withClause)
|
||||
ctx = ctx.pushGraph(g)
|
||||
|
||||
res = evalPart(ctx, u.where)
|
||||
|
||||
if u.using:
|
||||
if otherDefault:
|
||||
ctx = originalctx # restore original default graph
|
||||
if u.withClause:
|
||||
g = ctx.dataset.get_context(u.withClause)
|
||||
ctx = ctx.pushGraph(g)
|
||||
|
||||
for c in res:
|
||||
dg = ctx.graph
|
||||
if u.delete:
|
||||
# type error: Unsupported left operand type for - ("None")
|
||||
# type error: Unsupported operand types for - ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]")
|
||||
dg -= _fillTemplate(u.delete.triples, c) # type: ignore[operator]
|
||||
|
||||
for g, q in u.delete.quads.items():
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg -= _fillTemplate(q, c)
|
||||
|
||||
if u.insert:
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
# type error: Unsupported operand types for + ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]")
|
||||
dg += _fillTemplate(u.insert.triples, c) # type: ignore[operator]
|
||||
|
||||
for g, q in u.insert.quads.items():
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg += _fillTemplate(q, c)
|
||||
|
||||
|
||||
def evalAdd(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
add all triples from src to dst
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#add
|
||||
"""
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
|
||||
def evalMove(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
remove all triples from dst
|
||||
add all triples from src to dst
|
||||
remove all triples from src
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#move
|
||||
"""
|
||||
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
dstg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
if ctx.dataset.store.graph_aware:
|
||||
# type error: Argument 1 to "remove_graph" of "Store" has incompatible type "Optional[Graph]"; expected "Graph"
|
||||
ctx.dataset.store.remove_graph(srcg) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
srcg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
|
||||
def evalCopy(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
remove all triples from dst
|
||||
add all triples from src to dst
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#copy
|
||||
"""
|
||||
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
dstg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
|
||||
def evalUpdate(
|
||||
graph: Graph,
|
||||
update: Update,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#updateLanguage
|
||||
|
||||
'A request is a sequence of operations [...] Implementations MUST
|
||||
ensure that operations of a single request are executed in a
|
||||
fashion that guarantees the same effects as executing them in
|
||||
lexical order.
|
||||
|
||||
Operations all result either in success or failure.
|
||||
|
||||
If multiple operations are present in a single request, then a
|
||||
result of failure from any operation MUST abort the sequence of
|
||||
operations, causing the subsequent operations to be ignored.'
|
||||
|
||||
This will return None on success and raise Exceptions on error
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
|
||||
"""
|
||||
|
||||
for u in update.algebra:
|
||||
initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())
|
||||
|
||||
ctx = QueryContext(graph, initBindings=initBindings)
|
||||
ctx.prologue = u.prologue
|
||||
|
||||
try:
|
||||
if u.name == "Load":
|
||||
evalLoad(ctx, u)
|
||||
elif u.name == "Clear":
|
||||
evalClear(ctx, u)
|
||||
elif u.name == "Drop":
|
||||
evalDrop(ctx, u)
|
||||
elif u.name == "Create":
|
||||
evalCreate(ctx, u)
|
||||
elif u.name == "Add":
|
||||
evalAdd(ctx, u)
|
||||
elif u.name == "Move":
|
||||
evalMove(ctx, u)
|
||||
elif u.name == "Copy":
|
||||
evalCopy(ctx, u)
|
||||
elif u.name == "InsertData":
|
||||
evalInsertData(ctx, u)
|
||||
elif u.name == "DeleteData":
|
||||
evalDeleteData(ctx, u)
|
||||
elif u.name == "DeleteWhere":
|
||||
evalDeleteWhere(ctx, u)
|
||||
elif u.name == "Modify":
|
||||
evalModify(ctx, u)
|
||||
else:
|
||||
raise Exception("Unknown update operation: %s" % (u,))
|
||||
except: # noqa: E722
|
||||
if not u.silent:
|
||||
raise
|
||||
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
This package contains modules for additional RDFLib stores
|
||||
"""
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user